In [1]:
import numpy as np
import time
import matplotlib.pyplot as plt
import cvxpy as cp

## Generate the random Dataset


In [None]:
# We first generate a random dataset with number of features (m = 10) and number of instances (n = 100)
# We also generate a random label vector y \in {-1,1}

n = 100 # Number of instances
m = 10  # Number of Features 

X = np.random.rand(n,m) 
y = np.random.rand(n) # n-dimensional vector
ybin = [(int(yi >= 0.5) - int(yi < 0.5)) for yi in y]
y = np.array(ybin)
w = np.random.rand(m, 1) # m-dimensional vector
print(y)
print(X)

## An Implementation of the Logistic Loss 


In [3]:
def LogisticLossNaive(w, X, y, lam):
    # Computes the cost function for all the training samples
    # where f is the function value and g is the gradient
    f = 0.0
    m = X.shape[1]
    g = np.zeros(m)
    for i in range(X.shape[0]):
      ycap = 0.0
      for j in range(X.shape[1]):
        ycap += (X[i][j]*w[j][0])
      f += np.log(1+np.exp(-(y[i]*ycap)))
      gtemp = ((-y[i])/(1+np.exp(y[i]*ycap)))*X[i]
      g = np.add(g, gtemp)
    return [f, g]     

In [4]:
start = time.time()
[f,g] = LogisticLossNaive(w,X,y,1)
end = time.time()
print("Time Taken = " + str(end - start))
print("Function value = " + str(f))
print("Printing Gradient:")
print(g)

Time Taken = 0.02316737174987793
Function value = 118.56552583078296
Printing Gradient:
[11.65803187 19.06595187 19.03966253 22.12460452 16.8768076  18.96573872
 17.80817511 17.57146729 17.92158761 17.45561131]


## An Implementation of the Least Squares 


In [5]:
def LeastSquaresNaive(w, X, y, lam):
    # Computes the cost function for all the training samples
    # where f is the function value and g is the gradient
    f = 0.0
    m = X.shape[1]
    g = np.zeros(m)
    for i in range(X.shape[0]):
      ycap = 0.0
      for j in range(X.shape[1]):
        ycap += (X[i][j]*w[j][0])
      f += ((y[i]-ycap)**2)
      gtemp = -2*(y[i]-ycap)*X[i]
      g = np.add(g, gtemp)
    return [f, g]     

In [6]:
start = time.time()
[f,g] = LeastSquaresNaive(w,X,y,1)
end = time.time()
print("Time Taken = " + str(end - start))
print("Function value = " + str(f))
print("Printing Gradient:")
print(g)

Time Taken = 0.004540443420410156
Function value = 754.7921362033273
Printing Gradient:
[220.60374254 273.002289   278.40304601 292.93624624 265.86519272
 273.3260766  261.56691221 270.20037876 258.52008275 277.6539484 ]


## An Implementation of the Hinge Loss 

In [7]:
def HingeLossNaive(w, X, y, lam):
    # Computes the cost function for all the training samples
    # where f is the function value and g is the gradient
    f = 0.0
    m = X.shape[1]
    g = np.zeros(m)
    for i in range(X.shape[0]):
      ycap = 0.0
      for j in range(X.shape[1]):
        ycap += (X[i][j]*w[j][0])
      f += max(0,(1-(y[i]*ycap)))
      if y[i]*ycap >= 1:
        gtemp = np.zeros(m)
      else:
        gtemp = -y[i]*X[i]
      g = np.add(g, gtemp)
    return [f, g]

In [8]:
start = time.time()
[f,g] = HingeLossNaive(w,X,y,1)
end = time.time()
print("Time Taken = " + str(end - start))
print("Function value = " + str(f))
print("Printing Gradient:")
print(g)

Time Taken = 0.003775358200073242
Function value = 152.1091667168557
Printing Gradient:
[15.17202671 22.3563076  22.35262402 25.91180185 20.14939943 22.36155626
 21.3017856  20.98242982 21.10311422 20.76445159]


## Scalability of the code

In [9]:
n = 100
m = 1000

X = np.random.rand(n,m)
y = np.random.rand(n)
ybin = [(int(yi >= 0.5) - int(yi < 0.5)) for yi in y]
y = np.array(ybin)
w = np.random.rand(m, 1)

start = time.time()
[f,g] = LogisticLossNaive(w,X,y,1)
end = time.time()
print("Logistic Loss")
print("Time Taken = " + str(end - start))
print("Function value = " + str(f))
print("Printing Gradient:")
print(g)

start = time.time()
[f,g] = LeastSquaresNaive(w,X,y,1)
end = time.time()
print("Least Square")
print("Time Taken = " + str(end - start))
print("Function value = " + str(f))
print("Printing Gradient:")
print(g)

start = time.time()
[f,g] = HingeLossNaive(w,X,y,1)
end = time.time()
print("Hinge Loss")
print("Time Taken = " + str(end - start))
print("Function value = " + str(f))
print("Printing Gradient:")
print(g)

Logistic Loss
Time Taken = 0.07466983795166016
Function value = 11871.01258365134
Printing Gradient:
[23.08786115 22.57082797 22.63030865 20.91170222 25.84210549 26.78276446
 21.92937224 25.14726584 22.69142455 20.90890921 27.27608073 23.83988559
 26.74370936 27.09762346 24.77517442 22.39837592 24.28696975 25.57480989
 23.0460725  25.31654429 25.55859501 24.79753048 28.71057199 25.21734896
 20.51569552 21.12466192 24.34399929 25.59369029 25.38180578 24.11584593
 23.83917179 22.83568797 23.34116812 25.63388968 23.75963535 22.993462
 23.53694787 26.27578427 21.07885    23.24677895 22.27688027 22.27172945
 22.50492098 21.7278018  23.1457755  27.17867295 23.94825093 24.76653226
 25.13841223 24.95523657 21.96594418 24.78435036 22.52690774 21.34136972
 22.06283613 25.04230312 21.070516   23.38804919 22.09704734 26.25070933
 22.27733934 20.93349924 23.03083118 24.40387149 26.67332244 22.92856468
 23.45607065 22.91132532 24.22193041 23.40864908 25.65656798 22.46792943
 22.59430055 26.41202383 

## Implement a vectorized version 

In [10]:
def LogisticLossVec(w, X, y, lam):
    # Computes the cost function for all the training samples
    # where f is the function value and g is the gradient
    n = X.shape[0]
    f = np.sum(np.log(1+np.exp(-np.multiply(y,((X@w).reshape(n))))))
    g = X.T@((-y)/(1+np.exp(np.multiply(y,((X@w).reshape(n))))))
    return [f, g]     

In [11]:
def LeastSquaresVec(w, X, y, lam):
    # Computes the cost function for all the training samples
    # where f is the function value and g is the gradient
    n = X.shape[0]
    f = np.sum(np.square(y-((X@w).reshape(n))))
    g = X.T@(-2*(y-((X@w).reshape(n))))
    return [f, g]     

In [12]:
def HingeLossVec(w, X, y, lam):
    # Computes the cost function for all the training samples
    # where f is the function value and g is the gradient
    n = X.shape[0]
    f = np.sum(np.maximum(0,(1-np.multiply(y,((X@w).reshape(n))))))
    g = -X.T@(np.where(np.multiply(y,((X@w).reshape(n))) < 1, y, 0))
    return [f, g]

In [13]:
n = 100
m = 1000

X = np.random.rand(n,m)
y = np.random.rand(n)
ybin = [(int(yi >= 0.5) - int(yi < 0.5)) for yi in y]
y = np.array(ybin)
w = np.random.rand(m, 1)

start = time.time()
[f,g] = LogisticLossVec(w,X,y,1)
end = time.time()
print("Logistic Loss")
print("Time Taken = " + str(end - start))
print("Function value = " + str(f))
print("Printing Gradient:")
print(g)

start = time.time()
[f,g] = LeastSquaresVec(w,X,y,1)
end = time.time()
print("Least Square")
print("Time Taken = " + str(end - start))
print("Function value = " + str(f))
print("Printing Gradient:")
print(g)

start = time.time()
[f,g] = HingeLossVec(w,X,y,1)
end = time.time()
print("Hinge Loss")
print("Time Taken = " + str(end - start))
print("Function value = " + str(f))
print("Printing Gradient:")
print(g)

Logistic Loss
Time Taken = 0.006608724594116211
Function value = 11981.710661682804
Printing Gradient:
[21.63265165 23.58940766 24.55236863 23.38503111 21.69764221 22.55988964
 21.53534042 21.98838552 22.46800868 25.4961085  19.55068377 26.3877893
 24.58456669 29.8802185  22.96114534 25.83811552 26.08841968 25.20630475
 24.00312082 24.70497329 25.38122047 24.10163343 26.14203029 21.1774336
 20.70612476 27.10187237 22.0435343  21.29192114 23.9487409  24.84645105
 20.6996062  27.72947246 26.33895781 24.04438256 20.54298117 28.44961709
 24.98571318 23.7813091  27.95773954 22.38762335 29.93301082 24.00938758
 24.36599326 23.50004003 23.91275863 21.95311352 25.92946087 25.3962996
 26.409667   20.83855527 25.35095372 24.75687201 24.91790022 26.40399108
 21.65336285 23.12460101 27.15475054 22.59148302 27.52049406 25.6807719
 23.33943942 25.27620279 26.97541892 23.91702607 27.99636839 22.95083609
 30.33058985 26.23043488 25.81228773 22.66232946 26.85582773 26.44074268
 24.04501416 23.11119565 

## Lets us code the above Loss Fuctions in CVXPY!

CVXPY is an open source Python-embedded modeling language for convex optimization problems. Link: https://www.cvxpy.org/

In [14]:
def LogisticLossCVXPY(w, X, y, lam):
    # Computes the cost function for all the training samples
    # where f is the function value and g is the gradient
    n = X.shape[0]
    f = cp.sum(cp.logistic(-cp.multiply(y,((X@w).reshape(n)))))
    g = X.T@((-y)/(cp.logistic(-cp.multiply(y,((X@w).reshape(n))))))
    return [f, g]

In [15]:
def LeastSquaresCVXPY(w, X, y, lam):
    # Computes the cost function for all the training samples
    # where f is the function value and g is the gradient
    n = X.shape[0]
    m = X.shape[1]
    f = cp.sum_squares(y-((X@w).reshape(n)))
    g = X.T@(-2*(y-((X@w).reshape(n))))
    return [f, g]

In [16]:
def HingeLossCVXPY(w, X, y, lam):
    # Computes the cost function for all the training samples
    # where f is the function value and g is the gradient
    n = X.shape[0]
    f = cp.sum(cp.maximum(0,(1-cp.multiply(y,((X@w).reshape(n))))))
    g = -X.T@(np.where(cp.multiply(y,((X@w).reshape(n))) <= 1, y, 0))
    return [f, g]

In [None]:
import numpy as np
n = 100
m = 10

X = np.random.rand(n,m)
y = np.random.rand(n)
ybin = [(int(yi >= 0.5) - int(yi < 0.5)) for yi in y]
y = np.array(ybin)
w = np.random.rand(m, 1)

start = time.time()
[f1,g1] = LogisticLossCVXPY(w,X,y,1)
end = time.time()
print("Time Taken = " + str(end - start))
print("Function value Naive = " + str(f1))
print("Printing Gradient Naive:")
print(g1)

start = time.time()
[f2,g2] = LeastSquaresCVXPY(w,X,y,1)
end = time.time()
print("Time Taken = " + str(end - start))
print("Function value For = " + str(f2))
print("Printing Gradient For:")
print(g2)

start = time.time()
[f2,g2] = HingeLossCVXPY(w,X,y,1)
end = time.time()
print("Time Taken = " + str(end - start))
print("Function value For = " + str(f2))
print("Printing Gradient For:")
print(g2)

## Compare the losses with Graph



In [None]:
def LogisticLossFun(w, X, y, lam):
    return error_ll

def LeastSquaresFun(w, X, y, lam):
    return error_ls

def HingeLossFun(w, X, y, lam):
    return error_hl

def plot_errors(error_ll, error_ls, error_hl, num):
    plt.plot(num, error_ll, label="Logistic Loss")
    plt.plot(num, error_ls, label="Least Squares")
    plt.plot(num, error_hl, label="Hinge Loss")
    plt.show()
    return

In [None]:
n = 100
m = 10000

X = np.random.rand(n,m)
y = np.random.rand(n)
ybin = [(int(yi >= 0.5) - int(yi < 0.5)) for yi in y]
y = np.array(ybin)
w = np.random.rand(m, 1)

error_ll = LogisticLossFun(w,X,y,1)
error_ls = LeastSquaresFun(w,X,y,1)
error_hl = HingeLossFun(w,X,y,1)
plot_errors(error_ll, error_ls, error_hl, 100)