In [2]:
import numpy as np
from tqdm import tqdm
from random import randint
import matplotlib.pyplot as plt
from sklearn.metrics import log_loss
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression

% matplotlib inline

UsageError: Line magic function `%` not found.


In [3]:
data = np.loadtxt('wine.data', delimiter=',')

X, y = data[:, 1:], data[:,0]

# tranform problem into binary classification task
idxs = [i for i in range(len(y)) if y[i] == 1 or y[i] == 2]
        
X, y = X[idxs], y[idxs]

# normalize data
X = (X - X.mean(axis=0))/(X.max(axis=0) - X.min(axis=0))
X = np.hstack((X,np.ones(len(X)).reshape(len(X),1)))

# transform target variable
y = np.array(list(map(lambda x: 0 if x == 1 else 1, y))) 

print(X)
print(y)

[[ 0.33840081 -0.05100334  0.04788153 ...  0.40067028  0.19608252
   1.        ]
 [ 0.06734818 -0.03716935 -0.10719868 ...  0.18490265  0.18538352
   1.        ]
 [ 0.05682186  0.07745515  0.17622378 ...  0.08946696  0.28167453
   1.        ]
 ...
 [-0.30370445  0.03200061  0.23504731 ... -0.2134376  -0.23116427
   1.        ]
 [-0.15107287 -0.06681362 -0.02163719 ... -0.07235876 -0.31960935
   1.        ]
 [-0.23791498  0.46085436  0.02114356 ... -0.15949569 -0.14985186
   1.        ]]
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]


In [10]:
# scikit learn implementation (our benchmark)
reg = LogisticRegression(solver='sag', C=100000, max_iter = 10000).fit(X,y)
print(reg.coef_)
L_star = log_loss(y,reg.predict_proba(X))

print("Loss L* = {:<16f}".format(log_loss(y,reg.predict_proba(X))))

[[-23.68460805 -10.12066599 -21.12039252  22.84649101  -1.88806639
    3.1722054   -7.75475242   0.25009727   1.29090767 -11.88895965
    5.2113917  -10.49802832 -31.92757813  -0.11872858]]
Loss L* = 0.000387        


In [4]:
# helper functions
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def cross_entropy_loss(y,y_pred):
    loss = -(1/len(y))*np.sum(y*np.log(y_pred) + (1 - y)*np.log(1 - y_pred))
    return loss

def cross_entropy_grad(y,y_pred,x):
    return list(np.dot((y_pred - y),X)[0])

In [7]:
# initial weights
w = np.zeros(14).reshape(14,1)
w_rand = w

print(w.shape, X.shape, y.shape)

eta = 0.01
loss = []
loss_rand = []
num_iter = 10 # just take a bunch of iterations
for t in tqdm(range(num_iter)):
    
    # predict step 
    y_pred = sigmoid(np.dot(w.T,X.T))
    grad = cross_entropy_grad(y,y_pred,X)
    
    largest = np.argmax(np.abs(grad)) # idx with largest magnitude
    loss.append(cross_entropy_loss(y,y_pred))  
    
    # update that coordinate with largest gradient in magnitude
    w[largest] = w[largest] - eta*grad[largest]

y_pred = sigmoid(np.dot(w.T,X.T))
y_pred = np.array(list(map(lambda x: 1 if x >= 0.5 else 0, y_pred.flatten()))) # post processing

print("Accuracy for best coordinate descent {0}".format(accuracy_score(y, y_pred)))
print("Loss L = {0}".format(loss[-1]))

(14, 1) (130, 14) (130,)


100%|██████████| 10/10 [00:00<00:00, 6720.56it/s]

Accuracy for best coordinate descent 0.9307692307692308
Loss L = 0.5832867593722367



