In [None]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split as tts
from sklearn.metrics import accuracy_score, recall_score, precision_score
from sklearn.utils import shuffle

In [151]:
regularization_strength = 100
learning_rate = 0.000001

In [152]:
def compute_cost(W, X, Y):
    # calculate hinge loss
    N = X.shape[0]
    distances = 1 - Y * (np.dot(X, W))
    distances[distances < 0] = 0  # equivalent to max(0, distance)
    hinge_loss = regularization_strength * (np.sum(distances))

    # calculate cost
    cost = 1 / 2 * np.dot(W, W) + hinge_loss
    return cost


In [153]:
def calculate_cost_gradient(W, X_batch, Y_batch):
    # if only one example is passed (eg. in case of SGD)
    if type(Y_batch) == np.float64:
        Y_batch = np.array([Y_batch])
        X_batch = np.array([X_batch])  # gives multidimensional array

    distance = 1 - (Y_batch * np.dot(X_batch, W))
    dw = np.zeros(len(W))

    for ind, d in enumerate(distance):
        if max(0, d) == 0:
            di = W
        else:
            di = W - (regularization_strength * Y_batch[ind] * X_batch[ind])
        dw += di

    dw = dw/len(Y_batch)  # average
    return dw


In [154]:
def sgd(features, outputs, max_ep):
    max_epochs = max_ep
    weights = np.zeros(features.shape[1])
    nth = 0
    prev_cost = float("inf")
    cost_threshold = 0.001  # in percent
    # stochastic gradient descent
    for epoch in range(1, max_epochs):
        # shuffle to prevent repeating update cycles
        X, Y = shuffle(features, outputs)
        for ind, x in enumerate(X):
            ascent = calculate_cost_gradient(weights, x, Y[ind])
            weights = weights - (learning_rate * ascent)

        # convergence check on 2^nth epoch
        if epoch == 2 ** nth or epoch == max_epochs - 1:
            cost = compute_cost(weights, features, outputs)
            print("Epoch is: {} and Cost is: {}".format(epoch, cost))
            # stoppage criterion
            if abs(prev_cost - cost) < cost_threshold * prev_cost:
                return weights
            prev_cost = cost
            nth += 1
    return weights



In [155]:

    print("reading dataset...")
    # read data in pandas (pd) data frame
    data = pd.read_csv('diagnosis.csv')
    data = data[0:20]
    # drop last column (extra column added by pd)
    # and unnecessary first column (id)
    data.drop(data.columns[[-1, 0]], axis=1, inplace=True)

    print("applying feature engineering...")
    # convert categorical labels to numbers
    diag_map = {'M': 1.0, 'B': -1.0}
    data['diagnosis'] = data['diagnosis'].map(diag_map)

    # put features & outputs in different data frames
    Y = data.loc[:, 'diagnosis']
    X = data.iloc[:, 1:]

    # filter features
    #remove_correlated_features(X)
    #remove_less_significant_features(X, Y)

    # normalize data for better convergence and to prevent overflow
    X_normalized = StandardScaler().fit_transform(X.values)
    X = pd.DataFrame(X_normalized)

    # insert 1 in every row for intercept b
    X.insert(loc=len(X.columns), column='intercept', value=1)

    # split data into train and test set
    #tts(X, Y, test_size=0.2, random_state=42)

    # train the model
    
    

reading dataset...
applying feature engineering...


In [160]:
print("training started...")
W = sgd(X.to_numpy(), Y.to_numpy(),10000)
print("training finished.")
print("weights are: {}".format(W))

training started...
Epoch is: 1 and Cost is: 1995.6072213251364
Epoch is: 2 and Cost is: 1991.2145239887507
Epoch is: 4 and Cost is: 1982.4294021188355
Epoch is: 8 and Cost is: 1964.8602762876633
Epoch is: 16 and Cost is: 1929.7264813307427
Epoch is: 32 and Cost is: 1859.4765667647155
Epoch is: 64 and Cost is: 1719.0475983154815
Epoch is: 128 and Cost is: 1438.472473705783
Epoch is: 256 and Cost is: 1013.8879328373757
Epoch is: 512 and Cost is: 475.5122298786198
Epoch is: 1024 and Cost is: 176.20124757547916
Epoch is: 2048 and Cost is: 145.5042227787332
Epoch is: 4096 and Cost is: 90.84989454932878
Epoch is: 8192 and Cost is: 1.8276173613147784
Epoch is: 9999 and Cost is: 1.8199497736331431
training finished.
weights are: [ 0.09730647  0.46453706  0.08958624  0.14322724  0.28121743  0.02533176
  0.17830529  0.09708504 -0.27149414  0.21589785  0.05561843 -0.08874279
 -0.20886031 -0.06547136  0.0793413   0.02376994  0.17541641 -0.14457728
  0.02866021  0.03937472  0.17712402  0.40526407 

In [110]:
type(X)

pandas.core.frame.DataFrame

In [157]:
from sklearn.linear_model import SGDClassifier
clf = SGDClassifier(loss="hinge", penalty="l1", max_iter=4000000,alpha=100,verbose=True)
clf.fit(X.to_numpy(), Y.to_numpy())
clf.coef_

##not sure why this doesn't work to verify??? will solve ourselves and compare 
#to the SGD implementation above, sklearn doesn't seem to beat it anyways

-- Epoch 1
Norm: 1.81, NNZs: 0, Bias: 0.348723, T: 20, Avg. loss: 0.711331
Total training time: 0.00 seconds.
-- Epoch 2
Norm: 1.81, NNZs: 0, Bias: 0.354974, T: 40, Avg. loss: 0.683178
Total training time: 0.00 seconds.
-- Epoch 3
Norm: 1.81, NNZs: 0, Bias: 0.358683, T: 60, Avg. loss: 0.678840
Total training time: 0.00 seconds.
-- Epoch 4
Norm: 1.81, NNZs: 0, Bias: 0.361306, T: 80, Avg. loss: 0.676019
Total training time: 0.00 seconds.
-- Epoch 5
Norm: 1.81, NNZs: 0, Bias: 0.363336, T: 100, Avg. loss: 0.673933
Total training time: 0.00 seconds.
-- Epoch 6
Norm: 1.81, NNZs: 0, Bias: 0.364969, T: 120, Avg. loss: 0.672288
Total training time: 0.00 seconds.
-- Epoch 7
Norm: 1.81, NNZs: 0, Bias: 0.366363, T: 140, Avg. loss: 0.670922
Total training time: 0.00 seconds.
-- Epoch 8
Norm: 1.81, NNZs: 0, Bias: 0.367576, T: 160, Avg. loss: 0.669750
Total training time: 0.01 seconds.
-- Epoch 9
Norm: 1.81, NNZs: 0, Bias: 0.368638, T: 180, Avg. loss: 0.668723
Total training time: 0.01 seconds.
-- Ep

array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])

In [None]:
print(compute_cost(clf.coef_[0],X,Y))

In [None]:
print(X.shape)

In [158]:
import math

def dual_coord_l1(X, y, max_steps, C=regularization_strength):
    u=C
    n_features = X.shape[1]
    print(n_features)
    n_samples = X.shape[0]
    print(n_samples)
    w = np.zeros(n_features)
    alpha = np.zeros(n_samples)
    for o in range(max_steps) : #outer iteration
        for i in range(n_samples): #inner iteration
            assert(alpha[i] >= 0)
            assert(alpha[i] <= C)
            Dii = 0
            #step a
            G = y[i]*w.transpose().dot(X[i])-1+(alpha[i] * Dii) #G = yiwT xi − 1 + Diiαi
            #step b of algorithm 1
            PG = 0
            if (alpha[i]==0):
                PG = min(G,0)
            if (alpha[i]==u):
                PG = max(G,0)
            if (alpha[i] < u and alpha[i] > 0):
                PG=G
            #step c
            if PG != 0: 
                ahat = alpha[i]
                #for L1, Qhatii = Qii+Dii = Qii+0
                qhatii = ((y[i]**2)*X[i].dot(X[i])) + (Dii)
                alpha[i] = min(max(alpha[i]-(G/qhatii),0),u)
                w = w + ((alpha[i]-ahat)*y[i])*X[i]
        if o % 100 == 0:
            print(compute_cost(w,X,Y))
    return w

In [159]:
dual_coord_l1(X.to_numpy(),Y.to_numpy(),500)

31
20
703.4437502071513
1.922690912457123
1.7616251860433203
1.76160051883621
1.761600515057428


array([ 0.07039113,  0.44879001,  0.04728298,  0.13259928,  0.34147383,
       -0.01335348,  0.17317654,  0.09194354, -0.3341727 ,  0.2648162 ,
        0.08851806, -0.15256927, -0.27753869, -0.04237613,  0.09315064,
        0.01280408,  0.22197679, -0.19233484,  0.09052776, -0.00815736,
        0.23704206,  0.3972551 ,  0.08933121,  0.20202064, -0.07289512,
        0.08745164,  0.03345898, -0.21803711,  0.05848978,  0.2433788 ,
        1.53610418])

In [169]:
l2c = 100

In [172]:
def compute_cost_l2(W,X,Y):
    # calculate hinge loss
    N = X.shape[0]
    distances = 1 - Y * (np.dot(X, W))
    distances[distances < 0] = 0  # equivalent to max(0, distance)
    hinge_loss = l2c * (np.sum(distances.dot(distances)))

    # calculate cost
    cost = 1 / 2 * np.dot(W, W) + hinge_loss
    return cost

In [174]:
def dual_coord_l2(X, y, max_steps, C=regularization_strength):
    u=float('inf')
    n_features = X.shape[1]
    print(n_features)
    n_samples = X.shape[0]
    print(n_samples)
    w = np.zeros(n_features)
    Dii=1/(2*C)
    alpha = np.zeros(n_samples)
    for o in range(max_steps) : #outer iteration
        for i in range(n_samples): #inner iteration
            assert(alpha[i] >= 0)
            assert(alpha[i] <= C)
            #step a
            G = y[i]*w.transpose().dot(X[i])-1+(alpha[i] * Dii) #G = yiwT xi − 1 + Diiαi
            #step b of algorithm 1
            PG = 0
            if (alpha[i]==0):
                PG = min(G,0)
            if (alpha[i]==u):
                PG = max(G,0)
            if (alpha[i] < u and alpha[i] > 0):
                PG=G
            #step c
            if PG != 0: 
                ahat = alpha[i]
                #for L1, Qhatii = Qii+Dii = Qii+0
                qii = ((y[i]**2)*X[i].dot(X[i]))
                qhatii = qii + (Dii)
                alpha[i] = min(max(alpha[i]-(G/qhatii),0),u)
                w = w + ((alpha[i]-ahat)*y[i])*X[i]
        if o % 100 == 0:
            print(compute_cost_l2(w,X,Y))
    return w

In [175]:
dual_coord_l2(X.to_numpy(),Y.to_numpy(),500,C=10)

31
20
630.1628241510977
2.2501356010053337
2.2471667420716197
2.2471664682029053
2.2471664681773547


array([ 0.06637404,  0.42601365,  0.04468828,  0.12470193,  0.32704678,
       -0.01249464,  0.15806512,  0.08251113, -0.32697307,  0.25051643,
        0.09676299, -0.14170624, -0.25831514, -0.03539776,  0.09750556,
        0.0206437 ,  0.20820938, -0.18568193,  0.07841842, -0.0049623 ,
        0.23074528,  0.38297832,  0.08950464,  0.19388967, -0.07119158,
        0.09340938,  0.02739781, -0.212044  ,  0.05531137,  0.23513165,
        1.48551897])

In [181]:
clf = SGDClassifier(loss="squared_hinge", penalty="l2", max_iter=4000000,alpha=100,verbose=True)
clf.fit(X.to_numpy(), Y.to_numpy())
clf.coef_

-- Epoch 1
Norm: 0.02, NNZs: 31, Bias: 0.778735, T: 20, Avg. loss: 2.829549
Total training time: 0.00 seconds.
-- Epoch 2
Norm: 0.01, NNZs: 31, Bias: 0.780019, T: 40, Avg. loss: 0.199644
Total training time: 0.00 seconds.
-- Epoch 3
Norm: 0.01, NNZs: 31, Bias: 0.780993, T: 60, Avg. loss: 0.197584
Total training time: 0.00 seconds.
-- Epoch 4
Norm: 0.01, NNZs: 31, Bias: 0.781636, T: 80, Avg. loss: 0.196119
Total training time: 0.00 seconds.
-- Epoch 5
Norm: 0.01, NNZs: 31, Bias: 0.782107, T: 100, Avg. loss: 0.194952
Total training time: 0.00 seconds.
-- Epoch 6
Norm: 0.01, NNZs: 31, Bias: 0.782519, T: 120, Avg. loss: 0.195059
Total training time: 0.00 seconds.
-- Epoch 7
Norm: 0.01, NNZs: 31, Bias: 0.782877, T: 140, Avg. loss: 0.195082
Total training time: 0.00 seconds.
-- Epoch 8
Norm: 0.01, NNZs: 31, Bias: 0.783175, T: 160, Avg. loss: 0.194474
Total training time: 0.00 seconds.
-- Epoch 9
Norm: 0.01, NNZs: 31, Bias: 0.783448, T: 180, Avg. loss: 0.194568
Total training time: 0.00 secon

array([[ 0.00125716,  0.00278436,  0.00145754,  0.00123748,  0.00149985,
         0.0021502 ,  0.00179826,  0.00186172,  0.00113349,  0.0019119 ,
         0.00246267,  0.00131632,  0.00164295,  0.00183459, -0.00100817,
         0.00154351,  0.00092643,  0.00044065, -0.00019068,  0.0017813 ,
         0.00196218,  0.00277277,  0.00191713,  0.00170542,  0.0008361 ,
         0.0016962 ,  0.0012356 ,  0.00200162,  0.00085185,  0.00192288,
         0.00307417]])

In [182]:
print(compute_cost_l2(clf.coef_[0],X,Y))

1970.676688627001


In [185]:
def chang_descent(X,y,max_steps,c=regularization_parameter,beta=.5): # from https://www.csie.ntu.edu.tw/~cjlin/papers/cdl2.pdf
    n_features = X.shape[1]
    n_samples = X.shape[0]
    w = np.zeros(n_features)
    for o in range(max_steps) : #outer iteration
        for i in range(n_features): #inner iteration
            #start of Newtons method sub problem
            z = 0
            while(True):
                bj=1-y
                dprime = w[i]+z-2*c*
            
            for 
                
            
            
    
    

SyntaxError: invalid syntax (4062854192.py, line 8)