In [None]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split as tts
from sklearn.metrics import accuracy_score, recall_score, precision_score
from sklearn.utils import shuffle

In [None]:
regularization_strength = 100
learning_rate = 0.000001

In [None]:
def compute_cost(W, X, Y):
    # calculate hinge loss
    N = X.shape[0]
    distances = 1 - Y * (np.dot(X, W))
    distances[distances < 0] = 0  # equivalent to max(0, distance)
    hinge_loss = regularization_strength * (np.sum(distances))

    # calculate cost
    cost = 1 / 2 * np.dot(W, W) + hinge_loss
    return cost


In [None]:
def calculate_cost_gradient(W, X_batch, Y_batch):
    # if only one example is passed (eg. in case of SGD)
    if type(Y_batch) == np.float64:
        Y_batch = np.array([Y_batch])
        X_batch = np.array([X_batch])  # gives multidimensional array

    distance = 1 - (Y_batch * np.dot(X_batch, W))
    dw = np.zeros(len(W))

    for ind, d in enumerate(distance):
        if max(0, d) == 0:
            di = W
        else:
            di = W - (regularization_strength * Y_batch[ind] * X_batch[ind])
        dw += di

    dw = dw/len(Y_batch)  # average
    return dw


In [None]:
def sgd(features, outputs):
    max_epochs = 5000
    weights = np.zeros(features.shape[1])
    nth = 0
    prev_cost = float("inf")
    cost_threshold = 0.001  # in percent
    # stochastic gradient descent
    for epoch in range(1, max_epochs):
        # shuffle to prevent repeating update cycles
        X, Y = shuffle(features, outputs)
        for ind, x in enumerate(X):
            ascent = calculate_cost_gradient(weights, x, Y[ind])
            weights = weights - (learning_rate * ascent)

        # convergence check on 2^nth epoch
        if epoch == 2 ** nth or epoch == max_epochs - 1:
            cost = compute_cost(weights, features, outputs)
            print("Epoch is: {} and Cost is: {}".format(epoch, cost))
            # stoppage criterion
            if abs(prev_cost - cost) < cost_threshold * prev_cost:
                return weights
            prev_cost = cost
            nth += 1
    return weights



In [None]:

    print("reading dataset...")
    # read data in pandas (pd) data frame
    data = pd.read_csv('diagnosis.csv')
    data = data[0:20]
    # drop last column (extra column added by pd)
    # and unnecessary first column (id)
    data.drop(data.columns[[-1, 0]], axis=1, inplace=True)

    print("applying feature engineering...")
    # convert categorical labels to numbers
    diag_map = {'M': 1.0, 'B': -1.0}
    data['diagnosis'] = data['diagnosis'].map(diag_map)

    # put features & outputs in different data frames
    Y = data.loc[:, 'diagnosis']
    X = data.iloc[:, 1:]

    # filter features
    #remove_correlated_features(X)
    #remove_less_significant_features(X, Y)

    # normalize data for better convergence and to prevent overflow
    X_normalized = StandardScaler().fit_transform(X.values)
    X = pd.DataFrame(X_normalized)

    # insert 1 in every row for intercept b
    X.insert(loc=len(X.columns), column='intercept', value=1)

    # split data into train and test set
    #tts(X, Y, test_size=0.2, random_state=42)

    # train the model
    
    

In [None]:
print("training started...")
    W = sgd(X.to_numpy(), Y.to_numpy())
    print("training finished.")
    print("weights are: {}".format(W))

In [110]:
type(X)

pandas.core.frame.DataFrame

In [None]:
from sklearn.linear_model import SGDClassifier
clf = SGDClassifier(loss="hinge", penalty="l1", max_iter=4000000,alpha=10000,verbose=True)
clf.fit(X, Y)
clf.coef_

##not sure why this doesn't work to verify??? will solve ourselves and compare 
#to the SGD implementation above, sklearn doesn't seem to beat it anyways

In [None]:
print(compute_cost(clf.coef_[0],X,Y))

In [None]:
print(X.shape)

In [None]:
import math

def dual_coord_l1(X, y, max_steps, C=.25):
    u=C
    n_features = X.shape[1]
    print(n_features)
    n_samples = X.shape[0]
    print(n_samples)
    w = np.zeros(n_features)
    alpha = np.zeros(n_samples)
    for o in range(max_steps) : #outer iteration
        for i in range(n_samples): #inner iteration
            assert(alpha[i] >= 0)
            assert(alpha[i] <= C)
            #step a
            G = y[i]*w.transpose().dot(X[i])-1+(alpha[i] * X[i].dot(X[i])) #G = yiwT xi − 1 + Diiαi
            #step b of algorithm 1
            PG = 0
            if (alpha[i]==0):
                PG = min(G,0)
            if (alpha[i]==u):
                PG = max(G,0)
            if (alpha[i] < u and alpha[i] == 0):
                PF=G
            #step c
            if PG != 0: 
                ahat = alpha[i]
                #for L1, Qhatii = Qii+Dii = Qii+0
                qhatii = ((y[i]**2)*X[i].dot(X[i])) + (X[i].dot(X[i]))
                alpha[i] = min(max(alpha[i]-(G/qhatii),0),u)
                print(alpha[i]-ahat)
                w = w + ((alpha[i]-ahat)*y[i])*X[i]
        if o % 100 == 0:
            print(compute_cost(w,X,Y))
    return w

In [None]:
dual_coord_l1(X.to_numpy(),Y.to_numpy(),2,C=10000)

In [None]:
def dual_coord_l2(X, y, max_steps, C=.25):
    u=float('inf')
    n_features = X.shape[1]
    print(n_features)
    n_samples = X.shape[0]
    print(n_samples)
    X=X.transpose()
    w = np.zeros(n_features)
    Dii=1/(2*C)
    alpha = np.zeros(n_samples)
    for o in range(max_steps) : #outer iteration
        for i in range(n_samples): #inner iteration
            assert(alpha[i] >= 0)
            assert(alpha[i] <= C)
            #step a
            G = y[i]*w.transpose().dot(X[i])-1+(alpha[i] * Dii) #G = yiwT xi − 1 + Diiαi
            #step b of algorithm 1
            PG = 0
            if (alpha[i]==0):
                PG = min(G,0)
            if (alpha[i]==u):
                PG = max(G,0)
            if (alpha[i] < u and alpha[i] == 0):
                PF=G
            #step c
            if PG != 0: 
                ahat = alpha[i]
                #for L1, Qhatii = Qii+Dii = Qii+0
                qhatii = (Dii) + (X[i].dot(X[i]))
                alpha[i] = min(max(alpha[i]-(G/qhatii),0),u)
                w = w + ((alpha[i]-ahat)*y[i])*X[i]
        if o % 100 == 0:
            print(compute_cost(w,X.transpose(),Y))
    return w

In [None]:
dual_coord_l2(X,Y,500,C=10)