In [1]:
import numpy as np
import math
from scipy import stats as stats
from matplotlib import pyplot as plt
import time
import scipy as sp

# Classifier Superclass (Done?)

In [2]:
class Classifier(object):

    def fit(self, X_train, Y_train):
        """
        Classifier fitting function
            X_train: the features
            Y_train: the labels
        """
        self.X_train = X_train
        self.Y_train = Y_train
        
    def predict(self):
        """
        Classifier prediction function; to be filled in individually for each classifier
        If the subclass has no prediction function, raise an error
        """
        raise RuntimeError("This classifier does not have a prediction function")

    def score(self, X_pred, Y_pred):
        """
        Classifier score function.
            X_pred: Feature vectors in training set
            Y_pred: Corresponding labels for X_pred
        """
        return np.mean(np.equal(self.predict(X_pred), Y_pred))

# Classifier 1 - K-Nearest Neighbors (Done)

In [3]:
class KNNclassifier(Classifier):
    def __init__(self, k=2):
        self.k = k
        
    def predict(self, X,sparse=True):
        """
        k-NN prediction function. Adapted from code I had previously written in HW6
            X_pred: Feature vectors in training set.
        Return the predicted labels for X_pred. Shape: (len(X_pred), )
        """
        Y_pred = []
        
        #calculate distances
        if not sparse:
            distances = np.linalg.norm(self.X_train[:, np.newaxis] - X, axis = 2)
        if sparse:
            
        #append labels to each distance
        #labels = np.tile(self.Y_train, (distances.shape[1],1)).T
        #labeled_distances = np.stack((distances,labels), axis =2)
    
        #Sort Array based on Distances and find the k closest
        sort_indexes = np.argsort(distances,axis = 0)[0:self.k].T
        for indeces in sort_indexes:
            y_vals = []
            for index in indeces:
                y_vals.append(self.Y_train[index])
            Y_pred.append(stats.mode(y_vals)[0][0])
        
        return np.array(Y_pred)

# Classifier 2 - Random Forest

In [4]:
class RFClassifier(Classifier):
    def __init__(self, depth, trees):
        self.depth = depth
        self.trees = trees
#    def predict(self, X_test):
        

# Classifier 3 - Support Vector Machine (Done w/ linear; still need rbf implementation)

In [5]:
class SVMClassifier(Classifier):
    def __init__(self, kernel = 'linear',lmda = 5):
        valid_kernels = ['linear', 'rbf']
        if kernel not in valid_kernels:
            raise ValueError('bad kernel')
        self.kernels = kernel
        self.lmda = lmda
            
    def fit(self, X_train, Y_train, eta = 0.00005,threshold = 0.00000001, itr = 1000, filename = "svm_test.txt"):
        """
        SVM fitting function. Computes the optimal value of theta and stores it as a parameter of the object
            X_train: Feature vectors in training set.
            Y_train: Labels in training set.
            eta: learning rate. Initially set to be 0.05
            threshold: point at which the new theta and old theta differ to stop stop iteration
            itr: maximum number of iterations
        """
        tic = time.time()
        self.X_train = X_train
        self.Y_train = Y_train
        
        f= open(filename,"w+")
        
        #initialize theta* as a random matrix
        self.theta_star = np.random.random(X_train.shape[1])
        
        def h(x,y,theta):
            """
            helper function to calculate the above h(x_i) term in the gradient
                x: the i-th vector
                y: the label of x
                theta: paramter to be optimized
            """
            #if y*x.dot(theta) >= 1:
            #    return 0
            #else:
            #    return -y*x
            
            # Create a Sparse Diagonal Matrix whose entries correspond to 1 if y*<x,theta> >=1 and 0 else
            # That sparse matrix is then used to "zero out" any vectors that have low loss value
            # That new matrix is then summed over columnwise
            S = (((np.sign(y*x.dot(theta)-1)-1)/-2))
            S = sp.sparse.diags(S)
            S = -S*y
            return np.sum(S.dot(x),axis=0)
        
        #def SVM_grad(X,Y,theta):
        #    """
        #    helper function to calculate the gradient of the SVM loss function with respect to theta
        #        X: collection of vectors
        #        Y: collection of labels
        #        theta: parameter to be optimized
        #    """
        #    return 2*theta + self.lmda*h(X,Y,theta)
        
        
        # Lambda function to 
        gradient = lambda X,Y,theta: 2*theta + self.lmda*h(X,Y,theta)
        
        for i in range(itr):
#            new_theta = self.theta_star - eta*SVM_grad(self.X_train, self.Y_train, self.theta_star)
            new_theta = self.theta_star - eta*gradient(self.X_train, self.Y_train, self.theta_star)
            if np.linalg.norm(new_theta - self.theta_star, ord = 1) < threshold:
                print('broke at iteration ' + str(i))
                break
            self.theta_star = new_theta
            #normalizing theta to prevent overflow
            #self.theta_star = self.theta_star/np.linalg.norm(self.theta_star)
        toc = time.time()
        f.write("lambda = {}\neta = {}\nw = {}\ntraining accuracy is = {}\nApproximate time to run was : {}\nSize of data was {}".format(self.lmda, eta, self.theta_star,self.score(self.X_train, self.Y_train), (toc-tic), self.X_train.shape))
        
    def predict(self, X_Val):
        """
        SVM prediction function.
            X_Val: Feature vectors in training set.
        Return the predicted labels for X_pred. Shape: (len(X_Val), )
        """
        Y_pred = []
        
        for x in X_Val:
            pred = 2*int((x.dot(self.theta_star)).astype(np.float64) > 0)-1
            Y_pred.append(pred)
            
        return np.array(Y_pred)
    
    def normalize_theta(self):
        self.theta_star = self.theta_star/np.linalg.norm(self.theta_star)

# Classifier 4 - Linear Boosting

In [6]:
class Boosting(Classifier):
    

SyntaxError: unexpected EOF while parsing (<ipython-input-6-2104fa6adcb4>, line 2)

# Classifier 5 - Single-Layer Perceptron (Some issues still)

In [34]:
class Perceptron(Classifier):
    def __init__(self, epochs = 2):
        self.epochs = epochs
        
    def fit(self, X_train, Y_train, filename = 'perceptron_test.txt'):
        
#        f= open(filename,"w+")
        tic = time.time()
        self.X_train = X_train#np.c_[np.ones(X_train.shape[0]),X_train]
        self.Y_train = Y_train
        self.w = np.zeros(self.X_train.shape[1])
#        tuned = False
#        max_length = self.X_train.shape[0]**2
#        i = 0
        
#        while not tuned and i < max_length:
        for i in range(self.epochs):
            for x,y in zip(self.X_train, self.Y_train):
                if np.sign(x.dot(self.w)) != y:
                    new_w = self.w + 2*y*x
                    self.w = new_w
        toc = time.time()
#        f.write("epochs = {}\nw = {}\ntraining accuracy is = {}\nApproximate time to run was : {}\nSize of data was {}".format(self.epochs, self.w,self.score(self.X_train, self.Y_train), (toc-tic), self.X_train.shape))
        

    def predict(self, X_val):
        Y_pred = []
        
        for x in X_val:
            pred = 2*int((x.dot(self.w)).astype(np.float64) > 0)-1
            Y_pred.append(pred)
        return Y_pred

# Classifier 6 - Logistic Regression (maybe not)

In [None]:
class LogisticRegressionClassifier(Classifier):
    def fit(X_train, Y_train):
        self.X_train = X_train
        self.Y_train = Y_train
        
        self.w = np.zeros(self.X_train.shape[1])
        self.b = 0
        
        def logistic_prob(x,y,W,b):
            return 1/(1+np.exp( (-2*y+1)*(W.T.dot(x)+b) ))
        
        def logistic_loss_gradients(X,Y,W,b):
    
            P = logistic_prob(X.T, Y.T, w, b).T
            #w_grad = 
            for x,y in zip(X,Y):
                p = logistic_prob(x,y,w,b).T
                
            #w_grad = X.dot(Y-P.dot(X))
            #b_grad = Y-P.dot(X)
    
            return w_grad.T, np.float64(b_grad)

# Parameter Tuning Functions (CV and GridSearch)

In [7]:
def CrossValidation(X, Y, Classifier, fold = 3):
    #split data into k partitions
    Y_fold = np.array_split(Y,fold)
    X_fold = np.array_split(X,fold)
    
    #defining accuracies before going into loop
    train_acc = []
    val_acc = []
    
    for i in range(fold):
        
        #taking one value out for validation
        indeces = list(np.linspace(0, fold-1,fold).astype(int))
        indeces.remove(i)
        X_val = X_fold[i]
        Y_val = Y_fold[i]
        
        X_train = np.vstack((x for j,x in enumerate(X_fold) if j!=i))
        Y_train = np.hstack((y for j,y in enumerate(Y_fold) if j!=i))
        
        classifier.fit(X_train, Y_train)
        train_acc.append(classifier.score(X_train, Y_train))
        val_acc.append(classifier.score(X_val, Y_val))
    
    return np.mean(train_acc), np.mean(val_acc)
    

def GridSearch(X,Y, Classifier, param_list, folds = 3):
    train_accs = []
    val_accs = []
#    np.random.seed(0)
#    np.random.shuffle(Data)
#    X = Data.T[0:len(Data.T)-2].T
#    Y = Data.T[len(Data.T)-1]
    for param in param_list:
        a,b = CrossValidation(X,Y, Classifier, par = param, fold = folds)
        train_accs.append(a)
        val_accs.append(b)
    return np.matrix(train_accs).T, np.matrix(val_accs).T


# Loading in the Data

In [8]:
# Dataset 1 - Dota 2 data from UCI ML repo
Dota_Train = np.genfromtxt('./dota2Dataset/dota2Train.csv', delimiter=',')
Dota_Test = np.genfromtxt('./dota2Dataset/dota2Test.csv', delimiter=',')

Dota = np.vstack((Dota_Train,Dota_Test))
np.random.seed(0)
np.random.shuffle(Dota)
Dota_X = Dota.T[4:].T
Dota_Y = Dota.T[0]
Ones_Dota_X = np.c_[ np.ones(Dota_X.shape[0]), Dota_X ]  

In [24]:
Sparse_X1 = sp.sparse.csr_matrix(Ones_Dota_X)

In [31]:
Sparse_X1?

In [40]:
# Dataset 2 - Skin data from UCI ML repo
Skin = np.loadtxt('Skin_NonSkin.txt')
# Mapping labels from {1,2} (Skin, Not Skin) to {1,-1} (Skin, Not Skin)
Skin[:,3] = -2*Skin[:,3]+3

np.random.seed(4)
np.random.shuffle(Skin)
Skin_X = Skin.T[0:-1].T
Skin_Y = Skin.T[-1]
Ones_Skin_X = np.c_[ np.ones(Skin_X.shape[0]), Skin_X ]  

In [23]:
svm2 = SVMClassifier(lmda=5)
svm2.fit(Ones_Dota_X,Dota_Y)
#svm2.predict(Ones_Dota_X[0:n])

KeyboardInterrupt: 

In [86]:
Dota[0:100,:]

array([[ 2.,  0.,  1., ...,  0.,  0.,  0.],
       [ 2.,  0., -1., ...,  0.,  0.,  0.],
       [ 2.,  0.,  0., ...,  0.,  0.,  0.],
       ...,
       [ 2.,  0., -1., ...,  1.,  0.,  0.],
       [ 2.,  0.,  0., ...,  0.,  0.,  0.],
       [ 3.,  0.,  0., ...,  0.,  0.,  0.]])

In [49]:
n = 400

svm2 = SVMClassifier(lmda=5)
svm2.fit(Ones_Dota_X[0:n],Dota_Y[0:n])
svm2.predict(Ones_Dota_X[0:n])

array([ 1,  1,  1, -1,  1,  1,  1, -1,  1,  1,  1, -1,  1,  1, -1,  1, -1,
        1,  1,  1,  1,  1, -1, -1,  1, -1, -1,  1, -1,  1, -1,  1,  1, -1,
       -1,  1, -1,  1, -1,  1,  1, -1, -1,  1, -1, -1, -1, -1,  1,  1,  1,
        1, -1,  1, -1,  1,  1,  1,  1, -1, -1,  1,  1,  1,  1,  1,  1,  1,
        1,  1,  1, -1,  1, -1, -1,  1, -1,  1,  1, -1,  1, -1,  1,  1, -1,
        1,  1, -1, -1, -1, -1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
        1,  1,  1, -1,  1, -1,  1,  1,  1,  1,  1,  1,  1,  1,  1, -1, -1,
       -1,  1,  1,  1, -1,  1, -1,  1, -1,  1,  1,  1, -1,  1, -1,  1,  1,
       -1,  1,  1,  1, -1,  1,  1,  1,  1,  1, -1, -1,  1,  1,  1, -1, -1,
        1,  1, -1,  1,  1,  1, -1,  1, -1,  1,  1,  1, -1,  1, -1, -1,  1,
        1,  1,  1,  1, -1, -1, -1,  1,  1,  1, -1,  1,  1, -1, -1, -1,  1,
        1,  1,  1,  1,  1,  1,  1,  1, -1, -1,  1,  1,  1,  1, -1,  1, -1,
       -1,  1, -1,  1, -1,  1,  1,  1, -1,  1, -1,  1,  1,  1,  1, -1, -1,
       -1,  1,  1, -1,  1

In [189]:
svm2.score(Ones_Dota_X[0:n],Dota_Y[0:n])

0.54

In [41]:
i_list = [1, 2, 4, 8, 16]
for i in i_list:
    tic = time.time()
    ptron = Perceptron(epochs = i)
    ptron.fit(Ones_Skin_X,Skin_Y)
    toc = time.time()
    print("{} seconds\n score = {}\n".format((toc-tic), ptron.score(Ones_Skin_X,Skin_Y)))

1.5077941417694092 seconds
 score = 0.8459705293054269

2.0896408557891846 seconds
 score = 0.766458415797141

3.5442380905151367 seconds
 score = 0.8937879758586778

6.346112966537476 seconds
 score = 0.934215305010671

11.776028871536255 seconds
 score = 0.9237973206233652



In [44]:
lmda_list = [1,2,3,4,5]
for i in lmda_list:
    tic = time.time()
    svm = SVMClassifier(lmda = i)
    svm.fit(Ones_Skin_X,Skin_Y)
    toc = time.time()
    print("{} seconds\n score = {}\n".format((toc-tic), svm.score(Ones_Skin_X,Skin_Y)))

9.005782127380371 seconds
 score = 0.20753947040892526

8.406216859817505 seconds
 score = 0.20753947040892526

8.386011123657227 seconds
 score = 0.20753947040892526

8.387519836425781 seconds
 score = 0.20753947040892526

10.513423919677734 seconds
 score = 0.20753947040892526



In [56]:
Ones_Skin_X

array([[  1., 200., 198., 164.],
       [  1., 129., 166., 218.],
       [  1., 146., 148.,  96.],
       ...,
       [  1., 200., 198., 158.],
       [  1.,  41.,  71.,  52.],
       [  1., 195., 193., 159.]])

In [52]:
svm.predict(Ones_Skin_X)

array([1, 1, 1, ..., 1, 1, 1])

In [53]:
np.mean((Skin_Y+1)/2)

0.20753947040892526

In [54]:
svm.theta_star

array([1758.85274567, 1758.97873174, 1758.66708517, 1758.7590157 ])

In [55]:
svm2.theta_star

array([0.35030402, 0.79162491, 0.43092038, 1.09024388, 0.64839485,
       0.34335682, 0.3444459 , 0.69249426, 0.51271492, 1.21674895,
       0.71747285, 0.85707379, 1.04883894, 1.06980197, 0.68727413,
       0.66722918, 0.82575396, 0.56230516, 0.87998898, 0.64982577,
       1.10265711, 0.31615285, 0.89484125, 0.57635154, 0.64275319,
       1.15655842, 0.31882848, 0.75205156, 0.47448649, 1.04267733,
       0.39247871, 0.81151419, 1.03865719, 0.42016916, 0.33655875,
       0.63233394, 1.06876154, 1.08036204, 0.84611903, 0.89617121,
       0.45044837, 0.9417726 , 1.11038354, 0.69868263, 0.92626105,
       1.12542344, 1.0915871 , 0.44556315, 1.06767705, 1.05734364,
       1.02612722, 0.84332004, 0.40187903, 0.50500369, 0.39342636,
       0.52480518, 1.16714717, 1.21976824, 0.67599529, 0.7317163 ,
       0.51556982, 0.50459345, 0.58950016, 0.96776532, 0.37885727,
       1.02108934, 0.45585733, 1.11807709, 1.16875204, 0.96599229,
       1.14354595, 0.34339676, 0.42503079, 0.78585483, 1.07870

In [None]:
ptron

In [74]:
svm2 = SVMClassifier()
svm2.fit(Ones_Skin_X,Skin_Y)


In [75]:
svm2.normalize_theta()

In [76]:
svm2.theta_star

array([-0.00480062, -0.61446193, -0.61843088, -0.48985379])

In [79]:
svm2.score(Ones_Skin_X,Skin_Y)

0.7924605295910747

In [81]:
np.mean((Skin_Y-1)/2)

-0.7924605295910747

In [93]:
ptron.w

array([ 0., -2.,  0.,  0.,  0.,  0.,  0., -2.,  0.,  2.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0., -2.,  0.,
        0.,  0.,  2.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0., -2.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  2.,  0.,  0.,
        0.,  0.,  2.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  2.,  0., -2.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.])

In [175]:
theta_hat=np.random.random(Ones_Dota_X.shape[1])

In [173]:
Ones_Dota_X.shape

(102944, 114)

In [169]:
S[99]

1.0

In [141]:
Ones_Dota_X[0].

array([ 1.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  1.,  0., -1.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,
        0.,  0.,  0., -1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0., -1.,  0.,
        0.,  0.,  0., -1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0., -1.,  0.,  1.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.])

In [83]:
np.unique(ptron.predict(X))

array([-1,  1])

In [77]:
np.unique(svm2.predict(X))

array([-1,  1])

In [84]:
svm2.theta_star

array([0.90483289, 0.90483289, 0.90483289, 0.90483289, 0.90483289,
       0.90483289, 0.90483289, 0.90483289, 0.90483289, 0.90483289,
       0.90483289, 0.90483289, 0.90483289, 0.90483289, 0.90483289,
       0.90483289, 0.90483289, 0.90483289, 0.90483289, 0.90483289,
       0.90483289, 0.90483289, 0.90483289, 0.90483289, 0.90483289,
       0.90483289, 0.90483289, 0.90483289, 0.90483289, 0.90483289,
       0.90483289, 0.90483289, 0.90483289, 0.90483289, 0.90483289,
       0.90483289, 0.90483289, 0.90483289, 0.90483289, 0.90483289,
       0.90483289, 0.90483289, 0.90483289, 0.90483289, 0.90483289,
       0.90483289, 0.90483289, 0.90483289, 0.90483289, 0.90483289,
       0.90483289, 0.90483289, 0.90483289, 0.90483289, 0.90483289,
       0.90483289, 0.90483289, 0.90483289, 0.90483289, 0.90483289,
       0.90483289, 0.90483289, 0.90483289, 0.90483289, 0.90483289,
       0.90483289, 0.90483289, 0.90483289, 0.90483289, 0.90483289,
       0.90483289, 0.90483289, 0.90483289, 0.90483289, 0.90483

In [63]:
# Dataset 3 - Occupancy Data from UCI ML repo
Occupancy_Train = np.genfromtxt('datatraining.csv', delimiter = ',')


In [65]:
Occupancy_Train.shape

(8143, 6)

# Tuning hyperparameters on each dataset

In [95]:
#param list for KNN
k_list = [1,2,3,4,5]
knn_params = {"k" : k_list}

#param list for perceptron
epoch_list = [1,2,4,8,16]
perceptron_params = {"epochs" : epoch_list}

#param list for SVM
lambda_list = [1,2,3,4,5]
svm_params = {"lambda" : lambda_list}


partition = [0.8, 0.5, 0.2]
num_trials = 3

In [97]:
for epoch in epoch_list:
    for i in partition:
        X_train = Ones_Skin_X[:-int(np.rint((1-i)*Ones_Skin_X.shape[0])),:] # Get features from train + val set.
        X_test  = Ones_Skin_X[-int(np.rint((1-i)*Ones_Skin_X.shape[0])):,:] # Get features from test set.     
        Y_train = Skin_Y[:-int(np.rint((1-i)*Skin_Y.shape[0]))] # Get labels from train + val set.
        Y_test  = Skin_Y[-int(np.rint((1-i)*Skin_Y.shape[0])):] # Get labels from test set.  
        for j in range(num_trials):
            filename = "./Perceptron/Skin/Perceptron_Skin_{}_epochs_{}{:1.1f}_trial_{}.txt".format(epoch,i,1-i,j+1)
            ptron = Perceptron(epochs = epoch)
            ptron.fit(X_train,Y_train)
            f= open(filename,"w+")
            f.write("epochs = {}\nw = {}\ntraining accuracy is = {}\ntesting accuracy is {}".format(ptron.epochs, ptron.w,ptron.score(ptron.X_train, ptron.Y_train), ptron.score(X_test,Y_test)))
            

In [None]:
for epoch in epoch_list:
    for i in partition:
        X_train = Ones_Dota_X[:-int(np.rint((1-i)*Ones_Dota_X.shape[0])),:] # Get features from train + val set.
        X_test  = Ones_Dota_X[-int(np.rint((1-i)*Ones_Dota_X.shape[0])):,:] # Get features from test set.     
        Y_train = Dota_Y[:-int(np.rint((1-i)*Dota_Y.shape[0]))] # Get labels from train + val set.
        Y_test  = Dota_Y[-int(np.rint((1-i)*Dota_Y.shape[0])):] # Get labels from test set.  
        for j in range(num_trials):
            filename = "./Perceptron/Dota/Perceptron_Dota_{}_epochs_{}{:1.1f}_trial_{}.txt".format(epoch,i,1-i,j+1)
            ptron = Perceptron(epochs = epoch)
            ptron.fit(X_train,Y_train)
            f= open(filename,"w+")
            f.write("epochs = {}\nw = {}\ntraining accuracy is = {}\ntesting accuracy is {}".format(ptron.epochs, ptron.w,ptron.score(ptron.X_train, ptron.Y_train), ptron.score(X_test,Y_test)))
            

In [None]:
for lmda in lmda_list:
    for i in partition:
        X_train = Ones_Skin_X[:-int(np.rint((1-i)*Ones_Skin_X.shape[0])),:] # Get features from train + val set.
        X_test  = Ones_Skin_X[-int(np.rint((1-i)*Ones_Skin_X.shape[0])):,:] # Get features from test set.     
        Y_train = Skin_Y[:-int(np.rint((1-i)*Skin_Y.shape[0]))] # Get labels from train + val set.
        Y_test  = Skin_Y[-int(np.rint((1-i)*Skin_Y.shape[0])):] # Get labels from test set.  
        for j in range(num_trials):
            filename = "./SVM/Skin/SVM_Skin_{}_lambda_{}{:1.1f}_trial_{}.txt".format(lmda,i,1-i,j+1)
            svm = SVMClassifier(lmda = lmda)
            svm.fit(X_train,Y_train)
            f= open(filename,"w+")
            f.write("epochs = {}\nw = {}\ntraining accuracy is = {}\ntesting accuracy is {}".format(svm.epochs, svm.w,svm.score(ptron.X_train, ptron.Y_train), svm.score(X_test,Y_test)))
            

In [None]:
for lmda in lmda_list:
    for i in partition:
        X_train = Ones_Dota_X[:-int(np.rint((1-i)*Ones_Dota_X.shape[0])),:] # Get features from train + val set.
        X_test  = Ones_Dota_X[-int(np.rint((1-i)*Ones_Dota_X.shape[0])):,:] # Get features from test set.     
        Y_train = Dota_Y[:-int(np.rint((1-i)*Dota_Y.shape[0]))] # Get labels from train + val set.
        Y_test  = Dota_Y[-int(np.rint((1-i)*Dota_Y.shape[0])):] # Get labels from test set.  
        for j in range(num_trials):
            filename = "./SVM/Dota/SVM_Dota_{}_lambda_{}{:1.1f}_trial_{}.txt".format(lmda,i,1-i,j+1)
            svm = SVMClassifier(lmda = lmda)
            svm.fit(X_train,Y_train)
            f= open(filename,"w+")
            f.write("epochs = {}\nw = {}\ntraining accuracy is = {}\ntesting accuracy is {}".format(svm.epochs, svm.w,svm.score(ptron.X_train, ptron.Y_train), svm.score(X_test,Y_test)))
            

In [68]:
svm.score(X,Y)

0.476278364936276

In [73]:
svm.theta_star[81]

0.9048328935585462

# Looking at Highest Accuracies

In [322]:
keyparam['someParam']

5