In [1]:
import numpy as np
from math import e
import math
import random
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns


In [2]:


class LinearRegressionRegularization():
    def __init__(self,X,y, Lambda):
        self.X = self.addThresh(X)
        self.y = y
        self.Lambda = Lambda
        
        self.XT = np.transpose(self.X)
        self.XXT = np.matmul(self.XT,self.X)
        self.lm = np.multiply(self.Lambda, np.identity(self.XXT.shape[0]))
        self.XXT_plus_lambda = np.add(self.XXT, self.lm)
        self.iX = np.linalg.pinv(self.XXT_plus_lambda)
        self.w = np.matmul(np.matmul(self.iX,self.XT),self.y) 
        self.preds = self.calc(X)
        
    def addThresh(self, X):
        return np.concatenate([[[1]for x in range(len(X))],X],axis=1)
    def calc(self,X):
        return np.matmul(self.addThresh(X),self.w)

    


In [25]:
class create_test():
    def __init__(self,val1,val2=None,Lambda=1):
        self.val1 = val1
        self.val2 = val2
        
        self.train = pd.read_csv("../data/features.train.csv",index_col=0)
        self.train.columns = ["digit","x1","x2"]
        self.test = pd.read_csv("../data/features.test.csv",index_col=0)
        self.test.columns = ["digit","x1","x2"]
        
        self.initDataset()
        self.Xtrain = self.nonlinear_transform(self.Xtrain)
        self.Xtest = self.nonlinear_transform(self.Xtest)
        
        self.linreg = LinearRegressionRegularization(self.Xtrain,self.ytrain,Lambda)
        
        
        
        val2 = "all" if self.val2==None else self.val2
        print("%s vs. %s" % (self.val1, val2))
        Ein = self.E(self.Xtrain,self.ytrain)
        Eout = self.E(self.Xtest,self.ytest)
        print("Ein Error: %s" %np.round((Ein),3))
        print("Eout Error: %s" %np.round((Eout),3))
    
    def initDataset(self):
        if self.val2 != None:
            self.train = self.train[(self.train["digit"]==self.val1) | (self.train["digit"]==self.val2)]
            self.train.loc[self.train["digit"]==self.val1,"y"]=1
            self.train.loc[self.train["digit"]==self.val2,"y"]=-1
            
            self.test = self.test[(self.test["digit"]==self.val1) | (self.test["digit"]==self.val2)]
            self.test.loc[self.test["digit"]==self.val1,"y"]=1
            self.test.loc[self.test["digit"]==self.val2,"y"]=-1
        else:
            self.train["y"] = -1
            self.train.loc[self.train["digit"]==self.val1,"y"]=1
            
            self.test["y"] = -1
            self.test.loc[self.test["digit"]==self.val1,"y"]=1
        
        self.Xtrain = self.train[["x1","x2"]]
        self.ytrain = self.train.y
        self.Xtest = self.test[["x1","x2"]]
        self.ytest = self.test.y
    
    
    def E(self,X,y):
        preds = np.sign(self.linreg.calc(X))
        return np.count_nonzero(preds != y)/len(y)   
    
    def nonlinear_transform(self,X):
        X = X.copy()
        X["x1^2"] = X["x1"]**2
        X["x2^2"] = X["x2"]**2
        X["x1*x2"] = X["x1"]*X["x2"] 
        return X
    

In [26]:
#problems 2-4
for digit in range(5,10):
    create_test(digit)
    print("\n")

5 vs. all
Ein Error: 0.076
Eout Error: 0.08


6 vs. all
Ein Error: 0.091
Eout Error: 0.085


7 vs. all
Ein Error: 0.088
Eout Error: 0.073


8 vs. all
Ein Error: 0.074
Eout Error: 0.083


9 vs. all
Ein Error: 0.088
Eout Error: 0.088




In [28]:
# problem 5
for C in [.001,0.01,.1,1]:
    create_test(val1=1,val2=5,C=C,kernel="poly", Q=2)
    print("\n")

1 vs. 5
C is 0.001; Kernel is poly
Q is 2
Number of SV 76
Ein Error: 0.004
Eout Error: 0.017


1 vs. 5
C is 0.01; Kernel is poly
Q is 2
Number of SV 34
Ein Error: 0.004
Eout Error: 0.019


1 vs. 5
C is 0.1; Kernel is poly
Q is 2
Number of SV 24
Ein Error: 0.004
Eout Error: 0.019


1 vs. 5
C is 1; Kernel is poly
Q is 2
Number of SV 24
Ein Error: 0.003
Eout Error: 0.019




In [29]:
# problem 6
for C in [.001,0.01,.1,1]:
    create_test(val1=1,val2=5,C=C,kernel="poly", Q=2)
    print("\n")
    create_test(val1=1,val2=5,C=C,kernel="poly", Q=5)
    print("\n")

1 vs. 5
C is 0.001; Kernel is poly
Q is 2
Number of SV 76
Ein Error: 0.004
Eout Error: 0.017


1 vs. 5
C is 0.001; Kernel is poly
Q is 5
Number of SV 25
Ein Error: 0.004
Eout Error: 0.021


1 vs. 5
C is 0.01; Kernel is poly
Q is 2
Number of SV 34
Ein Error: 0.004
Eout Error: 0.019


1 vs. 5
C is 0.01; Kernel is poly
Q is 5
Number of SV 23
Ein Error: 0.004
Eout Error: 0.021


1 vs. 5
C is 0.1; Kernel is poly
Q is 2
Number of SV 24
Ein Error: 0.004
Eout Error: 0.019


1 vs. 5
C is 0.1; Kernel is poly
Q is 5
Number of SV 25
Ein Error: 0.003
Eout Error: 0.019


1 vs. 5
C is 1; Kernel is poly
Q is 2
Number of SV 24
Ein Error: 0.003
Eout Error: 0.019


1 vs. 5
C is 1; Kernel is poly
Q is 5
Number of SV 21
Ein Error: 0.003
Eout Error: 0.021




In [30]:
for C in [0.01,1,100,10**4,10**6]:
    create_test(val1=1,val2=5,C=C,kernel="rbf")
    print("\n")

1 vs. 5
C is 0.01; Kernel is rbf
Number of SV 406
Ein Error: 0.004
Eout Error: 0.024


1 vs. 5
C is 1; Kernel is rbf
Number of SV 31
Ein Error: 0.004
Eout Error: 0.021


1 vs. 5
C is 100; Kernel is rbf
Number of SV 22
Ein Error: 0.003
Eout Error: 0.019


1 vs. 5
C is 10000; Kernel is rbf
Number of SV 19
Ein Error: 0.003
Eout Error: 0.024


1 vs. 5
C is 1000000; Kernel is rbf
Number of SV 17
Ein Error: 0.001
Eout Error: 0.024




In [54]:
np.arange(0.1,100,10)

array([ 0.1, 10.1, 20.1, 30.1, 40.1, 50.1, 60.1, 70.1, 80.1, 90.1])