In [1]:
import numpy as np
from math import e
import math
import random
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns


In [2]:


class LinearRegressionRegularization():
    def __init__(self,X,y, Lambda):
        self.X = self.addThresh(X)
        self.y = y
        self.Lambda = Lambda
        
        self.XT = np.transpose(self.X)
        self.XXT = np.matmul(self.XT,self.X)
        self.lm = np.multiply(self.Lambda, np.identity(self.XXT.shape[0]))
        self.XXT_plus_lambda = np.add(self.XXT, self.lm)
        self.iX = np.linalg.pinv(self.XXT_plus_lambda)
        self.w = np.matmul(np.matmul(self.iX,self.XT),self.y) 
        self.preds = self.calc(X)
        
    def addThresh(self, X):
        return np.concatenate([[[1]for x in range(len(X))],X],axis=1)
    def calc(self,X):
        return np.matmul(self.addThresh(X),self.w)

    


In [37]:
class create_test():
    def __init__(self,val1,val2=None,Lambda=1,nonlinear=False):
        self.val1 = val1
        self.val2 = val2
        
        self.train = pd.read_csv("../data/features.train.csv",index_col=0)
        self.train.columns = ["digit","x1","x2"]
        self.test = pd.read_csv("../data/features.test.csv",index_col=0)
        self.test.columns = ["digit","x1","x2"]
        
        self.initDataset()
        
        if nonlinear:
            self.Xtrain = self.nonlinear_transform(self.Xtrain)
            self.Xtest = self.nonlinear_transform(self.Xtest)
        
        self.linreg = LinearRegressionRegularization(self.Xtrain,self.ytrain,Lambda)
        
        
        
        val2 = "all" if self.val2==None else self.val2
        print("%s vs. %s" % (self.val1, val2))
        print("Nonlinear is %s" % nonlinear)
        print("Lambda is %s" % Lambda)
        Ein = self.E(self.Xtrain,self.ytrain)
        Eout = self.E(self.Xtest,self.ytest)
        print("Ein Error: %s" %np.round((Ein),3))
        print("Eout Error: %s" %np.round((Eout),3))
    
    def initDataset(self):
        if self.val2 != None:
            self.train = self.train[(self.train["digit"]==self.val1) | (self.train["digit"]==self.val2)]
            self.train.loc[self.train["digit"]==self.val1,"y"]=1
            self.train.loc[self.train["digit"]==self.val2,"y"]=-1
            
            self.test = self.test[(self.test["digit"]==self.val1) | (self.test["digit"]==self.val2)]
            self.test.loc[self.test["digit"]==self.val1,"y"]=1
            self.test.loc[self.test["digit"]==self.val2,"y"]=-1
        else:
            self.train["y"] = -1
            self.train.loc[self.train["digit"]==self.val1,"y"]=1
            
            self.test["y"] = -1
            self.test.loc[self.test["digit"]==self.val1,"y"]=1
        
        self.Xtrain = self.train[["x1","x2"]]
        self.ytrain = self.train.y
        self.Xtest = self.test[["x1","x2"]]
        self.ytest = self.test.y
    
    
    def E(self,X,y):
        preds = np.sign(self.linreg.calc(X))
        return np.count_nonzero(preds != y)/len(y)   
    
    def nonlinear_transform(self,X):
        X = X.copy()
        X["x1^2"] = X["x1"]**2
        X["x2^2"] = X["x2"]**2
        X["x1*x2"] = X["x1"]*X["x2"] 
        return X
    

In [38]:
#problems 2-4
for digit in range(5,10):
    create_test(digit)
    print("\n")

5 vs. all
Nonlinear is False
Lambda is 1
Ein Error: 0.076
Eout Error: 0.08


6 vs. all
Nonlinear is False
Lambda is 1
Ein Error: 0.091
Eout Error: 0.085


7 vs. all
Nonlinear is False
Lambda is 1
Ein Error: 0.088
Eout Error: 0.073


8 vs. all
Nonlinear is False
Lambda is 1
Ein Error: 0.074
Eout Error: 0.083


9 vs. all
Nonlinear is False
Lambda is 1
Ein Error: 0.088
Eout Error: 0.088




In [39]:
for digit in range(5):
    create_test(digit,nonlinear=True)
    print("\n")

0 vs. all
Nonlinear is True
Lambda is 1
Ein Error: 0.102
Eout Error: 0.107


1 vs. all
Nonlinear is True
Lambda is 1
Ein Error: 0.012
Eout Error: 0.022


2 vs. all
Nonlinear is True
Lambda is 1
Ein Error: 0.1
Eout Error: 0.099


3 vs. all
Nonlinear is True
Lambda is 1
Ein Error: 0.09
Eout Error: 0.083


4 vs. all
Nonlinear is True
Lambda is 1
Ein Error: 0.089
Eout Error: 0.1




In [40]:
create_test(0,nonlinear=True)
print("\n")
create_test(0)
print("\n")
create_test(9,nonlinear=True)
print("\n")
create_test(9)
print("\n")

0 vs. all
Nonlinear is True
Lambda is 1
Ein Error: 0.102
Eout Error: 0.107


0 vs. all
Nonlinear is False
Lambda is 1
Ein Error: 0.109
Eout Error: 0.115


9 vs. all
Nonlinear is True
Lambda is 1
Ein Error: 0.088
Eout Error: 0.088


9 vs. all
Nonlinear is False
Lambda is 1
Ein Error: 0.088
Eout Error: 0.088




In [41]:
create_test(5,nonlinear=True)
print("\n")
create_test(5)
print("\n")

5 vs. all
Nonlinear is True
Lambda is 1
Ein Error: 0.076
Eout Error: 0.079


5 vs. all
Nonlinear is False
Lambda is 1
Ein Error: 0.076
Eout Error: 0.08




In [43]:
create_test(1,5,nonlinear=True,Lambda=1)
print("\n")
create_test(1,5,nonlinear=True,Lambda=.01)

1 vs. 5
Nonlinear is True
Lambda is 1
Ein Error: 0.005
Eout Error: 0.026


1 vs. 5
Nonlinear is True
Lambda is 0.01
Ein Error: 0.004
Eout Error: 0.028


<__main__.create_test at 0x7ff787c3fd00>