In [1]:
import re
import numpy as np
from sklearn import datasets
from sklearn.multiclass import OutputCodeClassifier
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import LinearSVC
import matplotlib.pyplot as plt
import sklearn

# a helper function to calculate the error
def error(Y_predict,test):
    error = 0
    for i in range(len(test)):
        if Y_predict[i] != test[i]:
            error+=1
    return error/len(test)

class SVM(object):

    def __init__(self, training_dataset_, test_dataset_):
        self.training_dataset = training_dataset_
        self.test_dataset = test_dataset_
        self.classes = {}
        self.X_train = None
        self.Y_train = None
        self.X_test = None
        self.Y_test = None

        self.support_indecies = None
        self.train_errors = None
        self.val_errors = None
        self.train_loss = None
        self.val_loss = None
        
    def read_data(self):
        f = open(self.training_dataset, 'r')
        rows = list(re.split(' ', row) for row in re.split('\n', f.read())[:-1])
        names, self.Y_train = np.unique(list(row[-1] for row in rows), return_inverse=True)
        self.X_train = np.empty((0,4), float)
        f.close()
        for row in rows:
            self.X_train = np.append(self.X_train, np.array([np.array(row[:-1]).astype(float)]), axis = 0)
        f = open(self.test_dataset, 'r')
        f.close()
        
        f = open(self.test_dataset, 'r')
        rows = list(re.split(' ', row) for row in re.split('\n', f.read())[:-1])
        names, self.Y_test = np.unique(list(row[-1] for row in rows), return_inverse=True)
        self.X_test = np.empty((0,4), float)
        f.close()
        for row in rows:
            self.X_test = np.append(self.X_test, np.array([np.array(row[:-1]).astype(float)]), axis = 0)

    def SVM(self):
    
        #########################
        ## WRITE YOUR CODE HERE##
        #########################    

        cl = OneVsRestClassifier(sklearn.svm.SVC(kernel='linear', C=1e5))
        cl.fit(self.X_train,self.Y_train)
        
        predict_train = cl.predict(self.X_train)
        train_loss = error(predict_train,self.Y_train)
        
        predict_test = cl.predict(self.X_test)
        test_loss = error(predict_test,self.Y_test)
        
        estimators = cl.estimators_
        support_vectors = []
        for estimator in estimators:
            support_vectors.append(estimator.support_vectors_)
        
        f = open('SVM_linear.txt','w')
        f.write('training_error:'+str(train_loss)+'\n')
        f.write('testing_error:'+str(test_loss)+'\n')
        f.write('w_of_setosa = '+str(estimators[0].coef_.tolist()).strip('[]')+'\n')
        f.write('b_of_setosa = '+str(estimators[0].intercept_).strip('[]')+'\n')
        f.write('support_vector_indices_of_setosa:'+str(estimators[0].support_.tolist())+'\n')
        f.write('w_of_versicolor = '+str(estimators[1].coef_.tolist()).strip('[]')+'\n')
        f.write('b_of_versicolor = '+str(estimators[1].intercept_).strip('[]')+'\n')
        f.write('support_vector_indices_of_versicolor:'+str(estimators[1].support_.tolist())+'\n')
        f.write('w_of_virginica = '+str(estimators[2].coef_.tolist()).strip('[]')+'\n')
        f.write('b_of_virginica = '+str(estimators[2].intercept_).strip('[]')+'\n')
        f.write('support_vector_indices_of_virginica:'+str(estimators[2].support_.tolist())+'\n')
        f.close()
        
        return train_loss, test_loss, support_vectors
    
    def SVM_slack(self,C):
    
        #########################
        ## WRITE YOUR CODE HERE##
        #########################    
        
        cl = OneVsRestClassifier(sklearn.svm.SVC(kernel='linear', C=C))
        cl.fit(self.X_train,self.Y_train)
        
        predict_train = cl.predict(self.X_train)
        train_loss = error(predict_train,self.Y_train)
        
        predict_test = cl.predict(self.X_test)
        test_loss = error(predict_test,self.Y_test)
        
        estimators = cl.estimators_
        support_vectors = []
        for estimator in estimators:
            support_vectors.append(estimator.support_vectors_)
        
        f = open(f'SVM_slack_{C}.txt','w')
        f.write('training_error:'+str(train_loss)+'\n')
        f.write('testing_error:'+str(test_loss)+'\n')
        f.write('w_of_setosa = '+str(estimators[0].coef_.tolist()).strip('[]')+'\n')
        f.write('b_of_setosa = '+str(estimators[0].intercept_).strip('[]')+'\n')
        f.write('support_vector_indices_of_setosa:'+str(estimators[0].support_.tolist())+'\n')
        f.write('w_of_versicolor = '+str(estimators[1].coef_.tolist()).strip('[]')+'\n')
        f.write('b_of_versicolor = '+str(estimators[1].intercept_).strip('[]')+'\n')
        f.write('support_vector_indices_of_versicolor:'+str(estimators[1].support_.tolist())+'\n')
        f.write('w_of_virginica = '+str(estimators[2].coef_.tolist()).strip('[]')+'\n')
        f.write('b_of_virginica = '+str(estimators[2].intercept_).strip('[]')+'\n')
        f.write('support_vector_indices_of_virginica:'+str(estimators[2].support_.tolist())+'\n')
        f.close()
        
        return train_loss, test_loss, support_vectors
    
    def SVM_kernel_poly2(self,C):
    
        #########################
        ## WRITE YOUR CODE HERE##
        #########################  
        
        cl = OneVsRestClassifier(sklearn.svm.SVC(kernel='poly',degree = 2, C=C))
        cl.fit(self.X_train,self.Y_train)
        
        predict_train = cl.predict(self.X_train)
        train_loss = error(predict_train,self.Y_train)
        
        predict_test = cl.predict(self.X_test)
        test_loss = error(predict_test,self.Y_test)
        
        estimators = cl.estimators_
        support_vectors = []
        for estimator in estimators:
            support_vectors.append(estimator.support_vectors_)
        
        f = open(f'SVM_poly2.txt','w')
        f.write('training_error:'+str(train_loss)+'\n')
        f.write('testing_error:'+str(test_loss)+'\n')
        f.write('b_of_setosa = '+str(estimators[0].intercept_).strip('[]')+'\n')
        f.write('support_vector_indices_of_setosa:'+str(estimators[0].support_.tolist())+'\n')
        f.write('b_of_versicolor = '+str(estimators[1].intercept_).strip('[]')+'\n')
        f.write('support_vector_indices_of_versicolor:'+str(estimators[1].support_.tolist())+'\n')
        f.write('b_of_virginica = '+str(estimators[2].intercept_).strip('[]')+'\n')
        f.write('support_vector_indices_of_virginica:'+str(estimators[2].support_.tolist())+'\n')
        f.close()
        
        return train_loss, test_loss, support_vectors
    
    def SVM_kernel_poly3(self,C):
    
        #########################
        ## WRITE YOUR CODE HERE##
        #########################
        
        cl = OneVsRestClassifier(sklearn.svm.SVC(kernel='poly',degree=3, C=C))
        cl.fit(self.X_train,self.Y_train)
        
        predict_train = cl.predict(self.X_train)
        train_loss = error(predict_train,self.Y_train)
        
        predict_test = cl.predict(self.X_test)
        test_loss = error(predict_test,self.Y_test)
        
        estimators = cl.estimators_
        support_vectors = []
        for estimator in estimators:
            support_vectors.append(estimator.support_vectors_)
        
        f = open(f'SVM_poly3.txt','w')
        f.write('training_error:'+str(train_loss)+'\n')
        f.write('testing_error:'+str(test_loss)+'\n')
        f.write('b_of_setosa = '+str(estimators[0].intercept_).strip('[]')+'\n')
        f.write('support_vector_indices_of_setosa:'+str(estimators[0].support_.tolist())+'\n')
        f.write('b_of_versicolor = '+str(estimators[1].intercept_).strip('[]')+'\n')
        f.write('support_vector_indices_of_versicolor:'+str(estimators[1].support_.tolist())+'\n')
        f.write('b_of_virginica = '+str(estimators[2].intercept_).strip('[]')+'\n')
        f.write('support_vector_indices_of_virginica:'+str(estimators[2].support_.tolist())+'\n')
        f.close()
        
        return train_loss, test_loss, support_vectors
    
    def SVM_kernel_rbf(self,C):
    
        #########################
        ## WRITE YOUR CODE HERE##
        #########################    
        
        cl = OneVsRestClassifier(sklearn.svm.SVC(kernel='rbf', gamma=1, C=C))
        cl.fit(self.X_train,self.Y_train)
        
        predict_train = cl.predict(self.X_train)
        train_loss = error(predict_train,self.Y_train)
        
        predict_test = cl.predict(self.X_test)
        test_loss = error(predict_test,self.Y_test)
        
        estimators = cl.estimators_
        support_vectors = []
        for estimator in estimators:
            support_vectors.append(estimator.support_vectors_)
        
        f = open(f'SVM_rbf.txt','w')
        f.write('training_error:'+str(train_loss)+'\n')
        f.write('testing_error:'+str(test_loss)+'\n')
        f.write('b_of_setosa = '+str(estimators[0].intercept_).strip('[]')+'\n')
        f.write('support_vector_indices_of_setosa:'+str(estimators[0].support_.tolist())+'\n')
        f.write('b_of_versicolor = '+str(estimators[1].intercept_).strip('[]')+'\n')
        f.write('support_vector_indices_of_versicolor:'+str(estimators[1].support_.tolist())+'\n')
        f.write('b_of_virginica = '+str(estimators[2].intercept_).strip('[]')+'\n')
        f.write('support_vector_indices_of_virginica:'+str(estimators[2].support_.tolist())+'\n')
        f.close()
        
        return train_loss, test_loss, support_vectors
    
    def SVM_kernel_sigmoid(self,C):
    
        #########################
        ## WRITE YOUR CODE HERE##
        #########################    
        
        cl = OneVsRestClassifier(sklearn.svm.SVC(kernel='sigmoid', gamma=1, C=C))
        cl.fit(self.X_train,self.Y_train)
        
        predict_train = cl.predict(self.X_train)
        train_loss = error(predict_train,self.Y_train)
        
        predict_test = cl.predict(self.X_test)
        test_loss = error(predict_test,self.Y_test)
        
        estimators = cl.estimators_
        support_vectors = []
        for estimator in estimators:
            support_vectors.append(estimator.support_vectors_)
        
        f = open(f'SVM_sigmoid.txt','w')
        f.write('training_error:'+str(train_loss)+'\n')
        f.write('testing_error:'+str(test_loss)+'\n')
        f.write('b_of_setosa = '+str(estimators[0].intercept_).strip('[]')+'\n')
        f.write('support_vector_indices_of_setosa:'+str(estimators[0].support_.tolist())+'\n')
        f.write('b_of_versicolor = '+str(estimators[1].intercept_).strip('[]')+'\n')
        f.write('support_vector_indices_of_versicolor:'+str(estimators[1].support_.tolist())+'\n')
        f.write('b_of_virginica = '+str(estimators[2].intercept_).strip('[]')+'\n')
        f.write('support_vector_indices_of_virginica:'+str(estimators[2].support_.tolist())+'\n')
        f.close()
        
        return train_loss, test_loss, support_vectors

In [2]:
svm = SVM("train.txt", "test.txt")
svm.read_data()
print(svm.X_train)

#####################################
## Call different SVM with value C ##
#####################################

[[5.4 3.7 1.5 0.2]
 [4.8 3.4 1.6 0.2]
 [4.8 3.  1.4 0.1]
 [4.3 3.  1.1 0.1]
 [5.8 4.  1.2 0.2]
 [5.7 4.4 1.5 0.4]
 [5.4 3.9 1.3 0.4]
 [5.1 3.5 1.4 0.3]
 [5.7 3.8 1.7 0.3]
 [5.1 3.8 1.5 0.3]
 [5.4 3.4 1.7 0.2]
 [5.1 3.7 1.5 0.4]
 [4.6 3.6 1.  0.2]
 [5.1 3.3 1.7 0.5]
 [4.8 3.4 1.9 0.2]
 [5.  3.  1.6 0.2]
 [5.  3.4 1.6 0.4]
 [5.2 3.5 1.5 0.2]
 [5.2 3.4 1.4 0.2]
 [4.7 3.2 1.6 0.2]
 [4.8 3.1 1.6 0.2]
 [5.4 3.4 1.5 0.4]
 [5.2 4.1 1.5 0.1]
 [5.5 4.2 1.4 0.2]
 [4.9 3.1 1.5 0.1]
 [5.  3.2 1.2 0.2]
 [5.5 3.5 1.3 0.2]
 [4.9 3.1 1.5 0.1]
 [4.4 3.  1.3 0.2]
 [5.1 3.4 1.5 0.2]
 [5.  3.5 1.3 0.3]
 [4.5 2.3 1.3 0.3]
 [4.4 3.2 1.3 0.2]
 [5.  3.5 1.6 0.6]
 [5.1 3.8 1.9 0.4]
 [4.8 3.  1.4 0.3]
 [5.1 3.8 1.6 0.2]
 [4.6 3.2 1.4 0.2]
 [5.3 3.7 1.5 0.2]
 [5.  3.3 1.4 0.2]
 [5.  2.  3.5 1. ]
 [5.9 3.  4.2 1.5]
 [6.  2.2 4.  1. ]
 [6.1 2.9 4.7 1.4]
 [5.6 2.9 3.6 1.3]
 [6.7 3.1 4.4 1.4]
 [5.6 3.  4.5 1.5]
 [5.8 2.7 4.1 1. ]
 [6.2 2.2 4.5 1.5]
 [5.6 2.5 3.9 1.1]
 [5.9 3.2 4.8 1.8]
 [6.1 2.8 4.  1.3]
 [6.3 2.5 4.

In [3]:
# question 1
train_loss, test_loss, support_vectors = svm.SVM()
print('linear SVM:')
print('train loss:'+str(train_loss))
print('test loss:'+str(test_loss))

linear SVM:
train loss:0.041666666666666664
test loss:0.0


In [14]:
a = svm.X_train
b = svm.Y_train
cl = LinearSVC(C=0.2)
cl.fit(a,b)
d = cl.predict(a)
m = svm.X_test
n = svm.Y_test
error(cl.predict(m),n)

0.0

In [4]:
# question 2
for i in range(1,11):
    C=i/10
    train_loss, test_loss, support_vectors = svm.SVM_slack(C)
    print(f'linear SVM with slack_{C}:')
    print('train loss:'+str(train_loss))
    print('test loss:'+str(test_loss))

linear SVM with slack_0.1:
train loss:0.125
test loss:0.23333333333333334
linear SVM with slack_0.2:
train loss:0.058333333333333334
test loss:0.16666666666666666
linear SVM with slack_0.3:
train loss:0.05
test loss:0.13333333333333333
linear SVM with slack_0.4:
train loss:0.05
test loss:0.1
linear SVM with slack_0.5:
train loss:0.05
test loss:0.1
linear SVM with slack_0.6:
train loss:0.05
test loss:0.1
linear SVM with slack_0.7:
train loss:0.05
test loss:0.1
linear SVM with slack_0.8:
train loss:0.05
test loss:0.1
linear SVM with slack_0.9:
train loss:0.05
test loss:0.06666666666666667
linear SVM with slack_1.0:
train loss:0.05
test loss:0.06666666666666667


In [5]:
# question 3
train_loss, test_loss, support_vectors = svm.SVM_kernel_poly2(C=1)
print('poly2 SVM:')
print('train loss:'+str(train_loss))
print('test loss:'+str(test_loss))
train_loss, test_loss, support_vectors = svm.SVM_kernel_poly3(C=1)
print('poly3 SVM:')
print('train loss:'+str(train_loss))
print('test loss:'+str(test_loss))
train_loss, test_loss, support_vectors = svm.SVM_kernel_rbf(C=1)
print('rbf SVM:')
print('train loss:'+str(train_loss))
print('test loss:'+str(test_loss))
train_loss, test_loss, support_vectors = svm.SVM_kernel_sigmoid(C=1)
print('sigmoid SVM:')
print('train loss:'+str(train_loss))
print('test loss:'+str(test_loss))

poly2 SVM:
train loss:0.03333333333333333
test loss:0.03333333333333333
poly3 SVM:
train loss:0.025
test loss:0.0
rbf SVM:
train loss:0.025
test loss:0.03333333333333333
sigmoid SVM:
train loss:0.6666666666666666
test loss:0.6666666666666666
