# Assignment 2 sample solution

In [1]:
import re
import numpy as np
from sklearn import datasets
from sklearn.multiclass import OutputCodeClassifier
from sklearn.svm import LinearSVC
import matplotlib.pyplot as plt
from sklearn import svm, datasets

class SVM(object):

    def __init__(self, training_dataset_, test_dataset_):
        self.training_dataset = training_dataset_
        self.test_dataset = test_dataset_
        self.classes = {}
        self.X_train = None
        self.Y_train = None
        self.X_test = None
        self.Y_test = None

        self.support_indecies = None
        self.train_errors = None
        self.test_errors = None
        self.train_loss = None
        self.val_loss = None
        
    def read_data(self):
        f = open(self.training_dataset, 'r')
        rows = list(re.split(' ', row) for row in re.split('\n', f.read())[:-1])
        names, self.Y_train = np.unique(list(row[-1] for row in rows), return_inverse=True)
        self.X_train = np.empty((0,4), float)
        f.close()
        for row in rows:
            self.X_train = np.append(self.X_train, np.array([np.array(row[:-1]).astype(float)]), axis = 0)
        f = open(self.test_dataset, 'r')
        f.close()
        
        f = open(self.test_dataset, 'r')
        rows = list(re.split(' ', row) for row in re.split('\n', f.read())[:-1])
        names, self.Y_test = np.unique(list(row[-1] for row in rows), return_inverse=True)
        self.X_test = np.empty((0,4), float)
        f.close()
        for row in rows:
            self.X_test = np.append(self.X_test, np.array([np.array(row[:-1]).astype(float)]), axis = 0)
        f.close()

    def gen_Y_aggregate(self, Y_labels):
        Y_dict = {label: [2*(lbl == label)-1 for lbl in Y_labels] for label in set(Y_labels)}
        return Y_dict
    
    def SVM(self, C=1e5, kernel='rbf', w_output=False, **kwargs):
    
        #########################
        ## WRITE YOUR CODE HERE##
        #########################    
        
        Y_train_dict = self.gen_Y_aggregate(self.Y_train)
        Y_test_dict = self.gen_Y_aggregate(self.Y_test)  
        Y_predict_per_cls = {}
        Y_predict_per_train_cls = {}
        w = []
        b = []
        sv = []
        if kernel == 'linear':
            print("Linear case: test whether the classes are linearly separable")
        for cls, Y_train in Y_train_dict.items():
            Y_test = Y_test_dict[cls]
            clf = svm.SVC(kernel=kernel, C=C, **kwargs)
            clf.fit(self.X_train, Y_train)
            train_loss = 1 - clf.score(self.X_train, Y_train)
            results = {
                'train loss': train_loss, 
                'w': clf.coef_.reshape(4) if kernel=='linear' else None, 
                'b': clf.intercept_[0] if kernel not in ['rbf', 'sigmoid'] else 0, 
                'support vector': clf.support_, 
                'train predict': clf.decision_function(self.X_train), 
                'test predict': clf.decision_function(self.X_test)
            }
            if kernel == 'linear':
                print(f"Linearly separable: {results['train loss']==0}")
            w.append(results['w'])
            b.append(results['b'])
            sv.append(results['support vector'])
            Y_predict_per_cls[cls] = results['test predict']
            Y_predict_per_train_cls[cls] = results['train predict']
        Y_predict = np.array(list(map(np.argmax, zip(*Y_predict_per_cls.values()))))
        Y_train_predict = np.array(list(map(np.argmax, zip(*Y_predict_per_train_cls.values()))))
        train_loss = np.mean(Y_train_predict!=self.Y_train)
        test_loss = np.mean(Y_predict!=self.Y_test)
        if w_output:
            support_vectors = list(sum(zip(w, b, sv), ()))
        else:
            support_vectors = list(sum(zip(b, sv), ()))
            
        return train_loss, test_loss, support_vectors
    
    
    def SVM_slack(self, C):
    
        #########################
        ## WRITE YOUR CODE HERE##
        #########################    
        
        train_loss, test_loss, support_vectors = self.SVM(C=C, kernel='linear', w_output=True)
        
        return train_loss, test_loss, support_vectors
    
    def SVM_kernel_poly2(self, C=1, gamma=1):
    
        #########################
        ## WRITE YOUR CODE HERE##
        #########################    
        
        train_loss, test_loss, support_vectors = self.SVM(C=C, kernel='poly', degree=2, gamma=gamma)
        
        return train_loss, test_loss, support_vectors
    
    def SVM_kernel_poly3(self, C=1, gamma=1):
    
        #########################
        ## WRITE YOUR CODE HERE##
        #########################    
        
        train_loss, test_loss, support_vectors = self.SVM(C=C, kernel='poly', degree=3, gamma=gamma)
        
        return train_loss, test_loss, support_vectors
    
    def SVM_kernel_rbf(self, C=1, gamma=0.5):
    
        #########################
        ## WRITE YOUR CODE HERE##
        #########################    
        
        train_loss, test_loss, support_vectors = self.SVM(C=C, kernel='rbf', gamma=gamma)
        
        return train_loss, test_loss, support_vectors
    
    def SVM_kernel_sigmoid(self, C=1, gamma=1):
    
        #########################
        ## WRITE YOUR CODE HERE##
        #########################    
        
        train_loss, test_loss, support_vectors = self.SVM(C=C, kernel='sigmoid', gamma=gamma)
        
        return train_loss, test_loss, support_vectors

In [2]:
def write_file(file_name, outputs):
    with open(file_name, 'w') as f:
        for output in outputs:
            if isinstance(output, np.ndarray):
                output = ",".join(map(str, output))
            f.write(f"{output}\n")

In [3]:
svm_clf = SVM("train.txt", "test.txt")
svm_clf.read_data()

print('\n------- standard SVM -------\n')
write_file('SVM_linear.txt', svm_clf.SVM(kernel='linear', w_output=True))
print('Finished writing output to file SVM_linear.txt')

for t in range(1, 11):
    C = round(0.1 * t, 2)
    print('\n------- SVM with slack variable C={}-------\n'.format(C))
    write_file(f'SVM_slack_{C}.txt', svm_clf.SVM_slack(C))
    print(f'Finished writing output to file SVM_slack_{C}.txt')

print('\n------- SVM with 2nd-order polynomial kernel -------\n')
write_file('SVM_poly2.txt', svm_clf.SVM_kernel_poly2())
print('Finished writing output to file SVM_poly2.txt')

print('\n------- SVM with 3rd-order polynomial kernel -------\n')
write_file('SVM_poly3.txt', svm_clf.SVM_kernel_poly3())
print('Finished writing output to file SVM_poly3.txt')

print('\n------- SVM with radial basis function kernel -------\n')
write_file('SVM_rbf.txt', svm_clf.SVM_kernel_rbf())
print('Finished writing output to file SVM_rbf.txt')

print('\n------- SVM with sigmoidal kernel -------\n')
write_file('SVM_sigmoid.txt', svm_clf.SVM_kernel_sigmoid())
print('Finished writing output to file SVM_sigmoid.txt')


------- standard SVM -------

Linear case: test whether the classes are linearly separable
Linearly separable: True
Linearly separable: False
Linearly separable: False
Finished writing output to file SVM_linear.txt

------- SVM with slack variable C=0.1-------

Linear case: test whether the classes are linearly separable
Linearly separable: True
Linearly separable: False
Linearly separable: False
Finished writing output to file SVM_slack_0.1.txt

------- SVM with slack variable C=0.2-------

Linear case: test whether the classes are linearly separable
Linearly separable: True
Linearly separable: False
Linearly separable: False
Finished writing output to file SVM_slack_0.2.txt

------- SVM with slack variable C=0.3-------

Linear case: test whether the classes are linearly separable
Linearly separable: True
Linearly separable: False
Linearly separable: False
Finished writing output to file SVM_slack_0.3.txt

------- SVM with slack variable C=0.4-------

Linear case: test whether the cl

## 1. standard SVM

In [4]:
print('------- standard SVM -------')
train_loss, test_loss, other_outputs = svm_clf.SVM(kernel='linear', w_output=True)
print('training_error:', train_loss)
print('testing_error:', test_loss)
print('w_of_setosa:', other_outputs[0])
print('b_of_setosa:', other_outputs[1])
print('support_vector_indices_of_setosa:', other_outputs[2])
print('w_of_versicolor:', other_outputs[3])
print('b_of_versicolor:', other_outputs[4])
print('support_vector_indices_of_versicolor:', other_outputs[5])
print('w_of_virginica:', other_outputs[6])
print('b_of_virginica:', other_outputs[7])
print('support_vector_indices_of_virginica:', other_outputs[8])

------- standard SVM -------
Linear case: test whether the classes are linearly separable
Linearly separable: True
Linearly separable: False
Linearly separable: False
training_error: 0.041666666666666664
testing_error: 0.0
w_of_setosa: [-0.04575352  0.52216766 -1.00294058 -0.46406882]
b_of_setosa: 1.4474641258143042
support_vector_indices_of_setosa: [78 13 31]
w_of_versicolor: [-0.75160958 -3.4187652   2.06714366 -4.63634689]
b_of_versicolor: 11.313568866810089
support_vector_indices_of_versicolor: [  1   2   3  14  15  20  28  31  32  81  82  83  84  86  88  89  91  92
  93  95  96  98  99 100 103 104 107 112 116 117 119  41  43  44  45  46
  47  50  52  54  55  56  57  58  59  62  64  65  66  68  69  71  73  74
  75  76  77  78  79]
w_of_virginica: [-4.26389247 -6.19330415  8.64141632 12.56275266]
b_of_virginica: -19.190666520692652
support_vector_indices_of_virginica: [ 50  52  57  63  97  99 103 108]


## 2. SVM with slack variables

In [5]:
for t in range(1, 11):
    C = round(0.1 * t, 2)
    print('\n------- SVM with slack variable C={}-------\n'.format(C))
    train_loss, test_loss, other_outputs = svm_clf.SVM_slack(C)
    print('training_error:', train_loss)
    print('testing_error:', test_loss)
    print('w_of_setosa:', other_outputs[0])
    print('b_of_setosa:', other_outputs[1])
    print('support_vector_indices_of_setosa:', other_outputs[2])
    print('w_of_versicolor:', other_outputs[3])
    print('b_of_versicolor:', other_outputs[4])
    print('support_vector_indices_of_versicolor:', other_outputs[5])
    print('w_of_virginica:', other_outputs[6])
    print('b_of_virginica:', other_outputs[7])
    print('support_vector_indices_of_virginica:', other_outputs[8])


------- SVM with slack variable C=0.1-------

Linear case: test whether the classes are linearly separable
Linearly separable: True
Linearly separable: False
Linearly separable: False
training_error: 0.125
testing_error: 0.23333333333333334
w_of_setosa: [-0.14732687  0.35061427 -0.7191508  -0.3419995 ]
b_of_setosa: 1.894306660993957
support_vector_indices_of_setosa: [40 44 59 73 78 10 13 14 15 31 34]
w_of_versicolor: [-0.15533517 -0.58614184  0.18867405 -0.26726425]
b_of_versicolor: 1.4332329377798487
support_vector_indices_of_versicolor: [  2   3  14  15  19  20  24  27  28  31  32  35  37  81  82  83  84  86
  88  89  91  92  93  95  96  97  98  99 100 102 103 104 105 107 108 109
 112 113 115 116 117 119  40  41  42  43  44  45  46  47  48  49  50  51
  52  53  54  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69
  70  71  72  73  74  75  76  77  78  79]
w_of_virginica: [ 0.09176466 -0.08722691  1.10554618  0.89899156]
b_of_virginica: -7.220857460580062
support_vector_indi

## 3. SVM with kernel functions and slack variables

### (a) 2nd-order polynomial kernel

In [6]:
print('\n------- SVM with 2nd-order polynomial kernel -------\n')
train_loss, test_loss, other_outputs = svm_clf.SVM_kernel_poly2()
print('training_error:', train_loss)
print('testing_error:', test_loss)
print('b_of_setosa:', other_outputs[0])
print('support_vector_indices_of_setosa:', other_outputs[1])
print('b_of_versicolor:', other_outputs[2])
print('support_vector_indices_of_versicolor:', other_outputs[3])
print('b_of_virginica:', other_outputs[4])
print('support_vector_indices_of_virginica:', other_outputs[5])


------- SVM with 2nd-order polynomial kernel -------

training_error: 0.025
testing_error: 0.0
b_of_setosa: 1.2209413239487106
support_vector_indices_of_setosa: [78 13 31]
b_of_versicolor: 4.336670060758147
support_vector_indices_of_versicolor: [ 14  31  89  93  96  97  99 103 108  48  50  52  57  58  63  64]
b_of_virginica: -10.42876522538526
support_vector_indices_of_virginica: [ 50  52  57  63  96  97  99 103 108]


### (b) 3rd-order polynomial kernel

In [7]:
print('\n------- SVM with 3rd-order polynomial kernel -------\n')
train_loss, test_loss, other_outputs = svm_clf.SVM_kernel_poly3()
print('training_error:', train_loss)
print('testing_error:', test_loss)
print('b_of_setosa:', other_outputs[0])
print('support_vector_indices_of_setosa:', other_outputs[1])
print('b_of_versicolor:', other_outputs[2])
print('support_vector_indices_of_versicolor:', other_outputs[3])
print('b_of_virginica:', other_outputs[4])
print('support_vector_indices_of_virginica:', other_outputs[5])


------- SVM with 3rd-order polynomial kernel -------

training_error: 0.008333333333333333
testing_error: 0.0
b_of_setosa: 1.1343496320085693
support_vector_indices_of_setosa: [78 13 31]
b_of_versicolor: 1.5442602836516772
support_vector_indices_of_versicolor: [ 31  89  97  99 101 103 108 119  50  52  57  63  70]
b_of_virginica: -6.117889217455328
support_vector_indices_of_virginica: [ 50  52  57  63  89 103 108 119]


### (c) radial basis function kernel

In [8]:
print('\n------- SVM with radial basis function kernel -------\n')
train_loss, test_loss, other_outputs = svm_clf.SVM_kernel_rbf()
print('training_error:', train_loss)
print('testing_error:', test_loss)
print('b_of_setosa:', other_outputs[0])
print('support_vector_indices_of_setosa:', other_outputs[1])
print('b_of_versicolor:', other_outputs[2])
print('support_vector_indices_of_versicolor:', other_outputs[3])
print('b_of_virginica:', other_outputs[4])
print('support_vector_indices_of_virginica:', other_outputs[5])


------- SVM with radial basis function kernel -------

training_error: 0.03333333333333333
testing_error: 0.03333333333333333
b_of_setosa: 0
support_vector_indices_of_setosa: [ 42  45  78  84  87  88  89 101 104 106   4   5  12  14  31]
b_of_versicolor: 0
support_vector_indices_of_versicolor: [  4   5  13  14  31  80  88  89  91  93  96  97  99 101 103 108 116 119
  40  43  46  48  50  52  56  57  58  63  64  65  66  78]
b_of_virginica: 0
support_vector_indices_of_virginica: [  3   4   5  12  14  31  40  43  46  48  50  52  56  57  58  63  64  65
  66  80  87  88  89  91  93  96  97  99 101 103 104 108 111 116 119]


### (d) sigmoidal kernel

In [9]:
print('\n------- SVM with sigmoidal kernel -------\n')
train_loss, test_loss, other_outputs = svm_clf.SVM_kernel_sigmoid()
print('training_error:', train_loss)
print('testing_error:', test_loss)
print('b_of_setosa:', other_outputs[0])
print('support_vector_indices_of_setosa:', other_outputs[1])
print('b_of_versicolor:', other_outputs[2])
print('support_vector_indices_of_versicolor:', other_outputs[3])
print('b_of_virginica:', other_outputs[4])
print('support_vector_indices_of_virginica:', other_outputs[5])


------- SVM with sigmoidal kernel -------

training_error: 0.6666666666666666
testing_error: 0.6666666666666666
b_of_setosa: 0
support_vector_indices_of_setosa: [ 80  81  82  83  84  85  86  87  88  89  90  91  92  93  94  95  96  97
  98  99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115
 116 117 118 119   0   1   2   3   4   5   6   7   8   9  10  11  12  13
  14  15  16  17  18  19  20  21  22  23  24  25  26  27  28  29  30  31
  32  33  34  35  36  37  38  39]
b_of_versicolor: 0
support_vector_indices_of_versicolor: [ 80  81  82  83  84  85  86  87  88  89  90  91  92  93  94  95  96  97
  98  99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115
 116 117 118 119  40  41  42  43  44  45  46  47  48  49  50  51  52  53
  54  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71
  72  73  74  75  76  77  78  79]
b_of_virginica: 0
support_vector_indices_of_virginica: [ 40  41  42  43  44  45  46  47  48  49  50  51  52  53  54  55  56  57
 