In [12]:
from loader import Loader
from sklearn import svm
from sklearn.model_selection import cross_val_score
import numpy as np

def flatten_image (X, length):
    X_flatten = np.empty([length,16*15])
    for i in range (length):
        X_flatten[i] = X[i].flatten()
    return X_flatten

def add_noise (X, intensity):
    examples, imsize = np.shape(X)
    for i in range (examples):
        noise = np.random.rand(imsize)
        X[i] += intensity*noise
    return X

def cross_val_with_noise (X,y,k,intensity,clf):
    
    scores = np.zeros(k)
    fold_len = int(len(X)/k)
    X_test = X[0:fold_len, :]
    y_test = y[0:fold_len]
    X_train = X[fold_len:, :]
    y_train = y[fold_len:]
    
    X_train = add_noise(X_train, intensity)
    
    for i in range(k):
        clf.fit(X_train,y_train)
        y_predict = clf.predict(X_test)
        mismatch = 0
        for j in range(len(y_test)):
            if y_test[j] != y_predict[j]:
                mismatch+=1
        scores[i] = (1-mismatch/len(y_test))*100
        X_test = X[i*fold_len:(i+1)*fold_len, :]
        y_test = y[i*fold_len:(i+1)*fold_len]
        X_train = np.append(X[0:i*fold_len, :], X[(i+1)*fold_len:, :], axis = 0)
        y_train = np.append(y[0:i*fold_len], y[(i+1)*fold_len:], axis = 0)
        
        X_train = add_noise(X_train, intensity)
        
    return scores;

In [3]:
dataset = Loader()
del dataset
dataset = Loader()

In [2]:
degree_opt = 1
acc_opt = 0
comp_opt = 0
noise_opt = 0.0
for n_comp in reversed(range(75,241)):
    dataset.pca(n_comp=n_comp)
        
    X, y = dataset.getWholeTrainSet(pca=True, shuffle=True)
    for degree in range (1,5):
        clf = svm.SVC(kernel = 'poly', degree = degree)            
        scores = cross_val_score(clf, X, y ,cv=5)
        #print("Validation accuracy for polynomial with degree, degree,": ", scores.mean())
        if scores.mean() >= acc_opt:
            acc_opt = scores.mean()
            degree_opt = degree
            comp_opt = n_comp
            noise_opt = noise_int
    print (n_comp)
print ("Optimal degree of", degree_opt, "with accuracy of", acc_opt, "and", comp_opt, "components")


NameError: name 'dataset' is not defined

In [None]:
dataset.pca(n_comp=comp_opt)
X, y = dataset.getWholeTrainSet(pca=True, shuffle=True)
clf = svm.SVC(kernel = 'poly', degree = degree_opt)
clf.fit(X,y)
X_test, y_test = dataset.getWholeTestSet(pca=True,shuffle=True,flat=False)
y_predict = clf.predict(X_test)
mismatch = 0
for i in range(len(y_test)):
    if y_test[i] != y_predict[i]:
        mismatch+=1
print ("Test accuracy:",  (1-mismatch/len(y_test))*100, "%")

In [13]:
c_opt = 1
acc_opt = 0
comp_opt = 0
noise_opt = 0.0
for n_comp in reversed(range(70,150)):
    dataset.pca(n_comp=n_comp)
        
    X, y = dataset.getWholeTrainSet(pca=True, shuffle=True)
    for C in [0.5,1,2,3,4]:
        for noise_int in [0.1,0.2,0.3,0.4,0.5]:
            clf = svm.SVC(C=C)
            scores = cross_val_with_noise(X,y,10,noise_int,clf)
            if np.mean(scores) >= acc_opt:
                acc_opt = np.mean(scores)
                comp_opt = n_comp
                noise_opt = noise_int
                c_opt = C
    
print ("Accuracy of", acc_opt, "and", comp_opt, "components and C:", c_opt, "noise intensity:", noise_opt)



149
148
147
146
145
144
143
142
141
140
139
138
137
136
135
134
133
132
131
130
129
128
127
126
125
124
123
122
121
120
119
118
117
116
115
114
113
112
111
110
109
108
107
106
105
104
103
102
101
100
99
98
97
96
95
94
93
92
91
90
89
88
87
86
85
84
83
82
81
80
79
78
77
76
75
74
73
72
71
70
Accuracy of 98.2 and 83 components and C: 2 noise intensity: 0.1


In [15]:
dataset.pca(n_comp=comp_opt)
#X, y = dataset.getWholeTrainSet(pca=True, shuffle=True)
X, y = dataset.getNoisySet(intensity = noise_opt, flat = False, pca=True, shuffle=True, set="train")
clf = svm.SVC(C=c_opt)
clf.fit(X,y)
X_test, y_test = dataset.getWholeTestSet(pca=True,shuffle=True,flat=False)
y_predict = clf.predict(X_test)
mismatch = 0
for i in range(len(y_test)):
    if y_test[i] != y_predict[i]:
        mismatch+=1
print ("Test accuracy:",  (1-mismatch/len(y_test))*100, "%")

Test accuracy: 97.89999999999999 %
