In [56]:
import pickle
import numpy as np
from sklearn.preprocessing import StandardScaler, normalize
from sklearn.decomposition import PCA

In [57]:
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn import svm, datasets

In [58]:
X = pickle.load(open('labelled_data/x.pkl', 'rb'))
y = pickle.load(open('labelled_data/y.pkl', 'rb')).flatten()

In [7]:
# Since we concate frames double the number of points
X[:, 0] *=2  

In [31]:
pca = PCA(n_components=10)
X = pca.fit_transform(X)

In [70]:
print(pca.explained_variance_ratio_)

[7.56585153e-01 2.24810043e-01 1.62757859e-02 2.01557336e-03
 1.28631384e-04 1.10067604e-04 3.53910129e-05 3.30577094e-05
 3.87496228e-06 1.14162989e-06]


In [71]:
print(min(pca.explained_variance_ratio_))

1.1416298928246452e-06


In [59]:
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [60]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, random_state = 0)

In [66]:
model = svm.SVC(kernel='rbf', decision_function_shape='ovo', C=1000, gamma=0.001, probability=True).fit(X_train, y_train)

In [71]:
linear_pred = model.predict_proba(X_test)

In [72]:
linear_pred

array([[0.15778389, 0.50393936, 0.00432127, 0.33395547],
       [0.79546832, 0.09633666, 0.02826431, 0.0799307 ],
       [0.82954749, 0.07465708, 0.01348815, 0.08230728],
       ...,
       [0.78585443, 0.05200176, 0.0144256 , 0.14771821],
       [0.82832437, 0.07435275, 0.01291619, 0.08440669],
       [0.61921503, 0.10393014, 0.22043018, 0.05642465]])

In [69]:
accuracy_lin = model.score(X_test, y_test)

In [70]:
print(accuracy_lin)

0.7544684854186265


In [73]:
pickle.dump(model, open('svm_model_scale.pkl','wb'))

In [74]:
pickle.dump(scaler, open('scaler_model.pkl','wb'))

In [16]:
from sklearn.model_selection import GridSearchCV

In [17]:
param_grid = [
  {'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001], 'kernel': ['rbf']},
 ]

In [18]:
svc = svm.SVC()
clf = GridSearchCV(svc, param_grid)
clf.fit(X_train, y_train)
print(sorted(clf.cv_results_.keys()))

['mean_fit_time', 'mean_score_time', 'mean_test_score', 'param_C', 'param_gamma', 'param_kernel', 'params', 'rank_test_score', 'split0_test_score', 'split1_test_score', 'split2_test_score', 'split3_test_score', 'split4_test_score', 'std_fit_time', 'std_score_time', 'std_test_score']


In [19]:
print(clf.cv_results_)

{'mean_fit_time': array([0.23055692, 0.1899446 , 0.32420397, 0.29374738, 0.60447135,
       0.64321017, 2.06988363, 2.21738267]), 'std_fit_time': array([0.00321779, 0.00285898, 0.02496816, 0.01132299, 0.05648204,
       0.06554344, 0.15776495, 0.18691547]), 'mean_score_time': array([0.04150128, 0.03798079, 0.04088607, 0.03834782, 0.03923893,
       0.0379705 , 0.03817773, 0.03668981]), 'std_score_time': array([0.00045657, 0.0004288 , 0.00048587, 0.00029329, 0.00025089,
       0.00026492, 0.00027178, 0.00042134]), 'param_C': masked_array(data=[1, 1, 10, 10, 100, 100, 1000, 1000],
             mask=[False, False, False, False, False, False, False, False],
       fill_value='?',
            dtype=object), 'param_gamma': masked_array(data=[0.001, 0.0001, 0.001, 0.0001, 0.001, 0.0001, 0.001,
                   0.0001],
             mask=[False, False, False, False, False, False, False, False],
       fill_value='?',
            dtype=object), 'param_kernel': masked_array(data=['rbf', 'rbf',

In [20]:
print(clf.cv_results_['params'])

[{'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}, {'C': 1, 'gamma': 0.0001, 'kernel': 'rbf'}, {'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}, {'C': 10, 'gamma': 0.0001, 'kernel': 'rbf'}, {'C': 100, 'gamma': 0.001, 'kernel': 'rbf'}, {'C': 100, 'gamma': 0.0001, 'kernel': 'rbf'}, {'C': 1000, 'gamma': 0.001, 'kernel': 'rbf'}, {'C': 1000, 'gamma': 0.0001, 'kernel': 'rbf'}]


In [21]:
print(np.argmax(clf.cv_results_['split0_test_score']))

7


In [22]:
print(np.argmax(clf.cv_results_['split1_test_score']))

6


In [23]:
print(clf.cv_results_['params'][5])

{'C': 100, 'gamma': 0.0001, 'kernel': 'rbf'}


In [24]:
print(clf.cv_results_['params'][6])

{'C': 1000, 'gamma': 0.001, 'kernel': 'rbf'}


In [25]:
model = svm.SVC(kernel='rbf', decision_function_shape='ovo', C=1000, gamma=0.001).fit(X_train, y_train)

In [26]:
linear_pred = model.predict(X_test)

In [27]:
accuracy_lin = model.score(X_test, y_test)

In [28]:
accuracy_lin

0.7601895734597156

In [29]:
pickle.dump(model, open('svm_model.pkl','wb'))