In [11]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, accuracy_score

In [2]:
'''DATA LOAD'''
df = pd.read_excel('Final_data_binary.xlsx', sheet_name='AIN')
X = df.drop('ocena', axis=1)
y = np.array(df['ocena'])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=101)

In [3]:
'''SCALING'''
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [8]:
svm = SVC(random_state=0)
'''GridSearch for best parameters'''
parameters = [{'C': [1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1, 10, 100], 'kernel': ['linear']}]
grid_search = GridSearchCV(estimator=svm, param_grid=parameters, scoring='accuracy', cv=10, n_jobs=-1)
grid_search.fit(X_train, y_train)
best_accuracy = grid_search.best_score_
best_parameters = grid_search.best_params_
print(f'Best accuracy: {best_accuracy*100}')
print(f'Best parameters: {best_parameters}')

Best accuracy: 84.80872677825104
Best parameters: {'C': 1e-05, 'kernel': 'linear'}


In [9]:
'''SVM with linear kernel'''
svm = SVC(C=1e-5, kernel='linear', random_state=0)
svm.fit(X_train, y_train)
predictions = svm.predict(X_test)

In [12]:
'''Confusion Matrix'''
cm = confusion_matrix(y_test, predictions)
ac_s = accuracy_score(y_test, predictions)
print('Confusion matrix:\n', cm)
print('Accuracy score: ', ac_s)

Confusion matrix:
 [[   0  247]
 [   0 1335]]
Accuracy score:  0.8438685208596713
