Support Vector machines for Classification process  

In [None]:
import numpy as np 
import pandas as pd 
import seaborn as sb 
import matplotlib.pyplot as plt 

from sklearn.preprocessing import StandardScaler 
from sklearn.svm import SVC 

from sklearn.model_selection import train_test_split, GridSearchCV #, RandomizedSearchCV   
from sklearn.metrics import accuracy_score, confusion_matrix 

import nbformat 
from IPython import get_ipython 

In [None]:
# %run "../Data_Preprocessing/data_preprocess_dtcls.ipynb" 

with open("../Data_Preprocessing/data_preprocess_dtcls.ipynb", "r", encoding="utf-8") as f:
    ntb = nbformat.read(f, as_version = 4) 

ipython = get_ipython() 

for cell in ntb.cells:
    if cell.cell_type == "code":
        print(cell.source) 

        if ("mov_cls_cleaned" in cell.source or "mov_cls" in cell.source):
            ipython.run_cell(cell.source, silent=True) 
            # ipython.run_cell_async(cell.source, silent=True) 

try:
    print("Movies Clean Data : ")
    print(mov_cls_cleaned.head())   # type: ignore 

except NameError as e:
    print(f"Variable not found: {e}")

In [None]:
mov_cls_cleaned     # type: ignore 

In [None]:
mov_cls_cleaned.corr()      # type: ignore 

Variable split (X,y) : 

In [None]:
X = mov_cls_cleaned.loc[:, mov_cls_cleaned.columns != 'Start_Tech_Oscar']      # type: ignore 
X 

In [None]:
print(type(X)) 

In [None]:
X.shape 

In [None]:
y = mov_cls_cleaned['Start_Tech_Oscar']      # type: ignore 
y 

In [None]:
print(type(y)) 

In [None]:
y.shape 

Test - Train Split : 

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0) 

print(f" X_train size : {X_train.shape} \n X_test size : {X_test.shape} \n y_train size : {y_train.shape} \n y_test size : {y_test.shape}") 

print(f"X_train :\n{X_train}") 
print(f"X_test :\n{X_test}") 
print(f"y_train :\n{y_train}") 
print(f"y_test :\n{y_test}") 

In [None]:
X_train = pd.DataFrame(X_train, columns=list(X.columns))  
X_train 

In [None]:
y_train = pd.Series(y_train) 
y_train 

Standardizing data - 

In [None]:
ssc = StandardScaler().fit(X_train)       # Scaler object 
ssc 

In [None]:
X_train_std = ssc.transform(X_train) 
X_train_std 

In [None]:
X_test_std = ssc.transform(X_test) 
X_test_std 

                Support Vector Classifier (Linear) 

In [None]:
svc_l = SVC(kernel='linear', C=0.01) 
svc_l.fit(X_train_std, y_train) 

In [None]:
svc_l.predict(X_test_std) 

In [None]:
acc_sc_trn = accuracy_score(y_train, svc_l.predict(X_train_std)) 
print(f"Train Accuracy Score : {acc_sc_trn}") 

acc_sc_tst = accuracy_score(y_test, svc_l.predict(X_test_std)) 
print(f"Test Accuracy Score : {acc_sc_tst}") 

In [None]:
conf_mtx_trn = confusion_matrix(y_train, svc_l.predict(X_train_std)) 
print(f"Train Confusion Matrix : \n{conf_mtx_trn}") 

conf_mtx_tst = confusion_matrix(y_test, svc_l.predict(X_test_std)) 
print(f"Test Confusion Matrix : \n{conf_mtx_tst}") 

In [None]:
print(f"Number of Support Vectors = {svc_l.n_support_}")      # [num_sv_for_0_class, num_sv_for_1_class]  
print(f"Number of Features = {svc_l.n_features_in_}") 

In [None]:
# svc_l_coef0 = svc_l.coef0 

svc_l_intc = svc_l.intercept_ 
svc_l_coef = svc_l.coef_ 

print(f"Linear Support Vector Classifier intercept = {svc_l_intc} \nLinear Support Vector Classifier coefficient = {svc_l_coef}") 

Hyper-Parameter Tuning ( with GridSearchCV / RandomizedSearchCV ) 

In [None]:
params = { "C": [0.005, 0.001, 0.01, 0.05, 0.1, 0.5, 1, 10, 5, 50, 100, 1000, 500], 
           "gamma": [0.1, 1, 0.001, 0.01] 
         } 
# Cost value, gamma value  

# svc = SVC() 
svc_lin = SVC(kernel='linear') 

grid_search = GridSearchCV(svc_lin, param_grid=params, cv=10, verbose=1, n_jobs=-1, scoring='accuracy') 
grid_search.fit(X_train_std, y_train)  

In [None]:
# grid_search.best_params_  
print(f"Best parameters : {grid_search.best_params_}") 

In [None]:
grid_search.best_estimator_         # Best estimation combo 

In [None]:
conf_mtx_trn = confusion_matrix(y_train, grid_search.predict(X_train_std)) 
print(f"Train Confusion Matrix : \n{conf_mtx_trn}") 

conf_mtx_tst = confusion_matrix(y_test, grid_search.predict(X_test_std)) 
print(f"Test Confusion Matrix : \n{conf_mtx_tst}") 

In [None]:
acc_sc_trn = accuracy_score(y_train, grid_search.predict(X_train_std)) 
print(f"Train Accuracy Score : {acc_sc_trn}") 

acc_sc_tst = accuracy_score(y_test, grid_search.predict(X_test_std)) 
print(f"Test Accuracy Score : {acc_sc_tst}") 