# SVM

In [263]:
#Loading Required libraries
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler

from sklearn.svm import SVC
from sklearn import svm
from sklearn import metrics
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import precision_score, recall_score
from sklearn.metrics import accuracy_score
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import classification_report, confusion_matrix
import warnings
import numpy as np

from sklearn.metrics import mean_squared_error
warnings.filterwarnings('ignore')
from sklearn.ensemble import IsolationForest

## Using normal data to create SVM

For uniformity among the results we are using train and test set which are split previously.

In [292]:
X_train=pd.read_csv('Normal/X_train.csv')
X_test=pd.read_csv('Normal/X_test.csv')
y_test=pd.read_csv('Normal/y_test.csv')
y_train=pd.read_csv('Normal/y_train.csv')

Selecting only the features we got through feature selection.

In [265]:
X_train=X_train[['TPP', 'TympType', 'OAE1', 'OAE1.4', 'OAE2', 'OAE2.8', 'OAE4','f(408.4789)', 'f(2593.6791)', 'f(2378.4142)', 'f(2310.7054)', 'f(7127.1897)', 'f(865.5366)', 'f(6727.1713)', 'f(226.0000)', 'f(458.5020)', 'f(500.0000)', 'f(1029.3022)', 'f(5993.2283)', 'f(1887.7486)', 'f(1373.9536)', 'f(667.4199)', 'f(2747.9073)', 'f(1296.8396)', 'f(577.6763)', 'f(1155.3527)', 'f(1090.5077)']]
X_test=X_test[['TPP', 'TympType', 'OAE1', 'OAE1.4', 'OAE2', 'OAE2.8', 'OAE4','f(408.4789)', 'f(2593.6791)', 'f(2378.4142)', 'f(2310.7054)', 'f(7127.1897)', 'f(865.5366)', 'f(6727.1713)', 'f(226.0000)', 'f(458.5020)', 'f(500.0000)', 'f(1029.3022)', 'f(5993.2283)', 'f(1887.7486)', 'f(1373.9536)', 'f(667.4199)', 'f(2747.9073)', 'f(1296.8396)', 'f(577.6763)', 'f(1155.3527)', 'f(1090.5077)']]

### Scaling

In [266]:


scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Grid Search

In [267]:
svm_linear = {'C': [0.1, 1, 10, 100, 1000], 
              'kernel': ['linear']} 
svm_others = {'C': [0.1, 1, 10, 100, 1000],
              'gamma': [1, 0.1, 0.01, 0.001, 0.0001,'auto'], 
              'kernel': ['poly', 'rbf', 'sigmoid']}

parameters = [svm_linear, svm_others]

  
grid = GridSearchCV(SVC(), param_grid=parameters, refit = True, verbose = 3)
  
# fitting the model for grid search
grid.fit(X_train, y_train)
# print best parameter after tuning 


Fitting 5 folds for each of 95 candidates, totalling 475 fits
[CV 1/5] END ..............C=0.1, kernel=linear;, score=0.923 total time=   0.0s
[CV 2/5] END ..............C=0.1, kernel=linear;, score=1.000 total time=   0.0s
[CV 3/5] END ..............C=0.1, kernel=linear;, score=0.947 total time=   0.0s
[CV 4/5] END ..............C=0.1, kernel=linear;, score=1.000 total time=   0.0s
[CV 5/5] END ..............C=0.1, kernel=linear;, score=0.974 total time=   0.0s
[CV 1/5] END ................C=1, kernel=linear;, score=0.923 total time=   0.0s
[CV 2/5] END ................C=1, kernel=linear;, score=1.000 total time=   0.0s
[CV 3/5] END ................C=1, kernel=linear;, score=0.947 total time=   0.0s
[CV 4/5] END ................C=1, kernel=linear;, score=1.000 total time=   0.0s
[CV 5/5] END ................C=1, kernel=linear;, score=0.974 total time=   0.0s
[CV 1/5] END ...............C=10, kernel=linear;, score=0.974 total time=   0.0s
[CV 2/5] END ...............C=10, kernel=linear

GridSearchCV(estimator=SVC(),
             param_grid=[{'C': [0.1, 1, 10, 100, 1000], 'kernel': ['linear']},
                         {'C': [0.1, 1, 10, 100, 1000],
                          'gamma': [1, 0.1, 0.01, 0.001, 0.0001, 'auto'],
                          'kernel': ['poly', 'rbf', 'sigmoid']}],
             verbose=3)

In [268]:
print(grid.best_params_) 

{'C': 100, 'gamma': 0.1, 'kernel': 'sigmoid'}


In [269]:
#Create a svm Classifier

clf = svm.SVC(C=100,gamma =0.1,kernel='sigmoid')

In [270]:
#Train the model using the training sets
clf.fit(X_train, y_train)

#Predict the response for test dataset
y_pred = clf.predict(X_test)

print (accuracy_score(y_test, y_pred))

0.9583333333333334


In [271]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.97      0.97      0.97        40
           1       0.88      0.88      0.88         8

    accuracy                           0.96        48
   macro avg       0.93      0.93      0.93        48
weighted avg       0.96      0.96      0.96        48



Here we can see that the accuracy score of the SVM  is  0.96

## Using Data with Oversampling to create  SVM

In [338]:

X_train_over=pd.read_csv('Oversampling/X_train_over.csv')
X_test_over=pd.read_csv('Oversampling/X_test_over.csv')
y_test_over=pd.read_csv('Oversampling/y_test_over.csv')
y_train_over=pd.read_csv('Oversampling/y_train_over.csv')

In [339]:
X_train_over=X_train_over[['TPP', 'TympType', 'OAE1', 'OAE1.4', 'OAE2', 'OAE2.8', 'OAE4','f(408.4789)', 'f(2593.6791)', 'f(2378.4142)', 'f(2310.7054)', 'f(7127.1897)', 'f(865.5366)', 'f(6727.1713)', 'f(226.0000)', 'f(458.5020)', 'f(500.0000)', 'f(1029.3022)', 'f(5993.2283)', 'f(1887.7486)', 'f(1373.9536)', 'f(667.4199)', 'f(2747.9073)', 'f(1296.8396)', 'f(577.6763)', 'f(1155.3527)', 'f(1090.5077)']]
X_test_over=X_test_over[['TPP', 'TympType', 'OAE1', 'OAE1.4', 'OAE2', 'OAE2.8', 'OAE4','f(408.4789)', 'f(2593.6791)', 'f(2378.4142)', 'f(2310.7054)', 'f(7127.1897)', 'f(865.5366)', 'f(6727.1713)', 'f(226.0000)', 'f(458.5020)', 'f(500.0000)', 'f(1029.3022)', 'f(5993.2283)', 'f(1887.7486)', 'f(1373.9536)', 'f(667.4199)', 'f(2747.9073)', 'f(1296.8396)', 'f(577.6763)', 'f(1155.3527)', 'f(1090.5077)']]


### Scaling

In [340]:
scaler = MinMaxScaler()
X_train_over = scaler.fit_transform(X_train_over)
X_test_over = scaler.transform(X_test_over)

# Grid Search

In [341]:
svm_linear = {'C': [0.1, 1, 10, 100, 1000], 
              'kernel': ['linear']} 
svm_others = {'C': [0.1, 1, 10, 100, 1000],
              'gamma': [1, 0.1, 0.01, 0.001, 0.0001,'auto'], 
              'kernel': ['poly', 'rbf', 'sigmoid']}

parameters = [svm_linear, svm_others]

  
grid = GridSearchCV(SVC(), param_grid=parameters, refit = True, verbose = 3)
  
# fitting the model for grid search
grid.fit(X_train_over, y_train_over)
# print best parameter after tuning 

Fitting 5 folds for each of 95 candidates, totalling 475 fits
[CV 1/5] END ..............C=0.1, kernel=linear;, score=0.942 total time=   0.1s
[CV 2/5] END ..............C=0.1, kernel=linear;, score=0.956 total time=   0.0s
[CV 3/5] END ..............C=0.1, kernel=linear;, score=0.912 total time=   0.0s
[CV 4/5] END ..............C=0.1, kernel=linear;, score=0.868 total time=   0.0s
[CV 5/5] END ..............C=0.1, kernel=linear;, score=0.897 total time=   0.0s
[CV 1/5] END ................C=1, kernel=linear;, score=0.971 total time=   0.0s
[CV 2/5] END ................C=1, kernel=linear;, score=0.985 total time=   0.0s
[CV 3/5] END ................C=1, kernel=linear;, score=0.941 total time=   0.0s
[CV 4/5] END ................C=1, kernel=linear;, score=0.956 total time=   0.0s
[CV 5/5] END ................C=1, kernel=linear;, score=0.971 total time=   0.0s
[CV 1/5] END ...............C=10, kernel=linear;, score=1.000 total time=   0.0s
[CV 2/5] END ...............C=10, kernel=linear

GridSearchCV(estimator=SVC(),
             param_grid=[{'C': [0.1, 1, 10, 100, 1000], 'kernel': ['linear']},
                         {'C': [0.1, 1, 10, 100, 1000],
                          'gamma': [1, 0.1, 0.01, 0.001, 0.0001, 'auto'],
                          'kernel': ['poly', 'rbf', 'sigmoid']}],
             verbose=3)

In [288]:
print(grid.best_params_) 

{'C': 100, 'kernel': 'linear'}


In [289]:
clf = svm.SVC(C=100,kernel='linear')

In [290]:
#Train the model using the training sets
clf.fit(X_train_over, y_train_over)

#Predict the response for test dataset
y_pred_over = clf.predict(X_test_over)



In [291]:
print(classification_report(y_test_over, y_pred_over))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        31
           1       1.00      1.00      1.00        30

    accuracy                           1.00        61
   macro avg       1.00      1.00      1.00        61
weighted avg       1.00      1.00      1.00        61



## Using Adasyn Large data to create  SVM

In [343]:

X_train_adasyn=pd.read_csv('Adasyn Large/X_train_adasyn.csv')
X_test_adasyn=pd.read_csv('Adasyn Large/X_test_adasyn.csv')
y_test_adasyn=pd.read_csv('Adasyn Large/y_test_adasyn.csv')
y_train_adasyn=pd.read_csv('Adasyn Large/y_train_adasyn.csv')

In [345]:
X_train_adasyn=X_train_adasyn[['TPP', 'TympType', 'OAE1', 'OAE1.4', 'OAE2', 'OAE2.8', 'OAE4','f(408.4789)', 'f(2593.6791)', 'f(2378.4142)', 'f(2310.7054)', 'f(7127.1897)', 'f(865.5366)', 'f(6727.1713)', 'f(226.0000)', 'f(458.5020)', 'f(500.0000)', 'f(1029.3022)', 'f(5993.2283)', 'f(1887.7486)', 'f(1373.9536)', 'f(667.4199)', 'f(2747.9073)', 'f(1296.8396)', 'f(577.6763)', 'f(1155.3527)', 'f(1090.5077)']]
X_test_adasyn=X_test_adasyn[['TPP', 'TympType', 'OAE1', 'OAE1.4', 'OAE2', 'OAE2.8', 'OAE4','f(408.4789)', 'f(2593.6791)', 'f(2378.4142)', 'f(2310.7054)', 'f(7127.1897)', 'f(865.5366)', 'f(6727.1713)', 'f(226.0000)', 'f(458.5020)', 'f(500.0000)', 'f(1029.3022)', 'f(5993.2283)', 'f(1887.7486)', 'f(1373.9536)', 'f(667.4199)', 'f(2747.9073)', 'f(1296.8396)', 'f(577.6763)', 'f(1155.3527)', 'f(1090.5077)']]


In [346]:
scaler = MinMaxScaler()
X_train_adasyn = scaler.fit_transform(X_train_adasyn)
X_test_adasyn = scaler.transform(X_test_adasyn)

In [347]:
svm_linear = {'C': [0.1, 1, 10, 100, 1000], 
              'kernel': ['linear']} 
svm_others = {'C': [0.1, 1, 10, 100, 1000],
              'gamma': [1, 0.1, 0.01, 0.001, 0.0001,'auto'], 
              'kernel': ['poly', 'rbf', 'sigmoid']}

parameters = [svm_linear, svm_others]

  
grid = GridSearchCV(SVC(), param_grid=parameters, refit = True, verbose = 3)
  
# fitting the model for grid search
grid.fit(X_train_adasyn, y_train_adasyn)


Fitting 5 folds for each of 95 candidates, totalling 475 fits
[CV 1/5] END ..............C=0.1, kernel=linear;, score=0.979 total time=   0.5s
[CV 2/5] END ..............C=0.1, kernel=linear;, score=0.978 total time=   0.5s
[CV 3/5] END ..............C=0.1, kernel=linear;, score=0.977 total time=   0.5s
[CV 4/5] END ..............C=0.1, kernel=linear;, score=0.975 total time=   0.4s
[CV 5/5] END ..............C=0.1, kernel=linear;, score=0.978 total time=   0.4s
[CV 1/5] END ................C=1, kernel=linear;, score=0.989 total time=   0.2s
[CV 2/5] END ................C=1, kernel=linear;, score=0.991 total time=   0.2s
[CV 3/5] END ................C=1, kernel=linear;, score=0.989 total time=   0.2s
[CV 4/5] END ................C=1, kernel=linear;, score=0.988 total time=   0.2s
[CV 5/5] END ................C=1, kernel=linear;, score=0.994 total time=   0.2s
[CV 1/5] END ...............C=10, kernel=linear;, score=0.999 total time=   0.1s
[CV 2/5] END ...............C=10, kernel=linear

GridSearchCV(estimator=SVC(),
             param_grid=[{'C': [0.1, 1, 10, 100, 1000], 'kernel': ['linear']},
                         {'C': [0.1, 1, 10, 100, 1000],
                          'gamma': [1, 0.1, 0.01, 0.001, 0.0001, 'auto'],
                          'kernel': ['poly', 'rbf', 'sigmoid']}],
             verbose=3)

In [348]:
# print best parameter after tuning 
print(grid.best_params_) 

{'C': 0.1, 'gamma': 1, 'kernel': 'poly'}


In [349]:
clf = svm.SVC(C=0.1,gamma=1,kernel='poly')
#Train the model using the training sets
clf.fit(X_train_adasyn, y_train_adasyn)

#Predict the response for test dataset
y_pred_adasyn= clf.predict(X_test_adasyn)


In [350]:
print(classification_report(y_test_adasyn, y_pred_adasyn))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00       750
           1       1.00      1.00      1.00       751

    accuracy                           1.00      1501
   macro avg       1.00      1.00      1.00      1501
weighted avg       1.00      1.00      1.00      1501



## Using Smote data to create  SVM

In [352]:

X_train_smote=pd.read_csv('Smote/X_train_smote.csv')
X_test_smote=pd.read_csv('Smote/X_test_smote.csv')
y_test_smote=pd.read_csv('Smote/y_test_smote.csv')
y_train_smote=pd.read_csv('Smote/y_train_smote.csv')

In [353]:
X_train_smote=X_train_smote[['TPP', 'TympType', 'OAE1', 'OAE1.4', 'OAE2', 'OAE2.8', 'OAE4','f(408.4789)', 'f(2593.6791)', 'f(2378.4142)', 'f(2310.7054)', 'f(7127.1897)', 'f(865.5366)', 'f(6727.1713)', 'f(226.0000)', 'f(458.5020)', 'f(500.0000)', 'f(1029.3022)', 'f(5993.2283)', 'f(1887.7486)', 'f(1373.9536)', 'f(667.4199)', 'f(2747.9073)', 'f(1296.8396)', 'f(577.6763)', 'f(1155.3527)', 'f(1090.5077)']]
X_test_smote=X_test_smote[['TPP', 'TympType', 'OAE1', 'OAE1.4', 'OAE2', 'OAE2.8', 'OAE4','f(408.4789)', 'f(2593.6791)', 'f(2378.4142)', 'f(2310.7054)', 'f(7127.1897)', 'f(865.5366)', 'f(6727.1713)', 'f(226.0000)', 'f(458.5020)', 'f(500.0000)', 'f(1029.3022)', 'f(5993.2283)', 'f(1887.7486)', 'f(1373.9536)', 'f(667.4199)', 'f(2747.9073)', 'f(1296.8396)', 'f(577.6763)', 'f(1155.3527)', 'f(1090.5077)']]


In [354]:
y_test_smote=y_test_smote.iloc[:,1:]
y_train_smote=y_train_smote.iloc[:,1:]
y_test_smote.to_csv('Smote/y_test_smote.csv')
y_train_smote.to_csv('Smote/y_train_smote.csv')

In [356]:
scaler = MinMaxScaler()
X_train_smote = scaler.fit_transform(X_train_smote)
X_test_smote = scaler.transform(X_test_smote)

In [359]:
svm_linear = {'C': [0.1, 1, 10, 100, 1000], 
              'kernel': ['linear']} 
svm_others = {'C': [0.1, 1, 10, 100, 1000],
              'gamma': [1, 0.1, 0.01, 0.001, 0.0001,'auto'], 
              'kernel': ['poly', 'rbf', 'sigmoid']}

parameters = [svm_linear, svm_others]

  
grid = GridSearchCV(SVC(), param_grid=parameters, refit = True, verbose = 3)
  
# fitting the model for grid search
grid.fit(X_train_smote, y_train_smote)

Fitting 5 folds for each of 95 candidates, totalling 475 fits
[CV 1/5] END ..............C=0.1, kernel=linear;, score=0.957 total time=   0.0s
[CV 2/5] END ..............C=0.1, kernel=linear;, score=0.941 total time=   0.0s
[CV 3/5] END ..............C=0.1, kernel=linear;, score=0.868 total time=   0.0s
[CV 4/5] END ..............C=0.1, kernel=linear;, score=0.912 total time=   0.0s
[CV 5/5] END ..............C=0.1, kernel=linear;, score=0.897 total time=   0.0s
[CV 1/5] END ................C=1, kernel=linear;, score=0.986 total time=   0.0s
[CV 2/5] END ................C=1, kernel=linear;, score=0.985 total time=   0.0s
[CV 3/5] END ................C=1, kernel=linear;, score=0.956 total time=   0.0s
[CV 4/5] END ................C=1, kernel=linear;, score=0.956 total time=   0.0s
[CV 5/5] END ................C=1, kernel=linear;, score=0.926 total time=   0.0s
[CV 1/5] END ...............C=10, kernel=linear;, score=0.971 total time=   0.0s
[CV 2/5] END ...............C=10, kernel=linear

GridSearchCV(estimator=SVC(),
             param_grid=[{'C': [0.1, 1, 10, 100, 1000], 'kernel': ['linear']},
                         {'C': [0.1, 1, 10, 100, 1000],
                          'gamma': [1, 0.1, 0.01, 0.001, 0.0001, 'auto'],
                          'kernel': ['poly', 'rbf', 'sigmoid']}],
             verbose=3)

In [360]:
print(grid.best_params_) 

{'C': 0.1, 'gamma': 1, 'kernel': 'poly'}


In [361]:
clf = svm.SVC(C=0.1,gamma=1,kernel='poly')
#Train the model using the training sets
clf.fit(X_train_smote, y_train_smote)

#Predict the response for test dataset
y_pred_smote= clf.predict(X_test_smote)


In [372]:
print(classification_report(y_test_smote, y_pred_smote))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        31
           1       1.00      1.00      1.00        30

    accuracy                           1.00        61
   macro avg       1.00      1.00      1.00        61
weighted avg       1.00      1.00      1.00        61



## Using Smote large data to create  SVM

In [365]:
X_train_smotelg=pd.read_csv('Smote Large/X_train_smote.csv')
X_test_smotelg=pd.read_csv('Smote Large/X_test_smote.csv')
y_test_smotelg=pd.read_csv('Smote Large/y_test_smote.csv')
y_train_smotelg=pd.read_csv('Smote Large/y_train_smote.csv')

In [366]:
X_train_smotelg=X_train_smotelg[['TPP', 'TympType', 'OAE1', 'OAE1.4', 'OAE2', 'OAE2.8', 'OAE4','f(408.4789)', 'f(2593.6791)', 'f(2378.4142)', 'f(2310.7054)', 'f(7127.1897)', 'f(865.5366)', 'f(6727.1713)', 'f(226.0000)', 'f(458.5020)', 'f(500.0000)', 'f(1029.3022)', 'f(5993.2283)', 'f(1887.7486)', 'f(1373.9536)', 'f(667.4199)', 'f(2747.9073)', 'f(1296.8396)', 'f(577.6763)', 'f(1155.3527)', 'f(1090.5077)']]
X_test_smotelg=X_test_smotelg[['TPP', 'TympType', 'OAE1', 'OAE1.4', 'OAE2', 'OAE2.8', 'OAE4','f(408.4789)', 'f(2593.6791)', 'f(2378.4142)', 'f(2310.7054)', 'f(7127.1897)', 'f(865.5366)', 'f(6727.1713)', 'f(226.0000)', 'f(458.5020)', 'f(500.0000)', 'f(1029.3022)', 'f(5993.2283)', 'f(1887.7486)', 'f(1373.9536)', 'f(667.4199)', 'f(2747.9073)', 'f(1296.8396)', 'f(577.6763)', 'f(1155.3527)', 'f(1090.5077)']]


In [368]:
scaler = MinMaxScaler()
X_train_smotelg = scaler.fit_transform(X_train_smotelg)
X_test_smotelg = scaler.transform(X_test_smotelg)

In [369]:
svm_linear = {'C': [0.1, 1, 10, 100, 1000], 
              'kernel': ['linear']} 
svm_others = {'C': [0.1, 1, 10, 100, 1000],
              'gamma': [1, 0.1, 0.01, 0.001, 0.0001,'auto'], 
              'kernel': ['poly', 'rbf', 'sigmoid']}

parameters = [svm_linear, svm_others]

  
grid = GridSearchCV(SVC(), param_grid=parameters, refit = True, verbose = 3)
  
# fitting the model for grid search
grid.fit(X_train_smotelg, y_train_smotelg)

Fitting 5 folds for each of 95 candidates, totalling 475 fits
[CV 1/5] END ..............C=0.1, kernel=linear;, score=0.995 total time=   0.7s
[CV 2/5] END ..............C=0.1, kernel=linear;, score=0.991 total time=   0.2s
[CV 3/5] END ..............C=0.1, kernel=linear;, score=0.989 total time=   0.2s
[CV 4/5] END ..............C=0.1, kernel=linear;, score=0.994 total time=   0.2s
[CV 5/5] END ..............C=0.1, kernel=linear;, score=0.992 total time=   0.2s
[CV 1/5] END ................C=1, kernel=linear;, score=0.996 total time=   0.1s
[CV 2/5] END ................C=1, kernel=linear;, score=0.995 total time=   0.1s
[CV 3/5] END ................C=1, kernel=linear;, score=0.994 total time=   0.1s
[CV 4/5] END ................C=1, kernel=linear;, score=0.996 total time=   0.1s
[CV 5/5] END ................C=1, kernel=linear;, score=0.995 total time=   0.1s
[CV 1/5] END ...............C=10, kernel=linear;, score=0.999 total time=   0.1s
[CV 2/5] END ...............C=10, kernel=linear

GridSearchCV(estimator=SVC(),
             param_grid=[{'C': [0.1, 1, 10, 100, 1000], 'kernel': ['linear']},
                         {'C': [0.1, 1, 10, 100, 1000],
                          'gamma': [1, 0.1, 0.01, 0.001, 0.0001, 'auto'],
                          'kernel': ['poly', 'rbf', 'sigmoid']}],
             verbose=3)

In [370]:
print(grid.best_params_) 

{'C': 100, 'kernel': 'linear'}


In [371]:
clf = svm.SVC(C=100,kernel='linear')
#Train the model using the training sets
clf.fit(X_train_smotelg, y_train_smotelg)

#Predict the response for test dataset
y_pred_smotelg= clf.predict(X_test_smotelg)


In [373]:
print(classification_report(y_test_smotelg, y_pred_smotelg))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00       750
           1       1.00      1.00      1.00       750

    accuracy                           1.00      1500
   macro avg       1.00      1.00      1.00      1500
weighted avg       1.00      1.00      1.00      1500



## Using Svm Smote Large data to create  SVM

In [374]:
X_train_smotesvm=pd.read_csv('Svm Smote Large/X_train_svmsmote.csv')
X_test_smotesvm=pd.read_csv('Svm Smote Large/X_test_svmsmote.csv')
y_test_smotesvm=pd.read_csv('Svm Smote Large/y_test_svmsmote.csv')
y_train_smotesvm=pd.read_csv('Svm Smote Large/y_train_svmsmote.csv')

In [375]:
X_train_smotesvm=X_train_smotesvm[['TPP', 'TympType', 'OAE1', 'OAE1.4', 'OAE2', 'OAE2.8', 'OAE4','f(408.4789)', 'f(2593.6791)', 'f(2378.4142)', 'f(2310.7054)', 'f(7127.1897)', 'f(865.5366)', 'f(6727.1713)', 'f(226.0000)', 'f(458.5020)', 'f(500.0000)', 'f(1029.3022)', 'f(5993.2283)', 'f(1887.7486)', 'f(1373.9536)', 'f(667.4199)', 'f(2747.9073)', 'f(1296.8396)', 'f(577.6763)', 'f(1155.3527)', 'f(1090.5077)']]
X_test_smotesvm=X_test_smotesvm[['TPP', 'TympType', 'OAE1', 'OAE1.4', 'OAE2', 'OAE2.8', 'OAE4','f(408.4789)', 'f(2593.6791)', 'f(2378.4142)', 'f(2310.7054)', 'f(7127.1897)', 'f(865.5366)', 'f(6727.1713)', 'f(226.0000)', 'f(458.5020)', 'f(500.0000)', 'f(1029.3022)', 'f(5993.2283)', 'f(1887.7486)', 'f(1373.9536)', 'f(667.4199)', 'f(2747.9073)', 'f(1296.8396)', 'f(577.6763)', 'f(1155.3527)', 'f(1090.5077)']]


In [376]:
y_test_smotesvm=y_test_smotesvm.iloc[:,1:]
y_train_smotesvm=y_train_smotesvm.iloc[:,1:]
y_test_smotesvm.to_csv('Svm Smote Large/y_test_svmsmote.csv')
y_train_smotesvm.to_csv('Svm Smote Large/y_train_svmsmote.csv')

In [377]:
scaler = MinMaxScaler()
X_train_smotesvm = scaler.fit_transform(X_train_smotesvm)
X_test_smotesvm = scaler.transform(X_test_smotesvm)

In [378]:
svm_linear = {'C': [0.1, 1, 10, 100, 1000], 
              'kernel': ['linear']} 
svm_others = {'C': [0.1, 1, 10, 100, 1000],
              'gamma': [1, 0.1, 0.01, 0.001, 0.0001,'auto'], 
              'kernel': ['poly', 'rbf', 'sigmoid']}

parameters = [svm_linear, svm_others]

  
grid = GridSearchCV(SVC(), param_grid=parameters, refit = True, verbose = 3)
  
# fitting the model for grid search
grid.fit(X_train_smotesvm, y_train_smotesvm)

Fitting 5 folds for each of 95 candidates, totalling 475 fits
[CV 1/5] END ..............C=0.1, kernel=linear;, score=0.956 total time=   0.8s
[CV 2/5] END ..............C=0.1, kernel=linear;, score=0.955 total time=   0.4s
[CV 3/5] END ..............C=0.1, kernel=linear;, score=0.946 total time=   0.4s
[CV 4/5] END ..............C=0.1, kernel=linear;, score=0.956 total time=   0.4s
[CV 5/5] END ..............C=0.1, kernel=linear;, score=0.968 total time=   0.4s
[CV 1/5] END ................C=1, kernel=linear;, score=0.987 total time=   0.2s
[CV 2/5] END ................C=1, kernel=linear;, score=0.996 total time=   0.2s
[CV 3/5] END ................C=1, kernel=linear;, score=0.987 total time=   0.2s
[CV 4/5] END ................C=1, kernel=linear;, score=0.988 total time=   0.2s
[CV 5/5] END ................C=1, kernel=linear;, score=0.992 total time=   0.2s
[CV 1/5] END ...............C=10, kernel=linear;, score=0.992 total time=   0.1s
[CV 2/5] END ...............C=10, kernel=linear

GridSearchCV(estimator=SVC(),
             param_grid=[{'C': [0.1, 1, 10, 100, 1000], 'kernel': ['linear']},
                         {'C': [0.1, 1, 10, 100, 1000],
                          'gamma': [1, 0.1, 0.01, 0.001, 0.0001, 'auto'],
                          'kernel': ['poly', 'rbf', 'sigmoid']}],
             verbose=3)

In [379]:
print(grid.best_params_) 

{'C': 10, 'gamma': 1, 'kernel': 'rbf'}


In [381]:
clf = svm.SVC(C=10,gamma=1,kernel='rbf')
#Train the model using the training sets
clf.fit(X_train_smotesvm, y_train_smotesvm)

#Predict the response for test dataset
y_pred_smotesvm= clf.predict(X_test_smotesvm)

In [382]:
print(classification_report(y_test_smotesvm, y_pred_smotesvm))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00       692
           1       1.00      1.00      1.00       750

    accuracy                           1.00      1442
   macro avg       1.00      1.00      1.00      1442
weighted avg       1.00      1.00      1.00      1442

