In [None]:
import numpy as np
import pandas as pd
from sklearn import svm
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.metrics import confusion_matrix, accuracy_score



In [None]:
# import some data to play with
data_transformed = pd.read_csv('data_transformed.csv')

# avoid this ugly slicing by using a two-dim dataset
X = data_transformed.iloc[:,:-1]
y = data_transformed.iloc[:,:]['Class']


# using 75% of the data for training and 25% for testing (with stratification for imbalanced class)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, stratify= y, random_state = 0)


sc = StandardScaler()
scaled_X_train = sc.fit_transform(X_train)
scaled_X_test = sc.transform(X_test)


In [None]:
def SVM(scaled_X_train,scaled_X_test,y_train,kernel_type, hyper_parameter={}):

  if kernel_type == 'linear':
    hyper_parameter = hyper_parameter or {'C': 1.0}
    clf = svm.SVC(kernel=kernel_type, class_weight = 'balanced', C = hyper_parameter.get('C'), random_state=0)
   
  if kernel_type == 'poly':
    hyper_parameter = hyper_parameter or { 'C': 1.0, 'degree': 3, 'coef0': 0  }
    clf = svm.SVC(kernel=kernel_type, class_weight = 'balanced', degree = hyper_parameter.get('degree'), C = hyper_parameter.get('C'),coef0= hyper_parameter.get('coef0'), random_state=0)

  if kernel_type == 'rbf':
    hyper_parameter = hyper_parameter or {'C': 1.0 , 'gamma' : 'scale' }
    clf = svm.SVC( kernel=kernel_type, class_weight = 'balanced', gamma = hyper_parameter.get('gamma'), C = hyper_parameter.get('C'),random_state=0)
  
  if kernel_type == 'sigmoid':
    hyper_parameter = hyper_parameter or {'C': 1.0 , 'gamma' : 'scale', 'coef0': 0 }
    clf = svm.SVC(kernel=kernel_type, class_weight = 'balanced', gamma = hyper_parameter.get('gamma'), C = hyper_parameter.get('C'), coef0 = hyper_parameter.get('coef0'), random_state=0)
   
  clf.fit(scaled_X_train, y_train)
  y_pred = clf.predict(scaled_X_test)

  return y_pred




In [None]:
y_prediction = SVM(scaled_X_train,scaled_X_test,y_train,'rbf', {'C': 1.0 , 'gamma' : 'scale'})


In [None]:
cm = confusion_matrix(y_test, y_prediction)
print(cm)
accuracy_score(y_test, y_prediction)

[[70269    98]
 [   33    90]]


0.9981415803660094

In [None]:
data_transformed['Class'].value_counts()

0    281469
1       490
Name: Class, dtype: int64

In [None]:
y_test.value_counts()

0    70367
1      123
Name: Class, dtype: int64