In [1]:
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, auc, roc_curve
from sklearn.model_selection import train_test_split, GridSearchCV
import numpy
from imblearn.over_sampling import SMOTE

In [2]:
features = numpy.loadtxt('modifiedTraining.csv', delimiter=',')
labels = numpy.loadtxt('labels.csv', delimiter=',')

In [3]:
xTrain, xTest, yTrain, yTest = train_test_split(features, labels, test_size=0.3, random_state=1)

In [4]:
xTrain.shape

(10794, 159)

In [5]:
yTrain.shape

(10794,)

In [6]:
sm = SMOTE(kind='regular')

In [7]:
x,y = sm.fit_sample(xTrain, yTrain)

In [8]:
params = {'C':[0.01, 0.1, 1, 5, 10], 'kernel':['rbf']}

In [9]:
svc = SVC()

In [10]:
clf = GridSearchCV(estimator=svc, param_grid=params)

In [11]:
clf.fit(x, y)

GridSearchCV(cv=None, error_score='raise',
       estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False),
       fit_params=None, iid=True, n_jobs=1,
       param_grid={'C': [0.01, 0.1, 1, 5, 10], 'kernel': ['rbf']},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [26]:
clf.best_estimator_

SVC(C=10, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [12]:
yPredicted = clf.predict(xTest)

In [13]:
acc = clf.score(xTest, yTest)

In [14]:
print('Test Accuracy = ', acc)

Test Accuracy =  0.8363597060095115


In [15]:
print('Training Accuracy = ' , clf.score(xTrain, yTrain))

Training Accuracy =  0.9999073559384843


In [16]:
confusion_matrix(yTest, yPredicted)

array([[3802,  515],
       [ 242,   67]])

In [17]:
clf2 = SVC(C=10)

In [18]:
clf2.fit(xTrain, yTrain)

SVC(C=10, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [19]:
print('Training Accuracy = ' , clf2.score(xTrain, yTrain))

Training Accuracy =  0.9999073559384843


In [20]:
print('Test Accuracy = ',  clf2.score(xTest, yTest))

Test Accuracy =  0.9092088197146563


In [21]:
yPredicted2 = clf2.predict(xTest)
confusion_matrix(yTest, yPredicted2)

array([[4190,  127],
       [ 293,   16]])

In [22]:
clf3 = SVC(C=0.1)
clf3.fit(xTrain, yTrain)
print('Training Accuracy = ' , clf3.score(xTrain, yTrain))
print('Test Accuracy = ',  clf3.score(xTest, yTest))
yPredicted3 = clf3.predict(xTest)
confusion_matrix(yTest, yPredicted3)

Training Accuracy =  0.9431165462293867
Test Accuracy =  0.9332036316472114


array([[4317,    0],
       [ 309,    0]])

In [23]:
print(sum(yTest[:] == 1))

309


In [24]:
help(GridSearchCV)

Help on class GridSearchCV in module sklearn.model_selection._search:

class GridSearchCV(BaseSearchCV)
 |  Exhaustive search over specified parameter values for an estimator.
 |  
 |  Important members are fit, predict.
 |  
 |  GridSearchCV implements a "fit" and a "score" method.
 |  It also implements "predict", "predict_proba", "decision_function",
 |  "transform" and "inverse_transform" if they are implemented in the
 |  estimator used.
 |  
 |  The parameters of the estimator used to apply these methods are optimized
 |  by cross-validated grid-search over a parameter grid.
 |  
 |  Read more in the :ref:`User Guide <grid_search>`.
 |  
 |  Parameters
 |  ----------
 |  estimator : estimator object.
 |      This is assumed to implement the scikit-learn estimator interface.
 |      Either estimator needs to provide a ``score`` function,
 |      or ``scoring`` must be passed.
 |  
 |  param_grid : dict or list of dictionaries
 |      Dictionary with parameters names (string) as ke

In [25]:
help(clf)

Help on GridSearchCV in module sklearn.model_selection._search object:

class GridSearchCV(BaseSearchCV)
 |  Exhaustive search over specified parameter values for an estimator.
 |  
 |  Important members are fit, predict.
 |  
 |  GridSearchCV implements a "fit" and a "score" method.
 |  It also implements "predict", "predict_proba", "decision_function",
 |  "transform" and "inverse_transform" if they are implemented in the
 |  estimator used.
 |  
 |  The parameters of the estimator used to apply these methods are optimized
 |  by cross-validated grid-search over a parameter grid.
 |  
 |  Read more in the :ref:`User Guide <grid_search>`.
 |  
 |  Parameters
 |  ----------
 |  estimator : estimator object.
 |      This is assumed to implement the scikit-learn estimator interface.
 |      Either estimator needs to provide a ``score`` function,
 |      or ``scoring`` must be passed.
 |  
 |  param_grid : dict or list of dictionaries
 |      Dictionary with parameters names (string) as k