In [1]:
import cv2
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn import svm, datasets
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.svm import SVC  
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report, confusion_matrix , accuracy_score
from sklearn.utils import shuffle
from sklearn.cross_validation import KFold



In [2]:
#read dataset
filename = 'classifier-train-data.csv'
data = pd.read_csv(filename)

#specify featuure and target
Feature = data.drop(['class_id', 'Unnamed: 0'], axis=1)
Target  = data['class_id']

Feature.sample(5)

Unnamed: 0,perimeter,area,circularity,ac,mean_intensity,standard_deviation,smoothness,skewness
494,101.656854,640.0,1.284942,498.076991,32611.755427,19157.93232,1.0,164621900000.0
1871,769.88225,8370.0,5.635251,1485.29311,32992.032929,19001.306563,1.0,66684910000.0
2158,232.142135,2013.0,2.130366,944.907865,33978.628289,19017.288017,1.0,-331597800000.0
2098,155.313708,1285.0,1.493849,860.194262,33387.624816,19410.775701,1.0,-146146600000.0
3956,121.899495,887.5,1.332372,666.105127,33182.848101,19013.556755,1.0,-17455810000.0


In [3]:
# shuffle the dataset
X, y = shuffle(Feature, Target, random_state=0)

#standardize data
X = (X - X.mean()) / X.std()

# Split the dataset in two equal parts
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.5, random_state=0)


# Set the parameters by cross-validation
parameters = [{'kernel': ['rbf'],
               'gamma': [1e-4, 1e-3, 0.01, 0.1, 0.2, 0.5],
                'C': [1, 10, 100, 1000]},
              {'kernel': ['linear'], 'C': [1, 10, 100, 1000]}]

print("# Tuning hyper-parameters")
print()


# Tuning hyper-parameters



In [4]:
#Train algorithm
svclassifier = SVC(kernel='linear')  
svclassifier.fit(X_train, y_train)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='linear',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [5]:
#make prediction
y_pred = svclassifier.predict(X_test)
print(confusion_matrix(y_test,y_pred))  
print(classification_report(y_test,y_pred))
print(accuracy_score(y_test, y_pred))

[[2072    5]
 [  15   45]]
             precision    recall  f1-score   support

       -1.0       0.99      1.00      1.00      2077
        1.0       0.90      0.75      0.82        60

avg / total       0.99      0.99      0.99      2137

0.9906410856340665


In [6]:
# Validate with k-fold
def run_kfold(clf):
    kf = KFold(891, n_folds=10)
    outcomes = []
    fold = 0
    for train_index, test_index in kf:
        fold += 1
        X_train, X_test =X.values[train_index], X.values[test_index]
        y_train, y_test = y.values[train_index], y.values[test_index]
        svclassifier.fit(X_train, y_train)
        predictions = svclassifier.predict(X_test)
        accuracy = accuracy_score(y_test, predictions)
        outcomes.append(accuracy)
        print("Fold {0} accuracy: {1}".format(fold, accuracy))     
    mean_outcome = np.mean(outcomes)
    print("Mean Accuracy: {0}".format(mean_outcome)) 

run_kfold(svclassifier)

Fold 1 accuracy: 1.0
Fold 2 accuracy: 1.0
Fold 3 accuracy: 0.9887640449438202
Fold 4 accuracy: 1.0
Fold 5 accuracy: 0.9775280898876404
Fold 6 accuracy: 0.9887640449438202
Fold 7 accuracy: 0.9887640449438202
Fold 8 accuracy: 0.9887640449438202
Fold 9 accuracy: 0.9887640449438202
Fold 10 accuracy: 0.9887640449438202
Mean Accuracy: 0.9910112359550561


In [None]:
clf = GridSearchCV(svm.SVC(decision_function_shape='ovo'), parameters, cv=5)
clf.fit(X_train, y_train)

print("Best parameters set found on development set:")
print()
print(clf.best_params_)
print()
print("Grid scores on training set:")
print()
means = clf.cv_results_['mean_test_score']
stds = clf.cv_results_['std_test_score']
for mean, std, params in zip(means, stds, clf.cv_results_['params']):
    print("%0.3f (+/-%0.03f) for %r"
          % (mean, std * 2, params))
print()