In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.svm import SVC

In [2]:
v_data=pd.read_table("Vertebral.dat", sep="\s+", header=None)
v_data.columns=['pelvic_incidence','pelvic_tilt','lumbar_lordosis_angle','sacral_slope','pelvic_radius','degree_spon','class']
v_data['class'] = v_data['class'].map({'DH': 1, 'SL': 2, 'NO': 3})
v_data.head()

Unnamed: 0,pelvic_incidence,pelvic_tilt,lumbar_lordosis_angle,sacral_slope,pelvic_radius,degree_spon,class
0,63.03,22.55,39.61,40.48,98.67,-0.25,1
1,39.06,10.06,25.02,29.0,114.41,4.56,1
2,68.83,22.22,50.09,46.61,105.99,-3.53,1
3,69.3,24.65,44.31,44.64,101.87,11.21,1
4,49.71,9.65,28.32,40.06,108.17,7.92,1


In [24]:
from sklearn.cross_validation import train_test_split
X, y = v_data.iloc[:, [3,5]].values, v_data['class']
X_train, X_test, y_train, y_test =train_test_split(X, y, test_size=0.3, random_state=10)

In [37]:
svm = SVC(kernel='rbfzz', C=10.0, gamma=0.1, random_state=0)
svm.fit(X_train, y_train)

SVC(C=10.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma=0.1, kernel='linear',
  max_iter=-1, probability=False, random_state=0, shrinking=True,
  tol=0.001, verbose=False)

In [38]:
svm.score(X,y,sample_weight=None)

0.76129032258064511

In [34]:
svm.score(X_test,y_test,sample_weight=None)

0.74193548387096775

In [28]:
#finetuning using GridSearch
from sklearn.grid_search import GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
pipe_svc = Pipeline([('scl', StandardScaler()),('clf', SVC(random_state=1))])
param_range = [0.0001, 0.001, 0.01, 0.1, 1.0, 10.0, 100.0, 1000.0]
param_grid = [{'clf__C': param_range,'clf__kernel': ['linear']},{'clf__C': param_range,'clf__gamma': param_range,'clf__kernel': ['rbf']}]
gs = GridSearchCV(estimator=pipe_svc,param_grid=param_grid,scoring='accuracy',cv=10,n_jobs=-1)
gs = gs.fit(X_train, y_train)

In [29]:
print(gs.best_score_)

0.7849462365591398


In [30]:
print(gs.best_params_)

{'clf__C': 1.0, 'clf__gamma': 1.0, 'clf__kernel': 'rbf'}


In [31]:
clf = gs.best_estimator_
clf.fit(X_train, y_train)
print('Test accuracy: %.3f' % clf.score(X_test, y_test))

Test accuracy: 0.734
