In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, roc_auc_score
from sklearn.preprocessing import StandardScaler

In [2]:
data = load_digits()
data

{'data': array([[ 0.,  0.,  5., ...,  0.,  0.,  0.],
        [ 0.,  0.,  0., ..., 10.,  0.,  0.],
        [ 0.,  0.,  0., ..., 16.,  9.,  0.],
        ...,
        [ 0.,  0.,  1., ...,  6.,  0.,  0.],
        [ 0.,  0.,  2., ..., 12.,  0.,  0.],
        [ 0.,  0., 10., ..., 12.,  1.,  0.]], shape=(1797, 64)),
 'target': array([0, 1, 2, ..., 8, 9, 8], shape=(1797,)),
 'frame': None,
 'feature_names': ['pixel_0_0',
  'pixel_0_1',
  'pixel_0_2',
  'pixel_0_3',
  'pixel_0_4',
  'pixel_0_5',
  'pixel_0_6',
  'pixel_0_7',
  'pixel_1_0',
  'pixel_1_1',
  'pixel_1_2',
  'pixel_1_3',
  'pixel_1_4',
  'pixel_1_5',
  'pixel_1_6',
  'pixel_1_7',
  'pixel_2_0',
  'pixel_2_1',
  'pixel_2_2',
  'pixel_2_3',
  'pixel_2_4',
  'pixel_2_5',
  'pixel_2_6',
  'pixel_2_7',
  'pixel_3_0',
  'pixel_3_1',
  'pixel_3_2',
  'pixel_3_3',
  'pixel_3_4',
  'pixel_3_5',
  'pixel_3_6',
  'pixel_3_7',
  'pixel_4_0',
  'pixel_4_1',
  'pixel_4_2',
  'pixel_4_3',
  'pixel_4_4',
  'pixel_4_5',
  'pixel_4_6',
  'pixel_4_7'

In [3]:
X = pd.DataFrame(data.data,columns=data.feature_names)
X.head()

Unnamed: 0,pixel_0_0,pixel_0_1,pixel_0_2,pixel_0_3,pixel_0_4,pixel_0_5,pixel_0_6,pixel_0_7,pixel_1_0,pixel_1_1,...,pixel_6_6,pixel_6_7,pixel_7_0,pixel_7_1,pixel_7_2,pixel_7_3,pixel_7_4,pixel_7_5,pixel_7_6,pixel_7_7
0,0.0,0.0,5.0,13.0,9.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,6.0,13.0,10.0,0.0,0.0,0.0
1,0.0,0.0,0.0,12.0,13.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,11.0,16.0,10.0,0.0,0.0
2,0.0,0.0,0.0,4.0,15.0,12.0,0.0,0.0,0.0,0.0,...,5.0,0.0,0.0,0.0,0.0,3.0,11.0,16.0,9.0,0.0
3,0.0,0.0,7.0,15.0,13.0,1.0,0.0,0.0,0.0,8.0,...,9.0,0.0,0.0,0.0,7.0,13.0,13.0,9.0,0.0,0.0
4,0.0,0.0,0.0,1.0,11.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,2.0,16.0,4.0,0.0,0.0


In [4]:
X.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1797 entries, 0 to 1796
Data columns (total 64 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   pixel_0_0  1797 non-null   float64
 1   pixel_0_1  1797 non-null   float64
 2   pixel_0_2  1797 non-null   float64
 3   pixel_0_3  1797 non-null   float64
 4   pixel_0_4  1797 non-null   float64
 5   pixel_0_5  1797 non-null   float64
 6   pixel_0_6  1797 non-null   float64
 7   pixel_0_7  1797 non-null   float64
 8   pixel_1_0  1797 non-null   float64
 9   pixel_1_1  1797 non-null   float64
 10  pixel_1_2  1797 non-null   float64
 11  pixel_1_3  1797 non-null   float64
 12  pixel_1_4  1797 non-null   float64
 13  pixel_1_5  1797 non-null   float64
 14  pixel_1_6  1797 non-null   float64
 15  pixel_1_7  1797 non-null   float64
 16  pixel_2_0  1797 non-null   float64
 17  pixel_2_1  1797 non-null   float64
 18  pixel_2_2  1797 non-null   float64
 19  pixel_2_3  1797 non-null   float64
 20  pixel_2_

In [5]:
y = pd.Series(data.target)
y.head()

0    0
1    1
2    2
3    3
4    4
dtype: int64

In [6]:
y.info()

<class 'pandas.core.series.Series'>
RangeIndex: 1797 entries, 0 to 1796
Series name: None
Non-Null Count  Dtype
--------------  -----
1797 non-null   int64
dtypes: int64(1)
memory usage: 14.2 KB


In [7]:
X_train, X_test, y_train, y_test = train_test_split(X,y,stratify=y,test_size=0.25,random_state=42)

In [8]:
sc = StandardScaler()
sc.fit_transform(X_train)
sc.transform(X_test)

array([[ 0.        , -0.33202001,  0.60297162, ..., -1.16658906,
        -0.50750528, -0.19071697],
       [ 0.        , -0.33202001, -0.24906381, ...,  0.70391739,
        -0.50750528, -0.19071697],
       [ 0.        , -0.33202001,  0.60297162, ...,  1.38410155,
        -0.26177932, -0.19071697],
       ...,
       [ 0.        , -0.33202001,  0.38996276, ..., -0.14631281,
        -0.50750528, -0.19071697],
       [ 0.        , -0.33202001, -1.10109923, ..., -1.16658906,
        -0.50750528, -0.19071697],
       [ 0.        , -0.33202001, -1.10109923, ..., -0.82649698,
        -0.50750528, -0.19071697]], shape=(450, 64))

In [9]:
model = SVC()

In [10]:
param_grid = [
    # Linear kernel
    {'kernel': ['linear'],
     'C': [0.01, 0.1, 1, 10,100]},

    # RBF kernel
    {'kernel': ['rbf'],
     'C': [0.01, 0.1, 1, 10,100],
     'gamma': ['scale', 'auto', 0.01, 0.1,1]},

    # Polynomial kernel
    {'kernel': ['poly'],
     'C': [0.01, 0.1, 1, 10],
     'gamma': ['scale', 'auto', 0.01, 0.1],
     'degree': [2,3]}
]

In [11]:
from sklearn.model_selection import StratifiedKFold
cv = StratifiedKFold(n_splits=5,shuffle=True)

In [12]:
grid = GridSearchCV(estimator=model,param_grid=param_grid,scoring='accuracy',cv=cv, n_jobs=-1)
grid.fit(X_train,y_train)

0,1,2
,estimator,SVC()
,param_grid,"[{'C': [0.01, 0.1, ...], 'kernel': ['linear']}, {'C': [0.01, 0.1, ...], 'gamma': ['scale', 'auto', ...], 'kernel': ['rbf']}, ...]"
,scoring,'accuracy'
,n_jobs,-1
,refit,True
,cv,StratifiedKFo... shuffle=True)
,verbose,0
,pre_dispatch,'2*n_jobs'
,error_score,
,return_train_score,False

0,1,2
,C,1
,kernel,'poly'
,degree,3
,gamma,'scale'
,coef0,0.0
,shrinking,True
,probability,False
,tol,0.001
,cache_size,200
,class_weight,


In [13]:
grid.best_params_

{'C': 1, 'degree': 3, 'gamma': 'scale', 'kernel': 'poly'}

In [14]:
grid.best_score_

np.float64(0.9925788241773372)

In [15]:
y_pred = grid.best_estimator_.predict(X_test)
print(f"Accuracy score : {accuracy_score(y_test,y_pred)}")

Accuracy score : 0.9866666666666667
