In [1]:
#importing Liabraries
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_digits
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
digits = load_digits()

In [2]:
digits

{'data': array([[ 0.,  0.,  5., ...,  0.,  0.,  0.],
        [ 0.,  0.,  0., ..., 10.,  0.,  0.],
        [ 0.,  0.,  0., ..., 16.,  9.,  0.],
        ...,
        [ 0.,  0.,  1., ...,  6.,  0.,  0.],
        [ 0.,  0.,  2., ..., 12.,  0.,  0.],
        [ 0.,  0., 10., ..., 12.,  1.,  0.]]),
 'target': array([0, 1, 2, ..., 8, 9, 8]),
 'frame': None,
 'feature_names': ['pixel_0_0',
  'pixel_0_1',
  'pixel_0_2',
  'pixel_0_3',
  'pixel_0_4',
  'pixel_0_5',
  'pixel_0_6',
  'pixel_0_7',
  'pixel_1_0',
  'pixel_1_1',
  'pixel_1_2',
  'pixel_1_3',
  'pixel_1_4',
  'pixel_1_5',
  'pixel_1_6',
  'pixel_1_7',
  'pixel_2_0',
  'pixel_2_1',
  'pixel_2_2',
  'pixel_2_3',
  'pixel_2_4',
  'pixel_2_5',
  'pixel_2_6',
  'pixel_2_7',
  'pixel_3_0',
  'pixel_3_1',
  'pixel_3_2',
  'pixel_3_3',
  'pixel_3_4',
  'pixel_3_5',
  'pixel_3_6',
  'pixel_3_7',
  'pixel_4_0',
  'pixel_4_1',
  'pixel_4_2',
  'pixel_4_3',
  'pixel_4_4',
  'pixel_4_5',
  'pixel_4_6',
  'pixel_4_7',
  'pixel_5_0',
  'pixel_5_1',
 

In [3]:
from sklearn.model_selection import train_test_split
X_train , X_test, y_train, y_test = train_test_split(digits.data, digits.target, test_size = 0.3)

### Logistic Regression

In [4]:
lr = LogisticRegression(solver = "liblinear", multi_class = "ovr")
lr.fit(X_train , y_train)
lr.score(X_test , y_test)

0.95

### SVM

In [5]:
svm = SVC(gamma = "auto")
svm.fit(X_train , y_train)
svm.score(X_test , y_test)

0.32407407407407407

### Random Forest 

In [6]:
rf = RandomForestClassifier(n_estimators = 40)
rf.fit(X_train, y_train)
rf.score(X_test,y_test )

0.9777777777777777

## KFold Cross Validation

In [7]:
from sklearn.model_selection import KFold
kf = KFold(n_splits = 3)
kf

KFold(n_splits=3, random_state=None, shuffle=False)

In [8]:
for train_index, test_index in kf.split([1,2,3,4,5,6,7,8,9]):
    print(train_index , test_index)

[3 4 5 6 7 8] [0 1 2]
[0 1 2 6 7 8] [3 4 5]
[0 1 2 3 4 5] [6 7 8]


### Use KFold for our digits example


In [9]:
def get_score(model , X_train , X_test, y_train, y_test):
    model.fit(X_train, y_train)
    return model.score(X_test, y_test)
    

In [10]:
X_train.shape

(1257, 64)

In [11]:
X_test.shape

(540, 64)

In [12]:
from sklearn.model_selection import StratifiedKFold
folds = StratifiedKFold(n_splits = 3)

score_logistic = []
score_svm = []
score_rf = []

for train_index, test_index in folds.split(digits.data, digits.target):
    X_train, X_test, y_train, y_test = digits.data[train_index], digits.data[test_index],digits.target[train_index],digits.target[test_index]
    
    score_logistic.append(get_score(LogisticRegression(solver = "liblinear", multi_class = "ovr"),X_train, X_test, y_train, y_test))
    
    score_svm.append(get_score(SVC(gamma = 'auto'), X_train, X_test, y_train, y_test))
    
    score_rf.append(get_score(RandomForestClassifier(n_estimators = 40),X_train, X_test, y_train, y_test))

In [13]:
print(score_logistic)
print(score_svm)
print(score_rf)

[0.8948247078464107, 0.9532554257095158, 0.9098497495826378]
[0.3806343906510851, 0.41068447412353926, 0.5125208681135225]
[0.9282136894824707, 0.9549248747913188, 0.9181969949916527]


In [14]:
from sklearn.model_selection import cross_val_score

In [15]:
cross_val_score(LogisticRegression(solver = "liblinear", multi_class = "ovr"), digits.data, digits.target , cv =3)

array([0.89482471, 0.95325543, 0.90984975])

## SVM model performance using cross_val_score

In [16]:
cross_val_score(SVC(gamma = "auto"), digits.data,digits.target , cv =3)

array([0.38063439, 0.41068447, 0.51252087])

## random forest perdormance using cross_val_score

In [17]:
cross_val_score(RandomForestClassifier(n_estimators = 40),digits.data,digits.target , cv =3)

array([0.91652755, 0.94490818, 0.91986644])

## parameter tuning using k fold cross validation 


In [18]:
scores1 = cross_val_score(RandomForestClassifier(n_estimators = 5), digits.data,digits.target , cv =10)
np.average(scores1)

0.8770080695220361

In [19]:
scores2 = cross_val_score(RandomForestClassifier(n_estimators = 20), digits.data,digits.target , cv =10)
np.average(scores2)

0.9382247051520795

In [20]:
scores3 = cross_val_score(RandomForestClassifier(n_estimators = 30), digits.data,digits.target , cv =10)
np.average(scores3)

0.9415673494723773

In [21]:
scores4 = cross_val_score(RandomForestClassifier(n_estimators = 40), digits.data,digits.target , cv =10)
np.average(scores4)

0.9415611421477342