In [30]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
import numpy as np
import warnings
warnings.filterwarnings("ignore")
from sklearn.datasets import load_digits

digits = load_digits()

In [31]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split (digits.data, digits.target,test_size=0.3)

In [32]:
# use LogisticRegression classifier
lr = LogisticRegression()
lr.fit(x_train,y_train)
lr.score(x_test,y_test)

0.975925925925926

In [33]:
# use SVM
svm = SVC()
svm.fit(x_train,y_train)
svm.score(x_test,y_test)

0.987037037037037

In [34]:
# Randomforest Classifier
rf = RandomForestClassifier(n_estimators=40)
rf.fit(x_train,y_train)
rf.score(x_test,y_test)

0.9777777777777777

In [35]:
from sklearn.model_selection import KFold
kf = KFold(n_splits=3)
kf

# here n_splits=3 it means we are going to select 3 folds

KFold(n_splits=3, random_state=None, shuffle=False)

In [36]:
# using KFold technique on the dataset
for train_index, test_index in kf.split([1,2,3,4,5,6,7,8,9]):
    print (train_index, test_index)

# now our KFold is ready its gonna made 3 splits so here in the argruement you can supply datasets lets supply number 1 to 9
# now in output this will return an iterator that iterator will return train and test index for each of the iteration so it 
# divided this into 3 Folds 3 each and 1st iteration it use 1 fold for testing and remaining 2 folds for training.

[3 4 5 6 7 8] [0 1 2]
[0 1 2 6 7 8] [3 4 5]
[0 1 2 3 4 5] [6 7 8]


In [37]:
# now we are going to use KFold for our digits example 
def get_score(model,x_train, x_test, y_train, y_test):
    model.fit(x_train, y_train)
    return model.score (x_test, y_test)

In [38]:
get_score(LogisticRegression(), x_train, x_test, y_train, y_test)

0.975925925925926

In [39]:
get_score(SVC(), x_train, x_test, y_train, y_test)

0.987037037037037

In [40]:
# use KFold to our digits dataset
from sklearn.model_selection import StratifiedKFold
folds = StratifiedKFold(n_splits=3)

# here stratifiedkfold is similar to KFold but it is little better in a way that when you are seperating out your folds it will
# divide each of the classification categories in a uniform way
# so this method and above KFold is exactly same 

In [41]:
scores_l = []
scores_svm = []
scores_rf = []

for train_index,test_index in kf.split(digits.data):
    x_train, x_test, y_train, y_test = digits.data [train_index], digits.data[test_index],\
                                       digits.target[train_index], digits.target[test_index]
    
# now its a time to measure the performance of three of our models in each iteration so since we have three folds this for 
# loops gonna repeat three time, every time will take this different x_test and  x_train and y_train and y_test and will 
# measure the performance of our model then we will append the scores in these arrays

    scores_l.append(get_score(LogisticRegression(), x_train, x_test, y_train, y_test))
    scores_svm.append(get_score(SVC(), x_train, x_test, y_train, y_test))
    scores_rf.append(get_score(RandomForestClassifier(n_estimators=40), x_train, x_test, y_train, y_test))

In [42]:
scores_l

[0.9232053422370617, 0.9415692821368948, 0.9148580968280468]

In [43]:
scores_svm

[0.9666110183639399, 0.9816360601001669, 0.9549248747913188]

In [44]:
scores_rf

[0.9365609348914858, 0.9549248747913188, 0.9248747913188647]

In [45]:
# In real life when you are solving machine learning problem you dont need to write this much code you can just call a cross
# val score method 
from sklearn.model_selection import cross_val_score

In [46]:
cross_val_score(LogisticRegression(), digits.data, digits.target)

# here your first arguement is Logisticregression and 2nd is digits.data is your x and digits.target is your y
# now this will showing you the similar score basically so internally this method did the same thing as above for loop basically

array([0.92222222, 0.86944444, 0.94150418, 0.93871866, 0.89693593])

In [47]:
cross_val_score(SVC(), digits.data, digits.target)

array([0.96111111, 0.94444444, 0.98328691, 0.98885794, 0.93871866])

In [48]:
cross_val_score(RandomForestClassifier(n_estimators=40), digits.data, digits.target)

array([0.93333333, 0.91388889, 0.95543175, 0.9637883 , 0.91364903])