In [15]:
# import libraries
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
import numpy as np
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split

In [16]:
# load the digits dataset
digits = load_digits()

In [17]:
# train test split data
x_train,x_test,y_train,y_test = train_test_split(digits.data,digits.target,test_size=0.3)

In [18]:
# create, train and get the score of the LogisticRegression model
lr = LogisticRegression(max_iter=1000)
lr.fit(x_train,y_train)
print(f"Model's score: {lr.score(x_test,y_test)}")

Model's score: 0.9518518518518518


In [19]:
# create, train and get the score of the SVM model
svm = SVC()
svm.fit(x_train,y_train)
print(f"Model's score: {svm.score(x_test,y_test)}")

Model's score: 0.9888888888888889


In [20]:
# create, train and get the score of the RandomForestClassifier model
rf = RandomForestClassifier()
rf.fit(x_train,y_train)
print(f"Model's score: {rf.score(x_test,y_test)}")

Model's score: 0.9722222222222222


In [26]:
# KFold
from sklearn.model_selection import KFold
kf = KFold(n_splits=3)
print(kf)

KFold(n_splits=3, random_state=None, shuffle=False)


In [27]:
for train_index, test_index in kf.split([1,2,3,4,5,6,7,8,9]):
    print(train_index, test_index)

[3 4 5 6 7 8] [0 1 2]
[0 1 2 6 7 8] [3 4 5]
[0 1 2 3 4 5] [6 7 8]


In [28]:
def get_score(model, x_train, x_test, y_train, y_test):
    model.fit(x_train, y_train)
    return model.score(x_test,y_test)

In [29]:
from sklearn.model_selection import StratifiedKFold
folds = StratifiedKFold(n_splits=3)

In [35]:
scores_lr = []
scores_svm = []
scores_rf = []

for train_index, test_index in kf.split(digits.data):
    x_train, x_test, y_train, y_test = digits.data[train_index], digits.data[test_index], \
                                        digits.target[train_index], digits.target[test_index]
    scores_lr.append(get_score(LogisticRegression(max_iter=1000), x_train, x_test, y_train, y_test))
    scores_svm.append(get_score(SVC(), x_train, x_test, y_train, y_test))
    scores_rf.append(get_score(RandomForestClassifier(n_estimators=40), x_train, x_test, y_train, y_test))

In [36]:
print(scores_lr)

[0.9248747913188647, 0.9432387312186978, 0.9148580968280468]


In [37]:
print(scores_svm)

[0.9666110183639399, 0.9816360601001669, 0.9549248747913188]


In [38]:
print(scores_rf)

[0.9265442404006677, 0.9599332220367279, 0.9282136894824707]


In [39]:
from sklearn.model_selection import cross_val_score

In [43]:
print(cross_val_score(LogisticRegression(max_iter=1000), digits.data, digits.target))

[0.92222222 0.86944444 0.94150418 0.94150418 0.89693593]


In [44]:
print(cross_val_score(SVC(), digits.data, digits.target))

[0.96111111 0.94444444 0.98328691 0.98885794 0.93871866]


In [51]:
print(cross_val_score(RandomForestClassifier(n_estimators=120), digits.data, digits.target))

[0.925      0.9        0.95543175 0.96935933 0.93314763]


In [53]:
print(cross_val_score(RandomForestClassifier(n_estimators=50), digits.data, digits.target))

[0.92777778 0.89722222 0.96100279 0.9637883  0.93871866]
