In [41]:
import numpy as np
import matplotlib.pyplot as plt

from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_digits
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_val_score

In [2]:
digits = load_digits()

In [7]:
def get_score(model, X_train, X_test, y_train, y_test):
    model.fit(X_train, y_train)
    return model.score(X_test, y_test)

# Base Example

In [8]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(digits.data,digits.target,test_size=0.3)

### Logistic Regression

In [17]:
get_score(LogisticRegression(solver='liblinear'), X_train, X_test, y_train, y_test)

0.9574074074074074

### SVM

In [22]:
get_score(SVC(), X_train, X_test, y_train, y_test)

0.9814814814814815

### Random Forest

In [28]:
get_score(RandomForestClassifier(), X_train, X_test, y_train, y_test)

0.9685185185185186

# KFold cross validation

In [38]:
#Объявлем модель, задаем количество сплитов
folds = StratifiedKFold(n_splits=3) 
#Создаем заданное количество сплитов, и ищем в каждом
split = folds.split(digits.data,digits.target)

scores_logistic = []
scores_svm = []
scores_rf = []

for train_index, test_index in split:
    X_train, X_test, y_train, y_test = digits.data[train_index], digits.data[test_index], \
                                       digits.target[train_index], digits.target[test_index]
    scores_logistic.append(get_score(LogisticRegression(solver='liblinear',multi_class='ovr'), X_train, X_test, y_train, y_test))  
    scores_svm.append(get_score(SVC(), X_train, X_test, y_train, y_test))
    scores_rf.append(get_score(RandomForestClassifier(n_estimators=40), X_train, X_test, y_train, y_test))

In [39]:
scores_logistic, scores_svm, scores_rf

([0.8948247078464107, 0.9532554257095158, 0.9098497495826378],
 [0.9649415692821369, 0.9799666110183639, 0.9649415692821369],
 [0.9348914858096828, 0.9565943238731218, 0.9415692821368948])

In [40]:
np.mean(scores_logistic), np.mean(scores_svm), np.mean(scores_rf)

(0.9193099610461881, 0.9699499165275459, 0.9443516972732331)

# Cross_val_score function

### Logistic Reg

In [42]:
cross_val_score(LogisticRegression(solver='liblinear'), digits.data, digits.target, cv=3)

array([0.89482471, 0.95325543, 0.90984975])

### SVM

In [44]:
cross_val_score(SVC(), digits.data, digits.target, cv=3)

array([0.96494157, 0.97996661, 0.96494157])

### Random Forest

In [45]:
cross_val_score(RandomForestClassifier(n_estimators=40), digits.data, digits.target, cv=3)

array([0.92487479, 0.94991653, 0.92487479])

# Take Iris Dataset

In [46]:
from sklearn.datasets import load_iris
iris = load_iris()

In [48]:
cross_val_score(LogisticRegression(solver='liblinear'), iris.data, iris.target, cv=3)

array([0.96, 0.96, 0.94])

In [49]:
cross_val_score(SVC(), iris.data, iris.target, cv=3)

array([0.96, 0.98, 0.94])

In [50]:
cross_val_score(RandomForestClassifier(n_estimators=40), iris.data, iris.target, cv=3)

array([0.98, 0.94, 0.96])