In [1]:
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_iris
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, KFold, StratifiedKFold, cross_val_score
iris = load_iris()

In [17]:
x = iris.data
y = iris.target

In [22]:
X_train, X_test, y_train, y_test = train_test_split(x,y,test_size=0.3)

##### Logistic Regression

In [23]:
lr = LogisticRegression()
lr.fit(X_train, y_train)
lr.score(X_test, y_test)

0.9555555555555556

##### Support Vector Machine

In [24]:
SVM = SVC(gamma='auto')
SVM.fit(X_train, y_train)
SVM.score(X_test, y_test)

0.9777777777777777

##### RandomForest Classifier

In [25]:
rfc = RandomForestClassifier()
rfc.fit(X_train, y_train)
rfc.score(X_test, y_test)

0.9777777777777777

##### KFold cross validation

In [26]:
kf = KFold(n_splits=3)
kf

KFold(n_splits=3, random_state=None, shuffle=False)

In [27]:
for train_index, test_index in kf.split([1, 2, 3, 4, 5, 6, 7, 8, 9]):
    print(train_index, test_index)

[3 4 5 6 7 8] [0 1 2]
[0 1 2 6 7 8] [3 4 5]
[0 1 2 3 4 5] [6 7 8]


##### Using Kfold for our digits

In [28]:
def get_score(model, X_train, X_test, y_train, y_test):
    model.fit(X_train, y_train)
    return model.score(X_test, y_test)

In [30]:
folds = StratifiedKFold(n_splits=3)

lr_score = []
svm_score = []
rf_score = []

for train_index, test_index in kf.split(x,y):
    X_train, X_test, y_train, y_test = x[train_index], x[test_index], \
                                       y[train_index], y[test_index]
    lr_score.append(get_score(LogisticRegression(), X_train, X_test, y_train, y_test))
    svm_score.append(get_score(SVC(), X_train, X_test, y_train, y_test))
    rf_score.append(get_score(RandomForestClassifier(), X_train, X_test, y_train, y_test))

In [32]:
get_score(LogisticRegression(), X_train, X_test, y_train, y_test)

0.0

In [33]:
get_score(SVC(), X_train, X_test, y_train, y_test)

0.0

##### Cross_val_score

In [34]:
# LogisticRegression
cross_val_score(LogisticRegression(), x, y)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


array([0.96666667, 1.        , 0.93333333, 0.96666667, 1.        ])

In [35]:
# SVC
cross_val_score(SVC(), x, y)

array([0.96666667, 0.96666667, 0.96666667, 0.93333333, 1.        ])

In [36]:
# RandomForest
cross_val_score(RandomForestClassifier(), x, y)

array([0.96666667, 0.96666667, 0.93333333, 0.93333333, 1.        ])

##### Average score

In [37]:
cvs_LR = cross_val_score(LogisticRegression(), x, y) 
cvs_SVC = cross_val_score(SVC(), x, y)
cvs_RFC = cross_val_score(RandomForestClassifier(), x, y)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


In [38]:
# Average of LogisticRegression
np.average(cvs_LR)

0.9733333333333334

In [39]:
# Average of SVM
np.average(cvs_SVC)

0.9666666666666666

In [40]:
# Average of RandomForestClassifier
np.average(cvs_RFC)

0.9666666666666668