In [2]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
import pandas as pd

from sklearn.datasets import load_digits
digits = load_digits()

In [3]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(digits.data, digits.target, test_size=0.3)

In [4]:
len(x_train), len(x_test), len(y_train), len(y_test)

(1257, 540, 1257, 540)

In [5]:
lr=LogisticRegression(max_iter = 5000)
lr.fit(x_train, y_train)
lr.score(x_test, y_test)

0.9592592592592593

In [6]:
svm=SVC()
svm.fit(x_train, y_train)
svm.score(x_test, y_test)

0.9851851851851852

In [7]:
rfc=RandomForestClassifier()
rfc.fit(x_train, y_train)
rfc.score(x_test, y_test)

0.9796296296296296

### K Fold Croos validation

In [8]:
from sklearn.model_selection import KFold
kf=KFold(n_splits=3)
kf

KFold(n_splits=3, random_state=None, shuffle=False)

In [9]:
for x_train, x_test in kf.split([1,2,3,4,5]):
    print(x_train, x_test)

[2 3 4] [0 1]
[0 1 4] [2 3]
[0 1 2 3] [4]


In [10]:
def get_score(model, x_train, x_test, y_train, y_test):
    model.fit(x_train, y_train)
    return model.score(x_test, y_test)

In [11]:
from sklearn.model_selection import StratifiedKFold
skf = StratifiedKFold(n_splits = 3)

In [13]:
scores_lr=[]
scores_svm=[]
scores_rf=[]
for x_train, x_test in kf.split(digits.data):
    x_train, x_test, y_train, y_test = digits.data[x_train], digits.data[x_test], digits.target[x_train], digits.target[x_test]
    scores_lr.append(get_score(LogisticRegression(max_iter=5000), x_train, x_test, y_train, y_test))
    scores_svm.append(get_score(SVC(), x_train, x_test, y_train, y_test))
    scores_rf.append(get_score(RandomForestClassifier(), x_train, x_test, y_train, y_test))

In [14]:
print(scores_lr)
print(scores_svm)
print(scores_rf)

[0.9282136894824707, 0.9415692821368948, 0.9165275459098498]
[0.9666110183639399, 0.9816360601001669, 0.9549248747913188]
[0.9415692821368948, 0.9482470784641068, 0.9298831385642737]


In [15]:
get_score(SVC(), x_train, x_test, y_train, y_test)

0.9549248747913188

In [16]:
get_score(RandomForestClassifier(), x_train, x_test, y_train, y_test)

0.9198664440734557

In [17]:
get_score(LogisticRegression(max_iter=5000), x_train, x_test, y_train, y_test)

0.9165275459098498

# Cross Val Score

In [18]:
from sklearn.model_selection import cross_val_score

In [19]:
cross_val_score(LogisticRegression(max_iter = 5000), digits.data, digits.target, cv=3)

array([0.92487479, 0.93823038, 0.92320534])

In [20]:
cross_val_score(SVC(), digits.data, digits.target)

array([0.96111111, 0.94444444, 0.98328691, 0.98885794, 0.93871866])

In [21]:
cross_val_score(RandomForestClassifier(), digits.data, digits.target)

array([0.94166667, 0.89722222, 0.95543175, 0.97214485, 0.93314763])

In [22]:
cross_val_score(RandomForestClassifier(n_estimators=50), digits.data, digits.target)

array([0.93333333, 0.88333333, 0.95543175, 0.96657382, 0.91364903])

## Exercise 

In [93]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
import pandas as pd

from sklearn.datasets import load_iris
iris = load_iris()

In [94]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size = 0.3)

In [96]:
lr = LogisticRegression()
lr.fit(x_train, y_train)
lr.score(x_test,y_test)

0.9555555555555556

In [97]:
svm = SVC()
svm.fit(x_train, y_train)
svm.score(x_test, y_test)

0.9333333333333333

In [98]:
rf = RandomForestClassifier()
rf.fit(x_train, y_train)
rf.score(x_test, y_test)

0.9111111111111111

# Cross Val Score

In [123]:
from sklearn.model_selection import cross_val_score

###  Logistic Regression

In [129]:
cross_val_score(LogisticRegression(max_iter =5000), iris.data, iris.target, cv=3)

array([0.98, 0.96, 0.98])

### Decision Tree

In [130]:
from sklearn.tree import DecisionTreeClassifier
cross_val_score(DecisionTreeClassifier(), iris.data, iris.target, cv =3)

array([0.98, 0.94, 0.96])

### SVM 

In [131]:
cross_val_score(SVC(), iris.data, iris.target, cv=3)

array([0.96, 0.98, 0.94])

### Random Forest 

In [132]:
cross_val_score(RandomForestClassifier(), iris.data, iris.target, cv=3)

array([0.98, 0.94, 0.96])