In [72]:
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_digits
from sklearn.datasets import load_iris

In [73]:
digits = load_digits()
iris = load_iris()

In [74]:
def get_val_score(model,x_train,x_test,y_train,y_test):
    model.fit(x_train,y_train)
    return model.score(x_test,y_test)

In [75]:
from sklearn.model_selection import KFold
kf = KFold(n_splits=10)

def get_score(model):
    
    score_model = []
    for train_index,test_index in kf.split(digits.data):
        x_train,x_test,y_train,y_test = digits.data[train_index], digits.data[test_index],\
            digits.target[train_index],digits.target[test_index]
        score_model.append(get_val_score(model,x_train,x_test,y_train,y_test))
    return score_model

In [76]:
lr = LogisticRegression(max_iter=2000,solver='lbfgs')
svc = SVC(kernel='linear')
rforest = RandomForestClassifier(n_estimators=40)

In [77]:
models = [svc,rforest]
model_df = {}
for model in models:
    model_df.setdefault(str(model), np.array(get_score(model)))

In [78]:
model_df

{"SVC(kernel='linear')": array([0.93888889, 0.99444444, 0.93333333, 0.96666667, 0.96111111,
        0.98888889, 0.96666667, 0.98882682, 0.93296089, 0.96648045]),
 'RandomForestClassifier(n_estimators=40)': array([0.92777778, 0.97777778, 0.92222222, 0.94444444, 0.95      ,
        0.95      , 0.96111111, 0.97206704, 0.94972067, 0.95530726])}

In [79]:
import pandas as pd

df = pd.DataFrame(model_df)
df.columns = ['Svc','RandomForest']
df

Unnamed: 0,Svc,RandomForest
0,0.938889,0.927778
1,0.994444,0.977778
2,0.933333,0.922222
3,0.966667,0.944444
4,0.961111,0.95
5,0.988889,0.95
6,0.966667,0.961111
7,0.988827,0.972067
8,0.932961,0.949721
9,0.96648,0.955307


In [80]:
print(df['Svc'].mean())
print(df['RandomForest'].mean())

0.963826815642458
0.9510428305400371


SCORE OD SVM IS BETTER THAN SCORE OF RANDOMFOREST

USING CROSS_VAL_SCORE

In [81]:
dir(iris)

['DESCR',
 'data',
 'data_module',
 'feature_names',
 'filename',
 'frame',
 'target',
 'target_names']

In [82]:
from sklearn.model_selection import cross_val_score

cross_val_score(RandomForestClassifier(n_estimators=40),iris.data,iris.target)

array([0.96666667, 0.96666667, 0.93333333, 0.93333333, 1.        ])

In [83]:
cross_val_score(SVC(kernel='linear'),iris.data,iris.target)

array([0.96666667, 1.        , 0.96666667, 0.96666667, 1.        ])

In [84]:
cross_val_score(LogisticRegression(max_iter=1000),iris.data,iris.target)

array([0.96666667, 1.        , 0.93333333, 0.96666667, 1.        ])

STRATIFIEDKFOLD

In [85]:
from sklearn import preprocessing
from statistics import mean,stdev
from sklearn.model_selection import StratifiedKFold

x = iris.data
y = iris.target

scaler = preprocessing.MinMaxScaler()
x_scaled = scaler.fit_transform(x)

lr = LogisticRegression()
  
# Create StratifiedKFold object.
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=1)
lst_accu_stratified = []
  
for train_index, test_index in skf.split(x, y):
    x_train_fold, x_test_fold = x_scaled[train_index], x_scaled[test_index]
    y_train_fold, y_test_fold = y[train_index], y[test_index]
    lr.fit(x_train_fold, y_train_fold)
    lst_accu_stratified.append(lr.score(x_test_fold, y_test_fold))
    
print('List of possible accuracy:', lst_accu_stratified)
print('\nMaximum Accuracy That can be obtained from this model is:',
      max(lst_accu_stratified)*100, '%')
print('\nMinimum Accuracy:',
      min(lst_accu_stratified)*100, '%')
print('\nOverall Accuracy:',
      mean(lst_accu_stratified)*100, '%')
print('\nStandard Deviation is:', stdev(lst_accu_stratified))

List of possible accuracy: [0.9333333333333333, 0.9333333333333333, 1.0, 1.0, 1.0, 0.8666666666666667, 0.9333333333333333, 1.0, 0.8, 0.9333333333333333]

Maximum Accuracy That can be obtained from this model is: 100.0 %

Minimum Accuracy: 80.0 %

Overall Accuracy: 94.0 %

Standard Deviation is: 0.06629526173411686
