In [4]:
from sklearn.datasets import load_iris

In [8]:
iris = load_iris()

In [12]:
dir(iris)

['DESCR',
 'data',
 'data_module',
 'feature_names',
 'filename',
 'frame',
 'target',
 'target_names']

In [9]:
def get_score(model, x_train, x_test, y_train, y_test):
    model.fit(x_train, y_train)
    return model.score(x_test, y_test)

In [10]:
from sklearn.model_selection import StratifiedKFold
folds = StratifiedKFold(n_splits = 3)

In [11]:
score_logistic_reg = []
score_svm = []
score_random_forest = []

In [26]:
x = iris.data
x.shape

(150, 4)

In [25]:
y = iris.target
y.shape

(150,)

In [28]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

### Logistic regression often performs better with standardized data, so we should scale the data to avoid ConvergenceWarning

In [30]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
x_scaled = scaler.fit_transform(x)

In [31]:
for train_index, test_index in folds.split(x_scaled,y):
    x_train, x_test, y_train, y_test = x_scaled[train_index], x_scaled[test_index], y[train_index],y[test_index]
    score_logistic_reg.append(get_score(LogisticRegression(), x_train, x_test, y_train, y_test))
    score_svm.append(get_score(SVC(), x_train, x_test, y_train, y_test))
    score_random_forest.append(get_score(RandomForestClassifier(), x_train, x_test, y_train, y_test))

In [35]:
logit_score = sum(score_logistic_reg)/len(score_logistic_reg)
svm_score = sum(score_svm)/len(score_svm)
random_forest_score = sum(score_random_forest)/len(score_random_forest)

In [36]:
logit_score

0.9714285714285714

In [37]:
svm_score

0.9628571428571429

In [38]:
random_forest_score

0.96

In [41]:
data_scaled = scaler.fit_transform(iris.data)

### We can do the same task using sklearn library

In [85]:
from sklearn.model_selection import cross_val_score
logit = cross_val_score(LogisticRegression(), data_scaled, iris.target)
svm = cross_val_score(SVC(), data_scaled, iris.target)
rand_forest = cross_val_score(RandomForestClassifier(n_estimators = 45), data_scaled, iris.target)

In [86]:
log_score_lib = sum(logit)/len(logit)
log_score_lib

0.9600000000000002

In [87]:
svm_score_lib = sum(svm)/len(svm)
svm_score_lib

0.9666666666666666

In [88]:
rand_forest_score_lib = sum(rand_forest)/len(rand_forest)
rand_forest_score_lib

0.96