In [1]:
import numpy as np
import pandas as pd
import seaborn as sns

In [2]:
from sklearn.linear_model import LogisticRegression

In [3]:
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

In [4]:
iris=sns.load_dataset("iris")
iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


# Extraction of Feature Matrix and Target Vector

In [5]:
X=iris.iloc[:,:-1]
y=iris.iloc[:,-1]
print("Feature Matrix X",X.head())
print("Target Vector y",y.head())

Feature Matrix X    sepal_length  sepal_width  petal_length  petal_width
0           5.1          3.5           1.4          0.2
1           4.9          3.0           1.4          0.2
2           4.7          3.2           1.3          0.2
3           4.6          3.1           1.5          0.2
4           5.0          3.6           1.4          0.2
Target Vector y 0    setosa
1    setosa
2    setosa
3    setosa
4    setosa
Name: species, dtype: object


# Evaluate using a train and a test set

In [6]:
from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test=train_test_split(X,y,test_size=0.30,
                                               random_state=100)
model = LogisticRegression()
model.fit(X_train, Y_train)
result = model.score(X_test, Y_test)
print("Accuracy: %.2f%%" % (result*100.0))

Accuracy: 100.00%


# Evaluate using K-fold Cross-Validation

In [7]:
from sklearn.model_selection import KFold,cross_val_score

kfold = KFold(n_splits=10, random_state=100)
model_kfold = LogisticRegression()
results_kfold = cross_val_score(model_kfold, X, y, cv=kfold)
print("Accuracy: %.2f%%" % (results_kfold.mean()*100.0))

Accuracy: 88.00%


# Stratified K-fold Cross-Validation

In [8]:
from sklearn.model_selection import StratifiedKFold

skfold = StratifiedKFold(n_splits=3, random_state=100)
model_skfold = LogisticRegression()
results_skfold = cross_val_score(model_skfold, X, y, cv=skfold)
print("Accuracy: %.2f%%" % (results_skfold.mean()*100.0))

Accuracy: 94.69%


# Leave One Out Cross-Validation (LOOCV)

In [9]:
from sklearn.model_selection import LeaveOneOut

loocv = LeaveOneOut()
model_loocv = LogisticRegression()
results_loocv = cross_val_score(model_loocv, X, y, cv=loocv)
print("Accuracy: %.2f%%" % (results_loocv.mean()*100.0))

Accuracy: 95.33%


# Repeated Random Test-Train Splits

In [10]:
from sklearn.model_selection import ShuffleSplit

kfold2 = ShuffleSplit(n_splits=10, test_size=0.30, random_state=100)
model_shufflecv = LogisticRegression()
results_4 = cross_val_score(model_shufflecv, X, y, cv=kfold2)
print("Accuracy: %.2f%% (%.2f%%)" % (results_4.mean()*100.0, 
                                     results_4.std()*100.0))

Accuracy: 96.44% (2.67%)


# All Validation Scores

In [11]:
print("train and a test method: %.2f%%" % (result*100.0))
print("K-fold : %.2f%%" % (results_kfold.mean()*100.0))
print("Stratified K-fold: %.2f%%" % (results_skfold.mean()*100.0))
print("Leave One Out : %.2f%%" % (results_loocv.mean()*100.0))
print("Random Test-Train Splits: %.2f%% (%.2f%%)" % (results_4.mean()*100.0, 
                                     results_4.std()*100.0))

train and a test method: 100.00%
K-fold : 88.00%
Stratified K-fold: 94.69%
Leave One Out : 95.33%
Random Test-Train Splits: 96.44% (2.67%)
