In [73]:
import pandas as pd
import numpy as np
from warnings import filterwarnings
filterwarnings("ignore")

In [74]:
df = pd.read_csv("iris.csv")
df.head(5)

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [75]:
X = df.drop("species", axis=1)
y = df["species"]

In [76]:
from sklearn.model_selection import cross_val_score, KFold, StratifiedKFold, LeaveOneOut
from sklearn.linear_model import LogisticRegression

* Simple validation croisée: Permet de croiser avec les parametres par defaut

In [77]:
logreg = LogisticRegression()
sc1 = cross_val_score(logreg, X, y)

In [78]:
def cvs(scor, nom):
    print(f"Model : {nom}\nMoyenne des cross validation scores:{np.round(scor.mean(), 4)}")

In [79]:
kfold = KFold(n_splits = 5)
sc2 = cross_val_score(logreg, X, y, cv=kfold)

In [80]:
stra_kfold = StratifiedKFold(n_splits = 5) # Pour preserver la repartition des classes (Seulement sur les classification)
sc21 = cross_val_score(logreg, X, y, cv= stra_kfold)

In [81]:
loo = LeaveOneOut()
sc3 = cross_val_score(logreg, X, y, cv=loo)
len(sc3)

150

In [82]:
models = {"Cross_val_ simple": sc1,
          "Cross_val avec Kfold": sc2,
          "Cross_val avec stratified kfold": sc21,
          "Cross_val avec LOO": sc3
          }

In [83]:
for model in models.items():
    cvs(model[1], model[0])

Model : Cross_val_ simple
Moyenne des cross validation scores:0.9733
Model : Cross_val avec Kfold
Moyenne des cross validation scores:0.9267
Model : Cross_val avec stratified kfold
Moyenne des cross validation scores:0.9733
Model : Cross_val avec LOO
Moyenne des cross validation scores:0.9667


* Avec shuffle

In [84]:
models_shuffle = {"Cross_val avec Kfold": cross_val_score(logreg, X, y, cv=KFold(n_splits = 5, shuffle = True, random_state= 101)),
          "Cross_val avec stratified kfold": cross_val_score(logreg, X, y, cv=StratifiedKFold(n_splits = 5, shuffle = True, random_state= 101)),
          }

In [85]:
print("Les splits avec shuffle")
for model in models_shuffle.items():
    cvs(model[1], model[0])

Les splits avec shuffle
Model : Cross_val avec Kfold
Moyenne des cross validation scores:0.96
Model : Cross_val avec stratified kfold
Moyenne des cross validation scores:0.9667


In [86]:
from sklearn.model_selection import ShuffleSplit, StratifiedShuffleSplit

In [87]:
sss = StratifiedShuffleSplit(n_splits =10, test_size = .5, train_size = .5, random_state = 101)
ss = ShuffleSplit(n_splits=10, test_size = .5, train_size = .5, random_state=101)
sc4 = cross_val_score(logreg, X, y, cv = ss)
sc5 = cross_val_score(logreg, X, y, cv = sss)

In [88]:
cvs(sc4, "ShuffleSplit")

Model : ShuffleSplit
Moyenne des cross validation scores:0.9533


In [89]:
cvs(sc5, "StratifiedShuffleSplit")

Model : StratifiedShuffleSplit
Moyenne des cross validation scores:0.96


* Validation croisé avec groupe:

In [91]:
from sklearn.model_selection import GroupKFold, LeaveOneGroupOut, LeavePGroupsOut, StratifiedGroupKFold
from sklearn.datasets import make_classification

In [101]:
# Generer les données simulées
X1, y1 = make_classification(n_samples=10000, n_features = 10, random_state = 101)
groups = np.random.randint(0, 10, size = 10000)

In [102]:
gkfold = GroupKFold(n_splits = 5)
sc6 = cross_val_score(logreg, X1, y1, cv = gkfold, groups = groups)

In [106]:
logo = LeaveOneGroupOut()
lpgo = LeavePGroupsOut(n_groups = 2)
sgfold = StratifiedGroupKFold(n_splits = 5)
sc7 = cross_val_score(logreg, X1, y1, cv = logo, groups = groups)
sc8 = cross_val_score(logreg, X1, y1, cv = lpgo, groups = groups)
sc9 = cross_val_score(logreg, X1, y1, cv = sgfold, groups = groups)

In [107]:
modls = {"GroupKFold": sc6,
         "LeaveOneGroupOut": sc7,
         "LeaveOneGroupOut": sc8,
         "StratifiedGroupKFold": sc9
         }

In [109]:
for model in modls.items():
    cvs(model[1], model[0])

Model : GroupKFold
Moyenne des cross validation scores:0.8365
Model : LeaveOneGroupOut
Moyenne des cross validation scores:0.8363
Model : StratifiedGroupKFold
Moyenne des cross validation scores:0.8365
