In [30]:
# import libraries
from sklearn.datasets import load_iris
from sklearn.model_selection import cross_validate, StratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn import tree
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
import numpy as np
from sklearn.metrics import make_scorer, precision_score, recall_score, f1_score

In [31]:
# load data
iris = load_iris()
X = iris.data
y = iris.target

In [32]:
# list of models to test
models_to_test = [
    ('LogisticRegression', LogisticRegression(max_iter=1000)),
    ('DecisionTreeClassifier(gini)', tree.DecisionTreeClassifier(criterion='gini')),
    ('DecisionTreeClassifier(entropy)', tree.DecisionTreeClassifier(criterion='entropy')),
    ('SVM(Linear)', SVC(C=1, kernel='linear')),
    ('SVM(RBF)', SVC(C=1, kernel='rbf')),
    ('RandomForestClassifier(10)', RandomForestClassifier(n_estimators=10, random_state=42)),
    ('RandomForestClassifier(50)', RandomForestClassifier(n_estimators=50, random_state=42)),
    ('RandomForestClassifier(100)', RandomForestClassifier(n_estimators=100, random_state=42))
]
    

In [33]:
# defining the metrics
# using macro to handle the 3-class problem
scoring_metrics = {
    'accuracy': 'accuracy',
    'precision': 'precision_macro',
    'recall': 'recall_macro',
    'f1': 'f1_macro'
}

In [34]:
# the validator
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

In [36]:
# Accuracy scores
print(f"{'Model':<25} | {'Acc':<8} | {'Prec':<8} | {'Recall':<8} | {'F1':<8}")
for name, model in models_to_test:
    results = cross_validate(model, X, y, cv=skf, scoring=scoring_metrics)

    acc = results['test_accuracy'].mean()
    prec = results['test_precision'].mean()
    rec = results['test_recall'].mean()
    f1 = results['test_f1'].mean()

    print(f"{name:<25} | {acc:.4f} | {prec:.4f} | {rec:.4f} | {f1:.4f}")

Model                     | Acc      | Prec     | Recall   | F1      
LogisticRegression        | 0.9667 | 0.9695 | 0.9667 | 0.9665
DecisionTreeClassifier(gini) | 0.9533 | 0.9572 | 0.9533 | 0.9531
DecisionTreeClassifier(entropy) | 0.9533 | 0.9572 | 0.9533 | 0.9531
SVM(Linear)               | 0.9867 | 0.9889 | 0.9867 | 0.9865
SVM(RBF)                  | 0.9667 | 0.9695 | 0.9667 | 0.9665
RandomForestClassifier(10) | 0.9533 | 0.9572 | 0.9533 | 0.9531
RandomForestClassifier(50) | 0.9533 | 0.9572 | 0.9533 | 0.9531
RandomForestClassifier(100) | 0.9467 | 0.9512 | 0.9467 | 0.9464
