In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

def specificity_score(y_true, y_pred):
    cm = confusion_matrix(y_true, y_pred)
    tn = cm.diagonal()
    fp = cm.sum(axis=0) - tn
    specificity_class = tn / (tn + fp)
    return np.mean(specificity_class)

fashion_mnist = fetch_openml('Fashion-MNIST')
X, y = fashion_mnist.data, fashion_mnist.target
X, _, y, _ = train_test_split(X, y, test_size=0.9, random_state=42)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

models = {
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Decision Tree": DecisionTreeClassifier(),
    "Random Forest": RandomForestClassifier()
}


cv_results = {}
evaluation_results = {}

for name, model in models.items():
    
    kfold = StratifiedKFold(n_splits=5)
    cv_scores = cross_val_score(model, X_train, y_train, cv=kfold, scoring='accuracy')
    cv_results[name] = cv_scores.mean()

    
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    evaluation_results[name] = {
        'Accuracy': accuracy_score(y_test, y_pred),
        'Recall': recall_score(y_test, y_pred, average='macro'),
        'Precision': precision_score(y_test, y_pred, average='macro'),
        'F1 Score': f1_score(y_test, y_pred, average='macro'),
        'Specificity': specificity_score(y_test, y_pred)
    }

cv_df = pd.DataFrame(cv_results, index=[0])
evaluation_df = pd.DataFrame(evaluation_results)

print("5-fold cross-validation:\n", cv_df)
print("\n result:\n", evaluation_df.transpose())


  warn(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative so

5-fold cross-validation:
    Logistic Regression  Decision Tree  Random Forest
0             0.840696       0.791179       0.878536

 result:
                      Accuracy    Recall  Precision  F1 Score  Specificity
Logistic Regression  0.844786  0.844836   0.843027  0.843660     0.843027
Decision Tree        0.796071  0.796012   0.796207  0.796063     0.796207
Random Forest        0.884286  0.884357   0.882962  0.882494     0.882962
