In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, precision_score, recall_score, f1_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
import joblib
import os

# Load the dataset
digits = load_digits()
X, y = digits.data, digits.target

# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize classifiers
classifiers = {
    'KNN': KNeighborsClassifier(n_neighbors=7),
    'SVM': SVC(),
    'Logistic Regression': LogisticRegression(max_iter=200),
    'Random Forest': RandomForestClassifier(n_estimators=100),
    'MLP': MLPClassifier(max_iter=300),
    'SGD': SGDClassifier(max_iter=1000, tol=1e-3)
}

results = {'unscaled': {}, 'scaled': {}}

for scale_type, (X_tr, X_te) in zip(['unscaled', 'scaled'], [(X_train, X_test), (X_train_scaled, X_test_scaled)]):
    for name, clf in classifiers.items():
        clf.fit(X_tr, y_train)
        y_pred = clf.predict(X_te)
        results[scale_type][name] = {
            'accuracy': accuracy_score(y_test, y_pred),
            'confusion_matrix': confusion_matrix(y_test, y_pred),
            'classification_report': classification_report(y_test, y_pred),
            'recall': recall_score(y_test, y_pred, average='weighted'),
            'precision': precision_score(y_test, y_pred, average='weighted'),
            'f1_score': f1_score(y_test, y_pred, average='weighted')
        }

# Print the results
for scale_type in ['unscaled', 'scaled']:
    print(f"\nResults for {scale_type} data:\n")
    for name, metrics in results[scale_type].items():
        print(f"{name} Results")
        print("Accuracy:", metrics['accuracy'])
        print("Confusion Matrix:\n", metrics['confusion_matrix'])
        print("Classification Report:\n", metrics['classification_report'])
        print("Recall:", metrics['recall'])
        print("Precision:", metrics['precision'])
        print("F1 Score:", metrics['f1_score'])

# Determine the best model for each scaling type
best_model_unscaled = max(results['unscaled'], key=lambda x: results['unscaled'][x]['accuracy'])
best_model_scaled = max(results['scaled'], key=lambda x: results['scaled'][x]['accuracy'])

print(f"\nBest model for unscaled data: {best_model_unscaled} with accuracy {results['unscaled'][best_model_unscaled]['accuracy']}")
print(f"Best model for scaled data: {best_model_scaled} with accuracy {results['scaled'][best_model_scaled]['accuracy']}")

# Save the models
model_dir = 'models'
os.makedirs(model_dir, exist_ok=True)

for name, clf in classifiers.items():
    model_path = os.path.join(model_dir, f'{name}_unscaled.joblib')
    joblib.dump(clf, model_path)
    print(f"Model {name} (unscaled) saved to {model_path}")

    model_path = os.path.join(model_dir, f'{name}_scaled.joblib')
    joblib.dump(clf, model_path)
    print(f"Model {name} (scaled) saved to {model_path}")



Results for unscaled data:

KNN Results
Accuracy: 0.9888888888888889
Confusion Matrix:
 [[33  0  0  0  0  0  0  0  0  0]
 [ 0 28  0  0  0  0  0  0  0  0]
 [ 0  0 33  0  0  0  0  0  0  0]
 [ 0  0  0 34  0  0  0  0  0  0]
 [ 0  0  0  0 46  0  0  0  0  0]
 [ 0  0  0  0  0 46  1  0  0  0]
 [ 0  0  0  0  0  0 35  0  0  0]
 [ 0  0  0  0  0  0  0 33  0  1]
 [ 0  0  0  0  0  0  0  0 30  0]
 [ 0  0  0  0  1  1  0  0  0 38]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        33
           1       1.00      1.00      1.00        28
           2       1.00      1.00      1.00        33
           3       1.00      1.00      1.00        34
           4       0.98      1.00      0.99        46
           5       0.98      0.98      0.98        47
           6       0.97      1.00      0.99        35
           7       1.00      0.97      0.99        34
           8       1.00      1.00      1.00        30
           9   