In [None]:
# Importazione delle librerie necessarie
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from tensorflow.keras.datasets import cifar10
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.decomposition import PCA
from sklearn.metrics import classification_report
import seaborn as sns

In [None]:
# Caricamento del dataset
(X_train, y_train), (X_test, y_test) = cifar10.load_data()
y_train = y_train.flatten()
y_test = y_test.flatten()

# Unisci tutto in un unico dataset
X = np.concatenate((x_train_flat, x_test_flat), axis=0)
y = np.concatenate((y_train, y_test), axis=0)

# Concatenazione dati e target
daticompleti = np.c[X, y]

# Split settings
train_fraction = 0.8
val_fraction = 0.25

shape_total = dati_completi.shape[0]
shape_train = int(shape_total * train_fraction)
shape_val = int(shape_train * val_fraction)

# Split dataset
train_set = dati_completi[:shape_train]
test_set = dati_completi[shape_train:]

val_set = train_set[:shape_val]
train_effettivo_set = train_set[shape_val:]

x_train_eff, y_train_eff = train_effettivo_set[:, :-1], train_effettivo_set[:, -1]
x_val, y_val = val_set[:, :-1], val_set[:, -1]
x_test, y_test = test_set[:, :-1], test_set[:, -1]

#standardizzazione
scaler = StandardScaler()
x_train_std = scaler.fit_transform(x_train_eff)
x_test_std = scaler.transform(x_test)


# Appiattimento immagini da (32, 32, 3) a (3072,)
X_train_flat = X_train.reshape((X_train.shape[0], -1))
X_test_flat = X_test.reshape((X_test.shape[0], -1))


# sottocampionamento (opzionale)
sample_size = 10000
X_train = X_train[:sample_size]
y_train = y_train[:sample_size]


# Standardizzazione delle feature
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_flat)
X_test_scaled = scaler.transform(X_test_flat)

In [None]:
# Applica PCA per ridurre dimensionalità da 3072 a 100
pca = PCA(n_components=100)
X_train_pca = pca.fit_transform(X_train_scaled)
X_test_pca = pca.transform(X_test_scaled)

explained = np.sum(pca.explained_variance_ratio_)
print(f"PCA con 100 componenti spiega il {explained:.2%} della varianza totale")

PCA con 100 componenti spiega il 89.83% della varianza totale


In [None]:
# importazione librerie necessarie per l'addestramento dei modelli (regressione logistica, k-NN, SVM, decision tree)

from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report

In [None]:
# creiamo una lista per raccogliere i risultati

risultati = {}

# allenamento modello di regressione logistica
logreg = LogisticRegression(max_iter=1000)
logreg.fit(X_train_pca, y_train)
y_pred_lr = logreg.predict(X_test_pca)
risultati['Logistic Regression'] = accuracy_score(y_test, y_pred_lr)

# allenamento modello K-NN
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train_pca, y_train)
y_pred_knn = knn.predict(X_test_pca)
risultati['k-NN'] = accuracy_score(y_test, y_pred_knn)

# allenamento modello SVM
svm = SVC(kernel='linear')
svm.fit(X_train_pca, y_train)
y_pred_svm = svm.predict(X_test_pca)
risultati['SVM'] = accuracy_score(y_test, y_pred_svm)

# allenamento modello decision tree
tree = DecisionTreeClassifier()
tree.fit(X_train_pca, y_train)
y_pred_dt = tree.predict(X_test_pca)
risultati['Decision Tree'] = accuracy_score(y_test, y_pred_dt)

# Stampiamo i risultati
for model_name, acc in risultati.items():
    print(f"{model_name} - Accuracy: {acc:.4f}")

In [None]:
# importazione della libreria per implementare GridSearchCV
from sklearn.model_selection import GridSearchCV

In [None]:
# 1. Logistic Regression
param_logreg = {
    'C': [0.01, 0.1, 1, 10]
}
grid_logreg = GridSearchCV(LogisticRegression(max_iter=1000), param_logreg, cv=3, scoring='accuracy', n_jobs=-1)
grid_logreg.fit(X_train_pca, y_train)
best_logreg = grid_logreg.best_estimator_

# 2. k-NN
param_knn = {
    'n_neighbors': [3, 5, 7]
}
grid_knn = GridSearchCV(KNeighborsClassifier(), param_knn, cv=3, scoring='accuracy', n_jobs=-1)
grid_knn.fit(X_train_pca, y_train)
best_knn = grid_knn.best_estimator_

# 3. SVM
param_svm = {
    'C': [0.1, 1, 10],
    'gamma': ['scale', 0.01, 0.001],
    'kernel': ['linear', 'rbf']
}
grid_svm = GridSearchCV(SVC(), param_svm, cv=3, scoring='accuracy', n_jobs=-1)
grid_svm.fit(X_train_pca, y_train)
best_svm = grid_svm.best_estimator_

# 4. Decision Tree
param_tree = {
    'max_depth': [10, 20, None],
    'min_samples_split': [2, 5, 10]
}
grid_tree = GridSearchCV(DecisionTreeClassifier(), param_tree, cv=3, scoring='accuracy', n_jobs=-1)
grid_tree.fit(X_train_pca, y_train)
best_tree = grid_tree.best_estimator_

In [None]:
models = {
    'Logistic Regression': best_logreg,
    'k-NN': best_knn,
    'SVM': best_svm,
    'Decision Tree': best_tree
}

for i, (name, model) in enumerate(models.items(), 1):
    y_pred = model.predict(X_test_pca)
    acc = accuracy_score(y_test, y_pred)
    print(f"\n{name} - Accuracy: {acc:.4f}")
    print(classification_report(y_test, y_pred))

    # Calcolo e visualizzazione della confusion matrix
    cm = confusion_matrix(y_test, y_pred)

    # Visualizzazione della confusion matrix
    plt.subplot(1, 2, i)
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=range(10), yticklabels=range(10))
    plt.title(f'Confusion Matrix: {name}')
    plt.xlabel('Predicted')
    plt.ylabel('True')

    plt.figure(figsize=(12, 6))
    plt.tight_layout()
    plt.show()



In [None]:

#scelta del modello dalle migliori prestazioni

best_lr_acc = accuracy_score(y_test, grid_lr.predict(x_test_pca))
best_knn_acc = accuracy_score(y_test, grid_knn.predict(x_test_pca))
best_svm_acc = accuracy_score(y_test, grid_svm.predict(x_test_pca))
best_dt_acc = accuracy_score(y_test, grid_dt.predict(x_test_pca))


accuracies = [best_lr_acc, best_knn_acc, best_svm_acc, best_dt_acc]
model_names = ["Logistic Regression", "k-NN", "SVM", "Decision Tree"]
best_index = accuracies.index(max(accuracies))
best_model_name = model_names[best_index]
print(f" Modello migliore: {best_model_name} (accuracy: {accuracies[best_index]:.4f})")



#visualizzazione della relativa confusion matrix
best_model = [best_lr, best_knn, best_svm, best_dt][best_index]
y_pred = best_model.predict(x_test_pca)
cm = confusion_matrix(y_test, y_pred)
plot_confusion_matrix(cm)