## Imports

In [215]:
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score, precision_score
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import StratifiedKFold
import numpy as np

## Data load

In [216]:
# One-hot encoding
encoder = OneHotEncoder(categories='auto', sparse_output=False)

# Carica i file di addestramento e test per ciascun dataset dal percorso specificato
monk1_train = pd.read_csv('../Datasets/Monks/monks-1.train', sep='\s+', header=None)
monk1_test = pd.read_csv('../Datasets/Monks/monks-1.test', sep='\s+', header=None)

monk2_train = pd.read_csv('../Datasets/Monks/monks-2.train', sep='\s+', header=None)
monk2_test = pd.read_csv('../Datasets/Monks/monks-2.test', sep='\s+', header=None)

monk3_train = pd.read_csv('../Datasets/Monks/monks-3.train', sep='\s+', header=None)
monk3_test = pd.read_csv('../Datasets/Monks/monks-3.test', sep='\s+', header=None)


# Lista per memorizzare i dataset trasformati
monks_train = []
monks_test = []


# Dataset monk1
X1_train = monk1_train.iloc[:, 1:7].values  # Caratteristiche
y1_train = monk1_train.iloc[:, 0].values    # Etichette

X1_test = monk1_test.iloc[:, 1:7].values
y1_test = monk1_test.iloc[:, 0].values

# Applicazione dell'encoder a monk1
X1_train_encoded = encoder.fit_transform(X1_train)  # Fit e trasformazione sui dati di training
X1_test_encoded = encoder.transform(X1_test)        # Solo trasformazione sui dati di test

monks_train.append((X1_train_encoded, y1_train))
monks_test.append((X1_test_encoded, y1_test))

# Dataset monk2
X2_train = monk2_train.iloc[:, 1:7].values
y2_train = monk2_train.iloc[:, 0].values

X2_test = monk2_test.iloc[:, 1:7].values
y2_test = monk2_test.iloc[:, 0].values

# Applicazione dell'encoder a monk2
X2_train_encoded = encoder.fit_transform(X2_train)
X2_test_encoded = encoder.transform(X2_test)

monks_train.append((X2_train_encoded, y2_train))
monks_test.append((X2_test_encoded, y2_test))

# Dataset monk3
X3_train = monk3_train.iloc[:, 1:7].values
y3_train = monk3_train.iloc[:, 0].values

X3_test = monk3_test.iloc[:, 1:7].values
y3_test = monk3_test.iloc[:, 0].values

# Applicazione dell'encoder a monk3
X3_train_encoded = encoder.fit_transform(X3_train)
X3_test_encoded = encoder.transform(X3_test)

monks_train.append((X3_train_encoded, y3_train))
monks_test.append((X3_test_encoded, y3_test))

## Model creation

In [None]:
def create_SVM(C = 100, type = 'rbf'):
    return SVC(kernel= type, C=C, random_state=42)

## K-fold cross validation

In [None]:
def k_fold_cross_validation(data, labels, params=None):
    # 3. Configurazione della k-fold cross-validation
    kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

    # 4. Ciclo di cross-validation
    fold_no = 1
    accuracy_per_fold = []
    for train_index, val_index in kfold.split(data, labels):
        
        # Suddivisione del dataset
        X_train, X_val = data[train_index], data[val_index]
        y_train, y_val = labels[train_index], labels[val_index]

        # Creazione della rete neurale
        model = create_SVM(C=params['C'], type=params['type'])


        # Addestramento con EarlyStopping
        history = model.fit(X_train, y_train,
                            epochs=params['epochs'], 
                            batch_size=params['batch_size'], 
                            validation_data=(X_val, y_val),
                            verbose=0, 
                            callbacks=[early_stopping])

        # Prendi il miglior score (l'accuratezza di validazione massima)
        score = max(history.history['val_accuracy'])
        accuracy_per_fold.append(score)    
        fold_no += 1

    avg_score = np.mean(accuracy_per_fold)


    X_train, X_val, y_train, y_val = train_test_split(data, labels, test_size=0.2, random_state=42)


    model = create_nn(input_dim=in_size, 
                      learning_rate=params['learning_rate'], 
                      hidden_size=params['hidden_size'], 
                      hidden_layers=params['hidden_layers'], 
                      regularization=params['regularization'], 
                      momentum=params['momentum'], 
                      alpha=params['alpha'])

    # Addestramento finale con EarlyStopping
    history = model.fit(X_train, y_train, 
                        epochs=params['epochs'], 
                        batch_size=params['batch_size'], 
                        validation_data=(X_val, y_val),
                        verbose=0, 
                        callbacks=[early_stopping])

    return avg_score, history, model

## Greed search

In [None]:
def greed_search(input_size = 6, param_grid = None):
    best_scores = []  # Usa una lista normale per memorizzare i punteggi
    best_params_list = []  # Lista per le configurazioni
    best_models = []  # Lista per i modelli
    best_histories = []  # Lista per la cronologia

    for params in param_grid:
        print("--------------------------------------------------")
        print(f"Testing params: {params}")
        score, history, model = k_fold_cross_validation(X1_train_encoded, y1_train, input_size, params=params)
        print(f"Score : {score}")

        # Aggiungi i risultati alla lista
        best_scores.append(score)
        best_params_list.append(params)
        best_models.append(model)
        best_histories.append(history)

        # Ordina la lista dei punteggi e mantieni solo i migliori 10
        sorted_indices = np.argsort(best_scores)[::-1]  # Ordina i punteggi in ordine decrescente
        best_scores = [best_scores[i] for i in sorted_indices][:10]  # Usa la lista e mantieni i top 10
        best_params_list = [best_params_list[i] for i in sorted_indices][:10]
        best_models = [best_models[i] for i in sorted_indices][:10]
        best_histories = [best_histories[i] for i in sorted_indices][:10]

    print("--------------------END GREED SEARCH------------------------------")

    # Ora hai i 10 migliori risultati
    print("Top 10 best scores:")
    print(best_scores)
    print("Top 10 best params:")
    print(best_params_list)

    return best_scores, best_params_list, best_models, best_histories

## Model assessment

In [218]:
# 3. Creazione e configurazione del modello SVM
svm_model1 = SVC(kernel='rbf', C=100.0, random_state=42)

# 4. Addestramento del modello
svm_model1.fit(X1_train_encoded, y1_train)

# 5. Valutazione del modello
y1_pred = svm_model1.predict(X1_test_encoded)

# 6. Report dei risultati
print("Accuracy:", accuracy_score(y1_test, y1_pred))
print("\nClassification Report:\n", classification_report(y1_test, y1_pred))

Accuracy: 0.9953703703703703

Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.99      1.00       216
           1       0.99      1.00      1.00       216

    accuracy                           1.00       432
   macro avg       1.00      1.00      1.00       432
weighted avg       1.00      1.00      1.00       432



In [219]:
# 3. Creazione e configurazione del modello SVM
svm_model2 = SVC(kernel='poly', C=100.0, random_state=42)

# 4. Addestramento del modello
svm_model2.fit(X2_train_encoded, y2_train)

# 5. Valutazione del modello
y2_pred = svm_model2.predict(X2_test_encoded)

# 6. Report dei risultati
print("Accuracy:", accuracy_score(y2_test, y2_pred))
print("\nClassification Report:\n", classification_report(y2_test, y2_pred))

Accuracy: 0.7731481481481481

Classification Report:
               precision    recall  f1-score   support

           0       0.88      0.77      0.82       290
           1       0.62      0.77      0.69       142

    accuracy                           0.77       432
   macro avg       0.75      0.77      0.76       432
weighted avg       0.79      0.77      0.78       432



In [220]:
# 3. Creazione e configurazione del modello SVM
svm_model3 = SVC(kernel='rbf', C=1.0, random_state=42)

# 4. Addestramento del modello
svm_model3.fit(X3_train_encoded, y3_train)

# 5. Valutazione del modello
y3_pred = svm_model3.predict(X3_test_encoded)

# 6. Report dei risultati
print("Accuracy:", accuracy_score(y3_test, y3_pred))
print("\nClassification Report:\n", classification_report(y3_test, y3_pred))

Accuracy: 0.9768518518518519

Classification Report:
               precision    recall  f1-score   support

           0       0.95      1.00      0.98       204
           1       1.00      0.96      0.98       228

    accuracy                           0.98       432
   macro avg       0.98      0.98      0.98       432
weighted avg       0.98      0.98      0.98       432

