# Random Forest

In [11]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder

# Carica i dati dal CSV
data = pd.read_csv('histograms_features.csv')

# Separare le feature (i bin degli istogrammi) e le etichette
X = data.drop(columns=['Label'])  # Tutte le colonne tranne 'Label' sono le feature
y = data['Label']  # La colonna 'Label' è la variabile target

# Codifica le etichette in numeri (per i classificatori che richiedono numeri)
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Dividi il dataset in training e testing (80% training, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.5, random_state=42)

# Crea il modello Random Forest
model = RandomForestClassifier(n_estimators=5000, random_state=42)

# Allena il modello sui dati di training
model.fit(X_train, y_train)

# Fai delle previsioni sui dati di test
y_pred = model.predict(X_test)

# Valuta le performance del modello
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")

# Report di classificazione per maggiori dettagli sulle performance
print("\nClassification Report:")
print(classification_report(y_test, y_pred, labels=label_encoder.transform(label_encoder.classes_),
                            target_names=label_encoder.classes_, zero_division=0))


Accuracy: 60.24%

Classification Report:
                         precision    recall  f1-score   support

        Aussie Lemonade       0.56      0.62      0.59         8
             Mango Loco       0.71      0.77      0.74        13
         Original green       0.36      0.56      0.43         9
          Pacific Punch       0.17      0.33      0.22         6
    java salted caramel       0.60      0.43      0.50         7
                lo carb       0.75      0.27      0.40        11
     nitro cosmic peach       0.64      0.88      0.74        16
        nitro super dry       0.20      0.33      0.25         3
           tea lemonade       0.00      0.00      0.00         1
            ultra black       0.00      0.00      0.00         3
             ultra blue       1.00      0.50      0.67         2
             ultra gold       0.00      0.00      0.00         4
      ultra peachy keen       1.00      0.57      0.73         7
              ultra red       1.00      0.33    

# SVM

In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder

# Carica i dati dal CSV
data = pd.read_csv('histograms_features.csv')

# Separare le feature (i bin degli istogrammi) e le etichette
X = data.drop(columns=['Label'])  # Tutte le colonne tranne 'Label' sono le feature
y = data['Label']  # La colonna 'Label' è la variabile target

# Codifica le etichette in numeri (per i classificatori che richiedono numeri)
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Dividi il dataset in training e testing (80% training, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.5, random_state=42)

# Crea il modello Random Forest
model = SVC(kernel='linear', random_state=42)

# Allena il modello sui dati di training
model.fit(X_train, y_train)

# Fai delle previsioni sui dati di test
y_pred = model.predict(X_test)

# Valuta le performance del modello
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")

# Report di classificazione per maggiori dettagli sulle performance
print("\nClassification Report:")
print(classification_report(y_test, y_pred, labels=label_encoder.transform(label_encoder.classes_),
                            target_names=label_encoder.classes_))


Accuracy: 56.02%

Classification Report:
                         precision    recall  f1-score   support

        Aussie Lemonade       0.50      0.62      0.56         8
             Mango Loco       1.00      0.85      0.92        13
         Original green       0.26      0.78      0.39         9
          Pacific Punch       0.29      0.67      0.40         6
    java salted caramel       0.40      0.57      0.47         7
                lo carb       0.33      0.09      0.14        11
     nitro cosmic peach       0.85      0.69      0.76        16
        nitro super dry       0.00      0.00      0.00         3
           tea lemonade       0.00      0.00      0.00         1
            ultra black       0.00      0.00      0.00         3
             ultra blue       0.50      1.00      0.67         2
             ultra gold       0.25      0.50      0.33         4
      ultra peachy keen       0.67      0.29      0.40         7
              ultra red       0.62      0.83    

# MLP Classifier

In [10]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder

# Carica i dati dal CSV
data = pd.read_csv('histograms_features.csv')

# Separare le feature (i bin degli istogrammi) e le etichette
X = data.drop(columns=['Label'])  # Tutte le colonne tranne 'Label' sono le feature
y = data['Label']  # La colonna 'Label' è la variabile target

# Codifica le etichette in numeri (per i classificatori che richiedono numeri)
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Dividi il dataset in training e testing (80% training, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.5, random_state=42)

# Crea il modello Random Forest
model = MLPClassifier(hidden_layer_sizes=(1000,), max_iter=10000, random_state=42)

# Allena il modello sui dati di training
model.fit(X_train, y_train)

# Fai delle previsioni sui dati di test
y_pred = model.predict(X_test)

# Valuta le performance del modello
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")

# Report di classificazione per maggiori dettagli sulle performance
print("\nClassification Report:")
print(classification_report(y_test, y_pred, labels=label_encoder.transform(label_encoder.classes_),
                            target_names=label_encoder.classes_))


Accuracy: 62.05%

Classification Report:
                         precision    recall  f1-score   support

        Aussie Lemonade       0.64      0.88      0.74         8
             Mango Loco       1.00      0.62      0.76        13
         Original green       0.30      0.78      0.44         9
          Pacific Punch       0.57      0.67      0.62         6
    java salted caramel       0.67      0.57      0.62         7
                lo carb       1.00      0.09      0.17        11
     nitro cosmic peach       0.75      0.75      0.75        16
        nitro super dry       0.00      0.00      0.00         3
           tea lemonade       0.00      0.00      0.00         1
            ultra black       0.00      0.00      0.00         3
             ultra blue       0.67      1.00      0.80         2
             ultra gold       0.00      0.00      0.00         4
      ultra peachy keen       1.00      0.57      0.73         7
              ultra red       0.60      0.50    