In [1]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Carichiamo il dataset Wine 🍷
data = load_wine()
X = data.data
y = data.target

# Lista di random_state da testare
random_states = [1, 5, 7, 10, 15, 21, 42, 55, 77, 99]

# Eseguiamo il test su ogni random_state
print("Random State | Decision Tree Accuracy | Random Forest Accuracy (100 trees)")
print("-" * 60)

dt_accuracies = []
rf_accuracies = []

for rs in random_states:
    # Suddivisione in Training e Test Set
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=rs)

    # Decision Tree
    modello_dt = DecisionTreeClassifier(random_state=rs)
    modello_dt.fit(X_train, y_train)
    y_pred_dt = modello_dt.predict(X_test)
    accuracy_dt = accuracy_score(y_test, y_pred_dt)
    dt_accuracies.append(accuracy_dt)

    # Random Forest con 100 alberi
    modello_rf = RandomForestClassifier(n_estimators=100, random_state=rs)
    modello_rf.fit(X_train, y_train)
    y_pred_rf = modello_rf.predict(X_test)
    accuracy_rf = accuracy_score(y_test, y_pred_rf)
    rf_accuracies.append(accuracy_rf)

    # Stampiamo i risultati
    print(f"{rs:<12} | {accuracy_dt:.2f}                 | {accuracy_rf:.2f}")

# Calcoliamo la media delle accuratezze
dt_mean = np.mean(dt_accuracies)
rf_mean = np.mean(rf_accuracies)

print("\nMedia Accuracy Decision Tree:", round(dt_mean, 3))
print("Media Accuracy Random Forest (100 trees):", round(rf_mean, 3))


Random State | Decision Tree Accuracy | Random Forest Accuracy (100 trees)
------------------------------------------------------------
1            | 0.86                 | 0.97
5            | 0.86                 | 0.97
7            | 0.92                 | 1.00
10           | 0.94                 | 0.94
15           | 0.92                 | 1.00
21           | 0.86                 | 1.00
42           | 0.94                 | 1.00
55           | 0.89                 | 1.00
77           | 0.89                 | 1.00
99           | 0.89                 | 1.00

Media Accuracy Decision Tree: 0.897
Media Accuracy Random Forest (100 trees): 0.989


In [3]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Carichiamo il dataset Wine 🍷
data = load_wine()
X = data.data
y = data.target

# Lista di random_state da testare
random_states = [1, 5, 7, 10, 15, 21, 42, 55, 77, 99]

print("Random State | Decision Tree (max_depth=5) | Random Forest (max_depth=5, 100 trees)")
print("-" * 70)

dt_accuracies = []
rf_accuracies = []

for rs in random_states:
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=rs)

    # Decision Tree con max_depth=5
    modello_dt = DecisionTreeClassifier(max_depth=5, random_state=rs)
    modello_dt.fit(X_train, y_train)
    y_pred_dt = modello_dt.predict(X_test)
    accuracy_dt = accuracy_score(y_test, y_pred_dt)
    dt_accuracies.append(accuracy_dt)

    # Random Forest con max_depth=5
    modello_rf = RandomForestClassifier(n_estimators=100, max_depth=5, random_state=rs)
    modello_rf.fit(X_train, y_train)
    y_pred_rf = modello_rf.predict(X_test)
    accuracy_rf = accuracy_score(y_test, y_pred_rf)
    rf_accuracies.append(accuracy_rf)

    print(f"{rs:<12} | {accuracy_dt:.2f}                      | {accuracy_rf:.2f}")

# Calcoliamo la media delle accuratezze
dt_mean = np.mean(dt_accuracies)
rf_mean = np.mean(rf_accuracies)

print("\nMedia Accuracy Decision Tree (max_depth=5):", round(dt_mean, 3))
print("Media Accuracy Random Forest (max_depth=5, 100 trees):", round(rf_mean, 3))


Random State | Decision Tree (max_depth=5) | Random Forest (max_depth=5, 100 trees)
----------------------------------------------------------------------
1            | 0.86                      | 0.97
5            | 0.86                      | 0.97
7            | 0.89                      | 1.00
10           | 0.94                      | 0.94
15           | 0.92                      | 1.00
21           | 0.86                      | 1.00
42           | 0.94                      | 1.00
55           | 0.92                      | 1.00
77           | 0.89                      | 1.00
99           | 0.89                      | 1.00

Media Accuracy Decision Tree (max_depth=5): 0.897
Media Accuracy Random Forest (max_depth=5, 100 trees): 0.989


In [7]:
#Test dataset "digits dataset" con max_depth(5) e 200 alberi decisionali | digits dataset contiene immagini 8x8 di numeri scritti a mano (da 0 a 9)

In [9]:
from sklearn.datasets import load_digits

# Carichiamo il dataset di immagini cifre scritte a mano
data = load_digits()
X = data.data
y = data.target

print("\nTest su dataset più complesso (Digits dataset)")
print("Random State | Decision Tree | Random Forest (200 trees)")
print("-" * 60)

dt_accuracies_digits = []
rf_accuracies_digits = []

for rs in random_states:
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=rs)

    # Decision Tree
    modello_dt = DecisionTreeClassifier(random_state=rs)
    modello_dt.fit(X_train, y_train)
    y_pred_dt = modello_dt.predict(X_test)
    accuracy_dt = accuracy_score(y_test, y_pred_dt)
    dt_accuracies_digits.append(accuracy_dt)

    # Random Forest con 200 alberi
    modello_rf = RandomForestClassifier(n_estimators=200, random_state=rs)
    modello_rf.fit(X_train, y_train)
    y_pred_rf = modello_rf.predict(X_test)
    accuracy_rf = accuracy_score(y_test, y_pred_rf)
    rf_accuracies_digits.append(accuracy_rf)

    print(f"{rs:<12} | {accuracy_dt:.2f}         | {accuracy_rf:.2f}")

# Calcoliamo la media delle accuratezze
dt_mean_digits = np.mean(dt_accuracies_digits)
rf_mean_digits = np.mean(rf_accuracies_digits)

print("\nMedia Accuracy Decision Tree (Digits dataset):", round(dt_mean_digits, 3))
print("Media Accuracy Random Forest (Digits dataset, 200 trees):", round(rf_mean_digits, 3))



Test su dataset più complesso (Digits dataset)
Random State | Decision Tree | Random Forest (200 trees)
------------------------------------------------------------
1            | 0.84         | 0.99
5            | 0.87         | 0.96
7            | 0.85         | 0.96
10           | 0.85         | 0.97
15           | 0.84         | 0.97
21           | 0.84         | 0.97
42           | 0.84         | 0.97
55           | 0.86         | 0.98
77           | 0.88         | 0.97
99           | 0.86         | 0.97

Media Accuracy Decision Tree (Digits dataset): 0.853
Media Accuracy Random Forest (Digits dataset, 200 trees): 0.973


In [10]:
#Testiamolo con diversi max_depth per capire di quante ramificazioni ha effettivamente bisogno

In [20]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Carichiamo il dataset Digits (immagini di numeri scritti a mano)
data = load_digits()
X = data.data
y = data.target

# Lista di random_state da testare
random_states = [1, 5, 7, 10, 15, 21, 42, 55, 77, 99]

print("\nTest su Digits Dataset con max_depth=10")
print("Random State | Decision Tree (max_depth=10) | Random Forest (max_depth=10, 200 trees)")
print("-" * 80)

dt_accuracies = []
rf_accuracies = []

for rs in random_states:
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=rs)

    # Decision Tree con max_depth=10
    modello_dt = DecisionTreeClassifier(max_depth=10, random_state=rs)
    modello_dt.fit(X_train, y_train)
    y_pred_dt = modello_dt.predict(X_test)
    accuracy_dt = accuracy_score(y_test, y_pred_dt)
    dt_accuracies.append(accuracy_dt)

    # Random Forest con max_depth=10 e 200 alberi
    modello_rf = RandomForestClassifier(n_estimators=30, max_depth=10, random_state=rs)
    modello_rf.fit(X_train, y_train)
    y_pred_rf = modello_rf.predict(X_test)
    accuracy_rf = accuracy_score(y_test, y_pred_rf)
    rf_accuracies.append(accuracy_rf)

    print(f"{rs:<12} | {accuracy_dt:.2f}                      | {accuracy_rf:.2f}")

# Calcoliamo la media delle accuratezze
dt_mean = np.mean(dt_accuracies)
rf_mean = np.mean(rf_accuracies)

print("\nMedia Accuracy Decision Tree (max_depth=10):", round(dt_mean, 3))
print("Media Accuracy Random Forest (max_depth=10, 200 trees):", round(rf_mean, 3))



Test su Digits Dataset con max_depth=10
Random State | Decision Tree (max_depth=10) | Random Forest (max_depth=10, 200 trees)
--------------------------------------------------------------------------------
1            | 0.90                      | 0.99
5            | 0.83                      | 0.96
7            | 0.86                      | 0.96
10           | 0.86                      | 0.97
15           | 0.84                      | 0.96
21           | 0.88                      | 0.97
42           | 0.86                      | 0.97
55           | 0.89                      | 0.97
77           | 0.88                      | 0.97
99           | 0.86                      | 0.98

Media Accuracy Decision Tree (max_depth=10): 0.866
Media Accuracy Random Forest (max_depth=10, 200 trees): 0.97
