In [2]:
# Setup iniziale con i vari import da sfruttare per i due svolgimenti separati
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report

In [3]:
# Caricamento Dataset di Kaggle
# Kaggle "DOM_hourly": columns => ["Datetime", "DOM_MW"]
df = pd.read_csv("Esercizio 2/DOM_hourly.csv", parse_dates=["Datetime"])
# Forziamo che la colonna 'Datetime' sia in formato datetime
df['Datetime'] = pd.to_datetime(df['Datetime'])
df.set_index('Datetime', inplace=True)

In [4]:
# Feature engineering
df["hour"] = df.index.hour
df["dayofweek"] = df.index.dayofweek
df["month"] = df.index.month

In [5]:
from sklearn.model_selection import train_test_split

# Etichetta: 1 se consumo > mediana, altrimenti 0
df["target"] = (df["DOM_MW"] > df["DOM_MW"].median()).astype(int)

# Feature: ora, giorno della settimana, mese
X = df[["hour", "dayofweek", "month"]]
y = df["target"]

# Split
X_temp, X_test, y_temp, y_test = train_test_split(X, y, test_size=0.15, stratify=y, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_temp, y_temp, test_size=0.176, stratify=y_temp, random_state=42)

print(f"Train: {X_train.shape}, Validation: {X_val.shape}, Test: {X_test.shape}")

Train: (81378, 3), Validation: (17382, 3), Test: (17429, 3)


In [6]:
from sklearn.model_selection import StratifiedKFold, cross_val_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import roc_auc_score, make_scorer

# Feature e target (come prima)
X = df[["hour", "dayofweek", "month"]]
y = df["target"]

# K-Fold stratificato
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# Decision Tree
tree = DecisionTreeClassifier(max_depth=5, random_state=42)
auc_tree = cross_val_score(tree, X, y, cv=skf, scoring="roc_auc")

# Neural Network con scaling
mlp_pipeline = Pipeline([
    ("scaler", StandardScaler()),
    ("mlp", MLPClassifier(hidden_layer_sizes=(64, 32), max_iter=300, random_state=42))
])
auc_mlp = cross_val_score(mlp_pipeline, X, y, cv=skf, scoring="roc_auc")

print(f"Decision Tree AUC: {auc_tree.mean():.3f} ± {auc_tree.std():.3f}")
print(f"Neural Network AUC: {auc_mlp.mean():.3f} ± {auc_mlp.std():.3f}")


Decision Tree AUC: 0.860 ± 0.001
Neural Network AUC: 0.895 ± 0.002
