In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, classification_report, ConfusionMatrixDisplay

In [2]:
train_df = pd.read_csv('./Dataset/training_data_v10.csv', encoding="latin1")
test_df = pd.read_csv('./Dataset/test_data_v10.csv', encoding="latin1")

In [3]:
def finalize_data_for_model(df):
    df = df.copy()
    drop_cols = ['record_date', 'city_name', 'AVERAGE_PRECIPITATION', 'AVERAGE_RAIN']
    df = df.drop(columns=[c for c in drop_cols if c in df.columns])
    return df

data_treino = finalize_data_for_model(train_df)
data_teste = finalize_data_for_model(test_df)

# Tratar valores nulos no target se existirem
if 'AVERAGE_SPEED_DIFF' in data_treino.columns:
   data_treino['AVERAGE_SPEED_DIFF'] = data_treino['AVERAGE_SPEED_DIFF'].fillna(0).astype(int)

# Separar Features e Target
X = data_treino.drop(columns=['AVERAGE_SPEED_DIFF'])
y = data_treino['AVERAGE_SPEED_DIFF']
X_submission = data_teste.copy()

In [4]:
scaler_X = MinMaxScaler(feature_range=(0,1)).fit(X)

# Transformar treino e teste
X_normalize = pd.DataFrame(scaler_X.transform(X), columns=X.columns)
X_submission_normalize = pd.DataFrame(scaler_X.transform(X_submission), columns=X_submission.columns)

In [5]:
# Split de 25% para validação, com stratify (igual ao notebook)
X_train, X_test, y_train, y_test = train_test_split(
    X_normalize, y, 
    test_size=0.25, 
    random_state=2023, 
    stratify=y
)

In [6]:
mlp = MLPClassifier(random_state=2023)

# Parâmetros exatos do notebook da Tarefa 2
param_grid = {
    'hidden_layer_sizes': [(50, 50, 25), (50, 25, 25)],
    'activation': ['tanh'],
    'solver': ['adam'],
    'alpha': [0.001],
    'max_iter': [100000],
    'learning_rate': ['constant', 'invscaling', 'adaptive'],
}

print("A iniciar GridSearchCV para MLP...")
grid_searchMLP = GridSearchCV(mlp, param_grid, cv=5, refit=True, verbose=3, n_jobs=-1)
grid_searchMLP.fit(X_train, y_train)

# Melhor modelo
mlp_best = grid_searchMLP.best_estimator_
print("\nMelhor estimador:", mlp_best)

A iniciar GridSearchCV para MLP...
Fitting 5 folds for each of 6 candidates, totalling 30 fits

Melhor estimador: MLPClassifier(activation='tanh', alpha=0.001, hidden_layer_sizes=(50, 25, 25),
              max_iter=100000, random_state=2023)


In [None]:
grid_predictionMLP = mlp_best.predict(X_test)

print("\nValidation Accuracy: {:.4f}".format(accuracy_score(y_test, grid_predictionMLP)))
print("-" * 30)
print("Classification Report:\n")
print(classification_report(y_test, grid_predictionMLP))

# ==============================================================================
# TAREFA 2: Previsão Final e Submissão
# ==============================================================================
print("\nA gerar previsões para submissão...")

predictionMLP = mlp_best.predict(X_submission_normalize)

# Mapeamento inverso
target_map_reverse = {
    0: 'None', 
    1: 'Low', 
    2: 'Medium', 
    3: 'High', 
    4: 'Very_High'
}

prediction_text = [target_map_reverse.get(p, 'None') for p in predictionMLP]

submission = pd.DataFrame({
    'RowId': range(1, len(prediction_text) + 1),
    'Speed_Diff': prediction_text
})

submission.to_csv('submission_mlp.csv', index=False)
print("Ficheiro criado: submission_mlp.csv")
print(submission.head())


Validation Accuracy: 0.7880
------------------------------
Classification Report:

              precision    recall  f1-score   support

           0       0.83      0.96      0.89       550
           1       0.70      0.62      0.66       355
           2       0.77      0.77      0.77       413
           3       0.78      0.72      0.75       265
           4       0.91      0.72      0.81       120

    accuracy                           0.79      1703
   macro avg       0.80      0.76      0.77      1703
weighted avg       0.79      0.79      0.78      1703


A gerar previsões para submissão...
Ficheiro criado: submission_mlp_task2_style.csv
   RowId Speed_Diff
0      1       None
1      2        Low
2      3       None
3      4     Medium
4      5       None
