In [13]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_absolute_error, mean_squared_error
import pickle

In [None]:
df = pd.read_csv('../data/dados_sinteticos_trem.csv', encoding='ISO-8859-1', sep='\t')
df

Unnamed: 0,#,Horario,TMP,Dia da Semana,Lotacao,Atraso,Estacao,Tipo de Trem
0,0,08:45,25.2,Segunda,39.7,5.6,Barueri,Novo
1,1,00:15,14.3,Segunda,59.0,3.3,Barra Funda,Novo
2,2,01:00,15.3,Segunda,76.2,5.6,Julio Prestes,Novo
3,3,02:30,14.9,Terca,31.5,1.7,Julio Prestes,Novo
4,4,01:00,10.3,Terca,66.4,0.2,Barueri,Antigo
...,...,...,...,...,...,...,...,...
4995,4995,02:15,10.3,Domingo,43.2,4.2,Osasco,Novo
4996,4996,02:00,16.6,Segunda,59.0,4.2,Osasco,Antigo
4997,4997,07:30,12.2,Sabado,45.2,2.7,Osasco,Novo
4998,4998,08:00,20.4,Sabado,77.3,1.0,Barueri,Novo


In [None]:
def horario_para_minutos(hora):
    h, m = map(int, hora.split(":"))
    return h * 60 + m

df["Horario_min"] = df["Horario"].apply(horario_para_minutos)


X = df[["Horario_min", "Dia da Semana", "Lotacao", "Atraso"]]
y = df["TMP"]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [None]:
cat_features = ["Dia da Semana"]
num_features = ["Horario_min", "Lotacao", "Atraso"]

preprocessor = ColumnTransformer([
    ("cat", OneHotEncoder(handle_unknown="ignore"), cat_features),
    ("num", "passthrough", num_features)
])

In [None]:
from xgboost import XGBRegressor

pipeline = Pipeline([
    ("preprocessor", preprocessor),
    ("regressor", XGBRegressor(
        n_estimators=300,       
        max_depth=6,            
        learning_rate=0.1,      
        random_state=42,
        n_jobs=-1,              
        verbosity=1             
    ))
])
pipeline.fit(X_train, y_train)


In [None]:
y_pred = pipeline.predict(X_test)


mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

print(f'MAE: {mae:.2f}')
print(f'RMSE: {rmse:.2f}')

MAE: 2.64
RMSE: 3.14


In [None]:
with open("mlp_previsao_trem.pkl", "wb") as f:
    pickle.dump(pipeline, f)