In [28]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_absolute_error
import pickle

In [23]:
# Carregar os dados (substitua o caminho com o dataset do trem)
df = pd.read_csv('C:/Users/labsfiap/Documents/CCR/dados.csv', encoding='ISO-8859-1', sep=';')
df

Unnamed: 0,#,Horário,TMP (min),Dia da Semana,Lotação (%),Atraso (min),Estação,Tipo de Trem
0,0,05:00,12.0,Domingo,30.0,1.0,Barra Funda,Novo
1,1,06:30,14.0,Domingo,45.0,2.0,Júlio Prestes,Antigo
2,2,08:00,20.0,Domingo,85.0,6.0,Osasco,Novo
3,3,09:15,18.0,Domingo,75.0,4.0,Presidente Altino,Antigo
4,4,10:45,13.0,Domingo,60.0,3.0,Barueri,Novo
...,...,...,...,...,...,...,...,...
84,84,19:30,16.0,Terça,74.0,4.0,Barra Funda,Novo
85,85,21:00,14.5,Terça,62.0,2.0,Júlio Prestes,Antigo
86,86,22:30,13.0,Terça,51.0,1.0,Osasco,Novo
87,87,23:59,11.0,Terça,39.0,0.0,Presidente Altino,Antigo


In [29]:
# Converte "HH:MM" para minutos
def horario_para_minutos(hora):
    h, m = map(int, hora.split(":"))
    return h * 60 + m

df["Horário_min"] = df["Horário"].apply(horario_para_minutos)

# Seleção de features e target
X = df[["Horário_min", "Dia da Semana", "Lotação (%)", "Atraso (min)"]]
y = df["TMP (min)"]

In [31]:
# ======================
# 3. Dividir dados
# ======================

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [33]:
# ======================
# 4. Pipeline + Modelo
# ======================

cat_features = ["Dia da Semana"]
num_features = ["Horário_min", "Lotação (%)", "Atraso (min)"]

preprocessor = ColumnTransformer([
    ("cat", OneHotEncoder(handle_unknown="ignore"), cat_features),
    ("num", "passthrough", num_features)
])

In [36]:
# MLP Regressor com estrutura semelhante ao MNIST
pipeline = Pipeline([
    ("preprocessor", preprocessor),
    ("regressor", MLPRegressor(hidden_layer_sizes=(50, 20), max_iter=1000, random_state=42, verbose=True))
])

pipeline.fit(X_train, y_train)

Iteration 1, loss = 478.63986955
Iteration 2, loss = 204.19361136
Iteration 3, loss = 131.82183514
Iteration 4, loss = 167.85537776
Iteration 5, loss = 202.26506380
Iteration 6, loss = 196.38327557
Iteration 7, loss = 161.74085896
Iteration 8, loss = 121.06721321
Iteration 9, loss = 93.22592635
Iteration 10, loss = 85.68564804
Iteration 11, loss = 95.40548727
Iteration 12, loss = 111.17956773
Iteration 13, loss = 121.07795704
Iteration 14, loss = 119.29682628
Iteration 15, loss = 107.45108017
Iteration 16, loss = 91.80948293
Iteration 17, loss = 79.13120598
Iteration 18, loss = 73.51063164
Iteration 19, loss = 74.99136114
Iteration 20, loss = 80.19353086
Iteration 21, loss = 84.43834417
Iteration 22, loss = 84.40890014
Iteration 23, loss = 79.60538490
Iteration 24, loss = 72.04989520
Iteration 25, loss = 64.79222827
Iteration 26, loss = 60.25151387
Iteration 27, loss = 59.06547002
Iteration 28, loss = 60.04048392
Iteration 29, loss = 61.01643201
Iteration 30, loss = 60.13351958
Iterati

In [38]:
# ======================
# 5. Avaliação
# ======================

y_pred = pipeline.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
print(f"MAE: {mae:.2f} minutos")

MAE: 1.24 minutos


In [39]:

# ======================
# 6. Salvar modelo
# ======================

with open("mlp_previsao_trem.pkl", "wb") as f:
    pickle.dump(pipeline, f)