In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_absolute_error, mean_squared_error
import pickle

In [2]:
# Carregar os dados (substitua o caminho com o dataset do trem)
df = pd.read_csv('C:/Users/labsfiap/Documents/CCR/dados.csv', encoding='ISO-8859-1', sep=';')
df

Unnamed: 0,#,Horario,TMP,Dia da Semana,Lotacao,Atraso,Estacao,Tipo de Trem
0,0,05:00,14.6,Segunda,32.8,1.7,JÃºlio Prestes,Antigo
1,1,11:00,18.8,TerÃ§a,58.9,0.4,Presidente Altino,Antigo
2,2,08:00,17.1,SÃ¡bado,34.2,4.2,Barueri,Novo
3,3,08:00,9.3,Sexta,86.8,5.6,Barra Funda,Novo
4,4,10:45,10.5,Quarta,38.6,3.6,JÃºlio Prestes,Novo
...,...,...,...,...,...,...,...,...
9995,9995,10:45,13.9,Segunda,86.3,5.3,Barueri,Antigo
9996,9996,10:45,15.6,Domingo,55.3,2.8,Osasco,Novo
9997,9997,08:00,12.1,Sexta,80.1,4.5,Presidente Altino,Antigo
9998,9998,08:00,12.2,Quinta,84.2,4.3,Barra Funda,Antigo


In [3]:
# Converte "HH:MM" para minutos
def horario_para_minutos(hora):
    h, m = map(int, hora.split(":"))
    return h * 60 + m

df["Horario_min"] = df["Horario"].apply(horario_para_minutos)

# Seleção de features e target
X = df[["Horario_min", "Dia da Semana", "Lotacao", "Atraso"]]
y = df["TMP"]

In [4]:
# ======================
# 3. Dividir dados
# ======================

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [5]:
# ======================
# 4. Pipeline + Modelo
# ======================

cat_features = ["Dia da Semana"]
num_features = ["Horario_min", "Lotacao", "Atraso"]

preprocessor = ColumnTransformer([
    ("cat", OneHotEncoder(handle_unknown="ignore"), cat_features),
    ("num", "passthrough", num_features)
])

In [6]:
# MLP Regressor com estrutura semelhante ao MNIST
from sklearn.tree import DecisionTreeRegressor

pipeline = Pipeline([
    ("preprocessor", preprocessor),
    ("regressor", DecisionTreeRegressor(random_state=42))
])

pipeline.fit(X_train, y_train)

In [7]:
# ======================
# 5. Avaliação
# ======================

# Faça previsões
y_pred = pipeline.predict(X_test)

# Calcule as métricas
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

print(f'MAE: {mae:.2f}')
print(f'RMSE: {rmse:.2f}')

MAE: 3.70
RMSE: 4.51


In [8]:

# ======================
# 6. Salvar modelo
# ======================

with open("mlp_previsao_trem.pkl", "wb") as f:
    pickle.dump(pipeline, f)