In [57]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.linear_model import LinearRegression
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
import joblib

# --- Cargar datos ---
df = pd.read_csv("smart_logistics_dataset.csv")

# --- 1) Renombrar columnas a español (idempotente) ---
mapa = {
    "Timestamp": "marca_tiempo",
    "Asset_ID": "id_activo",
    "Latitude": "latitud",
    "Longitude": "longitud",
    "Inventory_Level": "nivel_inventario",
    "Shipment_Status": "estado_envio",
    "Temperature": "temperatura",
    "Humidity": "humedad",
    "Traffic_Status": "estado_trafico",
    "Waiting_Time": "tiempo_espera",
    "User_Transaction_Amount": "monto_transaccion_usuario",
    "User_Purchase_Frequency": "frecuencia_compra_usuario",
    "Logistics_Delay_Reason": "motivo_retraso_logistico",
    "Asset_Utilization": "utilizacion_activo",
    "Demand_Forecast": "pronostico_demanda",
    "Logistics_Delay": "retraso_logistico",
}
df.rename(columns=mapa, inplace=True)

# --- 2) Fechas y columnas con fuga (leakage) ---
df["marca_tiempo"] = pd.to_datetime(df["marca_tiempo"], errors="coerce")
excluir_por_leakage = ["estado_envio", "motivo_retraso_logistico"]
df.drop(columns=[c for c in excluir_por_leakage if c in df.columns], inplace=True)

# --- 3) Seleccionar solo numéricas (EXCLUYENDO el objetivo) ---
columnas_numericas = []
for col in df.columns:
    if pd.api.types.is_numeric_dtype(df[col]):
        columnas_numericas.append(col)

objetivo = "tiempo_espera"
if objetivo in columnas_numericas:
    columnas_numericas.remove(objetivo)

# --- 4) Construir X, y y limpiar no-finitos en X/y ---
X = df[columnas_numericas].astype("float64").copy()
y = df[objetivo].astype("float64").copy()

# Reemplazar ±inf por NaN en X; filtrar filas con y no finita
X.replace([np.inf, -np.inf], np.nan, inplace=True)
mask_finitos = np.isfinite(y)
X = X[mask_finitos]
y = y[mask_finitos]

# --- 5) Train/Test split ---
x_train, x_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# --- 6) ÚNICO MODELO (pipeline): imputación + estandarización + regresión ---
modelo = make_pipeline(
    SimpleImputer(strategy="median"),
    StandardScaler(with_mean=True, with_std=True),
    LinearRegression()
)

# --- 7) Entrenar y evaluar ---
modelo.fit(x_train, y_train)
y_pred = modelo.predict(x_test)

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

  return X @ coef_ + self.intercept_
  return X @ coef_ + self.intercept_
  return X @ coef_ + self.intercept_


In [58]:
print("Train size:", x_train.shape, y_train.shape)
print("Test size:", x_test.shape, y_test.shape)
print("MSE:", mse)
print("R²:", r2)

Train size: (800, 10) (800,)
Test size: (200, 10) (200,)
MSE: 217.10896540683723
R²: -0.00947120333477769


In [62]:
import joblib

artifacts = {
    "model": modelo,                   # tu pipeline
    "features": columnas_numericas,    # en el MISMO orden de entrenamiento
}
joblib.dump(artifacts, "smart_logistics_artifacts.pkl")
print("Artefactos guardados.")

Artefactos guardados.


In [61]:
model.coef_

array([-2.51535001e-03,  6.35529994e-03, -9.75044120e-04,  1.63220276e-02,
        6.85969301e-02, -1.10977628e-03,  7.72766837e-02,  1.37997619e-02,
       -1.14680796e-02, -2.13139010e+00])

In [None]:
df[""]

In [64]:
pd.read_csv("smart_logistics_dataset.csv")

Unnamed: 0,Timestamp,Asset_ID,Latitude,Longitude,Inventory_Level,Shipment_Status,Temperature,Humidity,Traffic_Status,Waiting_Time,User_Transaction_Amount,User_Purchase_Frequency,Logistics_Delay_Reason,Asset_Utilization,Demand_Forecast,Logistics_Delay
0,2024-03-20 00:11:14,Truck_7,-65.7383,11.2497,390,Delayed,27.0,67.8,Detour,38,320,4,,60.1,285,1
1,2024-10-30 07:53:51,Truck_6,22.2748,-131.7086,491,In Transit,22.5,54.3,Heavy,16,439,7,Weather,80.9,174,1
2,2024-07-29 18:42:48,Truck_10,54.9232,79.5455,190,In Transit,25.2,62.2,Detour,34,355,3,,99.2,260,0
3,2024-10-28 00:50:54,Truck_9,42.3900,-1.4788,330,Delivered,25.4,52.3,Heavy,37,227,5,Traffic,97.4,160,1
4,2024-09-27 15:52:58,Truck_7,-65.8477,47.9468,480,Delayed,20.5,57.2,Clear,56,197,6,,71.6,270,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,2024-07-22 16:30:00,Truck_6,89.8701,73.6867,264,Delivered,26.9,70.0,Heavy,32,188,1,Weather,79.2,213,1
996,2024-04-30 04:58:58,Truck_5,-10.4792,-177.1239,479,Delivered,23.7,77.9,Detour,56,276,7,Weather,83.7,272,0
997,2024-10-27 22:09:13,Truck_2,-71.0609,75.3714,347,In Transit,21.0,63.1,Detour,35,382,5,,74.8,275,0
998,2024-04-18 23:06:56,Truck_2,-76.7910,18.3631,276,Delivered,18.0,64.3,Heavy,10,361,5,,88.6,242,1
