In [1]:
from dotenv import load_dotenv
import os

# Cargar automáticamente las variables del archivo .env
load_dotenv()
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

DATA_RAW = os.getenv('DATA_RAW')
DATA_PREPROCESSED = os.getenv('DATA_PROCESSED')
MODELS = os.getenv('MODELS')
df = pd.read_csv(os.path.join('..',DATA_RAW, "dataframe_merged.csv"), sep=";")

df.head(5)

df_features = df.copy()
df_features.drop([f'Sabor {i}' for i in range(1,80)], axis = 1, inplace = True)
df_target = df.loc[:, df.columns.str.startswith("Sabor")].copy()
df_target.head()

Unnamed: 0,Sabor 1,Sabor 2,Sabor 3,Sabor 4,Sabor 5,Sabor 6,Sabor 7,Sabor 8,Sabor 9,Sabor 10,...,Sabor 70,Sabor 71,Sabor 72,Sabor 73,Sabor 74,Sabor 75,Sabor 76,Sabor 77,Sabor 78,Sabor 79
0,5.36,12.45,5.47,5.03,,12.41,11.6,5.6,6.22,11.21,...,,,,,,,,,,
1,22.92,,26.5,,,32.5,5.08,5.12,,25.26,...,,1.07,,37.09,43.6,,20.0,50.0,28.03,11.63
2,34.39,,35.99,,,32.64,20.1,20.37,,49.79,...,,3.17,3.29,30.47,37.01,,20.0,50.0,25.95,10.61
3,10.66,,12.38,,,21.97,,10.21,,22.96,...,,,1.02,11.89,11.54,,10.0,10.0,11.3,5.59
4,11.42,,23.53,,,18.89,5.05,5.07,,26.02,...,,1.04,3.06,18.13,25.47,,60.0,70.0,12.66,11.94


In [2]:
df_features = df_features.rename(
        columns={
            'Ajuste Fecha': 'date',
            'EsFeriado':'holiday',
            'Estacion':'season'
            }
    )
df_features = df_features[['date', 'temp', 'humidity','wind_speed','clouds_all','holiday','season']]
df_features['date'] = pd.to_datetime(df_features['date'])
df_features.drop('season', axis = 1, inplace = True)

In [3]:
df_features

Unnamed: 0,date,temp,humidity,wind_speed,clouds_all,holiday
0,2022-01-01,25.950400,64.0,3.270,40.0,True
1,2022-01-02,25.956667,80.0,2.570,0.5,False
2,2022-01-03,28.290400,67.0,2.060,0.0,False
3,2022-01-04,26.944615,69.0,4.955,0.0,False
4,2022-01-05,24.381250,53.5,4.885,0.0,False
...,...,...,...,...,...,...
1091,2024-12-27,22.339167,58.5,2.150,0.0,False
1092,2024-12-28,24.437500,49.0,2.240,0.0,False
1093,2024-12-29,24.166250,52.5,4.470,0.0,False
1094,2024-12-30,25.622917,59.5,4.920,0.0,False


In [4]:
import sys
from pathlib import Path

#Añadír la carpeta src al PYTHONPATH de la sesión 
ROOT = Path.cwd().parent        # .. = carpeta raíz del proyecto
sys.path.append(str(ROOT / "src"))

# Importar mis clases
from features.my_transformers import LagRoller
from features.my_transformers import CyclicalEncoder, InformationOfDateExtractor, SeasonGetter, SeasonOrdinalizer
import joblib
pipeline = joblib.load("../src/features/features_pipeline.joblib")

In [5]:
X = pipeline.fit_transform(df_features)

In [6]:
# Funcion para combinar features y target
def concatFeaturesTarget(X:np.array, y:pd.Series, features_columns = list):
    columns = features_columns
    df_full = pd.DataFrame(X, columns = columns)
    df_full['kg'] = y.fillna(0)
    return df_full

In [7]:
columns = list(pipeline.get_feature_names_out())
target = 'Sabor 1'
concatFeaturesTarget(X, df_target[target], columns)

Unnamed: 0,temp,humidity,wind_speed,clouds_all,holiday,month_sin,month_cos,season_sin,season_cos,day_sin,day_cos,kg
0,0.744146,0.485714,0.21288,0.4,True,0.5,0.866025,0.0,1.0,0.201299,0.97953,5.36
1,0.744365,0.714286,0.150268,0.005,False,0.5,0.866025,0.0,1.0,0.394356,0.918958,22.92
2,0.826019,0.528571,0.104651,0.0,False,0.5,0.866025,0.0,1.0,0.571268,0.820763,34.39
3,0.778932,0.557143,0.363596,0.0,False,0.5,0.866025,0.0,1.0,0.724793,0.688967,10.66
4,0.689244,0.335714,0.357335,0.0,False,0.5,0.866025,0.0,1.0,0.848644,0.528964,11.42
...,...,...,...,...,...,...,...,...,...,...,...,...
1091,0.617795,0.407143,0.112701,0.0,False,-0.0,1.0,0.0,1.0,-0.724793,0.688967,16.74
1092,0.691212,0.271429,0.120751,0.0,False,-0.0,1.0,0.0,1.0,-0.571268,0.820763,5.42
1093,0.681721,0.321429,0.320215,0.0,False,-0.0,1.0,0.0,1.0,-0.394356,0.918958,38.55
1094,0.732688,0.421429,0.360465,0.0,False,-0.0,1.0,0.0,1.0,-0.201299,0.97953,48.28


In [8]:
df = concatFeaturesTarget(X, df_target[target], columns)
lags= 30
horizon= 14
roll_windows=(7,)
lagRoller = LagRoller(lags = lags, horizon = horizon, roll_windows = roll_windows)
lagRoller.fit(df)

In [9]:
lagRoller.get_feature_names_out()

array(['temp', 'humidity', 'wind_speed', 'clouds_all', 'holiday',
       'month_sin', 'month_cos', 'season_sin', 'season_cos', 'day_sin',
       'day_cos', 'kg', 'temp_lag_1', 'temp_lag_2', 'temp_lag_3',
       'temp_lag_4', 'temp_lag_5', 'temp_lag_6', 'temp_lag_7',
       'temp_lag_8', 'temp_lag_9', 'temp_lag_10', 'temp_lag_11',
       'temp_lag_12', 'temp_lag_13', 'temp_lag_14', 'temp_lag_15',
       'temp_lag_16', 'temp_lag_17', 'temp_lag_18', 'temp_lag_19',
       'temp_lag_20', 'temp_lag_21', 'temp_lag_22', 'temp_lag_23',
       'temp_lag_24', 'temp_lag_25', 'temp_lag_26', 'temp_lag_27',
       'temp_lag_28', 'temp_lag_29', 'temp_lag_30', 'temp_roll7_mean',
       'temp_roll7_std', 'humidity_lag_1', 'humidity_lag_2',
       'humidity_lag_3', 'humidity_lag_4', 'humidity_lag_5',
       'humidity_lag_6', 'humidity_lag_7', 'humidity_lag_8',
       'humidity_lag_9', 'humidity_lag_10', 'humidity_lag_11',
       'humidity_lag_12', 'humidity_lag_13', 'humidity_lag_14',
       'humidity_la

In [10]:
df_lag = lagRoller.transform(df)

  df[f"{col}_lag_{i}"] = df[col].shift(i)
  df[f"{col}_lag_{i}"] = df[col].shift(i)
  df[f"{col}_lag_{i}"] = df[col].shift(i)
  df[f"{col}_lag_{i}"] = df[col].shift(i)
  df[f"{col}_lag_{i}"] = df[col].shift(i)
  df[f"{col}_lag_{i}"] = df[col].shift(i)
  df[f"{col}_lag_{i}"] = df[col].shift(i)
  df[f"{col}_lag_{i}"] = df[col].shift(i)
  df[f"{col}_lag_{i}"] = df[col].shift(i)
  df[f"{col}_lag_{i}"] = df[col].shift(i)
  df[f"{col}_lag_{i}"] = df[col].shift(i)
  df[f"{col}_lag_{i}"] = df[col].shift(i)
  df[f"{col}_lag_{i}"] = df[col].shift(i)
  df[f"{col}_lag_{i}"] = df[col].shift(i)
  df[f"{col}_lag_{i}"] = df[col].shift(i)
  df[f"{col}_lag_{i}"] = df[col].shift(i)
  df[f"{col}_lag_{i}"] = df[col].shift(i)
  df[f"{col}_lag_{i}"] = df[col].shift(i)
  df[f"{col}_lag_{i}"] = df[col].shift(i)
  df[f"{col}_lag_{i}"] = df[col].shift(i)
  df[f"{col}_lag_{i}"] = df[col].shift(i)
  df[f"{col}_lag_{i}"] = df[col].shift(i)
  df[f"{col}_lag_{i}"] = df[col].shift(i)
  df[f"{col}_lag_{i}"] = df[col].s

In [11]:
df_lag

Unnamed: 0,temp,humidity,wind_speed,clouds_all,holiday,month_sin,month_cos,season_sin,season_cos,day_sin,...,kg_t_plus_5,kg_t_plus_6,kg_t_plus_7,kg_t_plus_8,kg_t_plus_9,kg_t_plus_10,kg_t_plus_11,kg_t_plus_12,kg_t_plus_13,kg_t_plus_14
30,0.676021,0.535714,0.311717,0.0,False,0.5,0.866025,0.0,1.0,-0.0,...,16.37,5.53,27.19,5.55,17.14,38.92,11.31,27.66,22.83,22.32
31,0.705878,0.3,0.242397,0.0,False,0.866025,0.5,0.0,1.0,0.201299,...,5.53,27.19,5.55,17.14,38.92,11.31,27.66,22.83,22.32,10.25
32,0.834169,0.421429,0.219589,0.0,False,0.866025,0.5,0.0,1.0,0.394356,...,27.19,5.55,17.14,38.92,11.31,27.66,22.83,22.32,10.25,10.69
33,0.697896,0.742857,0.242397,0.75,False,0.866025,0.5,0.0,1.0,0.571268,...,5.55,17.14,38.92,11.31,27.66,22.83,22.32,10.25,10.69,17.44
34,0.678732,0.771429,0.150268,0.4,False,0.866025,0.5,0.0,1.0,0.724793,...,17.14,38.92,11.31,27.66,22.83,22.32,10.25,10.69,17.44,16.78
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1077,0.69923,0.314286,0.520572,0.0,False,-0.0,1.0,-1.0,-0.0,0.485302,...,10.58,21.70,4.18,29.64,31.33,36.02,56.94,16.31,38.89,16.74
1078,0.687203,0.314286,0.288909,0.0,False,-0.0,1.0,-1.0,-0.0,0.299363,...,21.70,4.18,29.64,31.33,36.02,56.94,16.31,38.89,16.74,5.42
1079,0.672012,0.471429,0.288909,0.0,False,-0.0,1.0,-1.0,-0.0,0.101168,...,4.18,29.64,31.33,36.02,56.94,16.31,38.89,16.74,5.42,38.55
1080,0.592967,0.328571,0.600179,0.0,False,-0.0,1.0,-1.0,-0.0,-0.101168,...,29.64,31.33,36.02,56.94,16.31,38.89,16.74,5.42,38.55,48.28


In [13]:
import pandas as pd, numpy as np
from sklearn.model_selection import train_test_split
from sklearn.multioutput import MultiOutputRegressor
from sklearn.metrics import mean_absolute_error
from xgboost import XGBRegressor

data = df_lag
#X = data.filter(like='lag_').join(data[['roll7_mean', 'roll7_std']])
X = data.filter(like='lag_').join(data.filter(like='roll7_'))
y = data[[f'kg_t_plus_{h}' for h in range(1, horizon+1)]]

# ── 3. Split temporal (sin barajar) ───────────────────────────
X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, shuffle=False)

In [20]:
# justo antes de .fit(...)
X_train = X_train.astype('float32')
X_val   = X_val.astype('float32')
X = X.astype('float32')

In [None]:
# ── 4. Modelo XGBoost ────────────────────────────────────────
xgb_params = dict(
    n_estimators=500,
    learning_rate=0.01,
    max_depth=6,
    subsample=0.8,
    colsample_bytree=0.8,
    objective='reg:squarederror',
    random_state=42
)

base_model = XGBRegressor(**xgb_params)
model      = MultiOutputRegressor(base_model, n_jobs=-1)

model.fit(X_train, y_train)

MAE validación (14 días promedio): 8.392


ValueError: DataFrame.dtypes for data must be int, float, bool or category. When categorical type is supplied, the experimental DMatrix parameter`enable_categorical` must be set to `True`.  Invalid columns:temp_lag_1: object, temp_lag_2: object, temp_lag_3: object, temp_lag_4: object, temp_lag_5: object, temp_lag_6: object, temp_lag_7: object, temp_lag_8: object, temp_lag_9: object, temp_lag_10: object, temp_lag_11: object, temp_lag_12: object, temp_lag_13: object, temp_lag_14: object, temp_lag_15: object, temp_lag_16: object, temp_lag_17: object, temp_lag_18: object, temp_lag_19: object, temp_lag_20: object, temp_lag_21: object, temp_lag_22: object, temp_lag_23: object, temp_lag_24: object, temp_lag_25: object, temp_lag_26: object, temp_lag_27: object, temp_lag_28: object, temp_lag_29: object, temp_lag_30: object, humidity_lag_1: object, humidity_lag_2: object, humidity_lag_3: object, humidity_lag_4: object, humidity_lag_5: object, humidity_lag_6: object, humidity_lag_7: object, humidity_lag_8: object, humidity_lag_9: object, humidity_lag_10: object, humidity_lag_11: object, humidity_lag_12: object, humidity_lag_13: object, humidity_lag_14: object, humidity_lag_15: object, humidity_lag_16: object, humidity_lag_17: object, humidity_lag_18: object, humidity_lag_19: object, humidity_lag_20: object, humidity_lag_21: object, humidity_lag_22: object, humidity_lag_23: object, humidity_lag_24: object, humidity_lag_25: object, humidity_lag_26: object, humidity_lag_27: object, humidity_lag_28: object, humidity_lag_29: object, humidity_lag_30: object, wind_speed_lag_1: object, wind_speed_lag_2: object, wind_speed_lag_3: object, wind_speed_lag_4: object, wind_speed_lag_5: object, wind_speed_lag_6: object, wind_speed_lag_7: object, wind_speed_lag_8: object, wind_speed_lag_9: object, wind_speed_lag_10: object, wind_speed_lag_11: object, wind_speed_lag_12: object, wind_speed_lag_13: object, wind_speed_lag_14: object, wind_speed_lag_15: object, wind_speed_lag_16: object, wind_speed_lag_17: object, wind_speed_lag_18: object, wind_speed_lag_19: object, wind_speed_lag_20: object, wind_speed_lag_21: object, wind_speed_lag_22: object, wind_speed_lag_23: object, wind_speed_lag_24: object, wind_speed_lag_25: object, wind_speed_lag_26: object, wind_speed_lag_27: object, wind_speed_lag_28: object, wind_speed_lag_29: object, wind_speed_lag_30: object, clouds_all_lag_1: object, clouds_all_lag_2: object, clouds_all_lag_3: object, clouds_all_lag_4: object, clouds_all_lag_5: object, clouds_all_lag_6: object, clouds_all_lag_7: object, clouds_all_lag_8: object, clouds_all_lag_9: object, clouds_all_lag_10: object, clouds_all_lag_11: object, clouds_all_lag_12: object, clouds_all_lag_13: object, clouds_all_lag_14: object, clouds_all_lag_15: object, clouds_all_lag_16: object, clouds_all_lag_17: object, clouds_all_lag_18: object, clouds_all_lag_19: object, clouds_all_lag_20: object, clouds_all_lag_21: object, clouds_all_lag_22: object, clouds_all_lag_23: object, clouds_all_lag_24: object, clouds_all_lag_25: object, clouds_all_lag_26: object, clouds_all_lag_27: object, clouds_all_lag_28: object, clouds_all_lag_29: object, clouds_all_lag_30: object, holiday_lag_1: object, holiday_lag_2: object, holiday_lag_3: object, holiday_lag_4: object, holiday_lag_5: object, holiday_lag_6: object, holiday_lag_7: object, holiday_lag_8: object, holiday_lag_9: object, holiday_lag_10: object, holiday_lag_11: object, holiday_lag_12: object, holiday_lag_13: object, holiday_lag_14: object, holiday_lag_15: object, holiday_lag_16: object, holiday_lag_17: object, holiday_lag_18: object, holiday_lag_19: object, holiday_lag_20: object, holiday_lag_21: object, holiday_lag_22: object, holiday_lag_23: object, holiday_lag_24: object, holiday_lag_25: object, holiday_lag_26: object, holiday_lag_27: object, holiday_lag_28: object, holiday_lag_29: object, holiday_lag_30: object, month_sin_lag_1: object, month_sin_lag_2: object, month_sin_lag_3: object, month_sin_lag_4: object, month_sin_lag_5: object, month_sin_lag_6: object, month_sin_lag_7: object, month_sin_lag_8: object, month_sin_lag_9: object, month_sin_lag_10: object, month_sin_lag_11: object, month_sin_lag_12: object, month_sin_lag_13: object, month_sin_lag_14: object, month_sin_lag_15: object, month_sin_lag_16: object, month_sin_lag_17: object, month_sin_lag_18: object, month_sin_lag_19: object, month_sin_lag_20: object, month_sin_lag_21: object, month_sin_lag_22: object, month_sin_lag_23: object, month_sin_lag_24: object, month_sin_lag_25: object, month_sin_lag_26: object, month_sin_lag_27: object, month_sin_lag_28: object, month_sin_lag_29: object, month_sin_lag_30: object, month_cos_lag_1: object, month_cos_lag_2: object, month_cos_lag_3: object, month_cos_lag_4: object, month_cos_lag_5: object, month_cos_lag_6: object, month_cos_lag_7: object, month_cos_lag_8: object, month_cos_lag_9: object, month_cos_lag_10: object, month_cos_lag_11: object, month_cos_lag_12: object, month_cos_lag_13: object, month_cos_lag_14: object, month_cos_lag_15: object, month_cos_lag_16: object, month_cos_lag_17: object, month_cos_lag_18: object, month_cos_lag_19: object, month_cos_lag_20: object, month_cos_lag_21: object, month_cos_lag_22: object, month_cos_lag_23: object, month_cos_lag_24: object, month_cos_lag_25: object, month_cos_lag_26: object, month_cos_lag_27: object, month_cos_lag_28: object, month_cos_lag_29: object, month_cos_lag_30: object, season_sin_lag_1: object, season_sin_lag_2: object, season_sin_lag_3: object, season_sin_lag_4: object, season_sin_lag_5: object, season_sin_lag_6: object, season_sin_lag_7: object, season_sin_lag_8: object, season_sin_lag_9: object, season_sin_lag_10: object, season_sin_lag_11: object, season_sin_lag_12: object, season_sin_lag_13: object, season_sin_lag_14: object, season_sin_lag_15: object, season_sin_lag_16: object, season_sin_lag_17: object, season_sin_lag_18: object, season_sin_lag_19: object, season_sin_lag_20: object, season_sin_lag_21: object, season_sin_lag_22: object, season_sin_lag_23: object, season_sin_lag_24: object, season_sin_lag_25: object, season_sin_lag_26: object, season_sin_lag_27: object, season_sin_lag_28: object, season_sin_lag_29: object, season_sin_lag_30: object, season_cos_lag_1: object, season_cos_lag_2: object, season_cos_lag_3: object, season_cos_lag_4: object, season_cos_lag_5: object, season_cos_lag_6: object, season_cos_lag_7: object, season_cos_lag_8: object, season_cos_lag_9: object, season_cos_lag_10: object, season_cos_lag_11: object, season_cos_lag_12: object, season_cos_lag_13: object, season_cos_lag_14: object, season_cos_lag_15: object, season_cos_lag_16: object, season_cos_lag_17: object, season_cos_lag_18: object, season_cos_lag_19: object, season_cos_lag_20: object, season_cos_lag_21: object, season_cos_lag_22: object, season_cos_lag_23: object, season_cos_lag_24: object, season_cos_lag_25: object, season_cos_lag_26: object, season_cos_lag_27: object, season_cos_lag_28: object, season_cos_lag_29: object, season_cos_lag_30: object, day_sin_lag_1: object, day_sin_lag_2: object, day_sin_lag_3: object, day_sin_lag_4: object, day_sin_lag_5: object, day_sin_lag_6: object, day_sin_lag_7: object, day_sin_lag_8: object, day_sin_lag_9: object, day_sin_lag_10: object, day_sin_lag_11: object, day_sin_lag_12: object, day_sin_lag_13: object, day_sin_lag_14: object, day_sin_lag_15: object, day_sin_lag_16: object, day_sin_lag_17: object, day_sin_lag_18: object, day_sin_lag_19: object, day_sin_lag_20: object, day_sin_lag_21: object, day_sin_lag_22: object, day_sin_lag_23: object, day_sin_lag_24: object, day_sin_lag_25: object, day_sin_lag_26: object, day_sin_lag_27: object, day_sin_lag_28: object, day_sin_lag_29: object, day_sin_lag_30: object, day_cos_lag_1: object, day_cos_lag_2: object, day_cos_lag_3: object, day_cos_lag_4: object, day_cos_lag_5: object, day_cos_lag_6: object, day_cos_lag_7: object, day_cos_lag_8: object, day_cos_lag_9: object, day_cos_lag_10: object, day_cos_lag_11: object, day_cos_lag_12: object, day_cos_lag_13: object, day_cos_lag_14: object, day_cos_lag_15: object, day_cos_lag_16: object, day_cos_lag_17: object, day_cos_lag_18: object, day_cos_lag_19: object, day_cos_lag_20: object, day_cos_lag_21: object, day_cos_lag_22: object, day_cos_lag_23: object, day_cos_lag_24: object, day_cos_lag_25: object, day_cos_lag_26: object, day_cos_lag_27: object, day_cos_lag_28: object, day_cos_lag_29: object, day_cos_lag_30: object

In [21]:
# ── 5. Evaluación rápida ─────────────────────────────────────
pred_val = model.predict(X_val)
mae      = mean_absolute_error(y_val, pred_val)
print(f"MAE validación (14 días promedio): {mae:0.3f}")

# ── 6. Pronóstico futuro ─────────────────────────────────────
last_row = X.tail(1)                     # features hasta hoy
forecast_scaled = model.predict(last_row)[0]  # array(14,)
# si habías escalado antes, des-escala aquí.  (robust, min-max, etc.)
print("Pronóstico próximos 14 días:", forecast_scaled)


MAE validación (14 días promedio): 8.392
Pronóstico próximos 14 días: [16.134127 14.364522 19.690939 27.06571  24.530418 22.763205 26.189808
 21.72269  18.283482 25.0611   31.675068 21.597404 19.4157   19.946676]
