In [None]:
import pandas as pd
from prophet import Prophet
from tslearn.clustering import TimeSeriesKMeans
from tslearn.metrics import dtw
from tslearn.utils import to_time_series_dataset
from sklearn.preprocessing import StandardScaler
import numpy as np
import os

# Cargar dataset
df = pd.read_csv("dataset_base_features.csv", parse_dates=["periodo"])
df['product_id'] = df['product_id'].astype(str)

# ---------- PROPHET FEATURES ----------

def generar_features_prophet(df):
    features = []

    productos = df['product_id'].unique()
    for pid in productos:
        serie = df[df['product_id'] == pid][['periodo', 'tn']].rename(columns={"periodo": "ds", "tn": "y"})

        if len(serie) < 4 or (serie['y'] > 0).sum() < 4:
            continue

        try:
            m = Prophet(weekly_seasonality=False, daily_seasonality=False, yearly_seasonality=True)
            m.fit(serie)

            future = m.make_future_dataframe(periods=0, freq='MS')
            forecast = m.predict(future)

            row = {
                'product_id': pid,
                'trend_prophet': forecast['trend'].iloc[-1],
                'seasonal_prophet': forecast['seasonal'].iloc[-1]
            }
            features.append(row)

        except Exception as e:
            print(f"⚠️ Error con product_id={pid}: {e}")

    df_prophet = pd.DataFrame(features)
    df_prophet.to_csv("prophet_features.csv", index=False)
    print("✅ prophet_features.csv generado")
    return df_prophet




In [None]:
# ---------- CLUSTERING CON DTW ----------

def generar_clusters_dtw(df, k=50):
    pivot = df.pivot(index="product_id", columns="periodo", values="tn").fillna(0)
    pivot = pivot.loc[(pivot > 0).sum(axis=1) >= 4]

    scaler = StandardScaler()
    scaled = scaler.fit_transform(pivot)

    ts_data = to_time_series_dataset(scaled)

    km_dtw = TimeSeriesKMeans(n_clusters=k, metric="dtw", max_iter=10, random_state=42)
    labels = km_dtw.fit_predict(ts_data)

    df_cluster = pd.DataFrame({
        'product_id': pivot.index,
        'cluster_dtw': labels
    })

    df_cluster.to_csv("dtw_clusters.csv", index=False)
    print("✅ dtw_clusters.csv generado")
    return df_cluster

# Ejecutar funciones
if __name__ == "__main__":
    prophet_features = generar_features_prophet(df)
    dtw_clusters = generar_clusters_dtw(df, k=50)

In [None]:
import pandas as pd 
import numpy as np
import optuna
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# === 1. Cargar datos base ===
df = pd.read_csv("dataset_base_features.csv", parse_dates=['periodo'])
df['product_id'] = df['product_id'].astype(str)

# === 2. Filtrar y preparar dataset de entrenamiento ===
train_df = df[df['periodo'] <= '2019-10-01'].copy()

# === 3. Seleccionar features ===
exclude = ['tn', 'customer_id', 'periodo', 'product_id']
y = train_df['tn']
X = train_df.drop(columns=[col for col in exclude if col in train_df.columns])

# === 4. División para entrenamiento ===
X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, random_state=42
)
sample_weight = np.log1p(y_train + 1)

# === 5. Crear estudio Optuna con almacenamiento SQLite ===
storage = "sqlite:///optuna_tn_studyww.db"
study = optuna.create_study(
    direction='minimize',
    study_name="prediccion_tn1",
    storage=storage,
    load_if_exists=True
)

# === 6. Definir función objetivo ===
def objective(trial):
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 100, 500),
        'max_depth': trial.suggest_int('max_depth', 3, 10),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.1),
        'subsample': trial.suggest_float('subsample', 0.6, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.6, 1.0),
        'reg_alpha': trial.suggest_float('reg_alpha', 0.0, 5.0),
        'reg_lambda': trial.suggest_float('reg_lambda', 0.0, 5.0),
        'tree_method': 'hist',
        'random_state': 42,
        'n_jobs': -1
    }
    model = XGBRegressor(**params)
    model.fit(X_train, y_train, sample_weight=sample_weight)
    pred = model.predict(X_val)
    return np.sqrt(mean_squared_error(y_val, pred))

# === 7. Ejecutar optimización ===
study.optimize(objective, n_trials=30)

# === 8. Entrenar modelo final con mejores hiperparámetros ===
best_params = study.best_params
best_params.update({'tree_method': 'hist', 'random_state': 42})
model_final = XGBRegressor(**best_params)
model_final.fit(X, y, sample_weight=np.log1p(y + 1))

# === 9. Predicción sobre febrero 2020 ===
# Tomamos datos de diciembre 2019 y los "movemos" a febrero 2020
febrero = df[df['periodo'] == '2019-12-01'].copy()
febrero['periodo'] = pd.to_datetime('2020-02-01')
febrero = febrero.groupby('product_id', as_index=False).first()

# === 10. Agregar lags manuales ===
for i, mes in zip(range(1, 4), ['2019-12-01', '2019-11-01', '2019-10-01']):
    lag_df = (
        df[df['periodo'] == mes]
        .groupby('product_id', as_index=False)['tn']
        .mean()
        .rename(columns={'tn': f'lag_{i}'})
    )
    febrero = febrero.merge(lag_df, on='product_id', how='left')

# === 11. Predecir y exportar (blindaje de columnas) ===
# Rellenar con 0 cualquier feature que falte
faltantes = set(X.columns) - set(febrero.columns)
for col in faltantes:
    febrero[col] = 0

# Reordenar igual que en X
febrero_X = febrero[X.columns]

# Predict
febrero['tn_pred'] = model_final.predict(febrero_X)

# Exportar
febrero[['product_id', 'tn_pred']].to_csv(
    "prediccion_febrero_xgb.csv", index=False
)
print("✅ Predicción exportada como 'prediccion_febrero_xgb.csv'")
print("🧭 Para ver el dashboard: ejecutá ➜ optuna-dashboard sqlite:///optuna_tn_studyww.db")
