# 0. Environment Setup with pip

This command installs all required libraries in one step for a macOS-based development environment. It includes deep learning (Keras/TensorFlow), machine learning (XGBoost), hyperparameter optimization (Hyperopt, Optuna), experiment tracking (MLflow), and model wrapping (Scikeras).

In [2]:
!pip install -q --disable-pip-version-check \
    keras \
    tensorflow-macos \
    tensorflow-metal \
    xgboost \
    hyperopt \
    mlflow \
    optuna \
    scikeras \
    gdown

[0m

# 1. Import libraries

This section imports all necessary Python libraries and modules required for data handling, model building, evaluation, optimization, and visualization.

In [3]:
# Standard Library
from tqdm import tqdm

# Data Handling
import numpy as np
import pandas as pd

# Preprocessing & Splitting
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, LabelEncoder, RobustScaler

# Metrics
from sklearn.metrics import (
    mean_absolute_error,
    mean_squared_error,
    r2_score
)

from pathlib import Path

from scikeras.wrappers import KerasRegressor 
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
from keras.optimizers import Adam

# Traditional ML Models
import xgboost as xgb

# Deep Learning (Keras / TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential, Model, load_model
from tensorflow.keras.layers import Input, LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks  import EarlyStopping, ReduceLROnPlateau
# Hyperparameter Optimization
from hyperopt import fmin, tpe, hp, Trials, STATUS_OK

# Experiment Tracking with MLflow
import mlflow
import mlflow.sklearn
import mlflow.keras
from mlflow.models.signature import infer_signature

# Model Persistence
import joblib

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns


This snippet ensures the script can access project-level configuration regardless of execution context. It dynamically sets the project root, adjusts the system path for imports, and loads configuration constants needed for forecasting workflows.

In [4]:
import sys
import os

# Get project root one level up from current working directory (for Jupyter use)
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))

# Add project root to Python path if not already included
if project_root not in sys.path:
    sys.path.insert(0, project_root)

# Import constants from Streamlit app configuration
import importlib
import app.config as cfg
importlib.reload(cfg)

import model.model_utils as model_utils



def main():
    print("Features:", cfg.FEATURES)
    print("Sequence Length:", cfg.SEQ_LEN)
    print("Target:", cfg.TARGET)
    print("Cutoff Date:", cfg.CUTOFF_DATE)
    print("Forecast End:", cfg.FORECAST_END)
    print("Hyperopt Space:", cfg.HYPEROPT_SPACE)

if __name__ == "__main__":
    main()

Features: ['store_nbr', 'item_nbr', 'onpromotion', 'day_of_week', 'month', 'unit_sales_7d_avg', 'lag_1', 'lag_7', 'rolling_mean_7']
Sequence Length: 90
Target: unit_sales
Cutoff Date: 2013-12-31 00:00:00
Forecast End: 2014-03-31
Hyperopt Space: {'max_depth': [3, 4, 5, 6], 'learning_rate_range': (0.01, 0.1, 0.2, 0.3), 'n_estimators': [20, 50, 100]}


# 2. Import Data

Import data prepared in the first notebook.

In [5]:
df_train = pd.read_csv("../data/preprocessed_data/train_guayas_prepared.csv", parse_dates=['date'])

# 3. Data Preparation

## 3.1 Create feature and set dtypes

This section performs essential preprocessing tasks, such as datetime conversion, label encoding, memory optimization, and feature engineering. It includes lag features and rolling statistics, which are commonly used in time series forecasting models.

In [6]:
# 1. Convert the 'date' column to datetime
df_train['date'] = pd.to_datetime(df_train['date'])

# 2. Label-encode family on train, then apply same encoder to test
le = LabelEncoder()
df_train['family_code'] = le.fit_transform(df_train['family'])
df_train ['family_code'] = le.transform   (df_train ['family']).astype('uint8')

# 3. Downcast integer columns to reduce memory usage
df_train['store_nbr']   = df_train['store_nbr'].astype('uint8')
df_train['item_nbr']    = df_train['item_nbr'].astype('uint32')
df_train['day_of_week'] = df_train['day_of_week'].astype('uint8')
df_train['month']       = df_train['month'].astype('uint8')
df_train['year']        = df_train['year'].astype('uint16')
df_train['onpromotion'] = df_train['onpromotion'].astype('bool')

# 4. Downcast other numeric columns where possible
df_train['unit_sales']       = pd.to_numeric(df_train['unit_sales'],       downcast='float')
df_train['unit_sales_7d_avg'] = pd.to_numeric(df_train['unit_sales_7d_avg'], downcast='float')

# 5. Lag features
df_train['lag_1'] = df_train['unit_sales'].shift(1)
df_train['lag_7'] = df_train['unit_sales'].shift(7)

# 6. Rolling-Window-Features
df_train['rolling_mean_7']     = df_train['unit_sales'].shift(1).rolling(window=7).mean()
df_train['unit_sales_7d_avg']  = df_train['unit_sales'].shift(1).rolling(window=7).mean()

# 7. Drop rows with NaN values
df_train = df_train.dropna(subset=['lag_1', 'lag_7', 'rolling_mean_7']).reset_index(drop=True)

# 8. Drop unnecessary columns
int_cols = df_train.select_dtypes(include='int').columns
df_train[int_cols] = df_train[int_cols].astype("float16")

print(df_train.filter(['date','unit_sales','lag_1','lag_7','rolling_mean_7']).head(10))

        date  unit_sales  lag_1  lag_7  rolling_mean_7
0 2013-01-09         2.0    0.0    0.0        0.000000
1 2013-01-10         0.0    2.0    0.0        0.285714
2 2013-01-11         0.0    0.0    0.0        0.285714
3 2013-01-12         2.0    0.0    0.0        0.285714
4 2013-01-13         0.0    2.0    0.0        0.571429
5 2013-01-14         0.0    0.0    0.0        0.571429
6 2013-01-15         0.0    0.0    0.0        0.571429
7 2013-01-16         1.0    0.0    2.0        0.571429
8 2013-01-17         2.0    1.0    0.0        0.428571
9 2013-01-18         0.0    2.0    0.0        0.714286


## 3.2 Save the dataset

After the dataset is prepared and further features were included, the dataset has to be saved.

In [7]:
df_train.to_csv("../data/preprocessed_data/train_guayas_model_ready.csv", index=False)

## 3.3 Filter for best Store-Item-combination

This section filters the dataset to include only data from 2013 and computes basic statistics per store and item combination. It selects only combinations with sufficient data coverage and calculates a custom score based on mean and standard deviation of sales. The best-performing combination is selected to serve as the focus for subsequent model training.

In [8]:
# Filter 2013 data only
df_2013 = df_train[df_train["date"].dt.year == 2013]

# Group by store and item to compute statistics
stats = df_2013.groupby(["store_nbr", "item_nbr"]).agg(
    count_days=("unit_sales", "count"),
    mean_sales=("unit_sales", "mean"),
    std_sales=("unit_sales", "std"),
    positive_days=("unit_sales", lambda x: (x > 0).sum())
).reset_index()

# Filter only combinations with sufficient data
stats = stats[stats["count_days"] >= 300]

# Compute a custom score: mean / std
stats["score"] = stats["mean_sales"] / (stats["std_sales"] + 1e-5)

# Get the best combination
best_combo = stats.sort_values("score", ascending=False).iloc[0]
store_id = int(best_combo["store_nbr"])
item_id = int(best_combo["item_nbr"])

# Filter data for selected combination
print(f"Training model for store {store_id} and item {item_id}")
df = df_train[(df_train["store_nbr"] == store_id) & (df_train["item_nbr"] == item_id)].copy()
df_train = df.sort_values("date")

Training model for store 24 and item 220435


## 3.4 Train/Test-Split

This section splits the dataset into training and test sets based on a predefined `CUTOFF_DATE`

In [9]:
# Train/Test Split with Cutoff from config
train_df = df_train.loc[df_train["date"] <= cfg.CUTOFF_DATE].copy()
test_df  = df_train.loc[df_train["date"] >  cfg.CUTOFF_DATE].copy()

non_numeric_cols = df_train.select_dtypes(include=["datetime64[ns]", "datetime64", "object"]).columns

X_train = train_df.drop(columns=non_numeric_cols).drop(columns=[cfg.TARGET])
y_train = np.log1p(train_df[cfg.TARGET])
X_test  = test_df.drop(columns=non_numeric_cols).drop(columns=[cfg.TARGET])
y_test  = np.log1p(test_df[cfg.TARGET])

# 4. XGBoost

## 4.1 Hyperparameter-Optimized XGBoost Model (Hyperopt)

This code performs **automated hyperparameter tuning** and training of an XGBoost model to forecast `unit_sales` for a given store and item using **Hyperopt** and **MLflow**.

### Purpose
To find the best-performing XGBoost model configuration for each store/item combination through hyperparameter optimization. The goal is to minimize forecast error using a data-driven, repeatable search strategy.

### What the code does
- Defines a **hyperparameter search space** for key XGBoost settings (e.g., `max_depth`, `learning_rate`, `n_estimators`)
- Uses **Hyperopt** to explore this space with a loss function based on `MAE`
- Trains 25 model variations (`max_evals=25`) on the training set
- Selects the best model and evaluates it on the test set
- Logs the selected model, parameters, and metrics (`MAE`, `R²`) to **MLflow**
- Saves the final model to disk for reuse or deployment

This setup supports consistent, scalable model tuning and tracking, enabling per-store/item optimization for time series forecasting tasks.

In [10]:
# --- HYPEROPT XGBOOST TRAINING ---
# End any active MLflow run
if mlflow.active_run():
    mlflow.end_run()

mlruns_path = os.path.join(project_root, "mlruns", "xgb_hyperopt")
if not os.path.exists(mlruns_path):
    # Create the directory if it doesn't exist      
        os.makedirs(mlruns_path, exist_ok=True)

# Set MLflow tracking URI to the local directory
mlflow.set_tracking_uri(f"file://{mlruns_path}")
# Set the experiment name
mlflow.set_experiment("store_item_sales_forecast_hyperopt")

# Define the search space
space = {
    'max_depth': hp.choice('max_depth', range(3, 10)),
    'learning_rate': hp.uniform('learning_rate', 0.01, 0.3),
    'n_estimators': hp.choice('n_estimators', range(20, 301, 10)),
    'subsample': hp.uniform('subsample', 0.5, 1.0),
    'colsample_bytree': hp.uniform('colsample_bytree', 0.5, 1.0)
}

# Objective function for Hyperopt
def objective(params):
    model = xgb.XGBRegressor(random_state=42, **params)
    model.fit(X_train, y_train)
    y_pred = np.expm1(model.predict(X_test))
    score = mean_absolute_error(np.expm1(y_test), y_pred)
    return {'loss': score, 'status': STATUS_OK, 'model': model}

# Run Hyperopt
trials = Trials()
best = fmin(fn=objective, space=space, algo=tpe.suggest, max_evals=25, trials=trials)

# Retrieve best model
best_model = trials.best_trial['result']['model']
y_pred_log = best_model.predict(X_test)
y_pred = np.expm1(y_pred_log)

# Evaluate final model
mae = mean_absolute_error(np.expm1(y_test), y_pred)
r2 = r2_score(np.expm1(y_test), y_pred)
with mlflow.start_run():
    mlflow.log_param("store_id", store_id)
    mlflow.log_param("item_id", item_id)
    mlflow.log_params(best)
    mlflow.log_metric("mae", mae)
    mlflow.log_metric("r2_score", r2)
    print(f"Best Hyperopt MAE: {mae:.4f}")
    print(f"Best Hyperopt R2 Score: {r2:.4f}")
    signature = infer_signature(X_train, best_model.predict(X_train))
    mlflow.sklearn.log_model(
        best_model,
        "model",
        signature=signature,
        input_example=X_train.head(3)
    )

    
# Finally, persist the trained model
os.makedirs("../model/xgb/archive/", exist_ok=True)
model_path = f"../model/xgb/archive/xgb_hyperopt_store{store_id}_item{item_id}.pkl"
joblib.dump(best_model, model_path)
mlflow.log_artifact(model_path, artifact_path="final_model")

# End any active MLflow run
if mlflow.active_run():
    mlflow.end_run()

100%|██████████| 25/25 [00:01<00:00, 14.68trial/s, best loss: 8.11827278137207] 
Best Hyperopt MAE: 8.1183
Best Hyperopt R2 Score: -0.0425




and following the plot to visualize the forecast of the XGBoost Hyperopt Model.

In [11]:
def plot_forecast(test_df, y_test, y_pred, store_id, item_id, mae, r2, mlflow_artifact_path):
    test_df = test_df.copy()
    test_df["forecast"] = y_pred
    test_df["actual"] = np.expm1(y_test.values)

    plt.figure(figsize=(12, 6))
    plt.plot(test_df["date"], test_df["actual"], label="Actual", linewidth=2)
    plt.plot(test_df["date"], test_df["forecast"], 'o--', color='red', label="Forecast", linewidth=2)
    plt.title(f"XGBoost Hyperopt Forecast vs. Actual\nStore {store_id}, Item {item_id}")
    plt.xlabel("Date")
    plt.ylabel("Unit Sales")

    metrics_text = f"MAE: {mae:.2f}\nR²: {r2:.2f}"
    plt.gca().text(0.01, 0.95, metrics_text,
                   transform=plt.gca().transAxes,
                   fontsize=12,
                   verticalalignment='top',
                   bbox=dict(boxstyle="round", facecolor="#f9f9f9", alpha=0.8))

    plt.legend()
    plt.grid(True)
    plt.tight_layout()

    plot_path = f"xgb_hyperopt_forecast_plot_store{store_id}_item{item_id}.png"
    plt.savefig(plot_path)
    mlflow.log_artifact(plot_path, artifact_path=mlflow_artifact_path)
    plt.close()
    plt.show()

# --- LOAD FINAL MODEL AND PREDICT ---
store_id = 24
item_id = 220435
model_path = f"../model/xgb/archive/xgb_hyperopt_store{store_id}_item{item_id}.pkl"

# load Model
loaded_model = joblib.load(model_path)

# create forecast
y_pred_log = loaded_model.predict(X_test)
y_pred = np.expm1(y_pred_log)  # Annahme: Training war auf log1p-Daten

# calculate metrics
mae = mean_absolute_error(np.expm1(y_test), y_pred)
r2 = r2_score(np.expm1(y_test), y_pred)

# Forecast-Plot
plot_forecast(
    test_df=test_df,  # sollte eine Kopie von X_test mit Datumsspalte sein
    y_test=y_test,
    y_pred=y_pred,
    store_id=store_id,
    item_id=item_id,
    mae=mae,
    r2=r2,
    mlflow_artifact_path="final_model"
)
plt.show()

## 4.2 XGBoost Regression Model (Global)

The following Code is the XGBoost regression model for predicting unit sales using engineered features, evaluate its performance, and log everything with MLflow (including metrics, model, and visualizations).

### Purpose  
The purpose of the following code is to train a **global XGBoost model** that predicts unit sales based on multiple engineered features. It aims to capture general sales patterns across all stores and items and evaluate the model’s performance using robust metrics. The full pipeline is tracked using **MLflow** for transparency and reproducibility.

### What the function does
- Loads and filters historical sales data up to a defined `CUTOFF_DATE`
- Selects relevant features and applies a log transformation to `unit_sales`
- Splits the dataset into training and testing subsets
- Trains an XGBoost regression model on the processed data
- Predicts test values and applies inverse transformation using `expm1`
- Evaluates model performance using `MAE` and `R²`
- Logs model parameters, evaluation metrics, and plots using **MLflow**
- Saves the trained model and feature metadata to disk
- Generates and logs:
  - A time series plot of unit sales over time
  - A forecast vs. actual plot to visualize prediction accuracy

This pipeline supports fast experimentation and ensures results are reproducible, interpretable, and ready for deployment.


In [12]:
if mlflow.active_run():             # if the code interrupted here at the beginnig, pls. try again and go ahead (MLFLOW issue)
    mlflow.end_run()                # if there is an active run, end it



# Feature columns
FEATURE_COLS = [
    'store_nbr','item_nbr','onpromotion',
    'day_of_week','month','year','unit_sales_7d_avg',
    'family_code','lag_1','lag_7','rolling_mean_7'
]

# Load & filter data
df = pd.read_csv(os.path.join(project_root, cfg.PREPARED_DATA_FILE), parse_dates=["date"])
df = df[df['unit_sales'] >= 0]
df = df[df['date'] <= cfg.CUTOFF_DATE]

# Define features and target
X = df[FEATURE_COLS].astype("float64")
y = np.log1p(df['unit_sales'])  # log1p target

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Train model
model = xgb.XGBRegressor(random_state=42)
model.fit(X_train, y_train)

# Predict and reverse log
y_pred_log = model.predict(X_test)
y_pred = np.expm1(y_pred_log)
y_true = np.expm1(y_test)

# Evaluation metrics
mae = mean_absolute_error(y_true, y_pred)
r2 = r2_score(y_true, y_pred)

# MLflow tracking
# 7) Start MLflow tracking
mlflow_tracking_dir_xgb_global = os.path.join(project_root, "mlruns", "xgb_global")
if not os.path.exists(mlflow_tracking_dir_xgb_global):
    # Create the directory if it doesn't exist
    os.makedirs(mlflow_tracking_dir_xgb_global, exist_ok=True)

# Set MLflow tracking URI to the local directory
mlflow.set_tracking_uri(f"file://{mlflow_tracking_dir_xgb_global}")
# Set the experiment name
mlflow.set_experiment("global_xgb_sales_forecast")

with mlflow.start_run():
    # Log parameters and metrics
    mlflow.log_param("model_type", "xgb_global")
    mlflow.log_param("features", FEATURE_COLS)
    mlflow.log_metric("mae", mae)
    mlflow.log_metric("r2_score", r2)

    # Save model
    out_path = Path(project_root, cfg.XGB_ARCHIVE_DIR) / cfg.GLOBAL_XGB_MODEL
    out_path.parent.mkdir(parents=True, exist_ok=True)
    joblib.dump({"model": model, "features": FEATURE_COLS}, out_path)
    mlflow.log_artifact(str(out_path), artifact_path="model")

    print(f"✅ Global model saved to {out_path}")
    print(f"📊 MAE: {mae:.4f} | R2: {r2:.4f}")

    # 📈 Plot 1: Time series of unit_sales
    plt.figure(figsize=(10, 4))
    df_sorted = df.sort_values("date")
    plt.plot(df_sorted["date"], df_sorted["unit_sales"], label="unit_sales", alpha=0.8)
    plt.title("Unit Sales over Time")
    plt.xlabel("Date")
    plt.ylabel("Unit Sales")
    plt.grid(True)
    plt.tight_layout()
    time_series_path = "unit_sales_over_time.png"
    plt.savefig(time_series_path)
    plt.close()
    mlflow.log_artifact(time_series_path)

    # 📊 Plot 2: Forecast vs actuals (sample)
    plt.figure(figsize=(10, 4))
    sample_size = min(300, len(y_true))
    plt.plot(y_true[:sample_size], label="Actual", linewidth=2)
    plt.plot(y_pred[:sample_size], label="Forecast", linestyle="--")
    plt.title("Forecast vs. Actual (Test Set)")
    plt.xlabel("Sample")
    plt.ylabel("Unit Sales")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    forecast_path = "forecast_vs_actual.png"
    plt.savefig(forecast_path)
    plt.close()
    mlflow.log_artifact(forecast_path)

# Ensure clean MLflow exit
if mlflow.active_run():
    mlflow.end_run()


✅ Global model saved to /Users/jennypetschke/VS_Code_Projects/retail_demand_analysis/model/xgb/archive/xgb_global.pkl
📊 MAE: 1.0521 | R2: 0.6061


# 5. LSTM

## 5.1 LSTM Sequence-to-Sequence Forecasting Model (Global)

This function trains a **global LSTM Seq2Seq model** for multi-step time series forecasting, such as predicting future `unit_sales`.



### Purpose
The purpose of the following Code is to learn general sales patterns from historical data and forecast multiple future time steps in a single forward pass. The model is trained on multiple features, like aggregated data, but also on store_nbr and item_nbr.

### What the function does
- Loads and filters time series data up to a defined `CUTOFF_DATE`
- Scales `unit_sales` using MinMaxScaler
- Prepares input/output sequences using a sliding window
- Builds a Sequence-to-Sequence LSTM model (Encoder–Decoder)
- Trains the model with callbacks (`EarlyStopping`, `ReduceLROnPlateau`)
- Logs all key parameters, metrics, and artifacts using **MLflow**
- Saves both the trained model and the scaler for future use

This setup ensures that the entire training process is tracked, reproducible, and ready for deployment or experimentation.


In [13]:
'''# Imports
import os
import joblib
import mlflow
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
from sklearn.metrics import mean_absolute_error, r2_score
from sklearn.preprocessing import RobustScaler
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, LSTM
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
import sys
import os

# Get project root one level up from current working directory (for Jupyter use)
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))

# Add project root to Python path if not already included
if project_root not in sys.path:
    sys.path.insert(0, project_root)

# Import constants from Streamlit app configuration
import importlib
import app.config as cfg
importlib.reload(cfg)

import model.model_utils as model_utils
import app.config as cfg

seq_len = int(cfg.SEQ_LEN/30)

# --- Imports ---
# --- Projektstruktur und Pfade ---
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
data_path = os.path.join(project_root, cfg.PREPARED_DATA_FILE)

# --- MLflow vorbereiten ---
mlflow_tracking_dir = os.path.join(project_root, cfg.MLFLOW_LSTM_SEQ)
os.makedirs(mlflow_tracking_dir, exist_ok=True)
mlflow.set_tracking_uri(f"file://{mlflow_tracking_dir}")
mlflow.set_experiment("lstm_global_seq2seq_forecast")
if mlflow.active_run():
    mlflow.end_run()

# --- Daten laden ---
df = pd.read_csv(data_path, parse_dates=["date"])
df = df[df['unit_sales'] >= 0].sort_values('date').reset_index(drop=True)

# --- Skalierung vorbereiten ---
train_df = df[df['date'] <= cfg.CUTOFF_DATE]
scaler = RobustScaler()
scaler.fit(train_df[['unit_sales']])
df['unit_sales_scaled'] = scaler.transform(df[['unit_sales']])

# --- Sequenzen erstellen ---
def make_seq2seq(data, dates, n_in, n_out):
    X, y, d = [], [], []
    for i in range(len(data) - n_in - n_out + 1):
        X.append(data[i:i+n_in, 0])
        y.append(data[i+n_in:i+n_in+n_out, 0])
        d.append(dates[i+n_in - 1])
    return np.array(X), np.array(y), np.array(d)

X_all, y_all, date_all = make_seq2seq(
    df[['unit_sales_scaled']].values,
    df['date'].values,
    cfg.SEQ_LEN, cfg.SEQ_LEN
)

# --- Train/Test-Split nach Datum ---
cutoff = np.datetime64(cfg.CUTOFF_DATE)
train_mask = date_all <= cutoff
test_mask  = date_all > cutoff

X_train, y_train = X_all[train_mask], y_all[train_mask]
X_test,  y_test  = X_all[test_mask],  y_all[test_mask]

# --- Reshape für LSTM ---
def reshape_seq(X, y, seq_len):
    X_enc = X.reshape(-1, seq_len, 1)
    X_dec = np.zeros((len(X), seq_len, 1))
    X_dec[:, 1:, :] = y[:, :-1].reshape(len(y), seq_len - 1, 1)
    y_seq = y.reshape(len(y), seq_len, 1)
    return X_enc, X_dec, y_seq

X_enc_train, X_dec_train, y_seq_train = reshape_seq(X_train, y_train, cfg.SEQ_LEN)
X_enc_test,  X_dec_test,  y_seq_test  = reshape_seq(X_test,  y_test, cfg.SEQ_LEN)

# --- Modell definieren ---
enc_inputs = Input(shape=(cfg.SEQ_LEN, 1), name="encoder_input")
enc_outputs, state_h, state_c = LSTM(16, return_state=True, name="encoder_lstm")(enc_inputs)
dec_inputs = Input(shape=(cfg.SEQ_LEN, 1), name="decoder_input")
dec_lstm = LSTM(16, return_sequences=True, name="decoder_lstm")(dec_inputs, initial_state=[state_h, state_c])
outputs = Dense(1, activation="relu", name="output_dense")(dec_lstm)

model = Model([enc_inputs, dec_inputs], outputs)
model.compile(optimizer=Adam(0.01), loss="mae")

# --- Modellübersicht speichern ---
summary_path = os.path.join(project_root, cfg.LSTM_ARCHIVE_DIR, "model_summary_pretraining.txt")
os.makedirs(os.path.dirname(summary_path), exist_ok=True)
with open(summary_path, "w") as f:
    model.summary(print_fn=lambda x: f.write(x + "\n"))

# --- MLflow Logging ---
with mlflow.start_run():
    mlflow.log_params({
        "model_type": "LSTM Seq2Seq",
        "seq_len": cfg.SEQ_LEN,
        "units": 16,
        "optimizer": "adam",
        "loss": "mae",
        "activation": "relu",
        "batch_size": 256,
        "epochs": 10,
        "cutoff_date": cfg.CUTOFF_DATE.strftime('%Y-%m-%d'),
        "target": cfg.TARGET,
        "scaler": "RobustScaler"
    })

    # --- Training ---
    early_stop = EarlyStopping(monitor="loss", patience=3, restore_best_weights=True)
    reduce_lr = ReduceLROnPlateau(monitor="loss", factor=0.5, patience=2)

    history = model.fit(
        [X_enc_train, X_dec_train],
        y_seq_train,
        epochs=10,
        batch_size=256,
        callbacks=[early_stop, reduce_lr],
        verbose=1
    )

    mlflow.log_metric("final_loss", float(history.history["loss"][-1]))

    # --- Rolling Forecast für 90 Tage in 30er Chunks ---
    def rolling_forecast(model, start_sequence, forecast_days, chunk_size):
        n_chunks = forecast_days // chunk_size
        current_input = start_sequence.reshape(1, chunk_size, 1)
        forecast_result = []
        for _ in range(n_chunks):
            decoder_input = np.zeros((1, chunk_size, 1))
            pred = model.predict([current_input, decoder_input], verbose=0)
            forecast_result.append(pred.flatten())
            current_input = np.roll(current_input, -chunk_size, axis=1)
            current_input[0, -chunk_size:, 0] = pred.flatten()
        return np.concatenate(forecast_result)

    # --- Letzte Sequenz aus Trainingsdaten als Startpunkt ---
    last_seq = df[df['date'] <= cfg.CUTOFF_DATE]['unit_sales_scaled'].values[-cfg.SEQ_LEN:]
    forecast_scaled = rolling_forecast(model, last_seq, forecast_days=90, chunk_size=cfg.SEQ_LEN)
    forecast = scaler.inverse_transform(forecast_scaled.reshape(-1, 1)).flatten()

    # --- Plot Forecast ---
    plt.figure(figsize=(12, 4))
    plt.plot(range(1, 91), forecast, label="Forecast (90 Tage)", linewidth=2)
    plt.title("90-Tage Rolling LSTM Forecast")
    plt.xlabel("Tag")
    plt.ylabel("Unit Sales")
    plt.grid(True)
    plt.legend()
    plt.tight_layout()

    plot_path = os.path.join(project_root, "forecast_rolling_90d.png")
    plt.savefig(plot_path)
    mlflow.log_artifact(plot_path)

    # --- Modell + Scaler speichern ---
    model_path = Path(project_root, cfg.LSTM_ARCHIVE_DIR) / cfg.LSTM_GLOBAL_SEQ2SEQ_MODEL
    scaler_path = Path(project_root, cfg.SCALER_ARCHIVE_DIR) / cfg.LSTM_GLOBAL_SEQ2SEQ_SCALER
    model.save(model_path)
    joblib.dump(scaler, scaler_path)

    mlflow.log_artifact(str(model_path), artifact_path="model")
    mlflow.log_artifact(str(scaler_path), artifact_path="scaler")
    mlflow.log_artifact(summary_path)

    print("✅ Modell gespeichert.")
    print(f"   MAE: {mean_absolute_error(y_seq_test.flatten(), model.predict([X_enc_test, X_dec_test]).flatten()):.4f}")'''

'# Imports\nimport os\nimport joblib\nimport mlflow\nimport pandas as pd\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom pathlib import Path\nfrom sklearn.metrics import mean_absolute_error, r2_score\nfrom sklearn.preprocessing import RobustScaler\nfrom tensorflow.keras.models import Model\nfrom tensorflow.keras.layers import Input, Dense, LSTM\nfrom tensorflow.keras.optimizers import Adam\nfrom tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau\nimport sys\nimport os\n\n# Get project root one level up from current working directory (for Jupyter use)\nproject_root = os.path.abspath(os.path.join(os.getcwd(), \'..\'))\n\n# Add project root to Python path if not already included\nif project_root not in sys.path:\n    sys.path.insert(0, project_root)\n\n# Import constants from Streamlit app configuration\nimport importlib\nimport app.config as cfg\nimportlib.reload(cfg)\n\nimport model.model_utils as model_utils\nimport app.config as cfg\n\nseq_len = int(cfg.SEQ_

In [14]:
'''import os
import numpy as np
import pandas as pd
import joblib
from pathlib import Path
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, r2_score
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt
import app.config as cfg

# --- Parameter ---
SEQ_LEN = 90
LSTM_UNITS = 8
EPOCHS = 10
BATCH_SIZE = 16
MAX_SAMPLES = 800
DTYPE = "float16"
FEATURES = ['unit_sales', 'onpromotion', 'lag_1']  # exakt 3 Features

# --- Daten laden ---
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
data_path = os.path.join(project_root, cfg.PREPARED_DATA_FILE)
df = pd.read_csv(data_path, usecols=FEATURES + ['date'], parse_dates=["date"])
df = df[(df["unit_sales"] >= 0) & (df["date"] <= cfg.CUTOFF_DATE)].sort_values("date").reset_index(drop=True)

# --- Encoder-Daten skalieren (nur unit_sales!)
scaler = MinMaxScaler()
scaled_target = scaler.fit_transform(df[['unit_sales']]).astype(DTYPE)

# --- Full Feature Matrix für den Encoder ---
X_features = df[FEATURES].astype(DTYPE).values

# --- Sequenzdatengenerator ---
def make_seq2seq(X_feat, y_target, n_in, n_out):
    X_enc, X_dec_in, y_out = [], [], []
    for i in range(len(X_feat) - n_in - n_out + 1):
        # Encoder input mit allen Features
        X_enc.append(X_feat[i : i + n_in])
        # Decoder input: nur scaled target
        dec_seq = np.zeros((n_out, 1), dtype=DTYPE)
        dec_seq[1:, 0] = y_target[i + n_in : i + n_in + n_out - 1, 0]

        X_dec_in.append(dec_seq)
        # Decoder output: nur target
        y_out.append(y_target[i + n_in : i + n_in + n_out])
    return np.array(X_enc), np.array(X_dec_in), np.array(y_out)

X_enc, X_dec, y_seq = make_seq2seq(X_features, scaled_target, SEQ_LEN, SEQ_LEN)

# --- Trimmen für Speicherbegrenzung ---
if len(X_enc) > MAX_SAMPLES:
    X_enc = X_enc[-MAX_SAMPLES:]
    X_dec = X_dec[-MAX_SAMPLES:]
    y_seq = y_seq[-MAX_SAMPLES:]

# --- Modell erstellen ---
enc_inputs = Input(shape=(SEQ_LEN, len(FEATURES)), name="encoder_input")
enc_outputs, state_h, state_c = LSTM(LSTM_UNITS, return_state=True)(enc_inputs)

dec_inputs = Input(shape=(SEQ_LEN, 1), name="decoder_input")
dec_lstm = LSTM(LSTM_UNITS, return_sequences=True)
dec_outputs = dec_lstm(dec_inputs, initial_state=[state_h, state_c])
outputs = Dense(1, activation="relu")(dec_outputs)

model = Model([enc_inputs, dec_inputs], outputs)
model.compile(optimizer=Adam(learning_rate=0.01), loss="mae")

# --- Trainieren ---
early_stop = EarlyStopping(monitor="loss", patience=3, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor="loss", factor=0.5, patience=2)

history = model.fit(
    [X_enc, X_dec], y_seq,
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    callbacks=[early_stop, reduce_lr],
    verbose=1
)

# --- Vorhersage auf letzte Sequenz ---
# --- Vorhersage: autoregressiv ---
X_pred_enc = X_features[-SEQ_LEN:].reshape(1, SEQ_LEN, len(FEATURES))

# Initialisiere Decoder-Input mit 0
decoder_input = np.zeros((1, SEQ_LEN, 1), dtype=DTYPE)

# Starte mit letztem bekannten echten Wert (skaliert)
decoder_input[0, 0, 0] = scaled_target[-1, 0]

# Schrittweises Vorhersagen
for t in range(1, SEQ_LEN):
    pred_scaled = model.predict([X_pred_enc, decoder_input])[0]  # (1, SEQ_LEN, 1)
    decoder_input[:, t, 0] = pred_scaled[t - 1, 0]  # Autoregressiv: vorhergesagter Wert an nächste Stelle

# Finales Vorhersageergebnis
final_pred_scaled = model.predict([X_pred_enc, decoder_input])[0]
forecast = scaler.inverse_transform(final_pred_scaled)[:, 0]
'''
# --- Vorhersage auf letzte Sequenz ---
X_pred_enc = X_features[-SEQ_LEN:].reshape(1, SEQ_LEN, len(FEATURES))
X_pred_dec = np.zeros((1, SEQ_LEN, 1), dtype=DTYPE)
X_pred_dec[:, 0, 0] = scaled_target[-1, 0]  # 👈 Seed den ersten Decoder-Schritt mit letztem bekannten Target

# Vorhersage (skaliert)
pred_scaled = model.predict([X_pred_enc, X_pred_dec])[0]
forecast = scaler.inverse_transform(pred_scaled)[:, 0]
'''

# --- Forecast-Daten ---
future_dates = pd.date_range(df["date"].max() + pd.Timedelta(days=1), periods=SEQ_LEN)
actuals_df = df.groupby("date", as_index=True)["unit_sales"].mean()
actuals = actuals_df.reindex(future_dates).values


# --- Plot ---
plt.figure(figsize=(10, 4))
plt.plot(df["date"][-SEQ_LEN:], df["unit_sales"][-SEQ_LEN:], label="Last Seen", linewidth=2)
plt.plot(future_dates, forecast, "r--", label="Forecast", linewidth=2)
plt.title("LSTM Seq2Seq Forecast (90 Days)")
plt.xlabel("Date")
plt.ylabel("Unit Sales")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

# --- Modell speichern ---
model_path = Path(project_root) / cfg.LSTM_ARCHIVE_DIR / cfg.LSTM_GLOBAL_MODEL
scaler_path = Path(project_root) / cfg.SCALER_ARCHIVE_DIR / cfg.LSTM_GLOBAL_SCALER
model_path.parent.mkdir(parents=True, exist_ok=True)
scaler_path.parent.mkdir(parents=True, exist_ok=True)
model.save(model_path)
joblib.dump(scaler, scaler_path)

print("✅ Model und Scaler gespeichert.")
'''

NameError: name 'X_features' is not defined

In [25]:
import os
import numpy as np
import pandas as pd
import joblib
import mlflow
from pathlib import Path
import matplotlib.pyplot as plt
from pathlib import Path
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, r2_score
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
import importlib
import app.config as cfg
importlib.reload(cfg)


# --- Parameter ---
SEQ_LEN = 90
LSTM_UNITS = 8
learning_rate = 0.001
EPOCHS = 100
BATCH_SIZE = 16  # reduziert wegen langer Sequenz
MAX_SAMPLES = 800  # Limit der Trainingssequenzen für Performance
DTYPE = "float16"
FEATURES = ['unit_sales', 'onpromotion', 'lag_1'
            ]#, 'rolling_mean_7', 'store_nbr', 'item_nbr'

# --- Pfade ---
import app.config as cfg
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
data_path = os.path.join(project_root, cfg.PREPARED_DATA_FILE)
mlflow_tracking_dir_lstm = os.path.join(project_root, "mlruns/lstm_global")

# --- Daten laden ---
df = pd.read_csv(data_path, parse_dates=["date"])
use_cols = FEATURES + ['date']  # 'date' wird zum Parsen benötigt
df = pd.read_csv(data_path, usecols=use_cols, parse_dates=["date"])
df = df[(df["unit_sales"] >= 0) & (df["date"] <= cfg.CUTOFF_DATE)].sort_values("date").reset_index(drop=True)

# --- Skalierung ---
scaler = MinMaxScaler()
scaled = scaler.fit_transform(df[FEATURES]).astype(dtype=DTYPE)

# --- Sequenzdaten erzeugen ---
def make_seq2seq(data, n_in, n_out):
    X, y = [], []
    for i in range(len(data) - n_in - n_out + 1):
        X.append(data[i : i + n_in, 0])
        y.append(data[i + n_in : i + n_in + n_out, 0])
    return np.array(X), np.array(y)

X_raw, y_raw = make_seq2seq(scaled, SEQ_LEN, SEQ_LEN)
n_features = len(FEATURES)
# --- Datenformate ---
X_enc = X_raw.reshape(-1, SEQ_LEN, n_features).astype(dtype=DTYPE)
X_dec = np.zeros((len(X_raw), SEQ_LEN, n_features), dtype=DTYPE)
X_dec[:, 1:, :] = y_raw[:, :-1].reshape(len(y_raw), SEQ_LEN - 1, 1)
y_seq = y_raw.reshape(len(y_raw), SEQ_LEN, 1).astype(dtype=DTYPE)

# --- Begrenze Datensatzgröße ---
if len(X_enc) > MAX_SAMPLES:
    X_enc = X_enc[-MAX_SAMPLES:]
    X_dec = X_dec[-MAX_SAMPLES:]
    y_seq = y_seq[-MAX_SAMPLES:]

# --- LSTM-Modell ---


enc_inputs = Input(shape=(SEQ_LEN, n_features), name="encoder_input")
enc_outputs, state_h, state_c = LSTM(LSTM_UNITS, return_state=True, name="encoder_lstm")(enc_inputs)
dec_inputs = Input(shape=(SEQ_LEN, n_features), name="decoder_input")
dec_outputs = LSTM(LSTM_UNITS, return_sequences=True, name="decoder_lstm")(dec_inputs, initial_state=[state_h, state_c])
outputs = Dense(1, activation="relu")(dec_outputs)

model = Model([enc_inputs, dec_inputs], outputs)
model.compile(optimizer=Adam(0.1), loss="mae")

# --- MLflow ---
mlflow.set_tracking_uri(f"file://{mlflow_tracking_dir_lstm}")
mlflow.set_experiment("global_lstm_seq2seq_forecast")

if mlflow.active_run():
    mlflow.end_run()

with mlflow.start_run():
    mlflow.log_params({
        "model_type": "lstm_seq2seq",
        "seq_len": SEQ_LEN,
        "lstm_units": LSTM_UNITS,
        "epochs": EPOCHS,
        "batch_size": BATCH_SIZE,
        "sample_size": len(X_enc)
    })

    # --- Training ---
    early_stop = EarlyStopping(monitor="loss", patience=3, restore_best_weights=True)
    reduce_lr = ReduceLROnPlateau(monitor="loss", factor=0.5, patience=2)
    history = model.fit(
        [X_enc, X_dec], y_seq,
        epochs=EPOCHS,
        batch_size=BATCH_SIZE,
        callbacks=[early_stop, reduce_lr],
        verbose=1
    )

    final_loss = history.history["loss"][-1]
    mlflow.log_metric("final_loss", final_loss)

    # --- Vorhersage auf letzte Sequenz ---
    X_pred_enc = scaled[-SEQ_LEN:].reshape(1, SEQ_LEN, n_features)
    X_pred_dec = np.zeros((1, SEQ_LEN, n_features), dtype="float32")
    pred_scaled = model.predict([X_pred_enc, X_pred_dec])[0]
    # Dummy mit Nullen – exakt gleiche Struktur wie beim Scaling (3 Features)
    dummy = np.zeros((cfg.SEQ_LEN, len(FEATURES)), dtype=DTYPE)
    # Nur unit_sales (Index 0) mit den Vorhersagewerten befüllen
    dummy[:, 0] = pred_scaled[:, 0]
    # Rücktransformieren und nur unit_sales extrahieren
    forecast = scaler.inverse_transform(dummy)[:, 0]



    # --- Plot ---
    future_dates = pd.date_range(df["date"].max() + pd.Timedelta(days=1), periods=SEQ_LEN)
    plt.figure(figsize=(10, 4))
    plt.plot(df["date"][-SEQ_LEN:], df["unit_sales"][-SEQ_LEN:], label="Last Seen", linewidth=2)
    plt.plot(future_dates, forecast, "r--", label="Forecast", linewidth=2)
    plt.title("LSTM Seq2Seq Forecast (90 Days)")
    plt.xlabel("Date")
    plt.ylabel("Unit Sales")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()

    plot_path = "lstm_forecast_plot.png"
    plt.savefig(plot_path)
    mlflow.log_artifact(plot_path)
    plt.close()

    # --- Modell speichern ---
    project_root = Path(os.path.abspath(os.path.join(os.getcwd(), '..')))
    model_path = Path(project_root) / cfg.LSTM_ARCHIVE_DIR / cfg.LSTM_GLOBAL_MODEL
    scaler_path = Path(project_root) / cfg.SCALER_ARCHIVE_DIR / cfg.LSTM_GLOBAL_SCALER


    # Ordner anlegen
    model_path.parent.mkdir(parents=True, exist_ok=True)
    scaler_path.parent.mkdir(parents=True, exist_ok=True)


    assert str(model_path).endswith(".keras"), f"❌ Model path must end with .keras, got {model_path}"

    # Jetzt funktioniert das:
    model.save(model_path)
    joblib.dump(scaler, scaler_path)

    # MLflow
    mlflow.log_artifact(str(model_path))
    mlflow.log_artifact(str(scaler_path))

    print(f"✅ LSTM (SEQ_LEN={SEQ_LEN}) gespeichert & in MLflow geloggt.")

Epoch 1/100


2025-05-27 15:26:22.080348: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] PluggableGraphOptimizer failed: INVALID_ARGUMENT: Failed to deserialize the `graph_buf`.


[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 17ms/step - loss: 1.0905e-07 - learning_rate: 0.1000
Epoch 2/100
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - loss: 0.0000e+00 - learning_rate: 0.1000
Epoch 3/100
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - loss: 0.0000e+00 - learning_rate: 0.1000
Epoch 4/100
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - loss: 0.0000e+00 - learning_rate: 0.0500
Epoch 5/100
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - loss: 0.0000e+00 - learning_rate: 0.0500
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 157ms/step
✅ LSTM (SEQ_LEN=90) gespeichert & in MLflow geloggt.


In [None]:
# -- NEW --
import os
import numpy as np
import pandas as pd
import joblib
import mlflow
from pathlib import Path
import matplotlib.pyplot as plt
from pathlib import Path
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, r2_score
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
import importlib
import app.config as cfg
importlib.reload(cfg)


# --- Parameter ---
SEQ_LEN = 90
LSTM_UNITS = 8
learning_rate = 0.1
EPOCHS = 10
BATCH_SIZE = 16  # reduziert wegen langer Sequenz
MAX_SAMPLES = 800  # Limit der Trainingssequenzen für Performance
dtype = "float16"  # für geringeren Speicherverbrauch

FEATURES = ['unit_sales', 'lag_1', 'rolling_mean_7'
            # ,'store_nbr', 'item_nbr', 'onpromotion', 
            ]

# --- Pfade ---
import app.config as cfg
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
data_path = os.path.join(project_root, cfg.PREPARED_DATA_FILE)
mlflow_tracking_dir_lstm = os.path.join(project_root, "mlruns/lstm_global")

# --- Daten laden ---
df = pd.read_csv(data_path, parse_dates=["date"])
use_cols = FEATURES + ['date']  # 'date' wird zum Parsen benötigt
df = pd.read_csv(data_path, usecols=use_cols, parse_dates=["date"])
df = df[(df["unit_sales"] >= 0) & (df["date"] <= cfg.CUTOFF_DATE)].sort_values("date").reset_index(drop=True)

# --- Skalierung ---
scaler = MinMaxScaler()
scaled = scaler.fit_transform(df[FEATURES]).astype(dtype)

# --- Sequenzdaten erzeugen ---
def make_seq2seq(data, n_in, n_out):
    X, y = [], []
    for i in range(len(data) - n_in - n_out + 1):
        X.append(data[i : i + n_in, :])                # alle Features
        y.append(data[i + n_in : i + n_in + n_out, 0]) # nur Target (unit_sales)
    return np.array(X), np.array(y)


X_raw, y_raw = make_seq2seq(scaled, SEQ_LEN, SEQ_LEN)

X_enc = X_raw.astype(dtype)  # shape: (samples, SEQ_LEN, n_features)
X_dec = np.zeros_like(X_enc)     # shape: wie X_enc, wird später für Teacher Forcing genutzt
X_dec[:, 1:, :] = X_enc[:, :-1, :]  # nur wenn du ein autoregressives Decoder-Setup möchtest

y_seq = y_raw.reshape(len(y_raw), SEQ_LEN, 1).astype(dtype)


# --- Begrenze Datensatzgröße ---
if len(X_enc) > MAX_SAMPLES:
    X_enc = X_enc[-MAX_SAMPLES:]
    X_dec = X_dec[-MAX_SAMPLES:]
    y_seq = y_seq[-MAX_SAMPLES:]

# --- LSTM-Modell ---
n_features = len(FEATURES)

enc_inputs = Input(shape=(SEQ_LEN, n_features), name="encoder_input")
enc_outputs, state_h, state_c = LSTM(LSTM_UNITS, return_state=True, name="encoder_lstm")(enc_inputs)
dec_inputs = Input(shape=(SEQ_LEN, n_features), name="decoder_input")
dec_outputs = LSTM(LSTM_UNITS, return_sequences=True, name="decoder_lstm")(dec_inputs, initial_state=[state_h, state_c])
outputs = Dense(1, activation="relu")(dec_outputs)

model = Model([enc_inputs, dec_inputs], outputs)
model.compile(optimizer=Adam(0.1), loss="mae")

# --- MLflow ---
mlflow.set_tracking_uri(f"file://{mlflow_tracking_dir_lstm}")
mlflow.set_experiment("global_lstm_seq2seq_forecast")

if mlflow.active_run():
    mlflow.end_run()

with mlflow.start_run():
    mlflow.log_params({
        "model_type": "lstm_seq2seq",
        "seq_len": SEQ_LEN,
        "lstm_units": LSTM_UNITS,
        "epochs": EPOCHS,
        "batch_size": BATCH_SIZE,
        "sample_size": len(X_enc)
    })

    # --- Training ---
    early_stop = EarlyStopping(monitor="loss", patience=3, restore_best_weights=True)
    reduce_lr = ReduceLROnPlateau(monitor="loss", factor=0.5, patience=2)
    history = model.fit(
        [X_enc, X_dec], y_seq,
        epochs=EPOCHS,
        batch_size=BATCH_SIZE,
        callbacks=[early_stop, reduce_lr],
        verbose=1
    )

    final_loss = history.history["loss"][-1]
    mlflow.log_metric("final_loss", final_loss)

    # --- Vorhersage auf letzte Sequenz ---
    X_pred_enc = scaled[-SEQ_LEN:].reshape(1, SEQ_LEN, n_features)
    X_pred_dec = np.zeros((1, SEQ_LEN, n_features), dtype=dtype)
    X_pred_dec[:, 0, :] = X_pred_enc[:, -1, :]  # 👈 Seed initialisieren mit letztem echten Schritt


    # Vorhersage (skaliert)
    pred_scaled = model.predict([X_pred_enc, X_pred_dec])[0]  # shape = (90, 1)

    # Dummy-Matrix bauen: alle anderen Features = 0, unit_sales = pred_scaled
    dummy = np.zeros((SEQ_LEN, n_features), dtype=dtype)
    dummy[:, 0] = pred_scaled[:, 0]  # Annahme: unit_sales ist an Index 0

    # Rückskalierung: nur unit_sales extrahieren
    forecast = scaler.inverse_transform(dummy)[:, 0]


    # --- Plot ---
    future_dates = pd.date_range(df["date"].max() + pd.Timedelta(days=1), periods=SEQ_LEN)
    plt.figure(figsize=(10, 4))
    plt.plot(df["date"][-SEQ_LEN:], df["unit_sales"][-SEQ_LEN:], label="Last Seen", linewidth=2)
    plt.plot(future_dates, forecast, "r--", label="Forecast", linewidth=2)
    plt.title("LSTM Seq2Seq Forecast (90 Days)")
    plt.xlabel("Date")
    plt.ylabel("Unit Sales")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()

    plot_path = "lstm_forecast_plot.png"
    plt.savefig(plot_path)
    mlflow.log_artifact(plot_path)
    plt.close()

    # --- Modell speichern ---
    project_root = Path(os.path.abspath(os.path.join(os.getcwd(), '..')))
    model_path = Path(project_root) / cfg.LSTM_ARCHIVE_DIR / cfg.LSTM_GLOBAL_MODEL
    scaler_path = Path(project_root) / cfg.SCALER_ARCHIVE_DIR / cfg.LSTM_GLOBAL_SCALER


    # Ordner anlegen
    model_path.parent.mkdir(parents=True, exist_ok=True)
    scaler_path.parent.mkdir(parents=True, exist_ok=True)


    assert str(model_path).endswith(".keras"), f"❌ Model path must end with .keras, got {model_path}"

    # Jetzt funktioniert das:
    model.save(model_path)
    joblib.dump(scaler, scaler_path)

    # MLflow
    mlflow.log_artifact(str(model_path))
    mlflow.log_artifact(str(scaler_path))

    print(f"✅ LSTM (SEQ_LEN={SEQ_LEN}) gespeichert & in MLflow geloggt.")


Epoch 1/10


2025-05-27 13:41:29.911866: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] PluggableGraphOptimizer failed: INVALID_ARGUMENT: Failed to deserialize the `graph_buf`.


[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 25ms/step - loss: 7.7279e-07 - learning_rate: 0.1000
Epoch 2/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 0.0000e+00 - learning_rate: 0.1000
Epoch 3/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 50ms/step - loss: 0.0000e+00 - learning_rate: 0.1000
Epoch 4/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 34ms/step - loss: 0.0000e+00 - learning_rate: 0.0500
Epoch 5/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 0.0000e+00 - learning_rate: 0.0500
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 625ms/step
✅ LSTM (SEQ_LEN=90) gespeichert & in MLflow geloggt.


In [None]:
# --- Imports ---
import os
import sys
import importlib
import numpy as np
import pandas as pd
import joblib
import mlflow
import matplotlib.pyplot as plt
from pathlib import Path
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

# --- Config laden ---
import app.config as cfg
importlib.reload(cfg)

# --- Parameter ---
SEQ_LEN = cfg.SEQ_LEN
LSTM_UNITS = 8
EPOCHS = 20
BATCH_SIZE = 16
MAX_SAMPLES = 800

FEATURES = ['unit_sales', 'onpromotion', 'lag_1', 'rolling_mean_7', 'store_nbr', 'item_nbr']

# --- Pfade ---
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
data_path = os.path.join(project_root, cfg.PREPARED_DATA_FILE)
mlflow_tracking_dir = os.path.join(project_root, cfg.MLFLOW_LSTM_SEQ)
model_path = Path(project_root) / cfg.LSTM_ARCHIVE_DIR / cfg.LSTM_GLOBAL_MODEL
scaler_path = Path(project_root) / cfg.SCALER_ARCHIVE_DIR / cfg.LSTM_GLOBAL_SCALER

# --- Daten laden ---
df = pd.read_csv(data_path, usecols=FEATURES + ['date'], parse_dates=['date'])
df = df[(df["unit_sales"] >= 0) & (df["date"] <= cfg.CUTOFF_DATE)].sort_values("date").reset_index(drop=True)

# --- Target-Transformation (optional log1p) ---
df["target"] = np.log1p(df["unit_sales"])  # verwendet log(1 + x) gegen Null-Inflation

# --- Skalierung ---
scalers = {f: MinMaxScaler() for f in FEATURES}
scaled_features = np.stack([scalers[f].fit_transform(df[[f]]).flatten() for f in FEATURES], axis=-1).astype("float32")
target_scaler = MinMaxScaler()
scaled_target = target_scaler.fit_transform(df[["target"]]).astype("float32")

# --- Sequenzen erstellen ---
def make_seq2seq(X_data, y_data, n_in, n_out):
    X, y = [], []
    for i in range(len(X_data) - n_in - n_out + 1):
        X.append(X_data[i : i + n_in])
        y.append(y_data[i + n_in : i + n_in + n_out, 0])
    return np.array(X), np.array(y)

X_raw, y_raw = make_seq2seq(scaled_features, scaled_target, SEQ_LEN, SEQ_LEN)

# --- Reshape für LSTM ---
X_enc = X_raw.astype("float32")
X_dec = np.zeros((len(X_raw), SEQ_LEN, 1), dtype="float32")
X_dec[:, 1:, :] = y_raw[:, :-1].reshape(len(y_raw), SEQ_LEN - 1, 1)
X_dec[:, 0, 0] = X_enc[:, -1, 0]  # letzte bekannte Info als Startwert
y_seq = y_raw.reshape(len(y_raw), SEQ_LEN, 1).astype("float32")

# --- Sample-Größe begrenzen ---
if len(X_enc) > MAX_SAMPLES:
    X_enc = X_enc[-MAX_SAMPLES:]
    X_dec = X_dec[-MAX_SAMPLES:]
    y_seq = y_seq[-MAX_SAMPLES:]

# --- Modell ---
input_dim = X_enc.shape[2]
enc_inputs = Input(shape=(SEQ_LEN, input_dim), name="encoder_input")
_, state_h, state_c = LSTM(LSTM_UNITS, return_state=True, name="encoder_lstm")(enc_inputs)
dec_inputs = Input(shape=(SEQ_LEN, 1), name="decoder_input")
dec_outputs = LSTM(LSTM_UNITS, return_sequences=True, name="decoder_lstm")(dec_inputs, initial_state=[state_h, state_c])
outputs = Dense(1, activation=None)(dec_outputs)  # keine Aktivierung für Regression

model = Model([enc_inputs, dec_inputs], outputs)
model.compile(optimizer=Adam(learning_rate=0.001), loss="mae")

# --- MLflow ---
mlflow.set_tracking_uri(f"file://{mlflow_tracking_dir}")
mlflow.set_experiment("lstm_seq2seq_multivariate")

if mlflow.active_run():
    mlflow.end_run()

with mlflow.start_run():
    mlflow.log_params({
        "model_type": "lstm_seq2seq",
        "seq_len": SEQ_LEN,
        "lstm_units": LSTM_UNITS,
        "epochs": EPOCHS,
        "batch_size": BATCH_SIZE,
        "input_dim": input_dim,
        "features": FEATURES,
        "target": "log1p(unit_sales)"
    })

    history = model.fit(
        [X_enc, X_dec],
        y_seq,
        epochs=EPOCHS,
        batch_size=BATCH_SIZE,
        callbacks=[
            EarlyStopping(monitor="loss", patience=3, restore_best_weights=True),
            ReduceLROnPlateau(monitor="loss", factor=0.5, patience=2)
        ],
        verbose=1
    )

    mlflow.log_metric("final_loss", float(history.history["loss"][-1]))

    # --- Forecast ---
    X_pred_enc = X_enc[-1:]
    X_pred_dec = np.zeros((1, SEQ_LEN, 1), dtype="float32")
    X_pred_dec[:, 0, 0] = X_pred_enc[:, -1, 0]
    pred_scaled = model.predict([X_pred_enc, X_pred_dec])[0]
    pred_log1p = target_scaler.inverse_transform(pred_scaled).flatten()
    forecast = np.expm1(pred_log1p)  # rücktransformieren

    # --- Plot ---
    future_dates = pd.date_range(df["date"].max() + pd.Timedelta(days=1), periods=SEQ_LEN)
    plt.figure(figsize=(10, 4))
    plt.plot(future_dates, forecast, "r--", label="Forecast", linewidth=2)
    plt.title("Forecast (Next 90 Days)")
    plt.xlabel("Date")
    plt.ylabel("Unit Sales")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plot_path = "forecast_plot.png"
    plt.savefig(plot_path)
    mlflow.log_artifact(plot_path)
    plt.close()

    # --- Modell & Scaler speichern ---
    model_path.parent.mkdir(parents=True, exist_ok=True)
    scaler_path.parent.mkdir(parents=True, exist_ok=True)
    model.save(model_path)
    joblib.dump(target_scaler, scaler_path)
    mlflow.log_artifact(str(model_path))
    mlflow.log_artifact(str(scaler_path))

    print("✅ Training abgeschlossen. Modell gespeichert und Forecast erzeugt.")


In [None]:
import os
import numpy as np
import pandas as pd
import joblib
import mlflow
import matplotlib.pyplot as plt
from pathlib import Path
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
import importlib
import app.config as cfg
importlib.reload(cfg)

# --- Parameter ---
SEQ_LEN = cfg.SEQ_LEN
FEATURES = ['unit_sales'] + cfg.FEATURE_COLS
MAX_SAMPLES = 1000

# --- Pfade ---
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
data_path = os.path.join(project_root, cfg.PREPARED_DATA_FILE)
mlflow_tracking_dir_lstm = os.path.join(project_root, "mlruns/lstm_global")

# --- Daten laden ---
df = pd.read_csv(data_path, parse_dates=["date"])
df = df[(df["unit_sales"] >= 0) & (df["date"] <= cfg.CUTOFF_DATE)].sort_values("date").reset_index(drop=True)
available_features = [f for f in FEATURES if f in df.columns]
missing_features = [f for f in FEATURES if f not in df.columns]

if missing_features:
    print(f"⚠️ Warnung: Diese Features fehlen in den Daten und werden ignoriert: {missing_features}")

# Skalierung nur auf existierende Spalten anwenden
scalers = {f: MinMaxScaler() for f in available_features}
scaled_features = np.stack(
    [scalers[f].fit_transform(df[[f]]).flatten() for f in available_features],
    axis=-1
).astype("float32")


# --- Sequenzdaten erzeugen ---
def make_multivariate_seq2seq(data, n_in, n_out):
    X, y = [], []
    for i in range(len(data) - n_in - n_out + 1):
        X.append(data[i : i + n_in])
        y.append(data[i + n_in : i + n_in + n_out, 0])  # unit_sales als Target
    return np.array(X), np.array(y)

X_raw, y_raw = make_multivariate_seq2seq(scaled_features, SEQ_LEN, SEQ_LEN)

# --- Daten vorbereiten ---
X_enc = X_raw.astype("float32")
X_dec = np.zeros((len(X_raw), SEQ_LEN, 1), dtype="float32")
X_dec[:, 1:, :] = y_raw[:, :-1].reshape(len(y_raw), SEQ_LEN - 1, 1)
y_seq = y_raw.reshape(len(y_raw), SEQ_LEN, 1).astype("float32")

# --- Begrenze Datensatzgröße ---
if len(X_enc) > MAX_SAMPLES:
    X_enc = X_enc[-MAX_SAMPLES:]
    X_dec = X_dec[-MAX_SAMPLES:]
    y_seq = y_seq[-MAX_SAMPLES:]

# --- Modell ---
input_dim = X_enc.shape[2]
enc_inputs = Input(shape=(SEQ_LEN, input_dim), name="encoder_input")
_, state_h, state_c = LSTM(cfg.LSTM_PARAMS["lstm_units"], return_state=True, name="encoder_lstm")(enc_inputs)

dec_inputs = Input(shape=(SEQ_LEN, 1), name="decoder_input")
dec_lstm = LSTM(cfg.LSTM_PARAMS["lstm_units"], return_sequences=True, name="decoder_lstm")(
    dec_inputs, initial_state=[state_h, state_c]
)
outputs = Dense(1, activation="relu")(dec_lstm)

model = Model([enc_inputs, dec_inputs], outputs)
model.compile(optimizer=Adam(1e-3), loss="mae")

# --- MLflow Logging ---
mlflow.set_tracking_uri(f"file://{mlflow_tracking_dir_lstm}")
mlflow.set_experiment("global_lstm_seq2seq_forecast")

if mlflow.active_run():
    mlflow.end_run()

with mlflow.start_run():
    mlflow.log_params({
        "model_type": "lstm_seq2seq_multivariate",
        "seq_len": SEQ_LEN,
        "lstm_units": cfg.LSTM_PARAMS["lstm_units"],
        "epochs": cfg.LSTM_PARAMS["epochs"],
        "batch_size": cfg.LSTM_PARAMS["batch_size"],
        "input_dim": input_dim,
        "sample_size": len(X_enc)
    })

    # Training
    history = model.fit(
        [X_enc, X_dec], y_seq,
        epochs=cfg.LSTM_PARAMS["epochs"],
        batch_size=cfg.LSTM_PARAMS["batch_size"],
        callbacks=[
            EarlyStopping(monitor="loss", patience=cfg.LSTM_PARAMS["patience"], restore_best_weights=True),
            ReduceLROnPlateau(monitor="loss", factor=0.5, patience=2)
        ],
        verbose=1
    )

    final_loss = history.history["loss"][-1]
    mlflow.log_metric("final_loss", final_loss)

    # Forecast erzeugen
    X_pred_enc = X_enc[-1:].copy()
    X_pred_dec = np.zeros((1, SEQ_LEN, 1), dtype="float32")

    pred_scaled = model.predict([X_pred_enc, X_pred_dec])[0]
    pred_unit_sales = scalers["unit_sales"].inverse_transform(pred_scaled).flatten()

    future_dates = pd.date_range(df["date"].max() + pd.Timedelta(days=1), periods=SEQ_LEN)
    plt.figure(figsize=(10, 4))
    plt.plot(df["date"][-SEQ_LEN:], df["unit_sales"][-SEQ_LEN:], label="Last Seen", linewidth=2)
    plt.plot(future_dates, pred_unit_sales, "r--", label="Forecast", linewidth=2)
    plt.title("Multivariate LSTM Seq2Seq Forecast")
    plt.xlabel("Date")
    plt.ylabel("Unit Sales")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()

    plot_path = "lstm_multivariate_forecast_plot.png"
    plt.savefig(plot_path)
    mlflow.log_artifact(plot_path)
    plt.close()

    # Speichern
    model_path = project_root / cfg.LSTM_ARCHIVE_DIR / cfg.LSTM_GLOBAL_MODEL
    scaler_path = project_root / cfg.SCALER_ARCHIVE_DIR / cfg.LSTM_GLOBAL_SCALER
    model_path.parent.mkdir(parents=True, exist_ok=True)
    scaler_path.parent.mkdir(parents=True, exist_ok=True)
    model.save(model_path)
    joblib.dump(scalers["unit_sales"], scaler_path)

    mlflow.log_artifact(str(model_path))
    mlflow.log_artifact(str(scaler_path))

    print(f"✅ Multivariate LSTM (SEQ_LEN={SEQ_LEN}) gespeichert & in MLflow geloggt.")


: 

In [None]:
# Parameters
n_input  = cfg.SEQ_LEN   # input sequence length
n_output = cfg.SEQ_LEN   # forecast horizon

# fixed store/item
print(f"📌 Training model for store {store_id} and item {item_id}")
subset = (
    df_train
    .loc[(df_train.store_nbr==store_id)&(df_train.item_nbr==item_id)]
    .sort_values("date")
    .set_index("date")[["unit_sales"]]
    .astype(float)
)
scaler = MinMaxScaler()
subset["scaled"] = scaler.fit_transform(subset[["unit_sales"]])

# Build seq2seq sequences
def make_seq2seq_data(series, n_in, n_out):
    X, y = [], []
    for i in range(len(series) - n_in - n_out):
        X.append(series[i : i + n_in])
        y.append(series[i + n_in : i + n_in + n_out])
    return np.array(X), np.array(y)

series = subset["scaled"].values
X_all, y_all = make_seq2seq_data(series, n_input, n_output)
dates_all = subset.index[n_input + n_output :]

# Split train/test
cutoff = pd.to_datetime(cfg.CUTOFF_DATE)
split_idx = np.sum(dates_all <= cutoff)
X_train, X_test = X_all[:split_idx], X_all[split_idx:]
y_train, y_test = y_all[:split_idx], y_all[split_idx:]

# Reshape and prepare decoder inputs (Teacher Forcing)
X_train = X_train[..., np.newaxis]
X_test  = X_test[...,  np.newaxis]
dec_in_train = np.zeros((len(X_train), n_output, 1))
dec_in_train[:,1:,:] = y_train[:,:-1][...,None]
dec_in_test  = np.zeros((len(X_test), n_output, 1))

# Build a deeper Seq2Seq model
def build_seq2seq_model(n_in, n_out):
    # Encoder
    enc_inputs = Input(shape=(n_in,1), name="encoder_input")
    e = LSTM(64, return_sequences=True)(enc_inputs)
    _, state_h, state_c = LSTM(64, return_state=True)(e)
    # Decoder
    dec_inputs = Input(shape=(n_out,1), name="decoder_input")
    d = LSTM(64, return_sequences=True)(dec_inputs, initial_state=[state_h, state_c])
    d = LSTM(64, return_sequences=True)(d)
    outputs = Dense(1, activation="relu")(d)
    model = Model([enc_inputs, dec_inputs], outputs)
    model.compile(optimizer=Adam(1e-3), loss="mae")
    return model

model = build_seq2seq_model(n_input, n_output)

# Setup callbacks
early     = EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=3)

# Train & log with MLflow
mlflow.set_tracking_uri("../mlruns/xgb_seq2seq")

if mlflow.active_run(): mlflow.end_run()
mlflow.set_experiment("lstm_seq2seq_forecast")
with mlflow.start_run():
    # Log params
    mlflow.log_param("store_id", store_id)
    mlflow.log_param("item_id", item_id)
    mlflow.log_param("n_input", n_input)
    mlflow.log_param("n_output", n_output)
    # Train
    model.fit(
        [X_train, dec_in_train],
        y_train[...,None],
        validation_split=0.1,
        epochs=100,
        batch_size=32,
        callbacks=[early, reduce_lr],
        verbose=1
    )
    # Predict
    y_pred = model.predict([X_test, dec_in_test], verbose=0)
    y_pred = np.maximum(y_pred, 0)
    # Inverse transform
    y_pred_inv = scaler.inverse_transform(y_pred[:,:,0])
    y_test_inv = scaler.inverse_transform(y_test)
    # Metrics
    mae = mean_absolute_error(y_test_inv, y_pred_inv)
    r2  = r2_score(y_test_inv.flatten(), y_pred_inv.flatten())
    mlflow.log_metric("mae", mae)
    mlflow.log_metric("r2_score", r2)
    # Signature & Input-Example
    n_ex = min(5, len(X_test))
    input_example = {
        "encoder_input": X_test[:n_ex],
        "decoder_input": dec_in_test[:n_ex]
    }
    preds_example = y_pred[:n_ex]
    signature = infer_signature(input_example, preds_example)
    mlflow.keras.log_model(
        model,
        artifact_path="lstm_seq2seq_model",
        signature=signature
        # , input_example=input_example
    )
    print(f"✅ Seq2Seq LSTM trained | MAE: {mae:.2f}, R²: {r2:.3f}")

# Save artifacts
os.makedirs("../model/lstm/archive", exist_ok=True)
model.save(f"../model/lstm/archive/lstm_seq2seq_store{store_id}_item{item_id}.keras")

os.makedirs("../model/scaler/archive", exist_ok=True)
joblib.dump(scaler, f"../model/scaler/archive/scaler_seq2seq_store{store_id}_item{item_id}.pkl")


📌 Training model for store 24 and item 220435


2025-05-26 12:15:42.628014: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M2
2025-05-26 12:15:42.628257: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 8.00 GB
2025-05-26 12:15:42.628276: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 2.67 GB
2025-05-26 12:15:42.628597: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2025-05-26 12:15:42.629077: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


Epoch 1/100


2025-05-26 12:15:44.230204: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.
2025-05-26 12:15:44.237729: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] PluggableGraphOptimizer failed: INVALID_ARGUMENT: Failed to deserialize the `graph_buf`.


[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 168ms/step - loss: 0.5207 - val_loss: 0.5018 - learning_rate: 0.0010
Epoch 2/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step - loss: 0.5199 - val_loss: 0.4984 - learning_rate: 0.0010
Epoch 3/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step - loss: 0.4614 - val_loss: 0.1657 - learning_rate: 0.0010
Epoch 4/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step - loss: 0.1685 - val_loss: 0.1310 - learning_rate: 0.0010
Epoch 5/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step - loss: 0.1317 - val_loss: 0.1294 - learning_rate: 0.0010
Epoch 6/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step - loss: 0.1263 - val_loss: 0.1300 - learning_rate: 0.0010
Epoch 7/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step - loss: 0.1185 - val_loss: 0.1123 - learning_rate: 0.0010
Epoch 8/100
[1m6

['../model/scaler/archive/scaler_seq2seq_store24_item220435.pkl']

In [None]:
import os
import joblib
import mlflow
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, r2_score
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
import app.config as cfg

# --- Konfiguration und Pfade ---
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
data_path = os.path.join(project_root, cfg.PREPARED_DATA_FILE)
model_dir = Path(project_root, cfg.LSTM_ARCHIVE_DIR)
scaler_dir = Path(project_root, cfg.SCALER_ARCHIVE_DIR)
mlflow_dir = os.path.join(project_root, cfg.MLFLOW_LSTM_SEQ)



# --- Setup für MLflow ---
os.makedirs(mlflow_dir, exist_ok=True)
mlflow.set_tracking_uri(f"file://{mlflow_dir}")
mlflow.set_experiment("lstm_seq2seq_forecast_global")

for store_id, item_id in store_item_pairs:
    print(f"📦 Training for Store: {store_id} | Item: {item_id}")
    
    # --- Daten vorbereiten ---
    df = pd.read_csv(data_path, parse_dates=["date"])
    df = df[df['unit_sales'] >= 0]
    df = df[(df['store_nbr'] == store_id) & (df['item_nbr'] == item_id)].sort_values('date')

    if len(df) < 3 * cfg.SEQ_LEN:
        print(f"❌ Not enough data for Store {store_id} Item {item_id}")
        continue

    cutoff = pd.to_datetime(cfg.CUTOFF_DATE)
    train_df = df[df['date'] <= cutoff]
    scaler = MinMaxScaler()
    df['unit_sales_scaled'] = scaler.fit_transform(df[['unit_sales']])

    # --- Sequenz-Erstellung ---
    def make_seq2seq_data(series, n_in, n_out):
        X, y = [], []
        for i in range(len(series) - n_in - n_out):
            X.append(series[i:i + n_in])
            y.append(series[i + n_in:i + n_in + n_out])
        return np.array(X), np.array(y)

    series = df['unit_sales_scaled'].values
    X_all, y_all = make_seq2seq_data(series, cfg.SEQ_LEN, cfg.SEQ_LEN)
    dates_all = df['date'].iloc[cfg.SEQ_LEN * 2:].values

    # --- Train/Test Split ---
    split_idx = np.sum(pd.to_datetime(dates_all) <= cutoff)
    X_train, X_test = X_all[:split_idx], X_all[split_idx:]
    y_train, y_test = y_all[:split_idx], y_all[split_idx:]

    # --- Decoder Inputs ---
    X_train = X_train[..., np.newaxis]
    X_test = X_test[..., np.newaxis]
    dec_in_train = np.zeros((len(X_train), cfg.SEQ_LEN, 1))
    dec_in_train[:, 1:, :] = y_train[:, :-1][..., None]
    dec_in_test = np.zeros((len(X_test), cfg.SEQ_LEN, 1))

    # --- Modell ---
    def build_seq2seq_model(n_in, n_out):
        enc_inputs = Input(shape=(n_in, 1), name="encoder_input")
        e = LSTM(64, return_sequences=True)(enc_inputs)
        _, state_h, state_c = LSTM(64, return_state=True)(e)
        dec_inputs = Input(shape=(n_out, 1), name="decoder_input")
        d = LSTM(64, return_sequences=True)(dec_inputs, initial_state=[state_h, state_c])
        d = LSTM(64, return_sequences=True)(d)
        outputs = Dense(1, activation="relu")(d)
        model = Model([enc_inputs, dec_inputs], outputs)
        model.compile(optimizer=Adam(1e-3), loss="mae")
        return model

    model = build_seq2seq_model(cfg.SEQ_LEN, cfg.SEQ_LEN)

    early = EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True)
    reduce_lr = ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=3)

    if mlflow.active_run():
        mlflow.end_run()

    with mlflow.start_run():
        mlflow.log_params({
            "store_id": store_id,
            "item_id": item_id,
            "seq_len": cfg.SEQ_LEN,
            "lstm_units": 64,
            "batch_size": 32,
            "epochs": 100,
            "cutoff_date": cfg.CUTOFF_DATE.strftime('%Y-%m-%d'),
            "model_type": "lstm_seq2seq"
        })

        # --- Training ---
        model.fit(
            [X_train, dec_in_train],
            y_train[..., None],
            validation_split=0.1,
            epochs=100,
            batch_size=32,
            callbacks=[early, reduce_lr],
            verbose=1
        )

        # --- Evaluation ---
        y_pred = model.predict([X_test, dec_in_test], verbose=0)
        y_pred = np.maximum(y_pred, 0)
        y_pred_inv = scaler.inverse_transform(y_pred[:, :, 0])
        y_test_inv = scaler.inverse_transform(y_test)

        mae = mean_absolute_error(y_test_inv, y_pred_inv)
        r2 = r2_score(y_test_inv.flatten(), y_pred_inv.flatten())

        mlflow.log_metrics({
            "mae": float(mae),
            "r2_score": float(r2)
        })

        # --- Artefakte speichern ---
        model_name = f"lstm_seq2seq_store{store_id}_item{item_id}.keras"
        scaler_name = f"scaler_seq2seq_store{store_id}_item{item_id}.pkl"
        model_path = model_dir / model_name
        scaler_path = scaler_dir / scaler_name

        model_dir.mkdir(parents=True, exist_ok=True)
        scaler_dir.mkdir(parents=True, exist_ok=True)

        model.save(model_path)
        joblib.dump(scaler, scaler_path)

        mlflow.log_artifact(str(model_path))
        mlflow.log_artifact(str(scaler_path))

        # --- Plot Forecast vs Actuals ---
        plt.figure(figsize=(10, 4))
        sample_size = min(300, len(y_test_inv))
        plt.plot(y_test_inv[:sample_size], label="Actual", linewidth=2)
        plt.plot(y_pred_inv[:sample_size], label="Forecast", linestyle="--")
        plt.title(f"Forecast vs. Actual for Store {store_id} Item {item_id}")
        plt.xlabel("Time Step")
        plt.ylabel("Unit Sales")
        plt.legend()
        plt.grid(True)
        plt.tight_layout()
        plot_file = f"forecast_store{store_id}_item{item_id}.png"
        plt.savefig(plot_file)
        mlflow.log_artifact(plot_file)
        plt.close()

        print(f"✅ Finished: Store {store_id} | Item {item_id} | MAE: {mae:.2f} | R²: {r2:.3f}")

if mlflow.active_run():
    mlflow.end_run()


NameError: name 'store_item_pairs' is not defined

In [None]:
# --- Imports ---
import os
import pandas as pd
import numpy as np
import joblib
import mlflow
from pathlib import Path
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, r2_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from mlflow.models.signature import infer_signature
import app.config as cfg

# --- Parameter ---
n_steps = cfg.SEQ_LEN
EPOCHS = 6
BATCH_SIZE = 16
DROPOUT = 0.2
UNITS = 8
LEARNING_RATE = 3e-2

# --- Daten laden ---
# --- Konfiguration und Pfade ---
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
data_path = os.path.join(project_root, cfg.PREPARED_DATA_FILE)
model_dir = Path(project_root, cfg.LSTM_ARCHIVE_DIR)
scaler_dir = Path(project_root, cfg.SCALER_ARCHIVE_DIR)
mlflow_dir = os.path.join(project_root, cfg.MLFLOW_LSTM_SEQ)
df_train = pd.read_csv(data_path, parse_dates=["date"])
df_train = df_train[df_train["unit_sales"] >= 0]

# --- Cutoff ---
cutoff_date = cfg.CUTOFF_DATE

# --- Statistische Auswahl der besten Kombination ---
df_2013 = df_train[df_train['date'].dt.year == 2013]
stats = (
    df_2013.groupby(['store_nbr', 'item_nbr'])['unit_sales']
    .agg(count_days='count', mean_sales='mean', std_sales='std', positive_days=lambda x: (x>0).sum())
    .reset_index()
)
stats = stats[stats['count_days'] >= 300]
stats['score'] = stats['mean_sales'] / (stats['std_sales'] + 1e-5)

# Beste Kombination wählen
best_combo = stats.sort_values('score', ascending=False).iloc[0]
store_id = int(best_combo['store_nbr'])
item_id = int(best_combo['item_nbr'])

print(f"📌 Training model for store {store_id} and item {item_id}")

# --- Store-Item-Subset vorbereiten ---
df = (
    df_train
    .loc[(df_train.store_nbr == store_id) & (df_train.item_nbr == item_id)]
    .sort_values("date")
    .set_index("date")[["unit_sales"]]
    .astype(float)
)

# --- Skalierung ---
scaler = MinMaxScaler()
df["unit_sales_scaled"] = scaler.fit_transform(df[["unit_sales"]])
series = df["unit_sales_scaled"].values

# --- Sequenzen ---
def create_sequences(data, n_steps):
    X, y = [], []
    for i in range(n_steps, len(data)):
        X.append(data[i-n_steps:i])
        y.append(data[i])
    return np.array(X), np.array(y)

X, y = create_sequences(series, n_steps)
dates = df.index[n_steps:]
split_point = np.sum(dates <= cutoff_date)

X_train, X_test = X[:split_point], X[split_point:]
y_train, y_test = y[:split_point], y[split_point:]

X_train = X_train.reshape((-1, n_steps, 1))
X_test  = X_test.reshape((-1, n_steps, 1))

# --- Modell definieren ---
model = Sequential([
    Input(shape=(n_steps, 1)),
    LSTM(UNITS, activation="relu"),
    Dropout(DROPOUT),
    Dense(1),
])
model.compile(optimizer=Adam(learning_rate=LEARNING_RATE), loss="mae")

# --- MLflow Setup ---
mlflow.set_tracking_uri(f"file://{os.path.join(project_root, cfg.MLFLOW_LSTM_SEQ)}")
mlflow.set_experiment("lstm_unit_sales_forecast")

if mlflow.active_run():
    mlflow.end_run()

with mlflow.start_run():
    # Parameter
    mlflow.log_params({
        "store_id": store_id,
        "item_id": item_id,
        "n_steps": n_steps,
        "lstm_units": UNITS,
        "dropout_rate": DROPOUT,
        "optimizer": "adam",
        "learning_rate": LEARNING_RATE
    })

    # --- Training ---
    history = model.fit(
        X_train, y_train,
        epochs=EPOCHS,
        batch_size=BATCH_SIZE,
        validation_split=0.1,
        verbose=0
    )

    # --- Evaluation ---
    y_pred = model.predict(X_test, verbose=0)
    y_pred_inv = scaler.inverse_transform(y_pred)
    y_test_inv = scaler.inverse_transform(y_test.reshape(-1, 1))

    mae = mean_absolute_error(y_test_inv, y_pred_inv)
    r2 = r2_score(y_test_inv, y_pred_inv)

    mlflow.log_metric("mae", mae)
    mlflow.log_metric("r2_score", r2)

    # --- Signature ---
    X_ex = X_train[:3]
    y_ex = model.predict(X_ex)
    sig = infer_signature(X_ex, y_ex)

    mlflow.keras.log_model(model, artifact_path="lstm_model", signature=sig)

    print(f"✅ Model trained | MAE: {mae:.2f}, R²: {r2:.3f}")

# --- Speichern ---
model_dir = Path(project_root) / cfg.LSTM_ARCHIVE_DIR
scaler_dir = Path(project_root) / cfg.SCALER_ARCHIVE_DIR
model_dir.mkdir(parents=True, exist_ok=True)
scaler_dir.mkdir(parents=True, exist_ok=True)

model_path = model_dir / f"lstm_default_store{store_id}_item{item_id}.keras"
scaler_path = scaler_dir / f"lstm_default_scaler_store{store_id}_item{item_id}.pkl"

model.save(model_path)
joblib.dump(scaler, scaler_path)

print(f"✅ Saved model to {model_path}")
print(f"✅ Saved scaler to {scaler_path}")


📌 Training model for store 24 and item 220435


2025-05-26 13:38:22.254124: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] PluggableGraphOptimizer failed: INVALID_ARGUMENT: Failed to deserialize the `graph_buf`.
2025-05-26 13:40:22.213693: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] PluggableGraphOptimizer failed: INVALID_ARGUMENT: Failed to deserialize the `graph_buf`.
2025-05-26 13:50:17.309179: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] PluggableGraphOptimizer failed: INVALID_ARGUMENT: Failed to deserialize the `graph_buf`.


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 664ms/step
✅ Model trained | MAE: 8.68, R²: -0.104
✅ Saved model to /Users/jennypetschke/VS_Code_Projects/retail_demand_analysis/model/lstm/archive/lstm_default_store24_item220435.keras
✅ Saved scaler to /Users/jennypetschke/VS_Code_Projects/retail_demand_analysis/model/scaler/archive/lstm_default_scaler_store24_item220435.pkl


In [None]:
import matplotlib.pyplot as plt

# --- Plot erstellen ---
plt.figure(figsize=(12, 4))
plt.plot(dates[split_point:], y_test_inv.flatten(), label='Actual', linewidth=2)
plt.plot(dates[split_point:], y_pred_inv.flatten(), 'r--', label='Forecast', linewidth=2)
plt.title(f"Forecast vs Actual – Store {store_id}, Item {item_id}")
plt.xlabel("Date")
plt.ylabel("Unit Sales")
plt.legend()
plt.grid(True)
plt.tight_layout()

# Plot speichern
plot_path = model_dir / f"forecast_plot_store{store_id}_item{item_id}.png"
plt.savefig(plot_path)
plt.close()

# Plot auch in MLflow loggen
mlflow.log_artifact(str(plot_path))

print(f"📈 Forecast-Plot gespeichert unter: {plot_path}")


📈 Forecast-Plot gespeichert unter: /Users/jennypetschke/VS_Code_Projects/retail_demand_analysis/model/lstm/archive/forecast_plot_store24_item220435.png


In [None]:
import numpy as np
from sklearn.model_selection import RandomizedSearchCV, train_test_split
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
from keras.wrappers.scikit_learn import KerasRegressor
from keras.optimizers import Adam
import app.config as cfg

# --- Parameter ---
SEQ_LEN = cfg.SEQ_LEN

# --- Beispielhafte Annahme: Du hast bereits X_raw und y_raw vorbereitet ---
# X_raw shape: (samples, timesteps=SEQ_LEN, features)
# y_raw shape: (samples,) oder (samples, 1)

# --- Skalierung ---
scaler = MinMaxScaler()
X_2D = X_raw.reshape(-1, X_raw.shape[-1])  # für Skalierung in 2D
X_scaled_2D = scaler.fit_transform(X_2D)
X_scaled = X_scaled_2D.reshape(X_raw.shape)

# --- Split in Train/Test ---
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y_raw, test_size=0.2, random_state=42
)

# --- Modellfunktion ---
def create_model(units=50, dropout_rate=0.2, learning_rate=0.001):
    model = Sequential()
    model.add(LSTM(units, activation='relu', input_shape=(X_train.shape[1], X_train.shape[2])))
    model.add(Dropout(dropout_rate))
    model.add(Dense(1))
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='mse')
    return model

# --- Wrapper für Scikit-Learn ---
model = KerasRegressor(build_fn=create_model, verbose=0)

# --- Hyperparameter-Raum ---
param_grid = {
    'units': [50, 100, 150],
    'dropout_rate': [0.1, 0.2, 0.3],
    'batch_size': [16, 32, 64],
    'epochs': [50, 100],
    'learning_rate': [0.001, 0.01],
}

# --- Randomisierte Suche ---
random_search = RandomizedSearchCV(
    estimator=model,
    param_distributions=param_grid,
    n_iter=10,
    cv=3,
    verbose=2,
    scoring='neg_mean_squared_error',
    random_state=42
)

# --- Training ---
random_search.fit(X_train, y_train)

# --- Beste Parameter ---
best_params_lstm = random_search.best_params_
print("✅ Best Parameters for LSTM:", best_params_lstm)

model = KerasRegressor(
    model=create_model,
    optimizer=Adam,
    verbose=0
)



ModuleNotFoundError: No module named 'keras.wrappers.scikit_learn'

In [None]:
import os
import numpy as np
import pandas as pd
import joblib
import mlflow
from pathlib import Path
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping
import app.config as cfg

# --- Setup ---
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
data_path = os.path.join(project_root, cfg.PREPARED_DATA_FILE)
model_dir = Path(project_root, cfg.LSTM_ARCHIVE_DIR)
scaler_dir = Path(project_root, cfg.SCALER_ARCHIVE_DIR)
mlflow_dir = os.path.join(project_root, cfg.MLFLOW_LSTM_SEQ)

df = pd.read_csv(data_path, parse_dates=["date"])
df = df[df["unit_sales"] >= 0]

# --- MLflow Setup ---
mlflow.set_tracking_uri(f"file://{mlflow_dir}")
mlflow.set_experiment("LSTM_per_Store")

stores = df["store_nbr"].unique()
SEQ_LEN = cfg.SEQ_LEN
BATCH_SIZE = 64

for store_id in stores:
    print(f"🔄 Training Store: {store_id}")
    store_df = df[df["store_nbr"] == store_id].sort_values("date").copy()

    if len(store_df) < SEQ_LEN * 2:
        print(f"⚠️  Store {store_id} übersprungen (zu wenig Daten)")
        continue

    X_raw = store_df[cfg.FEATURE_COLS].values.astype(np.float32)
    y_raw = store_df[cfg.TARGET].values.astype(np.float32)

    # Skaliere nur die Features
    scaler = MinMaxScaler()
    X_scaled = scaler.fit_transform(X_raw)

    # Sequenzen
    X_seq, y_seq = [], []
    for i in range(len(X_scaled) - SEQ_LEN):
        X_seq.append(X_scaled[i:i + SEQ_LEN])
        y_seq.append(y_raw[i + SEQ_LEN])
    X_seq = np.array(X_seq)
    y_seq = np.array(y_seq)

    # Split
    X_train, X_test, y_train, y_test = train_test_split(X_seq, y_seq, test_size=0.2, random_state=42)

    # Modell
    model = Sequential()
    model.add(LSTM(32, activation='relu', input_shape=(SEQ_LEN, X_seq.shape[2])))
    model.add(Dropout(0.2))
    model.add(Dense(1))
    model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')

    # Training
    with mlflow.start_run(run_name=f"store_{store_id}"):
        early = EarlyStopping(monitor="loss", patience=3, restore_best_weights=True)
        history = model.fit(X_train, y_train, batch_size=BATCH_SIZE, epochs=10, callbacks=[early], verbose=0)

        y_pred = model.predict(X_test).flatten()
        mse = mean_squared_error(y_test, y_pred)
        mae = mean_absolute_error(y_test, y_pred)
        r2 = r2_score(y_test, y_pred)

        # Log
        mlflow.log_params({
            "store_nbr": store_id,
            "units": 32,
            "dropout": 0.2,
            "seq_len": SEQ_LEN,
            "batch_size": BATCH_SIZE
        })
        mlflow.log_metrics({"mse": mse, "mae": mae, "r2": r2})

        # Artefakte
        plot_file = f"forecast_store{store_id}.png"
        plt.figure(figsize=(10, 4))
        plt.plot(y_test[:100], label="Actual")
        plt.plot(y_pred[:100], label="Forecast", linestyle="--")
        plt.title(f"Store {store_id}: Forecast vs Actual")
        plt.legend()
        plt.grid(True)
        plt.tight_layout()
        plt.savefig(plot_file)
        mlflow.log_artifact(plot_file)
        plt.close()

        # Speicherorte
        model_path = model_dir / f"lstm_optimized_store{store_id}.keras"
        scaler_path = scaler_dir / f"scaler_optimized_store{store_id}.pkl"
        model.save(model_path)
        joblib.dump(scaler, scaler_path)
        mlflow.log_artifact(str(model_path))
        mlflow.log_artifact(str(scaler_path))

        print(f"✅ Store {store_id} fertig | MSE: {mse:.4f} | R²: {r2:.3f}")

if mlflow.active_run():
    mlflow.end_run()


2025/05/26 22:28:09 INFO mlflow.tracking.fluent: Experiment with name 'LSTM_per_Store' does not exist. Creating a new experiment.


🔄 Training Store: 24


2025-05-26 22:28:24.601866: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M2
2025-05-26 22:28:24.603628: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 8.00 GB
2025-05-26 22:28:24.604082: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 2.67 GB
2025-05-26 22:28:24.605776: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2025-05-26 22:28:24.606790: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)
  super().__init__(**kwargs)
2025-05-26 22:28:40.601625: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.
202

KeyboardInterrupt: 

In [None]:
''''import os
import time
import numpy as np
import pandas as pd
import joblib
import mlflow
import mlflow.keras
from pathlib import Path
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.utils import Sequence
import app.config as cfg

# --- Setup ---
start_time = time.time()

project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
data_path = os.path.join(project_root, cfg.PREPARED_DATA_FILE)
model_dir = Path(project_root, cfg.LSTM_ARCHIVE_DIR)
scaler_dir = Path(project_root, cfg.SCALER_ARCHIVE_DIR)
mlflow_dir = os.path.join(project_root, cfg.MLFLOW_LSTM_SEQ)

# --- Daten laden ---
df = pd.read_csv(data_path, parse_dates=["date"])
df = df[df["unit_sales"] >= 0].copy()
df = df.tail(500_000)  # Speicheroptimierung für große Daten

X_all = df[cfg.FEATURE_COLS].values
y_all = df[cfg.TARGET].values

# --- Skalieren ---
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X_all)

# --- Parameter ---
SEQ_LEN = cfg.SEQ_LEN
BATCH_SIZE = 32

# --- Generator Klasse ---
class TimeseriesGenerator(Sequence):
    def __init__(self, X, y, seq_len, batch_size):
        self.X = X
        self.y = y
        self.seq_len = seq_len
        self.batch_size = batch_size

    def __len__(self):
        return (len(self.X) - self.seq_len - 1) // self.batch_size

    def __getitem__(self, idx):
        X_batch = []
        y_batch = []
        start = idx * self.batch_size
        if start + self.batch_size + self.seq_len >= len(self.X):
            start = len(self.X) - self.batch_size - self.seq_len - 1
        for i in range(start, start + self.batch_size):
            X_batch.append(self.X[i:i + self.seq_len])
            y_batch.append(self.y[i + self.seq_len])
        return np.array(X_batch), np.array(y_batch)

# --- Generator Instanzen ---
split_idx = int(len(X_scaled) * 0.8)
train_gen = TimeseriesGenerator(X_scaled[:split_idx], y_all[:split_idx], SEQ_LEN, BATCH_SIZE)
test_gen = TimeseriesGenerator(X_scaled[split_idx:], y_all[split_idx:], SEQ_LEN, BATCH_SIZE)

# --- Modell ---
def create_model(units=32, dropout_rate=0.2):
    model = Sequential()
    model.add(LSTM(units, activation='relu', input_shape=(SEQ_LEN, X_scaled.shape[1])))
    model.add(Dropout(dropout_rate))
    model.add(Dense(1))
    model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
    return model

model = create_model()

# --- Callbacks ---
model_ckpt = 'temp_lstm_model.keras'
callbacks = [
    EarlyStopping(monitor='loss', patience=3, restore_best_weights=True),
    ModelCheckpoint(model_ckpt, monitor='loss', save_best_only=True)
]

# --- MLflow Setup ---
os.makedirs(mlflow_dir, exist_ok=True)
mlflow.set_tracking_uri(f"file://{mlflow_dir}")
mlflow.set_experiment("LSTM_Global_Optimized")

if mlflow.active_run():
    mlflow.end_run()

with mlflow.start_run():
    history = model.fit(train_gen, epochs=5, callbacks=callbacks, verbose=1, shuffle=False)

    # Evaluate
    y_preds = model.predict(test_gen).flatten()
    y_true = np.array([y_all[split_idx + SEQ_LEN + i] for i in range(len(y_preds))])

    mse = mean_squared_error(y_true, y_preds)
    mae = mean_absolute_error(y_true, y_preds)
    r2 = r2_score(y_true, y_preds)
    duration = time.time() - start_time

    mlflow.log_metrics({"mse": mse, "mae": mae, "r2_score": r2, "training_time_sec": duration})
    mlflow.log_params({"units": 32, "dropout_rate": 0.2, "seq_len": SEQ_LEN, "batch_size": BATCH_SIZE})

    # Plot
    plt.figure(figsize=(10, 4))
    plt.plot(y_true[:100], label="Actual")
    plt.plot(y_preds[:100], label="Forecast", linestyle="--")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plot_path = "lstm_forecast_plot.png"
    plt.savefig(plot_path)
    mlflow.log_artifact(plot_path)

    # Speichern
    final_model_path = model_dir / cfg.LSTM_OPTIMAL_MODEL
    final_scaler_path = scaler_dir / cfg.LSTM_OPTIMAL_SCALER

    model_dir.mkdir(parents=True, exist_ok=True)
    scaler_dir.mkdir(parents=True, exist_ok=True)

    model.save(final_model_path)
    joblib.dump(scaler, final_scaler_path)

    mlflow.log_artifact(str(final_model_path))
    mlflow.log_artifact(str(final_scaler_path))

    print(f"\u2705 Saved model to {final_model_path}")
    print(f"\u2705 Saved scaler to {final_scaler_path}")
    print(f"\ud83d\udcc9 MSE: {mse:.4f} | MAE: {mae:.2f} | R²: {r2:.4f}")

if mlflow.active_run():
    mlflow.end_run()''''

Epoch 1/5


  super().__init__(**kwargs)
  self._warn_if_super_not_called()
2025-05-26 21:16:26.782420: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] PluggableGraphOptimizer failed: INVALID_ARGUMENT: Failed to deserialize the `graph_buf`.


[1m  414/12497[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m13:43:05[0m 4s/step - loss: 3495715072.0000

KeyboardInterrupt: 

In [None]:
''''import os
import time
import numpy as np
import pandas as pd
import joblib
import mlflow
import mlflow.keras
from pathlib import Path
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator
import app.config as cfg

del X_scaled, X_all, y_all
import gc
gc.collect()

# --- Setup ---
start_time = time.time()
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
data_path = os.path.join(project_root, cfg.PREPARED_DATA_FILE)
model_dir = Path(project_root, cfg.LSTM_ARCHIVE_DIR)
scaler_dir = Path(project_root, cfg.SCALER_ARCHIVE_DIR)
mlflow_dir = os.path.join(project_root, cfg.MLFLOW_LSTM_SEQ)

# --- Load Data ---
df = pd.read_csv(data_path, parse_dates=["date"])
df = df[df["unit_sales"] >= 0].copy()

X_all = df[cfg.FEATURE_COLS].values.astype(np.float32)
y_all = df[cfg.TARGET].values.astype(np.float32)

# --- Scaling ---
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X_all)

# --- Parameters ---
SEQ_LEN = cfg.SEQ_LEN
BATCH_SIZE = 256
UNITS = 32
DROPOUT = 0.2
EPOCHS = 5

# --- Train/Test Split ---
split_idx = int(len(X_scaled) * 0.8)
train_gen = TimeseriesGenerator(X_scaled[:split_idx], y_all[:split_idx], length=SEQ_LEN, batch_size=BATCH_SIZE)
test_gen = TimeseriesGenerator(X_scaled[split_idx:], y_all[split_idx:], length=SEQ_LEN, batch_size=BATCH_SIZE)

# --- Model Definition ---
def create_model():
    model = Sequential()
    model.add(LSTM(UNITS, activation='relu', input_shape=(SEQ_LEN, X_scaled.shape[1])))
    model.add(Dropout(DROPOUT))
    model.add(Dense(1))
    model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
    return model

model = create_model()

# --- Callbacks ---
callbacks = [
    EarlyStopping(monitor='loss', patience=2, restore_best_weights=True)
]

# --- MLflow Setup ---
os.makedirs(mlflow_dir, exist_ok=True)
mlflow.set_tracking_uri(f"file://{mlflow_dir}")
mlflow.set_experiment("LSTM_Global_Optimized")

if mlflow.active_run():
    mlflow.end_run()

with mlflow.start_run():
    history = model.fit(train_gen, epochs=EPOCHS, callbacks=callbacks, verbose=1)

    y_preds = model.predict(test_gen).flatten()
    y_true = np.array([y_all[split_idx + SEQ_LEN + i] for i in range(len(y_preds))])

    mse = mean_squared_error(y_true, y_preds)
    mae = mean_absolute_error(y_true, y_preds)
    r2 = r2_score(y_true, y_preds)
    duration = time.time() - start_time

    mlflow.log_metrics({"mse": mse, "mae": mae, "r2_score": r2, "training_time_sec": duration})
    mlflow.log_params({"units": UNITS, "dropout_rate": DROPOUT, "seq_len": SEQ_LEN, "batch_size": BATCH_SIZE})

    # --- Plot ---
    plt.figure(figsize=(10, 4))
    plt.plot(y_true[:100], label="Actual")
    plt.plot(y_preds[:100], label="Forecast", linestyle="--")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plot_path = "lstm_forecast_plot.png"
    plt.savefig(plot_path)
    mlflow.log_artifact(plot_path)
    plt.close()

    # --- Save model & scaler ---
    model_path = model_dir / cfg.LSTM_OPTIMAL_MODEL
    scaler_path = scaler_dir / cfg.LSTM_OPTIMAL_SCALER
    model_dir.mkdir(parents=True, exist_ok=True)
    scaler_dir.mkdir(parents=True, exist_ok=True)
    model.save(model_path)
    joblib.dump(scaler, scaler_path)
    mlflow.log_artifact(str(model_path))
    mlflow.log_artifact(str(scaler_path))

    print(f"\n✅ Model saved: {model_path}")
    print(f"✅ Scaler saved: {scaler_path}")
    print(f"📉 MSE: {mse:.4f} | MAE: {mae:.2f} | R²: {r2:.4f}")

    del X_scaled, X_all, y_all
    import gc
    gc.collect()

if mlflow.active_run():
    mlflow.end_run()
''''

2025-05-26 22:09:06.319567: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M2
2025-05-26 22:09:06.320075: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 8.00 GB
2025-05-26 22:09:06.320471: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 2.67 GB
2025-05-26 22:09:06.320546: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2025-05-26 22:09:06.320639: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)
  super().__init__(**kwargs)


Epoch 1/5


  self._warn_if_super_not_called()
2025-05-26 22:09:07.384323: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.
2025-05-26 22:09:07.389995: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] PluggableGraphOptimizer failed: INVALID_ARGUMENT: Failed to deserialize the `graph_buf`.


[1m   13/29493[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m64:21:54[0m 8s/step - loss: 42.0595

KeyboardInterrupt: 

In [None]:
import os
import time
import numpy as np
import pandas as pd
import joblib
import mlflow
import mlflow.keras
import matplotlib.pyplot as plt
from pathlib import Path
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping, ModelCheckpoint
import app.config as cfg

# --- Setup ---
start_time = time.time()
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
data_path = os.path.join(project_root, cfg.PREPARED_DATA_FILE)
model_dir = Path(project_root, cfg.LSTM_ARCHIVE_DIR)
scaler_dir = Path(project_root, cfg.SCALER_ARCHIVE_DIR)
mlflow_dir = os.path.join(project_root, cfg.MLFLOW_LSTM_SEQ)

os.makedirs(mlflow_dir, exist_ok=True)
mlflow.set_tracking_uri(f"file://{mlflow_dir}")
mlflow.set_experiment("LSTM_Global_Reduzierte_Features")

# --- Daten laden ---
df = pd.read_csv(data_path, parse_dates=["date"])
df = df[df["unit_sales"] >= 0]

# --- Feature-Auswahl ---
FEATURES = [
    'store_nbr', 'item_nbr', 'onpromotion', 'month',
    'unit_sales_7d_avg', 'family_code',
    'lag_1', 'lag_7', 'rolling_mean_7'
]
TARGET = cfg.TARGET
SEQ_LEN = cfg.SEQ_LEN
BATCH_SIZE = 32
EPOCHS = 5

# --- Features & Zielwerte ---
X_all = df[FEATURES].astype(np.float32).values
y_all = df[TARGET].astype(np.float32).values

# --- Skalierung ---
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X_all)

# --- Sequenzbildung ---
X_seq, y_seq = [], []
for i in range(len(X_scaled) - SEQ_LEN):
    X_seq.append(X_scaled[i:i + SEQ_LEN])
    y_seq.append(y_all[i + SEQ_LEN])
X_seq = np.array(X_seq)
y_seq = np.array(y_seq)

# --- Train/Test Split ---
X_train, X_test, y_train, y_test = train_test_split(X_seq, y_seq, test_size=0.2, random_state=42)

# --- Modell bauen ---
def build_model(input_shape, units=64, dropout=0.2):
    model = Sequential()
    model.add(LSTM(units, activation='relu', input_shape=input_shape))
    model.add(Dropout(dropout))
    model.add(Dense(1))
    model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
    return model

model = build_model((SEQ_LEN, X_seq.shape[2]))

# --- Callbacks ---
callbacks = [
    EarlyStopping(monitor='loss', patience=2, restore_best_weights=True),
    ModelCheckpoint("temp_global_model.keras", monitor='loss', save_best_only=True)
]

# --- MLflow Run ---
if mlflow.active_run():
    mlflow.end_run()

with mlflow.start_run():
    model.fit(X_train, y_train, batch_size=BATCH_SIZE, epochs=EPOCHS, verbose=1, callbacks=callbacks)

    y_pred = model.predict(X_test).flatten()
    mse = mean_squared_error(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    duration = time.time() - start_time

    mlflow.log_params({
        "model_type": "global_lstm",
        "units": 32,
        "dropout": 0.2,
        "seq_len": SEQ_LEN,
        "batch_size": BATCH_SIZE,
        "epochs": EPOCHS
    })
    mlflow.log_metrics({
        "mse": mse,
        "mae": mae,
        "r2": r2,
        "train_time_sec": duration
    })

    # --- Plot speichern ---
    plt.figure(figsize=(10, 4))
    plt.plot(y_test[:100], label="Actual")
    plt.plot(y_pred[:100], label="Forecast", linestyle="--")
    plt.title("Forecast vs. Actual (Global LSTM)")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plot_path = "forecast_lstm_global.png"
    plt.savefig(plot_path)
    mlflow.log_artifact(plot_path)
    plt.close()

    # --- Modell und Scaler speichern ---
    model_path = model_dir / cfg.LSTM_GLOBAL_MODEL
    scaler_path = scaler_dir / cfg.LSTM_GLOBAL_SCALER
    model_dir.mkdir(parents=True, exist_ok=True)
    scaler_dir.mkdir(parents=True, exist_ok=True)

    model.save(model_path)
    joblib.dump(scaler, scaler_path)

    mlflow.log_artifact(str(model_path))
    mlflow.log_artifact(str(scaler_path))

    print(f"\n✅ Modell gespeichert: {model_path}")
    print(f"✅ Scaler gespeichert: {scaler_path}")
    print(f"📉 MSE: {mse:.4f} | MAE: {mae:.2f} | R²: {r2:.4f}")

if mlflow.active_run():
    mlflow.end_run()


Neu´

In [None]:
import pandas as pd
df = pd.read_csv("data/preprocessed_data/train_guayas_model_ready.csv")
df.to_parquet("data/preprocessed_data/train_guayas_model_ready.parquet")