# Rossmann Store Sales — Phase 5 




------------------------------------------------------------------------

> **How to use**
>
> 1.  Run each cell sequentially. Do not skip cells. Output and models
>     are saved to `/kaggle/working/models_phase5`.
> 2.  If you run on Kaggle, the competition dataset path is
>     `/kaggle/input/competitions/rossmann-store-sales/`.
> 3.  If you run locally, change `DATA_BASE` constant to point to your
>     `dataset/` folder.

------------------------------------------------------------------------

In [8]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import numpy as np
base_path = "/kaggle/input/competitions/rossmann-store-sales/"

# Load datasets 
train = pd.read_csv(f'{base_path}train.csv', parse_dates=['Date'], low_memory=False)
store = pd.read_csv(f'{base_path}store.csv')
test = pd.read_csv(f'{base_path}test.csv')

df = pd.merge(train, store, on='Store', how='left')

print(df.info())
print(df.head())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1017209 entries, 0 to 1017208
Data columns (total 18 columns):
 #   Column                     Non-Null Count    Dtype         
---  ------                     --------------    -----         
 0   Store                      1017209 non-null  int64         
 1   DayOfWeek                  1017209 non-null  int64         
 2   Date                       1017209 non-null  datetime64[ns]
 3   Sales                      1017209 non-null  int64         
 4   Customers                  1017209 non-null  int64         
 5   Open                       1017209 non-null  int64         
 6   Promo                      1017209 non-null  int64         
 7   StateHoliday               1017209 non-null  object        
 8   SchoolHoliday              1017209 non-null  int64         
 9   StoreType                  1017209 non-null  object        
 10  Assortment                 1017209 non-null  object        
 11  CompetitionDistance        1014567 no

## T

In [8]:

df['Date'] = pd.to_datetime(df['Date'])

df['Year'] = df['Date'].dt.year
df['Month'] = df['Date'].dt.month
df['Day'] = df['Date'].dt.day
df['DayOfWeek'] = df['Date'].dt.dayofweek
df['WeekOfYear'] = df['Date'].dt.isocalendar().week.astype(int)

df['StateHoliday'] = df['StateHoliday'].map({'0': 0, 0: 0, 'a': 1, 'b': 1, 'c': 1})
df['SchoolHoliday'] = df['SchoolHoliday'].astype(int)

df = df.sort_values(['Store', 'Date'])
df['SalesMovingAverage7'] = df.groupby('Store')['Sales'].transform(lambda x: x.rolling(window=7).mean())
df['SalesMovingAverage30'] = df.groupby('Store')['Sales'].transform(lambda x: x.rolling(window=30).mean())

df['SalesMovingAverage7'] = df['SalesMovingAverage7'].fillna(0)
df['SalesMovingAverage30'] = df['SalesMovingAverage30'].fillna(0)

df.head()

Unnamed: 0,Store,DayOfWeek,Date,Sales,Customers,Open,Promo,StateHoliday,SchoolHoliday,StoreType,...,Promo2,Promo2SinceWeek,Promo2SinceYear,PromoInterval,Year,Month,Day,WeekOfYear,SalesMovingAverage7,SalesMovingAverage30
1016095,1,1,2013-01-01,0,0,0,0,1,1,c,...,0,,,,2013,1,1,1,0.0,0.0
1014980,1,2,2013-01-02,5530,668,1,0,0,1,c,...,0,,,,2013,1,2,1,0.0,0.0
1013865,1,3,2013-01-03,4327,578,1,0,0,1,c,...,0,,,,2013,1,3,1,0.0,0.0
1012750,1,4,2013-01-04,4486,619,1,0,0,1,c,...,0,,,,2013,1,4,1,0.0,0.0
1011635,1,5,2013-01-05,4997,635,1,0,0,1,c,...,0,,,,2013,1,5,1,0.0,0.0


In [9]:
df = df.sort_values(['Store', 'Date'])

df['Sales_Lag1'] = df.groupby('Store')['Sales'].shift(1)

df['Sales_Mean7'] = df.groupby('Store')['Sales'].transform(lambda x: x.rolling(window=7).mean())

df['Sales_Std7'] = df.groupby('Store')['Sales'].transform(lambda x: x.rolling(window=7).std())

df['Sales_Lag1'] = df['Sales_Lag1'].fillna(0)
df['Sales_Mean7'] = df['Sales_Mean7'].fillna(0)
df['Sales_Std7'] = df['Sales_Std7'].fillna(0)

df[['Store', 'Date', 'Sales', 'Sales_Lag1', 'Sales_Mean7', 'Sales_Std7']].head(10)

Unnamed: 0,Store,Date,Sales,Sales_Lag1,Sales_Mean7,Sales_Std7
1016095,1,2013-01-01,0,0.0,0.0,0.0
1014980,1,2013-01-02,5530,0.0,0.0,0.0
1013865,1,2013-01-03,4327,5530.0,0.0,0.0
1012750,1,2013-01-04,4486,4327.0,0.0,0.0
1011635,1,2013-01-05,4997,4486.0,0.0,0.0
1010520,1,2013-01-06,0,4997.0,0.0,0.0
1009405,1,2013-01-07,7176,0.0,3788.0,2752.283961
1008290,1,2013-01-08,5580,7176.0,4585.142857,2231.018633
1007175,1,2013-01-09,5471,5580.0,4576.714286,2226.961885
1006060,1,2013-01-10,4892,5471.0,4657.428571,2226.641706


In [10]:
import numpy as np

df = df.sort_values(['Store', 'Date'])

df['Sales_Lag1'] = df.groupby('Store')['Sales'].shift(1)
df['Sales_Mean7'] = df.groupby('Store')['Sales'].transform(lambda x: x.rolling(window=7).mean())
df['Sales_Std7'] = df.groupby('Store')['Sales'].transform(lambda x: x.rolling(window=7).std())

df['sin_month'] = np.sin(2 * np.pi * df['Month'] / 12)
df['cos_month'] = np.cos(2 * np.pi * df['Month'] / 12)

df['IsHolidayNextDay'] = df['StateHoliday'].shift(-1).fillna(0).astype(int)
df['IsHolidayYesterday'] = df['StateHoliday'].shift(1).fillna(0).astype(int)

df['Sales_Lag1'] = df['Sales_Lag1'].fillna(0)
df['Sales_Mean7'] = df['Sales_Mean7'].fillna(0)
df['Sales_Std7'] = df['Sales_Std7'].fillna(0)

df[['Store', 'Date', 'Sales', 'Sales_Lag1', 'Sales_Mean7', 'Sales_Std7', 'sin_month', 'cos_month', 'IsHolidayNextDay']].head(10)

Unnamed: 0,Store,Date,Sales,Sales_Lag1,Sales_Mean7,Sales_Std7,sin_month,cos_month,IsHolidayNextDay
1016095,1,2013-01-01,0,0.0,0.0,0.0,0.5,0.866025,0
1014980,1,2013-01-02,5530,0.0,0.0,0.0,0.5,0.866025,0
1013865,1,2013-01-03,4327,5530.0,0.0,0.0,0.5,0.866025,0
1012750,1,2013-01-04,4486,4327.0,0.0,0.0,0.5,0.866025,0
1011635,1,2013-01-05,4997,4486.0,0.0,0.0,0.5,0.866025,0
1010520,1,2013-01-06,0,4997.0,0.0,0.0,0.5,0.866025,0
1009405,1,2013-01-07,7176,0.0,3788.0,2752.283961,0.5,0.866025,0
1008290,1,2013-01-08,5580,7176.0,4585.142857,2231.018633,0.5,0.866025,0
1007175,1,2013-01-09,5471,5580.0,4576.714286,2226.961885,0.5,0.866025,0
1006060,1,2013-01-10,4892,5471.0,4657.428571,2226.641706,0.5,0.866025,0


In [11]:
from sklearn.preprocessing import MinMaxScaler

df['CompetitionDistance'] = df['CompetitionDistance'].fillna(df['CompetitionDistance'].max())
df = df.fillna(0)

columns_to_scale = ['Sales', 'CompetitionDistance', 'Sales_Lag1', 'Sales_Mean7', 'Sales_Std7']

scaler = MinMaxScaler()
df[columns_to_scale] = scaler.fit_transform(df[columns_to_scale])

df[columns_to_scale].head()

Unnamed: 0,Sales,CompetitionDistance,Sales_Lag1,Sales_Mean7,Sales_Std7
1016095,0.0,0.016482,0.0,0.0,0.0
1014980,0.133089,0.016482,0.0,0.0,0.0
1013865,0.104137,0.016482,0.133089,0.0,0.0
1012750,0.107964,0.016482,0.104137,0.0,0.0
1011635,0.120262,0.016482,0.107964,0.0,0.0


In [12]:
df = df.sort_values('Date')

train_size = int(len(df) * 0.7)
val_size = int(len(df) * 0.15)

train_df = df.iloc[:train_size]
val_df = df.iloc[train_size : train_size + val_size]
test_df = df.iloc[train_size + val_size:]

print(f"Train dates: {train_df['Date'].min()} to {train_df['Date'].max()}")
print(f"Val dates: {val_df['Date'].min()} to {val_df['Date'].max()}")
print(f"Test dates: {test_df['Date'].min()} to {test_df['Date'].max()}")

Train dates: 2013-01-01 00:00:00 to 2014-10-19 00:00:00
Val dates: 2014-10-19 00:00:00 to 2015-03-17 00:00:00
Test dates: 2015-03-17 00:00:00 to 2015-07-31 00:00:00


In [4]:
import os
import gc
import math
from datetime import datetime, timedelta
from tqdm import tqdm

import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.metrics import mean_squared_error, r2_score
import tensorflow as tf
from tensorflow.keras import layers, models, callbacks, backend as K
import optuna
import shap
import matplotlib.pyplot as plt


2026-02-20 23:17:07.529149: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1771629427.778479      55 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1771629427.858630      55 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1771629428.467900      55 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1771629428.467951      55 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1771629428.467955      55 computation_placer.cc:177] computation placer alr

## Model Hyperparameters & Constants

SEQ_LEN = 56 and HORIZON = 7:  The model will look at exactly 56 days (8 weeks) of historical data to predict the next 7 days of sales.

N_ENSEMBLE = 3 and MC_DROPOUT_FORWARD_PASSES = 50: Instead of training one model, it trains an ensemble of 3 distinct models. During prediction, it will run the data through the network 50 times with "dropout" turned on to generate a  different possible outcomes (Monte Carlo Dropout).

QUANTILES = [0.1, 0.5, 0.9]: The model is configured to output the 10th percentile (pessimistic), 50th percentile (median/expected), and 90th percentile (optimistic) sales forecasts.





In [8]:
SEQ_LEN = 56               # context window length (days)
HORIZON = 7                # forecast horizon (days)
BATCH_SIZE = 256
EPOCHS = 30
PATIENCE = 5
N_ENSEMBLE = 3             # ensemble size for uncertainty
MC_DROPOUT_FORWARD_PASSES = 50
QUANTILES = [0.1, 0.5, 0.9]  # predicted quantiles
RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)
tf.random.set_seed(RANDOM_SEED)


## Data Loading & Preprocessing Functions

read_data: loads the train.csv, test.csv, and store.csv files into Pandas DataFrames.

preprocess_base:

Merges the store-specific characteristics (like assortment type or competition distance) into the daily sales records.

- It addresses missing data by filling any missing CompetitionDistance with the maximum known distance, and fills all other NaNs with 0.

add_time_features: Extracts standard calendar features (Day, Month, Year, Week) from the Date column.

expand_promo_interval: Checks if the current row's month matches any of the store's designated promo months and creates a binary IsPromoIntervalMonth flag.

In [26]:
def read_data(base_folder="dataset"):
    train = pd.read_csv(f'{base_path}train.csv', parse_dates=['Date'], low_memory=False)
    store = pd.read_csv(f'{base_path}store.csv')
    test = pd.read_csv(f'{base_path}test.csv')
    # ensure Date dtypes
    return train, test, store

def preprocess_base(train, test, store):
    train = train.merge(store, how="left", on="Store")
    test = test.merge(store, how="left", on="Store")
    def promo_months_to_list(x):
        if pd.isna(x): return []
        return [m.strip() for m in x.split(',')]
    store['PromoInterval_list'] = store['PromoInterval'].apply(promo_months_to_list)

    max_comp = train['CompetitionDistance'].max()
    train['CompetitionDistance'] = train['CompetitionDistance'].fillna(max_comp)
    test['CompetitionDistance'] = test['CompetitionDistance'].fillna(max_comp)
    train = train.fillna(0)
    test = test.fillna(0)
    return train, test

def add_time_features(df):
    df['Day'] = df['Date'].dt.day
    df['Month'] = df['Date'].dt.month
    df['Year'] = df['Date'].dt.year
    df['WeekOfYear'] = df['Date'].dt.isocalendar().week.astype(int)
    # cyclical month
    df['sin_month'] = np.sin(2 * np.pi * df['Month'] / 12)
    df['cos_month'] = np.cos(2 * np.pi * df['Month'] / 12)
    return df

def expand_promo_interval(df, store_df):

    promo_map = store_df.set_index('Store')['PromoInterval'].to_dict()
    def is_in_promo_interval(row):
        pi = promo_map.get(row['Store'], "")
        if not pi or pi != pi: return 0
        months = [m.strip() for m in pi.split(',')]
        monname = row['Date'].strftime('%b')
        return 1 if monname in months else 0
    df['IsPromoIntervalMonth'] = df.apply(is_in_promo_interval, axis=1)
    return df


## Feature Engineering 
- sort the dataframe by Store and Date 

- Sales_Lag1: Creates a column representing the exact sales from the previous day (shift(1)).

- Sales_Mean7, Sales_Std7, Sales_Mean28: Calculates the moving average and volatility (standard deviation) over the last 7 days and 28 days.

- CompetitionOpenMonths: Calculates exactly how many months a competing store has been open by subtracting the CompetitionOpenSince date from the current row's Year and Month.

In [17]:
def add_lag_features(df):
    df = df.sort_values(['Store', 'Date']).copy()
    
    df['Sales_Lag1'] = df.groupby('Store')['Sales'].shift(1).fillna(0)
    df['Sales_Mean7'] = df.groupby('Store')['Sales'].transform(lambda x: x.rolling(7, min_periods=1).mean()).fillna(0)
    df['Sales_Std7'] = df.groupby('Store')['Sales'].transform(lambda x: x.rolling(7, min_periods=1).std()).fillna(0)

    df['Sales_Mean28'] = df.groupby('Store')['Sales'].transform(lambda x: x.rolling(28, min_periods=1).mean()).fillna(0)

    df['CompetitionOpenSinceYear'] = df['CompetitionOpenSinceYear'].fillna(0)
    df['CompetitionOpenMonths'] = (df['Year'] - df['CompetitionOpenSinceYear']) * 12 + df['CompetitionOpenSinceMonth'].fillna(0)
    df['CompetitionOpenMonths'] = df['CompetitionOpenMonths'].clip(lower=0)
    return df

## create_sequences_multi_store

The model will look at the data inside this frame, and try to predict the next 7 days (horizon). Once that sequence is recorded, the frame slides forward by exactly one day, and the process repeats.


- The code uses groupby('Store') to isolate each store's timeline.

-  if a store is brand new and doesn't even have 63 days of data, we skip it entirely so it doesn't crash the sliding window.

-  The for start in range(...) loop is the actual sliding window. It carves out X (the 56 days of features) and y (the 7 days of target sales).

- If the store is closed  the loop skips creating that sequence because predicting seven zeros isn't useful for training.

In [18]:
def create_sequences_multi_store(df, features, target_col='Sales', seq_len=SEQ_LEN, horizon=HORIZON,
                                 stores=None, min_open_days=1):

    X_list = []
    y_list = []
    idx_info = []
    if stores is None:
        stores = df['Store'].unique()

    grouped = df.sort_values(['Store', 'Date']).groupby('Store')
    for s in tqdm(stores, desc="create_sequences"):
        g = grouped.get_group(s).reset_index(drop=True)

        if len(g) < seq_len + horizon: 
            continue

        feat_arr = g[features].values
        sales_arr = g[target_col].values
        open_arr = g['Open'].values if 'Open' in g.columns else np.ones_like(sales_arr)
        for start in range(0, len(g) - seq_len - horizon + 1):
            end = start + seq_len
            target_end = end + horizon

            if open_arr[end:target_end].sum() < min_open_days:
                continue
            X_list.append(feat_arr[start:end])

            y_list.append(sales_arr[end:target_end])
            idx_info.append((s, g.loc[end, 'Date']))  # store, prediction start
    X = np.array(X_list, dtype=np.float32)
    y = np.array(y_list, dtype=np.float32)
    return X, y, idx_info


## 2. scaleing
Neural networks perform much better when all input features are on a similar scale (usually between 0 and 1). 


Flattening (reshape(-1, n_features)): It temporarily collapses the 3D data down to 2D by ignoring the concept of "time sequences." It just stacks every single day on top of each other.

Fitting: The scaler looks at this massive 2D list to find the global minimum and maximum for each feature (e.g., the absolute highest temperature ever recorded across all sequences).

Restoring: Once the scaling math is applied, it reshapes the data back into its original 3D sequence format (n_samples, seq_len, n_features).

In [19]:
def fit_feature_scaler(X_train, feature_names):

    n_features = X_train.shape[2]
    flat = X_train.reshape(-1, n_features)
    scaler = MinMaxScaler()
    scaler.fit(flat)
    return scaler

def scale_X(X, scaler):
    n_samples, seq_len, n_features = X.shape
    flat = X.reshape(-1, n_features)
    flat_scaled = scaler.transform(flat)
    return flat_scaled.reshape(n_samples, seq_len, n_features)

def scale_y(y, scaler_y=None):
    if scaler_y is None:
        flat = y.reshape(-1, 1)
        scaler_y = MinMaxScaler()
        scaler_y.fit(flat)
    y_scaled = scaler_y.transform(y.reshape(-1,1)).reshape(y.shape)
    return y_scaled, scaler_y
