In [1]:
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler, OrdinalEncoder
import tensorflow as tf
from tensorflow.keras import layers, Sequential
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import SimpleRNN, BatchNormalization,LayerNormalization, Dropout, TimeDistributed, Dense
from keras.models import Sequential
from keras.layers import LSTM,GRU, BatchNormalization, Dropout, TimeDistributed, Dense
from tensorflow.keras.optimizers import Adam
from keras.regularizers import l2
from keras.layers import Bidirectional
from keras.models import Model
from keras.layers import Input, Dense, LSTM, Bidirectional, Dropout, BatchNormalization, TimeDistributed, Attention

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
train = pd.read_csv("/kaggle/input/store-sales-time-series-forecasting/train.csv", parse_dates=['date'])
test = pd.read_csv("/kaggle/input/store-sales-time-series-forecasting/test.csv", parse_dates=['date'])
oil = pd.read_csv("/kaggle/input/store-sales-time-series-forecasting/oil.csv", parse_dates=['date'])
holidays_events = pd.read_csv("/kaggle/input/store-sales-time-series-forecasting/holidays_events.csv", parse_dates=['date'])
transactions = pd.read_csv("/kaggle/input/store-sales-time-series-forecasting/transactions.csv", parse_dates=['date'])
stores = pd.read_csv("/kaggle/input/store-sales-time-series-forecasting/stores.csv")

In [4]:
oil['dcoilwtico'] = oil['dcoilwtico'].fillna(method='ffill').fillna(method='bfill')
train = pd.merge(train, oil, on='date', how='left')
test = pd.merge(test, oil, on='date', how='left')

In [5]:
holidays_events = holidays_events[holidays_events['locale'] == 'National']
train = pd.merge(train, holidays_events, on='date', how='left', suffixes=('', '_holidays'))
test = pd.merge(test, holidays_events, on='date', how='left', suffixes=('', '_holidays'))

In [6]:
train = pd.merge(train, stores, on='store_nbr', how='left')
test = pd.merge(test, stores, on='store_nbr', how='left')

In [7]:
train = pd.merge(train, transactions, on=['date', 'store_nbr'], how='left')
test = pd.merge(test, transactions, on=['date', 'store_nbr'], how='left')

train['transactions'] = train['transactions'].fillna(0)
test['transactions'] = test['transactions'].fillna(0)

In [8]:
train['year'] = train['date'].dt.year
train['month'] = train['date'].dt.month
train['day'] = train['date'].dt.day
train['day_of_week'] = train['date'].dt.dayofweek  
train['week_of_year'] = train['date'].dt.isocalendar().week

test['year'] = test['date'].dt.year
test['month'] = test['date'].dt.month
test['day'] = test['date'].dt.day
test['day_of_week'] = test['date'].dt.dayofweek
test['week_of_year'] = test['date'].dt.isocalendar().week

In [9]:
train_data = train.copy().drop(['onpromotion'], axis=1)
test_data = test.copy().drop(['onpromotion'], axis=1)

In [10]:
ordinal_encoder = OrdinalEncoder(dtype=int, handle_unknown='use_encoded_value', unknown_value=-1)
train_data[['family']] = ordinal_encoder.fit_transform(train_data[['family']])
test_data[['family']] = ordinal_encoder.transform(test_data[['family']])
 
train_data['sales'] = train_data.groupby(['date', 'store_nbr', 'family'])['sales'].transform('sum')
train_data = train_data.drop_duplicates(subset=['date', 'store_nbr', 'family'])

pivoted_train = train_data.pivot(index='date', columns=['store_nbr', 'family'], values='sales')

# Split train and validation data
n_o_days_train = train["date"].nunique()
train_samples = int(n_o_days_train * 0.95)
train_samples_df = pivoted_train[:train_samples]
valid_samples_df = pivoted_train[train_samples:]

# Scale data
minmax_scaler = MinMaxScaler()
minmax_scaler.fit(train_samples_df)

scaled_train_samples = minmax_scaler.transform(train_samples_df)
scaled_validation_samples = minmax_scaler.transform(valid_samples_df)

# Function to split time series into samples
def split_series(series, n_past, n_future):
    X, y = list(), list()
    for window_start in range(len(series)):
        past_end = window_start + n_past
        future_end = past_end + n_future
        if future_end > len(series):
            break
        past, future = series[window_start:past_end, :], series[past_end:future_end, :]
        X.append(past)
        y.append(future)
    return np.array(X), np.array(y)

# Define time steps and features
n_past = 16
n_future = 16
n_features = len(train_data['store_nbr'].unique()) * len(train_data['family'].unique())

X_train, y_train = split_series(scaled_train_samples, n_past, n_future)
X_val, y_val = split_series(scaled_validation_samples, n_past, n_future)

def attention_mechanism(inputs):
    """Custom Attention Layer"""
    attention_scores = Dense(inputs.shape[-1], activation='softmax')(inputs)  # Attention weights
    attention_output = inputs * attention_scores  # Element-wise multiplication
    return attention_output
    
def timemodel():
    inputs = Input(shape=(n_past, n_features))
    
    # First GRU Layer
    x = Bidirectional(GRU(128, return_sequences=True, kernel_regularizer=l2(0.0005)))(inputs)
    x = LayerNormalization()(x)
    x = Dropout(0.1)(x)
    
    # Second GRU Layer
    x = Bidirectional(GRU(128, return_sequences=True, kernel_regularizer=l2(0.0005)))(x)
    x = LayerNormalization()(x)
    x = Dropout(0.1)(x)
    
    # Third GRU Layer with Attention Mechanism
    x = GRU(64, return_sequences=True, kernel_regularizer=l2(0.0005))(x)
    x = LayerNormalization()(x)
    x = Dropout(0.1)(x)
    
    attention_out = attention_mechanism(x)
    
    # Fourth GRU Layer
    x = GRU(64, return_sequences=True, kernel_regularizer=l2(0.0005))(attention_out)
    x = LayerNormalization()(x)
    x = Dropout(0.1)(x)
    
    # Output Layer
    outputs = TimeDistributed(Dense(n_features, kernel_regularizer=l2(0.0005)))(x)
    
    # Model
    model = Model(inputs=inputs, outputs=outputs)
    model.compile(loss="mae", optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), metrics=['mae'])
    return model

model = timemodel()

# Train the model
early_stopping = EarlyStopping(monitor='val_mae', min_delta=0.0001, patience=100, restore_best_weights=True)

from tensorflow.keras.callbacks import ReduceLROnPlateau

lr_scheduler = ReduceLROnPlateau(
    monitor='val_loss', factor=0.5, patience=10, min_lr=1e-6, verbose=1
)
EPOCHS = 500
model_history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=EPOCHS,
    callbacks=[early_stopping, lr_scheduler],
    batch_size=128,
    shuffle=True
)

# Make predictions
x_test_pred = scaled_validation_samples[-n_past:, :].reshape((1, n_past, n_features))
scaled_y_predict = model.predict(x_test_pred)

# Inverse transform predictions
y_predict = pd.DataFrame(minmax_scaler.inverse_transform(scaled_y_predict.reshape((n_future, n_features))),
                         columns=valid_samples_df.columns)

# Display predictions
print(y_predict.head())

Epoch 1/500
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 428ms/step - loss: 1.3100 - mae: 0.1629 - val_loss: 1.3053 - val_mae: 0.1947 - learning_rate: 1.0000e-04
Epoch 2/500
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 310ms/step - loss: 1.2445 - mae: 0.1459 - val_loss: 1.2469 - val_mae: 0.1855 - learning_rate: 1.0000e-04
Epoch 3/500
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 316ms/step - loss: 1.1876 - mae: 0.1380 - val_loss: 1.1873 - val_mae: 0.1742 - learning_rate: 1.0000e-04
Epoch 4/500
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 314ms/step - loss: 1.1304 - mae: 0.1288 - val_loss: 1.1231 - val_mae: 0.1566 - learning_rate: 1.0000e-04
Epoch 5/500
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 354ms/step - loss: 1.0731 - mae: 0.1177 - val_loss: 1.0566 - val_mae: 0.1349 - learning_rate: 1.0000e-04
Epoch 6/500
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 314ms/step - loss:

In [11]:
print(test.columns)

Index(['id', 'date', 'store_nbr', 'family', 'onpromotion', 'dcoilwtico',
       'type_x', 'locale', 'locale_name', 'description', 'transferred', 'city',
       'state', 'type_y', 'cluster', 'transactions', 'year', 'month', 'day',
       'day_of_week', 'week_of_year'],
      dtype='object')


In [12]:
# Load test data for predictions
test_data = test.copy().drop(['onpromotion'], axis=1)
test_data[['family']] = ordinal_encoder.transform(test_data[['family']])

test_data['sales'] = 0

# Pivot test data to match the training structure
pivoted_test = test_data.pivot(index=['date'], columns=['store_nbr', 'family'], values='sales')

# Scale test data using the same scaler used for training
scaled_test_samples = minmax_scaler.transform(pivoted_test)

# Prepare test input with the last `n_past` days
x_test_pred = scaled_test_samples[-n_past:, :].reshape((1, n_past, n_features))

# Make predictions
scaled_y_predict = model.predict(x_test_pred)

# Inverse transform predictions to original scale
y_predict = pd.DataFrame(minmax_scaler.inverse_transform(scaled_y_predict.reshape((n_future, n_features))),
                         columns=pivoted_test.columns)

# Create submission file
submission = test[['id']].copy()
submission['sales'] = y_predict.values.flatten()[:len(test)]  # Match length of test set

# Save to CSV
submission.to_csv('submission.csv', index=False)
print("submission.csv file generated!")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
submission.csv file generated!
