In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error
from datetime import datetime

# Task 2.1: Preprocessing
# Load datasets
train_df = pd.read_csv("../data/rossmann_store_sales/train.csv", low_memory=False)
store_df = pd.read_csv("../data/rossmann_store_sales/store.csv")
sample_submission_df = pd.read_csv("../data/rossmann_store_sales/sample_submission.csv")

In [None]:
# Merge store data with train data
train_df = train_df.merge(store_df, on='Store', how='left')

# Prepare time series data for LSTM model
scaler_ts = MinMaxScaler()
ts_data = train_df[['Date', 'Sales']].set_index('Date').resample('D').sum().fillna(0)
ts_data['Sales'] = scaler_ts.fit_transform(ts_data[['Sales']])


In [None]:
# Create supervised data for LSTM model
window_size = 30

def create_supervised_data(data, window_size):
    X, y = [], []
    for i in range(len(data) - window_size):
        X.append(data[i:i + window_size])
        y.append(data[i + window_size])
    return np.array(X), np.array(y)

X_ts, y_ts = create_supervised_data(ts_data['Sales'].values, window_size)

# Split data for LSTM model
X_ts_train, X_ts_test, y_ts_train, y_ts_test = train_test_split(X_ts, y_ts, test_size=0.2, random_state=42)

# Reshape for LSTM model
X_ts_train = X_ts_train.reshape((X_ts_train.shape[0], X_ts_train.shape[1], 1))
X_ts_test = X_ts_test.reshape((X_ts_test.shape[0], X_ts_test.shape[1], 1))