In [None]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

from scipy import stats

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, LabelEncoder

import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import ModelCheckpoint , EarlyStopping
from tensorflow.keras import layers
from tensorflow.keras.optimizers.schedules import ExponentialDecay

from pandas.plotting import scatter_matrix

import warnings
warnings.filterwarnings("ignore")

In [None]:
train_df = pd.read_csv("train.csv",dtype={'StateHoliday': object})
test_df = pd.read_csv("test.csv")
store_df = pd.read_csv("store.csv")

train_df = pd.merge(train_df, store_df, how = 'left', on = 'Store')
test_df = pd.merge(test_df, store_df, how = 'left', on = 'Store')

ID = test_df['Id']
test_df.drop('Id',inplace=True,axis=1)

train_df.sort_values(["Store","Date"], ignore_index=True, inplace=True)
test_df.sort_values(["Store","Date"], ignore_index=True, inplace=True)

for dataset in (train_df,test_df):
    dataset['Date'] = pd.to_datetime(dataset['Date'])
    dataset['Day'] = dataset.Date.dt.day
    dataset['Month'] = dataset.Date.dt.month
    dataset['Year'] = dataset.Date.dt.year
    dataset['DayOfYear'] = dataset.Date.dt.dayofyear
    dataset['WeekOfYear'] = dataset.Date.dt.weekofyear
    dataset.set_index('Date', inplace=True)


store_data_sales = train_df.groupby([train_df['Store']])['Sales'].sum()
store_data_customers = train_df.groupby([train_df['Store']])['Customers'].sum()
store_data_avg_sales = train_df.groupby([train_df['Store']])['Sales'].mean()
store_data_avg_customers = train_df.groupby([train_df['Store']])['Customers'].mean()
store_data_open = train_df.groupby([train_df['Store']])['Open'].count()

store_data_sales_per_day = store_data_sales / store_data_open
store_data_customers_per_day = store_data_customers / store_data_open
store_data_avg_sales_per_customer = store_data_avg_sales / store_data_avg_customers
store_data_sales_per_customer_per_day = store_data_sales_per_day / store_data_customers_per_day

sales_per_day_dict = dict(store_data_sales_per_day)
customers_per_day_dict = dict(store_data_customers_per_day)
avg_sales_per_customer_dict = dict(store_data_avg_sales_per_customer)
sales_per_customers_per_day_dict = dict(store_data_sales_per_customer_per_day)



train_df['SalesPerDay'] = train_df['Store'].map(sales_per_day_dict)
train_df['Customers_per_day'] = train_df['Store'].map(customers_per_day_dict)
train_df['Avg_Sales_per_Customer'] = train_df['Store'].map(avg_sales_per_customer_dict)
train_df['Sales_Per_Customers_Per_Day'] = train_df['Store'].map(sales_per_customers_per_day_dict)

test_df['Sales_per_day'] = test_df['Store'].map(sales_per_day_dict)
test_df['Customers_per_day'] = test_df['Store'].map(customers_per_day_dict)
test_df['Avg_Sales_per_Customer'] = test_df['Store'].map(avg_sales_per_customer_dict)
test_df['Sales_Per_Customers_Per_Day'] = test_df['Store'].map(sales_per_customers_per_day_dict)


freq2_dict_no_log = dict()
freq3_dict_no_log = dict()

amp2_dict_no_log = dict()
amp3_dict_no_log = dict()


for feat_1 in ('Year','Month'):
        for i in range(min(train_df[feat_1].unique()), max(train_df[feat_1].unique()) + 1):

            a = train_df.loc[train_df[feat_1]==i]
            a_sales = a['Sales']

            Y = np.fft.fft(a_sales.values)
            Y = abs(Y)
            freq = np.fft.fftfreq(len(Y), 1)

            intercept_index = np.argmax(Y)
            Y = np.delete(Y, intercept_index)
            freq = np.delete(freq, intercept_index)

            amplitude_1_index = np.argmax(Y)
            amplitude_1 = Y[amplitude_1_index]
            Y = np.delete(Y, amplitude_1_index)
            freq_1 = freq[amplitude_1_index]
            freq = np.delete(freq, amplitude_1_index)

            amplitude_2_index = np.argmax(Y)
            amplitude_2 = Y[amplitude_2_index]
            Y = np.delete(Y, amplitude_2_index)
            freq_2 = freq[amplitude_2_index]
            freq = np.delete(freq, amplitude_2_index)

            amplitude_3_index = np.argmax(Y)
            amplitude_3 = Y[amplitude_3_index]
            Y = np.delete(Y, amplitude_3_index)
            freq_3 = freq[amplitude_3_index]
            freq = np.delete(freq, amplitude_3_index)

            a[f'Frequency_2_{feat_1}_Sales'] = freq_2
            a[f'Frequency_3_{feat_1}_Sales'] = freq_3

            a[f'Amplitude_2_{feat_1}_Sales'] = amplitude_2
            a[f'Amplitude_3_{feat_1}_Sales'] = amplitude_3

            freq2_dict_no_log[i] = freq_2
            freq3_dict_no_log[i] = freq_3

            amp2_dict_no_log[i] = amplitude_2
            amp3_dict_no_log[i] = amplitude_3


            if i == min(train_df[feat_1].unique()):
                k = a
            else:
                k = pd.concat([k,a])
        train_df = k
        test_df[f'Frequency_2_{feat_1}_Sales'] = test_df[feat_1].map(freq2_dict_no_log)
        test_df[f'Frequency_3_{feat_1}_Sales'] = test_df[feat_1].map(freq3_dict_no_log)
        test_df[f'Amplitude_2_{feat_1}_Sales'] = test_df[feat_1].map(amp2_dict_no_log)
        test_df[f'Amplitude_3_{feat_1}_Sales'] = test_df[feat_1].map(amp3_dict_no_log)
        freq2_dict_no_log = dict()
        freq3_dict_no_log = dict()
        amp2_dict_no_log = dict()
        amp3_dict_no_log = dict()


In [None]:
feats = ['CompetitionOpenSinceMonth','CompetitionOpenSinceYear']
modes = train_df[feats].mode()

for f in feats:
        train_df[f] = train_df[f].fillna(modes[f][0])
        test_df[f] = test_df[f].fillna(modes[f][0])

def convertCompetitionOpen(df):
    try:
        date = '{}-{}'.format(int(df['CompetitionOpenSinceYear']), int(df['CompetitionOpenSinceMonth']))
        return pd.to_datetime(date)
    except:
        return np.nan

train_df['CompetitionOpenInt'] = train_df.apply(lambda df: convertCompetitionOpen(df), axis=1).astype(np.int64)
test_df['CompetitionOpenInt'] = test_df.apply(lambda df: convertCompetitionOpen(df), axis=1).astype(np.int64)

In [None]:
train_df.drop('Customers',inplace = True, axis=1)  #Because it is not in the test set
train_df.drop('StateHoliday',inplace=True,axis=1)  #Because it reduces the performance
test_df.drop('StateHoliday',inplace=True,axis=1)

train_df.sort_values(["Store"], ignore_index=True, inplace=True)
test_df.sort_values(["Store"], ignore_index=True, inplace=True)
train_df.sort_values(["Year","Month","Day"], ascending=False ,ignore_index=True, inplace=True)
test_df.sort_values(["Year","Month","Day"], ascending=False ,ignore_index=True, inplace=True)

feats = ['Promo2SinceYear','Promo2SinceWeek','CompetitionDistance', 'PromoInterval']
modes = train_df[feats].mode()

for f in feats:
        train_df[f] = train_df[f].fillna(modes[f][0])
        test_df[f] = test_df[f].fillna(modes[f][0])
for dataset in (train_df,test_df):
    dataset['Open'] = dataset['Open'].fillna(0)

attributes = ['StoreType','Assortment','PromoInterval']
for dataset in (train_df,test_df):
    for f in attributes:
        dataset[attributes] = dataset[attributes].apply(lambda x: pd.factorize(x)[0])

In [None]:
train_df = train_df[train_df['Open'] == 1]
train_df = train_df[train_df['Sales'] > 0.0]

In [None]:
temp = train_df.sort_values(["Year", "Month", "Day"], ignore_index=True).copy()

train = temp[:-47000].copy()
test = temp[-47000:].copy()

train.sort_values(["Store"], ignore_index=True, inplace=True)
test.sort_values(["Store"], ignore_index=True, inplace=True)
train.sort_values(["Year", "Month", "Day"], ascending=False, ignore_index=True, inplace=True)
test.sort_values(["Year", "Month", "Day"], ascending=False, ignore_index=True, inplace=True)

X = train.drop('Sales', axis=1)
y = train['Sales']

scaler_X = MinMaxScaler()
X_normalized = scaler_X.fit_transform(X)
scaler_y = MinMaxScaler()
y_normalized = scaler_y.fit_transform(y.values.reshape(-1, 1)).flatten()

X_train, X_test, y_train, y_test = train_test_split(X_normalized, y_normalized, test_size=0.2, random_state=42)

In [None]:
y_train

In [None]:
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

In [None]:
def rmspe_loss(y_true, y_pred):
    y_true = tf.cast(y_true, dtype=tf.float32)
    y_pred = tf.cast(y_pred, dtype=tf.float32)

    error = tf.math.divide_no_nan(y_true - y_pred, y_true)
    squared_error = tf.square(error)
    mean_squared_error = tf.reduce_mean(squared_error)
    rmspe = tf.sqrt(mean_squared_error)
    return rmspe

### Model 1

In [None]:
lstm_units = 64
input_layer = layers.Input(shape=(X_train.shape[1], X_train.shape[2]))
lstm_output = layers.LSTM(units=lstm_units, activation='tanh', return_sequences=True)(input_layer)

cnn_filters = 32
cnn_kernel_size = 3
cnn_output = layers.Conv1D(filters=cnn_filters, kernel_size=cnn_kernel_size, padding='same', activation='relu')(lstm_output)

attention_units = 64
attention_output = layers.MultiHeadAttention(num_heads=2, key_dim=attention_units)(cnn_output, cnn_output)

global_avg_pooling = layers.GlobalAveragePooling1D()(attention_output)

dense_units = 128
dense_output = layers.Dense(units=dense_units, activation='relu')(global_avg_pooling)

output_layer = layers.Dense(units=1, activation='linear')(dense_output)

model_1 = models.Model(inputs=input_layer, outputs=output_layer)

model_1.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3), loss="mse", metrics=[rmspe_loss])

model_1.summary()
checkpoint_callback = ModelCheckpoint(
    filepath='lstm_cnn_trans.h5',
    monitor='val_loss',  
    save_best_only=True,  
    save_weights_only=True, 
    mode='min'  
)

early_stopping_callback = EarlyStopping(
    monitor='val_loss',  
    patience=5,  
    restore_best_weights=True 
)

history_1 = model_1.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_test, y_test),callbacks=[checkpoint_callback,early_stopping_callback])

In [None]:
model_1.evaluate(X_test,y_test)

In [None]:
loss = history_1.history['loss']
val_loss = history_1.history['val_loss']
epochs = range(1, len(loss) + 1)

plt.plot(epochs, loss, label='Training Loss')
plt.plot(epochs, val_loss, label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

### Model 2

In [None]:
class CustomMultiHeadAttention(layers.MultiHeadAttention):
    def call(self, inputs, **kwargs):
        value = inputs
        return super().call(inputs, value=value, **kwargs)

total_training_samples = 637870
batch_size = 64
epochs = 100

total_steps = (total_training_samples / batch_size) * epochs

decay_steps = int(0.1 * total_steps)

initial_learning_rate = 3e-4
lr_schedule = ExponentialDecay(
    initial_learning_rate,
    decay_steps=decay_steps,
    decay_rate=0.9,
    staircase=True)

model_2 = tf.keras.Sequential([
    layers.Input(shape=(X_train.shape[1], 1)),
    tf.compat.v1.keras.layers.CuDNNLSTM(64, return_sequences=True),
    CustomMultiHeadAttention(num_heads=2, key_dim=2),
    layers.GlobalAveragePooling1D(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.4),
    layers.Dense(1, activation='linear')
])

model_2.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr_schedule), loss=rmspe_loss, metrics=[rmspe_loss])
history_2 = model_2.fit(X_train, y_train, epochs=100, batch_size=64, validation_split=0.2)


In [None]:
model_2.evaluate(X_test,y_test)

In [None]:
loss = history_2.history['loss']
val_loss = history_2.history['val_loss']
epochs = range(1, len(loss) + 1)

plt.plot(epochs, loss, label='Training Loss')
plt.plot(epochs, val_loss, label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

### Ensemble

In [None]:
df_test=test.copy()
X_new_testing = df_test.drop(["Sales"],axis=1)
x_new_testing_normalized = scaler_X.transform(X_new_testing)
x_new_testing_normalized = x_new_testing_normalized.reshape(x_new_testing_normalized.shape[0], x_new_testing_normalized.shape[1], 1)
y_new_testing = df_test["Sales"]
y_new_testing_normalized = scaler_y.transform(np.array(y_new_testing).reshape(-1, 1))

model_1.evaluate(x_new_testing_normalized, y_new_testing_normalized), model_2.evaluate(x_new_testing_normalized, y_new_testing_normalized)

In [None]:
test_df_normalized = scaler_X.transform(test_df)
y1 = model_1.predict(test_df_normalized)
y2 = model_2.predict(test_df_normalized)

y_preds = (y1+y2)/2

y_preds_denorm = scaler_y.inverse_transform(y_preds)

submission_1 = pd.DataFrame()

submission_1['Id'] = range(1, len(y_preds) + 1)
submission_1['Sales'] = y_preds_denorm


In [None]:
submission_1.to_csv('ensemble.csv', index=False)