## File descriptions
* sales_train.csv - the training set. Daily historical data from January 2013 to October 2015.
* test.csv - the test set. You need to forecast the sales for these shops and products for November 2015.
* sample_submission.csv - a sample submission file in the correct format.
* items.csv - supplemental information about the items/products.
* item_categories.csv  - supplemental information about the items categories.
* shops.csv- supplemental information about the shops.
## Data fields
* ID - an Id that represents a (Shop, Item) tuple within the test set
* shop_id - unique identifier of a shop
* item_id - unique identifier of a product
* item_category_id - unique identifier of item category
* item_cnt_day - number of products sold. You are predicting a monthly amount of this measure
* item_price - current price of an item
* date - date in format dd/mm/yyyy
* date_block_num - a consecutive month number, used for convenience. January 2013 is 0, February 2013 is 1,..., October 2015 is 33
* item_name - name of item
* shop_name - name of shop
* item_category_name - name of item category

# <center> Import libs

In [None]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
# Setting parameters for matplotlib
%matplotlib inline
plt.rcParams["figure.figsize"] = (15, 7)
plt.rcParams["font.size"] = 14

import seaborn as sns
# Customizing seaborn color palette
sns.light_palette("seagreen", as_cmap=True)

import warnings
warnings.filterwarnings('ignore')

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
import tensorflow as tf

# <center> Import data

In [None]:
# Importing data
item_categories = pd.read_csv('../input/competitive-data-science-predict-future-sales/item_categories.csv')
items = pd.read_csv('../input/competitive-data-science-predict-future-sales/items.csv')
sales_train = pd.read_csv('../input/competitive-data-science-predict-future-sales/sales_train.csv')
shops = pd.read_csv('../input/competitive-data-science-predict-future-sales/shops.csv')
sales_test = pd.read_csv('../input/competitive-data-science-predict-future-sales/test.csv')

print(f'item_categories shape is {item_categories.shape}')
print(f'items shape is {items.shape}')
print(f'sales_train shape is {sales_train.shape}')
print(f'shops shape is {shops.shape}')
print(f'sales_test shape is {sales_test.shape}')

# <center> Datasets check

### Item category

In [None]:
item_categories.head(10)

In [None]:
item_categories.dtypes

### Items

In [None]:
items.head(10)

In [None]:
items.dtypes

# Shops

In [None]:
shops.head(10)

In [None]:
shops.dtypes

# Sales train

In [None]:
sales_train.head()

In [None]:
sales_train.dtypes

# Sales test

In [None]:
sales_test.head()

In [None]:
sales_test.dtypes

# <center> Dtype validation

In [None]:
item_categories['item_category_id'] = item_categories['item_category_id'].astype(object)

items['item_id'] = items['item_id'].astype(object)
items['item_category_id'] = items['item_category_id'].astype(object)

shops['shop_id'] = shops['shop_id'].astype(object)

sales_train['date'] = pd.to_datetime(sales_train['date'])
sales_train['date_block_num'] = sales_train['date_block_num'].astype(object)
sales_train['shop_id'] = sales_train['shop_id'].astype(object)
sales_train['item_id'] = sales_train['item_id'].astype(object)
sales_train['item_cnt_day'] = sales_train['item_cnt_day'].astype('int64')

sales_test['shop_id'] = sales_test['shop_id'].astype(object)
sales_test['item_id'] = sales_test['item_id'].astype(object)

# <center> Creating new features

In [None]:
#creating a new feature (concatination shop_id and item_id)
sales_train['series'] = sales_train['shop_id'].astype('str') + '|' + sales_train['item_id'].astype('str')
sales_test['series'] = sales_test['shop_id'].astype('str') + '|' + sales_test['item_id'].astype('str')

sales_test.head()

# <center> Group data

In [None]:
# Grouping data by series and month (date_blocck_num)
sales_train = sales_train.groupby(
    ['series', 'date_block_num']
).agg({
    'item_cnt_day': 'sum'
})

sales_train.reset_index(inplace = True)
sales_train.head()

# <center> Pivot Table creation

In [None]:
# Creating a pivot table from our data. Shape of pivot table == number of unique series * months  
train_data_pivot = sales_train.pivot_table(
    values = ['item_cnt_day'],
    index = ['series'],
    columns = ['date_block_num'],
    fill_value = 0,
    aggfunc='sum'
)
train_data_pivot.head()

# <center> Merging with test data

In [None]:
# Creating a merged data (right join test data to train data). This one will be our test data for model
merged_data = train_data_pivot.merge(
    sales_test,
    on = 'series',
    how = 'right'
).drop(
    columns=['ID', 'shop_id', 'item_id']
).fillna(0).set_index('series')

merged_data.head()

# <center> Dividing train data into X_train, y_train

In [None]:
# Taking the last month as y_train, deleting the first month to have the same shape as test data
X_train = np.expand_dims(train_data_pivot.iloc[:, :-1], 2)
y_train = train_data_pivot.iloc[:, -1]

# <center> Modeling

In [None]:
# Creating a model
model = Sequential([
   LSTM(
        units = 64, 
        input_shape = (33, 1), 
        return_sequences = False
    ),
    Dense(
        units = 1,
        activation = 'relu'
    )
])

# Model compiling
model.compile(
    optimizer = 'adam',
    loss = 'mae',
    metrics = [
        tf.keras.metrics.RootMeanSquaredError() # Using the same metrics as Kaggle validator
    ]
)

In [None]:
# Model training
history = model.fit(
    x = X_train,
    y = y_train,
    epochs = 10,
    batch_size=4056,
    validation_split=0.4
)

In [None]:
# Visualize model train loss

fig, ax = plt.subplots(
    nrows = 1,
    ncols = 2
)
ax[0].plot(history.history['val_root_mean_squared_error'], color = 'orange')
ax[0].set_title('RMSE')
ax[0].set_xlabel('Epochs')
ax[0].set_ylabel('Error')
ax[0].grid(alpha = 0.5)

ax[1].plot(history.history['loss'], color = 'green')
ax[1].set_title('Loss')
ax[1].set_xlabel('Epochs')
ax[1].set_ylabel('Loss')
ax[1].grid(alpha = 0.5)

# <center> Test data processing

In [None]:
# Creating test data from merged data (merged sales_test with pivot table)
test_data = np.expand_dims(merged_data.iloc[:, 1:], 2)

# Generating predictions
predictions = model.predict(test_data)

In [None]:
# Creating pandas datframe from the sales_test ID's and predictions and saving that
submission = pd.DataFrame({
    'ID':sales_test['ID'],
    'item_cnt_month':predictions.ravel()
})
submission.to_csv('./submission.csv',index = False)