# Session Based Recommendations with RNN


In [None]:
!pip install recbole
!pip install torch-lr-finder

In [None]:
import pandas as pd
import gc
import numpy as np
import os

## 1. Creating input files for GRU-based RNN model

In [None]:
df = pd.read_csv(r"/kaggle/input/h-and-m-personalized-fashion-recommendations/articles.csv", dtype={'article_id': 'str'})
df = df.drop(columns = ['product_type_name', 'graphical_appearance_name', 'colour_group_name', 'perceived_colour_value_name',
                        'perceived_colour_master_name', 'index_name', 'index_group_name', 'section_name', 
                        'garment_group_name', 'prod_name', 'department_name', 'detail_desc'])

temp = df.rename(
    columns={'article_id': 'item_id:token', 'product_code': 'product_code:token', 'product_type_no': 'product_type_no:float',
             'product_group_name': 'product_group_name:token_seq', 'graphical_appearance_no': 'graphical_appearance_no:token', 
             'colour_group_code': 'colour_group_code:token', 'perceived_colour_value_id': 'perceived_colour_value_id:token', 
             'perceived_colour_master_id': 'perceived_colour_master_id:token', 'department_no': 'department_no:token', 
             'index_code': 'index_code:token', 'index_group_no': 'index_group_no:token', 'section_no': 'section_no:token', 
             'garment_group_no': 'garment_group_no:token'})

In [None]:
!mkdir /kaggle/working/recbox_data
temp.to_csv(r'/kaggle/working/recbox_data/recbox_data.item', index=False, sep='\t')

In [None]:
# filtering the most recent data to be used 
df = pd.read_csv(r"/kaggle/input/h-and-m-personalized-fashion-recommendations/transactions_train.csv", 
                 dtype={'article_id': 'str'})
df['t_dat'] = pd.to_datetime(df['t_dat'], format="%Y-%m-%d")
df['timestamp'] = df.t_dat.values.astype(np.int64) // 10 ** 9

_ = gc.collect()
temp = df[df['timestamp'] > 1585620000][['customer_id', 'article_id', 'timestamp']].rename(
    columns={'customer_id': 'user_id:token', 'article_id': 'item_id:token', 'timestamp': 'timestamp:float'})

temp.to_csv('/kaggle/working/recbox_data/recbox_data.inter', index=False, sep='\t')

## 2. Constructing and Train RNN Model

In [None]:
import logging
from logging import getLogger
from recbole.config import Config
from recbole.data import create_dataset, data_preparation
from recbole.model.sequential_recommender import GRU4Rec
from recbole.trainer import Trainer
from recbole.utils import init_seed, init_logger

In [None]:
parameter_dict = {
    'data_path': '/kaggle/working',
    'USER_ID_FIELD': 'user_id',
    'ITEM_ID_FIELD': 'item_id',
    'TIME_FIELD': 'timestamp',
    'user_inter_num_interval': "[40,inf)",
    'item_inter_num_interval': "[40,inf)",
    'load_col': {'inter': ['user_id', 'item_id', 'timestamp'],
                 'item': ['item_id', 'product_code', 'product_type_no', 'product_group_name', 'graphical_appearance_no',
                      'colour_group_code', 'perceived_colour_value_id', 'perceived_colour_master_id',
                      'department_no', 'index_code', 'index_group_no', 'section_no', 'garment_group_no']
             },
    'selected_features': ['product_code', 'product_type_no', 'product_group_name', 'graphical_appearance_no',
                          'colour_group_code', 'perceived_colour_value_id', 'perceived_colour_master_id',
                          'department_no', 'index_code', 'index_group_no', 'section_no', 'garment_group_no'],
    'neg_sampling': None,
    'epochs': 70,
    'eval_args': {
        'split': {'RS': [9, 0, 1]},
        'group_by': 'user',
        'order': 'TO',
        'mode': 'full'}
}

config = Config(model='GRU4Rec', dataset='recbox_data', config_dict=parameter_dict)

# init random seed
init_seed(config['seed'], config['reproducibility'])

# logger initialization
init_logger(config)
logger = getLogger()
# Create handlers
c_handler = logging.StreamHandler()
c_handler.setLevel(logging.INFO)
logger.addHandler(c_handler)

# write config info into log
logger.info(config)

In [None]:
_ = gc.collect()
dataset = create_dataset(config)
logger.info(dataset)

# dataset splitting
_ = gc.collect()
train_data, valid_data, test_data = data_preparation(config, dataset)

In [None]:
# model loading and initialization
_ = gc.collect()

model = GRU4Rec(config, train_data.dataset).to(config['device'])
logger.info(model)

# trainer loading and initialization
trainer = Trainer(config, model)

# model training
best_valid_score, best_valid_result = trainer.fit(train_data)

## 3. Get recommendation result from trained model

In [None]:
_ = gc.collect()

from recbole.utils.case_study import full_sort_topk
external_user_ids = dataset.id2token(
    dataset.uid_field, list(range(dataset.user_num)))[1:]#fist element in array is 'PAD'(default of Recbole) ->remove it 

In [None]:
topk_items = []
for internal_user_id in list(range(dataset.user_num))[1:]:
    _, topk_iid_list = full_sort_topk([internal_user_id], model, test_data, k=12, device=config['device'])
    last_topk_iid_list = topk_iid_list[-1]
    external_item_list = dataset.id2token(dataset.iid_field, last_topk_iid_list.cpu()).tolist()
    topk_items.append(external_item_list)
print(len(topk_items))

In [None]:
external_item_str = [' '.join(x) for x in topk_items]
result = pd.DataFrame(external_user_ids, columns=['customer_id'])
result['prediction'] = external_item_str
result.head()

In [None]:
result.to_csv('result.csv', index = False)

## 4. Evaluation with MAP@12

In [None]:
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from skimage import io
import cv2
import gc
import pandas as pd
import numpy as np


def apk(actual, predicted, k=10):
    if len(predicted)>k:
        predicted = predicted[:k]
    score = 0.0
    num_hits = 0.0
    for i,p in enumerate(predicted):
        if p in actual[-1] and p not in predicted[:i]:
            num_hits += 1.0
            score += num_hits / (i+1.0)
    if not actual:
        return 0.0
    return score / min(len(actual), k)

def mapk(actual, predicted, k=12):
    return np.mean([apk(a,p,k) for a,p in zip(actual, predicted) if a]) # CHANGES: ignore null actual (variable=a)

In [None]:
_ = gc.collect()
transactions = pd.read_csv(r"/kaggle/input/h-and-m-personalized-fashion-recommendations/transactions_train.csv", \
                           dtype={'article_id': str}, parse_dates=['t_dat'])
val_start_date = '2020-01-01'
_ = gc.collect()
valid_df = transactions.query(f"t_dat >= '{val_start_date}'").reset_index(drop=True)
valid_df = valid_df.sort_values(["customer_id", "t_dat"], ascending=False)

_ = gc.collect()
valid_df = valid_df.sort_values(['customer_id', 't_dat'], ascending = [True, True]) 
valid_cust = valid_df.groupby('customer_id')['article_id'].apply(list).reset_index()
valid_cust['valid_true'] = valid_cust['article_id']
del valid_df, valid_cust['article_id']

In [None]:
result = pd.read_csv('./result.csv')
actual = valid_cust.merge(result, on ='customer_id', how ='left')
actual = actual.dropna(axis=0)
actual['prediction'] = actual['prediction'].map(lambda x: x.split())
actual.head()

In [None]:
mapk(actual['valid_true'], actual['prediction'], k=12)

## 5. Example Visualization 

In [None]:
_ = gc.collect()
path = "../input/h-and-m-personalized-fashion-recommendations/images"

def uu_plot_prev(index):
    prev_items = list(set(actual.iloc[index]["valid_true"]))
    fig = plt.figure(figsize=(35, 15))
    plt.title("Purchased items")
    for item, i in zip(prev_items, range(1, 7)):
        sub = item[:3]
        image = path + "/"+ sub + "/"+ item +".jpg"
        image = cv2.imread(image)
        fig.add_subplot(1, 6, i)
        plt.xticks(())
        plt.yticks(())
        plt.imshow(image)
        
def uu_plot_rcmd(index):
    prev_items = list(set(actual.iloc[index]["valid_true"]))
    fig = plt.figure(figsize=(45, 15))
    plt.title("Recommend items")
    for item, i in zip(prev_items, range(1, 12)):
        sub = item[:3]
        image = path + "/"+ sub + "/"+ item +".jpg"
        image = cv2.imread(image)
        fig.add_subplot(1, 11, i)
        plt.xticks(())
        plt.yticks(())
        plt.imshow(image)
    
def uu_plot(index):
    uu_plot_prev(index)
    uu_plot_rcmd(index)

In [None]:
uu_plot(333)

In [None]:
uu_plot(210)