### Load data and build matrices

In [1]:
cd ../../../../

/Users/archnnj/Development/recsys/recsys_polimi_challenge_2018/repo


In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as pyplot
%matplotlib inline
import scipy.sparse as sps
from scipy.stats import iqr
import seaborn as sns
sns.set(style="white", color_codes=True)
sns.set_context(rc={"font.family":'sans',"font.size":12,"axes.titlesize":12,"axes.labelsize":12})

import src.utils.build_icm as build_icm
from src.utils.data_splitter import train_test_holdout, train_test_user_holdout, train_test_row_holdout

In [4]:
import sys
sys.path.append("src/libs/RecSys_Course_2018/") # go to parent dir
sys.path.append("src/libs/RecSys_Course_2018/SequenceAware/sars_tutorial_master/") # go to parent dir

In [5]:
from SequenceAware.sars_tutorial_master.util.data_utils import create_seq_db_filter_top_k, sequences_to_spfm_format
from SequenceAware.sars_tutorial_master.util.data_utils import create_seq_db_filter_top_k, sequences_to_spfm_format
from SequenceAware.sars_tutorial_master.util.split import last_session_out_split
from SequenceAware.sars_tutorial_master.util.metrics import precision, recall, mrr
from SequenceAware.sars_tutorial_master.util import evaluation
from SequenceAware.sars_tutorial_master.recommenders.MixedMarkovRecommender import MixedMarkovChainRecommender

#### Global vars

In [6]:
JUPYTER = False

#### Load data

In [7]:
if JUPYTER:
    # Jupyter
    tracks_csv_file = "../../../data/tracks.csv"
    interactions_csv_file = "../../../data/train.csv"
    playlist_id_csv_file = "../../../data/target_playlists.csv"
    sequential_csv_file = "../../../data/train_sequential.csv"
else:
    # PyCharm
    tracks_csv_file = "data/tracks.csv"
    interactions_csv_file = "data/train.csv"
    playlist_id_csv_file = "data/target_playlists.csv"
    sequential_csv_file = "data/train_sequential.csv"

tracks_df = pd.read_csv(tracks_csv_file)
interactions_df = pd.read_csv(interactions_csv_file)
playlist_id_df = pd.read_csv(playlist_id_csv_file)
train_sequential_df = pd.read_csv(sequential_csv_file)

userList = interactions_df["playlist_id"]
itemList = interactions_df["track_id"]
ratingList = np.ones(interactions_df.shape[0])
targetsList = playlist_id_df["playlist_id"]
targetsListOrdered = targetsList[:5000].tolist()
targetsListCasual = targetsList[5000:].tolist()

userList_unique = pd.unique(userList)
itemList_unique = tracks_df["track_id"]
numUsers = len(userList_unique)
numItems = len(itemList_unique)
numberInteractions = interactions_df.size

In [8]:
URM_all = sps.coo_matrix((ratingList, (userList, itemList)))
URM_all_csr = URM_all.tocsr()

In [9]:
itemPopularity = (URM_all>0).sum(axis=0)
itemPopularity = np.array(itemPopularity).squeeze()
itemPopularity_unsorted = itemPopularity
itemPopularity = np.sort(itemPopularity)

#### Prepare ICM and URM with splits

In [10]:
import pickle

with open('dump/dump_URM_train_rowholdout0802', 'rb') as dump_file:
    URM_train = pickle.load(dump_file)
with open('dump/dump_URM_test_rowholdout0802', 'rb') as dump_file:
    URM_test = pickle.load(dump_file)

#### 3. Fitting the recommender

In [11]:
# You can try with max_order=2 or higher too, but it will take some time to complete though due to slow heristic computations
recommender = MixedMarkovChainRecommender(URM_train, train_sequential_df, targetsListOrdered,
                                          min_order=1, 
                                          max_order=2)

In [None]:
recommender.fit()

2019-01-10 02:45:22,755 - INFO - Building Markov Chain model with k = 1
2019-01-10 02:45:22,757 - INFO - Adding nodes
2019-01-10 02:45:28,643 - INFO - Adding edges
2019-01-10 03:01:26,668 - INFO - Applying skipping
2019-01-10 03:01:31,999 - INFO - Applying clustering
2019-01-10 03:01:32,001 - INFO - 12540 states in the graph
2019-01-10 10:18:28,692 - INFO - Building Markov Chain model with k = 2
2019-01-10 10:18:28,694 - INFO - Adding nodes
2019-01-10 10:18:39,576 - INFO - Adding edges
2019-01-10 10:18:59,258 - INFO - Applying skipping
2019-01-10 10:19:16,165 - INFO - Applying clustering
2019-01-10 10:19:16,186 - INFO - 817026 states in the graph


In [None]:
est_rat = recommender.compute_markov_score(targetsListOrdered)
print(est_rat)
est_rat

In [None]:
recommendations = recommender.recommend(targetsListOrdered, cutoff=10) # tree is good, there must be sth wrong here
recommendations

#### Saving and loading mechanism

##### Saving

In [None]:
recommender.saveModel("dump/", "dump_MixedMarkovChainRecommender_ord_1_2")

##### Loading

In [16]:
recommender_load = MixedMarkovChainRecommender(URM_train, train_sequential_df, targetsListOrdered,
                                          min_order=1, 
                                          max_order=1)

In [17]:
recommender_load.loadModel("dump/", "dump_MixedMarkovChainRecommender_ord_1_2")

MixedMarkovChainRecommender: Loading model from file 'dump/test_save_MixedMarkovChainRecommender_ord_1_2'
MixedMarkovChainRecommender: Loading complete
MarkovChainRecommender: Loading model from file 'dump/test_save_MixedMarkovChainRecommender_ord_1_2_MarkovChainRecommender_order_1'
MarkovChainRecommender: Loading complete
MarkovChainRecommender: Loading graph model from file 'dump/test_save_MixedMarkovChainRecommender_ord_1_2_MarkovChainRecommender_order_1_G'
MarkovChainRecommender: Loading of graph model complete
MarkovChainRecommender: Loading model from file 'dump/test_save_MixedMarkovChainRecommender_ord_1_2_MarkovChainRecommender_order_2'
MarkovChainRecommender: Loading complete
MarkovChainRecommender: Loading graph model from file 'dump/test_save_MixedMarkovChainRecommender_ord_1_2_MarkovChainRecommender_order_2_G'
MarkovChainRecommender: Loading of graph model complete


##### Test loaded model

In [21]:
recommendations = recommender_load.recommend(targetsListOrdered, cutoff=10) # tree is good, there must be sth wrong here
recommendations

[[20634, 6857, 6879, 6880, 6881, 6882, 6883, 6884, 6885, 6886],
 [20634, 6853, 6875, 6876, 6877, 6878, 6879, 6880, 6881, 6882],
 [20634, 6854, 6876, 6877, 6878, 6879, 6880, 6881, 6882, 6883],
 [20634, 6851, 6873, 6874, 6875, 6876, 6877, 6878, 6879, 6880],
 [20634, 6854, 6876, 6877, 6878, 6879, 6880, 6881, 6882, 6883],
 [20634, 6852, 6874, 6875, 6876, 6877, 6878, 6879, 6880, 6881],
 [20634, 6853, 6875, 6876, 6877, 6878, 6879, 6880, 6881, 6882],
 [19926, 14796, 14732, 603, 18804, 11886, 15935, 6862, 6863, 6879],
 [20634, 6854, 6876, 6877, 6878, 6879, 6880, 6881, 6882, 6883],
 [20634, 6852, 6874, 6875, 6876, 6877, 6878, 6879, 6880, 6881]]