### Load data and build matrices

In [1]:
cd ../../../../

/Users/archnnj/Development/recsys/recsys_polimi_challenge_2018/repo


In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as pyplot
%matplotlib inline
import scipy.sparse as sps
from scipy.stats import iqr
import seaborn as sns
sns.set(style="white", color_codes=True)
sns.set_context(rc={"font.family":'sans',"font.size":12,"axes.titlesize":12,"axes.labelsize":12})

import src.utils.build_icm as build_icm
from src.utils.data_splitter import train_test_holdout, train_test_user_holdout, train_test_row_holdout

In [4]:
import sys
sys.path.append("src/libs/RecSys_Course_2018/") # go to parent dir
sys.path.append("src/libs/RecSys_Course_2018/SequenceAware/sars_tutorial_master/") # go to parent dir

In [5]:
from SequenceAware.sars_tutorial_master.util.data_utils import create_seq_db_filter_top_k, sequences_to_spfm_format
from SequenceAware.sars_tutorial_master.util.data_utils import create_seq_db_filter_top_k, sequences_to_spfm_format
from SequenceAware.sars_tutorial_master.util.split import last_session_out_split
from SequenceAware.sars_tutorial_master.util.metrics import precision, recall, mrr
from SequenceAware.sars_tutorial_master.util import evaluation
from SequenceAware.sars_tutorial_master.recommenders.MixedMarkovRecommender import MixedMarkovChainRecommender

#### Global vars

In [6]:
JUPYTER = False

#### Load data

In [7]:
if JUPYTER:
    # Jupyter
    tracks_csv_file = "../../../data/tracks.csv"
    interactions_csv_file = "../../../data/train.csv"
    playlist_id_csv_file = "../../../data/target_playlists.csv"
    sequential_csv_file = "../../../data/train_sequential.csv"
else:
    # PyCharm
    tracks_csv_file = "data/tracks.csv"
    interactions_csv_file = "data/train.csv"
    playlist_id_csv_file = "data/target_playlists.csv"
    sequential_csv_file = "data/train_sequential.csv"

tracks_df = pd.read_csv(tracks_csv_file)
interactions_df = pd.read_csv(interactions_csv_file)
playlist_id_df = pd.read_csv(playlist_id_csv_file)
train_sequential_df = pd.read_csv(sequential_csv_file)

userList = interactions_df["playlist_id"]
itemList = interactions_df["track_id"]
ratingList = np.ones(interactions_df.shape[0])
targetsList = playlist_id_df["playlist_id"]
targetsListOrdered = targetsList[:5000].tolist()
targetsListCasual = targetsList[5000:].tolist()

userList_unique = pd.unique(userList)
itemList_unique = tracks_df["track_id"]
numUsers = len(userList_unique)
numItems = len(itemList_unique)
numberInteractions = interactions_df.size

In [8]:
URM_all = sps.coo_matrix((ratingList, (userList, itemList)))
URM_all_csr = URM_all.tocsr()

In [9]:
itemPopularity = (URM_all>0).sum(axis=0)
itemPopularity = np.array(itemPopularity).squeeze()
itemPopularity_unsorted = itemPopularity
itemPopularity = np.sort(itemPopularity)

#### Prepare ICM and URM with splits

In [10]:
import pickle

with open('dump/dump_URM_train_rowholdout0802', 'rb') as dump_file:
    URM_train = pickle.load(dump_file)
with open('dump/dump_URM_test_rowholdout0802', 'rb') as dump_file:
    URM_test = pickle.load(dump_file)

#### 3. Fitting the recommender

In [12]:
# You can try with max_order=2 or higher too, but it will take some time to complete though due to slow heristic computations
recommender = MixedMarkovChainRecommender(URM_train, train_sequential_df, targetsListOrdered,
                                          min_order=1, 
                                          max_order=1)

In [13]:
recommender.fit()

2019-01-10 02:39:39,770 - INFO - Building Markov Chain model with k = 1
2019-01-10 02:39:39,774 - INFO - Adding nodes
2019-01-10 02:39:51,672 - INFO - Adding edges
2019-01-10 02:56:53,536 - INFO - Applying skipping
2019-01-10 02:56:58,981 - INFO - Applying clustering
2019-01-10 02:56:58,983 - INFO - 12540 states in the graph


In [19]:
recommender.recommenders[1].G.

<networkx.classes.digraph.DiGraph at 0x111614e10>

In [25]:
est_rat = recommender.compute_markov_score(targetsListOrdered)
print(est_rat)
est_rat




<50446x20635 sparse matrix of type '<class 'numpy.float64'>'
	with 0 stored elements in Compressed Sparse Row format>

In [None]:
recommendations = recommender.recommend(targetsListOrdered, cutoff=10)
recommendations

#### Saving and loading mechanism

##### Saving

In [16]:
recommender.saveModel("dump/", "dump_MixedMarkovChainRecommender_ord_1_1")

MixedMarkovChainRecommender: Saving model in file 'dump/dump_MixedMarkovChainRecommender_ord_1_1'
{}: Saving complete
MarkovChainRecommender: Saving model in file 'dump/dump_MixedMarkovChainRecommender_ord_1_1_MarkovChainRecommender_order_1'
MarkovChainRecommender: Saving graph model in file 'dump/dump_MixedMarkovChainRecommender_ord_1_1_MarkovChainRecommender_order_1_G'
{}: Saving complete


##### Loading

In [11]:
recommender_load = MixedMarkovChainRecommender(URM_train, train_sequential_df, targetsListOrdered,
                                          min_order=1, 
                                          max_order=1)

In [12]:
recommender_load.loadModel("dump/", "dump_MixedMarkovChainRecommender_ord_1_1")

MixedMarkovChainRecommender: Loading model from file 'dump/dump_MixedMarkovChainRecommender_ord_1_1'
MixedMarkovChainRecommender: Loading complete
MarkovChainRecommender: Loading model from file 'dump/dump_MixedMarkovChainRecommender_ord_1_1_MarkovChainRecommender_order_1'
MarkovChainRecommender: Loading complete
MarkovChainRecommender: Loading graph model from file 'dump/dump_MixedMarkovChainRecommender_ord_1_1_MarkovChainRecommender_order_1_G'
MarkovChainRecommender: Loading of graph model complete


##### Test loaded model

In [13]:
est_rat = recommender_load.compute_markov_score(targetsListOrdered)
print(est_rat)
est_rat

  (7, 215)	0.0009088337542277413
  (7, 229)	0.00028401054819616914
  (7, 315)	0.0009088337542277413
  (7, 358)	1.3867702548641072e-08
  (7, 414)	0.00022720843855693533
  (7, 436)	0.0009088337542277413
  (7, 541)	1.1094162038912858e-07
  (7, 573)	0.00022720843855693533
  (7, 583)	0.003635335016910965
  (7, 603)	0.0009088337542277413
  (7, 648)	0.00011365969008866223
  (7, 849)	0.003635335016910965
  (7, 868)	0.003635335016910965
  (7, 961)	1.1094162038912858e-07
  (7, 1006)	0.00022720843855693533
  (7, 1023)	0.0018176675084554826
  (7, 1063)	0.0018744696180947164
  (7, 1093)	7.100263704904229e-06
  (7, 1231)	0.00727067003382193
  (7, 1373)	2.8404521745254074e-05
  (7, 1398)	2.2188324077825715e-07
  (7, 1427)	0.00045441687711387065
  (7, 1442)	2.2188324077825715e-07
  (7, 1461)	0.003635335016910965
  (7, 1741)	0.00727067003382193
  :	:
  (50431, 4693)	0.003734854508450838
  (50431, 5252)	0.04855310860986089
  (50431, 5674)	0.014939418033803352
  (50431, 5778)	0.029878836067606704
  (5043

<50446x20635 sparse matrix of type '<class 'numpy.float64'>'
	with 924641 stored elements in Compressed Sparse Row format>

In [17]:
recommendations = recommender_load.recommend(targetsListOrdered, cutoff=10)
recommendations[0:10]

[[17154, 20592, 14453, 12599, 13283, 5668, 15558, 11421, 15926, 1741],
 [17088, 12030, 6843, 9635, 1900, 2986, 3238, 6472, 5105, 1220],
 [16708, 5679, 4109, 7197, 13803, 5925, 14504, 4199, 16205, 9003],
 [9643, 9230, 14751, 6639, 6873, 6874, 6875, 6876, 6877, 6878],
 [5765, 3399, 6762, 9508, 4267, 8229, 16198, 17066, 3885, 17379],
 [19054, 12303, 3544, 9696, 2736, 16686, 12539, 11989, 10048, 5018],
 [18040, 6701, 12066, 4111, 12038, 4916, 12075, 8996, 7197, 13247],
 [8928, 6971, 20532, 7820, 8437, 14277, 19033, 6884, 6624, 4467],
 [362, 10100, 1909, 16115, 12164, 4574, 2122, 13626, 15775, 17226],
 [865, 2869, 11654, 18026, 7910, 3919, 12226, 4476, 6861, 10257]]