In [1]:
import time, sys
import warnings

import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix, csc_matrix
from scipy.sparse.linalg import norm
from numpy.linalg import svd

from sklearn.linear_model import ElasticNet
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split

import matplotlib as mpl
import matplotlib.pyplot as plt


from polara.datasets.movielens import get_movielens_data
from polara.tools.preprocessing import filter_sessions_by_length
from polara import RecommenderData

from SLIM import SLIM, SLIMatrix

warnings.filterwarnings("ignore")
%matplotlib inline

In [2]:
#Load row Data file

DATA_NAME = 'ml-1m'
DATA_FILE = '/home/albert/Recommendations/{}.zip'.format(DATA_NAME)

ml_data = get_movielens_data(local_file=DATA_FILE, get_genres=False)

In [3]:
#Make rating matrix from 3 columns of data

ratingMtx = ml_data.pivot(index='userid', columns='movieid', values='rating').fillna(0.0)

#Change non-zero values on 1.0 as a sign of interaction
#ratingMtx[ratingMtx[:] > 0.0] = 1.0

In [4]:
ratingMtx.head()

movieid,1,2,3,4,5,6,7,8,9,10,...,3943,3944,3945,3946,3947,3948,3949,3950,3951,3952
userid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [5]:
ratingMtx.shape

(6040, 3706)

In [6]:
#Eliminate value from data and put the index of it into test

def train_test_split(data):
    test = []   
    for i in data.index:
        item = data.loc[i,:].sort_values(ascending=False)
        maxim = item.iloc[0]
        item = item[item == maxim]
        c = np.random.choice(item.index)
        test.append(c)
        data.loc[i, c:c] = 0.0        
    test = np.array(test) 
    return test

In [7]:
#Make train, validation, test sets

#valid = train_test_split(ratingMtx)
test = train_test_split(ratingMtx)

In [8]:
ratingMtx[ratingMtx[:] > 0.0] = 1.0

In [9]:
u, s, vh = np.linalg.svd(ratingMtx, full_matrices=True)

In [10]:
u.shape, s.shape, vh.shape

((6040, 6040), (3706,), (3706, 3706))

In [11]:
rank = 50
v = vh.T[:,:rank]

In [12]:
m = v@v.T

In [13]:
d = ratingMtx@m

In [14]:
d.columns = ratingMtx.columns

In [15]:
#Make P matrix that our final recommendation model

def rec_walk_model(W, R, alp=0.005):#0.001):
    normW = norm(W, np.inf) # Count infinite matrix norm of our Model
    ones = np.ones(W.shape[0])
    W = W / normW
    diagVec = ones - W.dot(ones)
    row = range(0,diagVec.shape[0])
    col = range(0,diagVec.shape[0])
    Diag = csr_matrix((diagVec, (row, col)), shape=(diagVec.shape[0], diagVec.shape[0]))
    W = W + Diag

    M = csr_matrix(np.diag(np.ones(R.shape[0] + R.shape[1])), dtype='float64') # One of two Mtx in RecWalkModel
    M[M.shape[0]-W.shape[0]:, M.shape[0]-W.shape[0]:] = W

    H = csr_matrix(np.diag(np.zeros(R.shape[0] + R.shape[1])), dtype='float64')
    H[: R.shape[0], H.shape[1] - R.shape[1]:] = R
    H[H.shape[0] - R.shape[1]:, : R.shape[0]] = R.transpose()
    k = H.dot(np.ones(R.shape[0] + R.shape[1]))
    k = 1 / k
    H = csr_matrix((H.transpose().dot(np.diag(k).transpose())).transpose())

    P = alp*H + (1 - alp)*M
    return P

In [16]:
def hit_rate(recMtx, ratingMtx, testVec, topN = 10):
    sum = 0.0
    for i in range(1, testVec.size + 1):
        r = ratingMtx.loc[i: i, :].unstack().droplevel("userid") < 1.0
        l = recMtx.loc[i:i,:].unstack().droplevel("userid")[r]
        m = l[l.index!=testVec[i - 1]]
        c = np.append(np.random.choice(m.index.values, 999),(testVec[i - 1]))
        if (testVec[i - 1] in 
            recMtx.loc[i:i,:].unstack().droplevel("userid")[r][c].sort_values(ascending=False)[:topN].index.values):
            sum += 1.0
    return sum / testVec.size   

In [17]:
#Count Hit Rate of Recommendation for every user using test set:

HRPureSVD = hit_rate(d,ratingMtx, test)
print("Hit Rate PureSVD({}): {};\n".format(rank,HRPureSVD))

Hit Rate PureSVD(50): 0.5428807947019868;

