In [1]:
import time, sys
import warnings

import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix, csc_matrix
from scipy.sparse.linalg import norm

from sklearn.linear_model import ElasticNet
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split

import matplotlib as mpl
import matplotlib.pyplot as plt


from polara.datasets.movielens import get_movielens_data
from polara.tools.preprocessing import filter_sessions_by_length
from polara import RecommenderData

from SLIM import SLIM, SLIMatrix

warnings.filterwarnings("ignore")
%matplotlib inline

In [2]:
#Make P matrix that our final recommendation model

def rec_walk_model(W, R, alp=0.005):#0.001):
    normW = norm(W, np.inf) # Count infinite matrix norm of our Model
    ones = np.ones(W.shape[0])
    W = W / normW
    diagVec = ones - W.dot(ones)
    row = range(0,diagVec.shape[0])
    col = range(0,diagVec.shape[0])
    Diag = csr_matrix((diagVec, (row, col)), shape=(diagVec.shape[0], diagVec.shape[0]))
    W = W + Diag

    M = csr_matrix(np.diag(np.ones(R.shape[0] + R.shape[1])), dtype='float64') # One of two Mtx in RecWalkModel
    M[M.shape[0]-W.shape[0]:, M.shape[0]-W.shape[0]:] = W

    H = csr_matrix(np.diag(np.zeros(R.shape[0] + R.shape[1])), dtype='float64')
    H[: R.shape[0], H.shape[1] - R.shape[1]:] = R
    H[H.shape[0] - R.shape[1]:, : R.shape[0]] = R.transpose()
    k = H.dot(np.ones(R.shape[0] + R.shape[1]))
    k = 1 / k
    H = csr_matrix((H.transpose().dot(np.diag(k).transpose())).transpose())

    P = alp*H + (1 - alp)*M
    return P

In [4]:
#Eliminate value from data and put the index of it into test

def train_test_split(data):
    test = []   
    for i in data.index:
        item = data.loc[i,:].sort_values(ascending=False)
        maxim = item.iloc[0]
        item = item[item == maxim]
        c = np.random.choice(item.index)
        test.append(c)
        data.loc[i, c:c] = 0.0        
    test = np.array(test) 
    return test

In [5]:
def hit_rate(recMtx, ratingMtx, testVec, topN = 10):
    sum = 0.0
    for i in range(1, testVec.size + 1):
        r = ratingMtx.loc[i: i, :].unstack().droplevel("userid") < 1.0
        l = recMtx.loc[i:i,:].unstack().droplevel("userid")[r]
        m = l[l.index!=testVec[i - 1]]
        c = np.append(np.random.choice(m.index.values, 999),(testVec[i - 1]))
        if (testVec[i - 1] in 
            recMtx.loc[i:i,:].unstack().droplevel("userid")[r][c].sort_values(ascending=False)[:topN].index.values):
            sum += 1.0
    return sum / testVec.size        

In [6]:
#Load row Data file:

DATA_NAME = 'ml-1m'
DATA_FILE = '/home/albert/Recommendations/{}.zip'.format(DATA_NAME)

ml_data = get_movielens_data(local_file=DATA_FILE, get_genres=False)

In [35]:
#SANDBOX:)
X_train, X_test = train_test_split(ml_data, test_size=0.1, random_state=42)

trainmat = SLIMatrix(X_train)
testmat = SLIMatrix(X_test, trainmat)

params = { 'algo':'cd', 
          'nthreads':4, 
          'l1r':1., 
          'l2r':1.,
          'optTol':1e-7,
          'niters':100,
          'nnbrs':150
          }

l1s = [28,30]
l2s = [28,30]

model = SLIM()
model.mselect(params, trainmat, testmat, l1s, l2s, nrcmds=10)

"\nX_train, X_test = train_test_split(ml_data, test_size=0.1, random_state=42)\n\ntrainmat = SLIMatrix(X_train)\ntestmat = SLIMatrix(X_test, trainmat)\n\nparams = { 'algo':'cd', \n          'nthreads':4, \n          'l1r':1., \n          'l2r':1.,\n          'optTol':1e-7,\n          'niters':100,\n          'nnbrs':150\n          }\n\nl1s = [28,30]\nl2s = [28,30]\n\nmodel = SLIM()\nmodel.mselect(params, trainmat, testmat, l1s, l2s, nrcmds=10)\n"

In [7]:
#Make rating matrix from 3 columns of data:

ratingMtx = ml_data.pivot(index='userid', columns='movieid', values='rating').fillna(0.0)

#Change non-zero values on 1.0 as a sign of interaction
#ratingMtx[ratingMtx[:] > 0.0] = 1.0

In [8]:
ratingMtx.head()

movieid,1,2,3,4,5,6,7,8,9,10,...,3943,3944,3945,3946,3947,3948,3949,3950,3951,3952
userid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [9]:
#Make train, test sets:

test = train_test_split(ratingMtx)

In [10]:
ratingMtx[ratingMtx[:] > 0.0] = 1.0

In [11]:
#Make data sparse:

sparseRating = csr_matrix(ratingMtx.to_numpy())

In [12]:
#What is the density of initial rating?:

a = ratingMtx.index.size
b = ratingMtx.columns.size
c = a * b

print("Users: {};\nItems: {};\nEntries all: {};\nEntries non zero: {};\nDensity of A: {};\n".\
      format(a, b, c, sparseRating.size, sparseRating.size / c))

Users: 6040;
Items: 3706;
Entries all: 22384240;
Entries non zero: 994169;
Density of A: 0.04441379291858915;



In [13]:
#Parameters on NN:

num = 0.1
C = int(ratingMtx.columns.size*num)
initHR = 0.0

In [14]:
for i1 in [10]:
    for i2 in [10]:
    
        #Train Matrix W:
        trainmat = SLIMatrix(sparseRating)
        params = {'algo':'cd',
                  'nthreads':4,
                  'l1r':i1,
                  'l2r':i2,
                  'nnbrs': C
                 }
        model = SLIM()
        model.train(params, trainmat)
    
        #Got Matrix W:
        WSlim = model.to_csr()
    
        #RecWalk P matrix with W SLIM based:
        PMtxSlim = rec_walk_model(WSlim, sparseRating).toarray()
    
        #Make a few more steps to future to capture intersactions between items:
        kMtxSlim = PMtxSlim
        for k in range(20): 
            kMtxSlim = kMtxSlim@PMtxSlim
        
            #Get essential information on scores from achieved Recommendation Matrix P:
            RecSlim = pd.DataFrame(kMtxSlim[: 6040, 6040:], 
                                   index=ratingMtx.index,
                                   columns=ratingMtx.columns,
                                   dtype='float64')
        
            #Count Hit Rate of Recommendation for every user using test set:
            HRSlim = hit_rate(RecSlim, ratingMtx, test)
            if initHR < HRSlim:
                print("Hit Rate RecWalk[M] K-step with W based on SLIM: {};\n".format(HRSlim))
                print("Parameters: C = {}; y1 = {}; y2 = {}; k = {}; alp = {};\n".format(C, i1, i2, k, 0.005))
            
                #Density of distance matrix W, diagonal and other elements
                print("Density of Rating matrix: {};\n".format(sparseRating.size / c))
                print("Density of W: {};\n".format(WSlim.size / WSlim.shape[0]**2))
                print("Number of elements less than 0 in W matrix: {} \n".format(WSlim.toarray()[WSlim.toarray()<0].size))
                print("Maximal diagonal element of the W matrix: {}\n".format(WSlim.diagonal().max()))
                initHR = HRSlim

Learning takes 11.772 secs.
Hit Rate RecWalk[M] K-step with W based on SLIM: 0.5450331125827814;

Parameters: C = 370; y1 = 10; y2 = 10; k = 0; alp = 0.005;

Density of Rating matrix: 0.04441379291858915;

Density of W: 0.008234120425476518;

Number of elements less than 0 in W matrix: 0 

Maximal diagonal element of the W matrix: 0.0

