## Imports

In [1]:
import io
import os
import gzip
import json
import scipy
import random
import warnings
import numpy as np
import numba as nb
import pandas as pd
import datetime as dt
import seaborn as sns
from tqdm import tqdm
import scipy.stats as st
import scipy.sparse as sparse
from scipy.linalg import orth
import matplotlib.pyplot as plt
from numpy import linalg as lin
warnings.filterwarnings('ignore')
from collections import OrderedDict
from scipy.sparse import coo_matrix
from scipy.sparse.linalg import svds
from datetime import datetime as dt_dt
from scipy.sparse.linalg import spsolve
from numpy.linalg import qr as QR_decomp
from scipy.sparse import csr_matrix, find
from pandas.api.types import CategoricalDtype

## Functions

In [2]:
import sys
sys.path.append('/content/')

In [3]:
import a_DataHelpers
import b_AlgoFunctions
import c_PredEval
import d_CorrScores

In [4]:
from a_DataHelpers import getPivotMonths,Time_DataSplit,TestTrain_DataSplit,SingleRatingMatrix
from a_DataHelpers import AllRatingMatrices,All_SingleStepRatMat,psiStep_RatMat,psiAllStep_RatMat
from a_DataHelpers import Find_NewUsersItems,adjustedPSI_DF,adjustedAllDF,get_NEWHoldout,ADJUST_mainDF

from b_AlgoFunctions import integrator,getStartingValues,integratorOnMat,last_psiTrainMat
from b_AlgoFunctions import Updt_RowMatrix,getRow_Mat,row_update, Updt_ColMatrix,getCol_Mat,colunm_update
from b_AlgoFunctions import UsersItems_RatPair,getRowCol_psiupdt,ITEMS_defferredStatus,USERS_defferredStatus
from b_AlgoFunctions import getV_listUpdate, SingleStep_UPDATE,ALLSTEPs_UPDATE

from c_PredEval import TopNPred,TQDMgetALLTopNPred, Hitrate_Eval,getAll_HitRate
from c_PredEval import TQDMgetALLTopNPred_ALLUSERS,getALLTopNPred_ALLUSERS
from c_PredEval import get_ALLRandPred,getAll_RandomHitRate,getMOSTPOP_Pred,getAll_MostPOPHitRate,getAll_MOSTPOP_Pred
from c_PredEval import topN_Index,get_ALLRandPred_2,getAll_RandomHitRate_2,getAll_MostPOPHitRate2

from d_CorrScores import no_copy_csr_matrix,build_rank_weights_matrix,rank_weighted_jaccard_index
from d_CorrScores import Updt_getAll_AvgCorr, updtCorr_4AllRanks

## Apply Functions on *MovieLens*

### Import Cleaned Data

In [5]:
MLDF  = pd.read_csv('/content/usedMovieLen.csv')   #/
MLDF['timestamp'] = pd.to_datetime(MLDF['timestamp']) 
print(MLDF.shape)       #productId_int	userId_int
print(MLDF.dtypes)
print()
MLDF

(575281, 6)
userId_int              int64
movieId_int             int64
rating                  int64
timestamp      datetime64[ns]
userId                  int64
productId               int64
dtype: object



Unnamed: 0,userId_int,movieId_int,rating,timestamp,userId,productId
0,6040,858,1,2000-04-25 23:05:32,0,0
1,6040,2384,1,2000-04-25 23:05:54,0,1
2,6040,593,1,2000-04-25 23:05:54,0,2
3,6040,1961,1,2000-04-25 23:06:17,0,3
4,6040,2019,1,2000-04-25 23:06:17,0,4
...,...,...,...,...,...,...
575276,4958,2453,1,2003-02-28 17:44:20,1081,1838
575277,4958,3489,1,2003-02-28 17:45:20,1081,632
575278,4958,1407,1,2003-02-28 17:47:23,1081,677
575279,4958,3264,1,2003-02-28 17:49:08,1081,1018


In [6]:
MovieLen_df = MLDF[['userId','productId','rating','timestamp']]
print(MovieLen_df.shape)
print("For users: ")
print(MovieLen_df['userId'].nunique(),MovieLen_df['userId'].max())
print("For items: ")
print(MovieLen_df['productId'].nunique(),MovieLen_df['productId'].max())
print("Timestamp")
print(MovieLen_df['timestamp'].min())
print(MovieLen_df['timestamp'].max())
MovieLen_df

(575281, 4)
For users: 
6038 6037
For items: 
3533 3532
Timestamp
2000-04-25 23:05:32
2003-02-28 17:49:50


Unnamed: 0,userId,productId,rating,timestamp
0,0,0,1,2000-04-25 23:05:32
1,0,1,1,2000-04-25 23:05:54
2,0,2,1,2000-04-25 23:05:54
3,0,3,1,2000-04-25 23:06:17
4,0,4,1,2000-04-25 23:06:17
...,...,...,...,...
575276,1081,1838,1,2003-02-28 17:44:20
575277,1081,632,1,2003-02-28 17:45:20
575278,1081,677,1,2003-02-28 17:47:23
575279,1081,1018,1,2003-02-28 17:49:08


###Step 1: Data Splitting

In [7]:
pivotMonths_list  = getPivotMonths(MovieLen_df,'timestamp',N_TMonths=18)  
pivotMonths_list

[Timestamp('2003-01-28 17:49:50'),
 Timestamp('2002-12-28 17:49:50'),
 Timestamp('2002-11-28 17:49:50'),
 Timestamp('2002-10-28 17:49:50'),
 Timestamp('2002-09-28 17:49:50'),
 Timestamp('2002-08-28 17:49:50'),
 Timestamp('2002-07-28 17:49:50'),
 Timestamp('2002-06-28 17:49:50'),
 Timestamp('2002-05-28 17:49:50'),
 Timestamp('2002-04-28 17:49:50'),
 Timestamp('2002-03-28 17:49:50'),
 Timestamp('2002-02-28 17:49:50'),
 Timestamp('2002-01-28 17:49:50'),
 Timestamp('2001-12-28 17:49:50'),
 Timestamp('2001-11-28 17:49:50'),
 Timestamp('2001-10-28 17:49:50'),
 Timestamp('2001-09-28 17:49:50'),
 Timestamp('2001-08-28 17:49:50')]

In [8]:
A0_df,ΔA_train,ΔA_test = Time_DataSplit(MovieLen_df,'timestamp',pivotMonths_list,N_TMonths= 18,n_train=10)
print('Last date in A0_df :',A0_df['timestamp'].max())
print('Num ΔA_train :', len(ΔA_train))

Last date in A0_df : 2001-08-28 17:32:06
Num ΔA_train : 10


In [9]:
AllDF_list, PSITest_list, HOLDOUT_list, UserItemDF_list = TestTrain_DataSplit(MovieLen_df,'userId','timestamp',pivotMonths_list,ΔA_test)
print('Num AllDF_list :', len(AllDF_list))

Num AllDF_list : 8


In [None]:
for df in ΔA_train:
    print(df['userId'].nunique(),df['productId'].nunique())
for df in ΔA_test:
    print(df['userId'].nunique(),df['productId'].nunique())

### Step 2: Find new users and items

In [10]:
Tstps = MovieLen_df['timestamp']
tr_ts = ΔA_train[-1]['timestamp'].max()
print("ΔA_train_last max date: ",tr_ts)
AllDF_start = MovieLen_df.loc[Tstps <= tr_ts] 
print(AllDF_start['timestamp'].min())
print(AllDF_start['timestamp'].max())
print()
print(AllDF_list[0]['timestamp'].min())
print(AllDF_list[0]['timestamp'].max())

ΔA_train_last max date:  2002-06-28 17:28:28
2000-04-25 23:05:32
2002-06-28 17:28:28

2000-04-25 23:05:32
2002-07-28 15:04:47


In [11]:
New_usersList,New_itemsList = Find_NewUsersItems(AllDF_start,AllDF_list,'userId','productId',N_steps=8)
New_usersList[0]   #

array([], dtype=int64)

In [12]:
New_itemsList[0]   

array([3530])

In [13]:
for i,j in zip(New_usersList,New_itemsList):
    print(i.size)
    print(j.size)
    print()

0
1

0
0

1
1

0
0

1
1

0
0

0
0

0
0



###Get Updt RatMat

In [14]:
print(AllDF_start['userId'].nunique(),AllDF_start['productId'].nunique())   #

6036 3530


In [15]:
rows_i = AllDF_start['userId'].nunique()
cols_i = AllDF_start['productId'].nunique()
print(A0_df.shape)
A0_RatMat_updt = SingleRatingMatrix(A0_df,'userId', 'productId',rows_i,cols_i)
A0_RatMat_updt

(554234, 4)


<6036x3530 sparse matrix of type '<class 'numpy.float64'>'
	with 554234 stored elements in Compressed Sparse Row format>

In [16]:
PSI_TrainMat_updt = AllRatingMatrices(ΔA_train,'userId','productId',rows_i ,cols_i)
PSI_TrainMat_updt

[<6036x3530 sparse matrix of type '<class 'numpy.float64'>'
 	with 1706 stored elements in Compressed Sparse Row format>,
 <6036x3530 sparse matrix of type '<class 'numpy.float64'>'
 	with 1189 stored elements in Compressed Sparse Row format>,
 <6036x3530 sparse matrix of type '<class 'numpy.float64'>'
 	with 1601 stored elements in Compressed Sparse Row format>,
 <6036x3530 sparse matrix of type '<class 'numpy.float64'>'
 	with 1521 stored elements in Compressed Sparse Row format>,
 <6036x3530 sparse matrix of type '<class 'numpy.float64'>'
 	with 2262 stored elements in Compressed Sparse Row format>,
 <6036x3530 sparse matrix of type '<class 'numpy.float64'>'
 	with 1357 stored elements in Compressed Sparse Row format>,
 <6036x3530 sparse matrix of type '<class 'numpy.float64'>'
 	with 1025 stored elements in Compressed Sparse Row format>,
 <6036x3530 sparse matrix of type '<class 'numpy.float64'>'
 	with 1509 stored elements in Compressed Sparse Row format>,
 <6036x3530 sparse matri

In [17]:
print(AllDF_start['userId'].nunique(),AllDF_start['productId'].nunique()) #

6036 3530


### Allsteps Update

In [None]:
# rows_i = AllDF_start['userId'].nunique()
# cols_i = AllDF_start['productId'].nunique()
# print(AllDF_start.shape)
# AllDF_startRatMat = SingleRatingMatrix(AllDF_start,'userId', 'productId',rows_i,cols_i)

# U_start,S_start,V_start = svds(AllDF_startRatMat,k=50)
# V_start = V_start.T
# print()
# print("U_start shape: ",U_start.shape)
# print("S_start shape: ",S_start.shape)
# print("V_start shape: ",V_start.shape)

In [25]:
U_start,S_start,V_start = last_psiTrainMat(A0_RatMat_updt,PSI_TrainMat_updt,k=50)
print()
print("U_start shape: ",U_start.shape)
print("S_start shape: ",S_start.shape)
print("V_start shape: ",V_start.shape)

100%|██████████| 10/10 [00:00<00:00, 24.01it/s]



U_start shape:  (6036, 50)
S_start shape:  (50, 50)
V_start shape:  (3530, 50)


In [26]:
U_list,S_list,V_list = [],[],[]
U_list.append(U_start)
S_list.append(S_start)
V_list.append(V_start)
print(len(U_list),len(V_list))

1 1


In [27]:
DItems_, DUsers_,In_DomainUSERS,In_DomainITEMS,userID_dict,itemID_dict,AllUpdtUSERS_,AllUpdtITEMS_,U_list,S_list,V_list = ALLSTEPs_UPDATE(AllDF_start,
                                            UserItemDF_list,New_itemsList,New_usersList,U_list,S_list,V_list,'userId','productId',Nsteps=8,k=50,Forced_Orth=False)

100%|██████████| 7/7 [00:00<00:00, 57.46it/s]


#### Output Check

In [30]:
AllDF_start['productId'].max() 

3529

In [33]:
print(len(In_DomainITEMS[3530:]))
print(In_DomainITEMS[3530:])
print()
print(len(In_DomainUSERS[6036:]))
print(In_DomainUSERS[6036:])

3
[3530, 3531, 3532]

2
[6036, 6037]


In [34]:
#deferred items and users
print(len(DItems_))
print(DItems_)
print()
print(len(DUsers_))
print(DUsers_)      

0
[]

0
[]


In [35]:
for x in list(itemID_dict)[3530:]:
    print ("Id: {}, UpdtId: {} ".format(x,  itemID_dict[x]))   ##id ==key || updtedid == values  ,itemID_dict

Id: 3530, UpdtId: 3530 
Id: 3531, UpdtId: 3531 
Id: 3532, UpdtId: 3532 


In [36]:
for x in list(userID_dict)[6036:]:
    print ("Id: {}, UpdtId: {} ".format(x,  userID_dict[x]))   ##id ==key || updtedid == values 

Id: 6036, UpdtId: 6036 
Id: 6037, UpdtId: 6037 


In [37]:
print(len(U_list),len(V_list))
print(U_list[0].shape,V_list[0].shape)
print()
for u,v in zip(U_list[1:],V_list[1:]):
    print(u.shape,v.shape)   

9 9
(6036, 50) (3530, 50)

(6036, 50) (3531, 50)
(6036, 50) (3531, 50)
(6037, 50) (3532, 50)
(6037, 50) (3532, 50)
(6038, 50) (3532, 50)
(6038, 50) (3533, 50)
(6038, 50) (3533, 50)
(6038, 50) (3533, 50)


In [38]:
for i,j in zip(AllUpdtUSERS_,AllUpdtITEMS_):
    print(len(i),len(j))

6036 3531
6036 3531
6037 3532
6037 3532
6038 3532
6038 3533
6038 3533
6038 3533


In [39]:
print(len(In_DomainUSERS),len(In_DomainITEMS))

6038 3533


Data Adjustments

#### Data Adjustment

In [40]:
newHOLDOUT_LIST = get_NEWHoldout(HOLDOUT_list,userID_dict,itemID_dict,AllUpdtUSERS_,AllUpdtITEMS_,'userId','productId',n=8)
print(len(newHOLDOUT_LIST))
print()
for old,new in zip(HOLDOUT_list,newHOLDOUT_LIST):
    print(old.shape[0],new.shape[0])

100%|██████████| 8/8 [00:00<00:00, 219.62it/s]

8

126 126
114 114
98 98
96 96
98 97
105 105
114 114
88 88





In [41]:
newUserItem_list = adjustedAllDF(UserItemDF_list,userID_dict,itemID_dict,AllUpdtUSERS_,AllUpdtITEMS_,'userId','productId',n=8)
print()
for df in newUserItem_list:
    print(df['Updated_UserID'].nunique(),df['Updated_ItemID'].nunique())


6036 3531
6036 3531
6037 3532
6037 3532
6038 3532
6038 3533
6038 3533
6038 3533


In [42]:
new_PSIDFlist = adjustedPSI_DF(PSITest_list,userID_dict,itemID_dict,AllUpdtUSERS_,AllUpdtITEMS_,'userId','productId',n=8)
for old,new in zip(PSITest_list,new_PSIDFlist):
    print(old.shape[0],new.shape[0])

828 828
866 866
550 550
472 472
957 957
524 524
839 839
903 903


#### Get UserItem MAT

In [43]:
UserItem_MatUpdt = All_SingleStepRatMat(newUserItem_list,'Updated_UserID','Updated_ItemID')
print(len(UserItem_MatUpdt))
UserItem_MatUpdt

8


[<6036x3531 sparse matrix of type '<class 'numpy.float64'>'
 	with 569331 stored elements in Compressed Sparse Row format>,
 <6036x3531 sparse matrix of type '<class 'numpy.float64'>'
 	with 570323 stored elements in Compressed Sparse Row format>,
 <6037x3532 sparse matrix of type '<class 'numpy.float64'>'
 	with 570987 stored elements in Compressed Sparse Row format>,
 <6037x3532 sparse matrix of type '<class 'numpy.float64'>'
 	with 571557 stored elements in Compressed Sparse Row format>,
 <6038x3532 sparse matrix of type '<class 'numpy.float64'>'
 	with 572610 stored elements in Compressed Sparse Row format>,
 <6038x3533 sparse matrix of type '<class 'numpy.float64'>'
 	with 573232 stored elements in Compressed Sparse Row format>,
 <6038x3533 sparse matrix of type '<class 'numpy.float64'>'
 	with 574176 stored elements in Compressed Sparse Row format>,
 <6038x3533 sparse matrix of type '<class 'numpy.float64'>'
 	with 575193 stored elements in Compressed Sparse Row format>]

In [44]:
print(len(U_list),len(V_list))
for u,v in zip(U_list,V_list):
    print(u.shape[0],v.shape[0])   

9 9
6036 3530
6036 3531
6036 3531
6037 3532
6037 3532
6038 3532
6038 3533
6038 3533
6038 3533


In [45]:
print(AllDF_start['userId'].nunique(),AllDF_start['productId'].nunique())
print()
for i,j in zip(AllUpdtUSERS_,AllUpdtITEMS_):
    print(len(i),len(j))

6036 3530

6036 3531
6036 3531
6037 3532
6037 3532
6038 3532
6038 3533
6038 3533
6038 3533


#### Prediction

In [46]:
print(len(UserItem_MatUpdt),len(newHOLDOUT_LIST))

8 8


In [47]:
Vpsi_listUpdt = V_list[1:]
for v in Vpsi_listUpdt:
    print(v.shape)

(3531, 50)
(3531, 50)
(3532, 50)
(3532, 50)
(3532, 50)
(3533, 50)
(3533, 50)
(3533, 50)


In [48]:
V_1 = Vpsi_listUpdt[0]
V_1.shape

(3531, 50)

In [49]:
Top_NPred = TopNPred(UserItem_MatUpdt[0],newHOLDOUT_LIST[0],V_1,'Updated_UserID',10)
Top_NPred

array([[ 639,  206,  342, ..., 1335,   59,  950],
       [  22,  556,   41, ...,   40,  131,   73],
       [ 328,  955,  581, ...,  908,  135,  651],
       ...,
       [ 955,  127,  271, ..., 1117,  549,  649],
       [ 574,  563,   73, ...,  576,  112,  630],
       [ 223,  970,  334, ...,  608,  521,  905]])

In [50]:
Hitrate_Eval(newHOLDOUT_LIST[0],Top_NPred,'Updated_UserID','Updated_ItemID')

Number of hits:  8
Total Num of users:  126
Recommendation HitRate:  0.06349206349206349


0.06349206349206349

In [51]:
AllUpdt_pred =  TQDMgetALLTopNPred(UserItem_MatUpdt,newHOLDOUT_LIST,Vpsi_listUpdt,'Updated_UserID',N = 10)
print(len(AllUpdt_pred))
AllUpdt_pred[:2]

8it [00:00, 52.78it/s]

8





[array([[ 639,  206,  342, ..., 1335,   59,  950],
        [  22,  556,   41, ...,   40,  131,   73],
        [ 328,  955,  581, ...,  908,  135,  651],
        ...,
        [ 955,  127,  271, ..., 1117,  549,  649],
        [ 574,  563,   73, ...,  576,  112,  630],
        [ 223,  970,  334, ...,  608,  521,  905]]),
 array([[ 100,   71,   13, ...,  570,  635,  845],
        [ 485,   10,   79, ...,  328,  480,  360],
        [ 463, 1159,  465, ..., 1688,  204,   89],
        ...,
        [2933,  127,  539, ...,  514,  585, 1226],
        [   2,  719,  254, ...,  133,   18,  153],
        [ 970,  334,  312, ...,   57,  608,  709]])]

In [52]:
AllSteps_HitR_updt, LowerBand_updt, Avg_HitR_updt, UpperBand_updt   = getAll_HitRate(newHOLDOUT_LIST,AllUpdt_pred,'Updated_UserID','Updated_ItemID')

Number of hits:  8
Total Num of users:  126
Recommendation HitRate:  0.06349206349206349
Number of hits:  6
Total Num of users:  114
Recommendation HitRate:  0.05263157894736842
Number of hits:  5
Total Num of users:  98
Recommendation HitRate:  0.05102040816326531
Number of hits:  4
Total Num of users:  96
Recommendation HitRate:  0.041666666666666664
Number of hits:  6
Total Num of users:  97
Recommendation HitRate:  0.061855670103092786
Number of hits:  10
Total Num of users:  105
Recommendation HitRate:  0.09523809523809523
Number of hits:  4
Total Num of users:  114
Recommendation HitRate:  0.03508771929824561
Number of hits:  5
Total Num of users:  88
Recommendation HitRate:  0.056818181818181816
Average HitRate for All Recommendations:  0.05722629796587242


### Regular PSI 

#### Get Rating Matrices

In [54]:
new_MLDF = ADJUST_mainDF(MovieLen_df,userID_dict,itemID_dict,AllUpdtUSERS_,AllUpdtITEMS_,'userId','productId')
print(new_MLDF.shape)
print(new_MLDF['userId'].max(),new_MLDF['productId'].max())
print(new_MLDF['userId'].nunique(),new_MLDF['productId'].nunique())
print()
print(new_MLDF['Updated_UserID'].max(),new_MLDF['Updated_ItemID'].max())
print(new_MLDF['Updated_UserID'].nunique(),new_MLDF['Updated_ItemID'].nunique())
new_MLDF.head()

(575281, 6)
6037 3532
6038 3533

6037 3532
6038 3533


Unnamed: 0,userId,productId,rating,timestamp,Updated_UserID,Updated_ItemID
0,0,0,1,2000-04-25 23:05:32,0,0
1,0,1,1,2000-04-25 23:05:54,0,1
2,0,2,1,2000-04-25 23:05:54,0,2
3,0,3,1,2000-04-25 23:06:17,0,3
4,0,4,1,2000-04-25 23:06:17,0,4


In [55]:
rows_i = new_MLDF['Updated_UserID'].nunique()
cols_i = new_MLDF['Updated_ItemID'].nunique()
print(A0_df.shape)
A0_Rating_matrix = SingleRatingMatrix(A0_df,'userId', 'productId',rows_i,cols_i)  ##changes start within steps...
A0_Rating_matrix

(554234, 4)


<6038x3533 sparse matrix of type '<class 'numpy.float64'>'
	with 554234 stored elements in Compressed Sparse Row format>

In [56]:
for df in new_PSIDFlist:
    print(df['Updated_UserID'].max(),df['Updated_ItemID'].max())

6035 3530
6035 3507
6036 3531
6033 3400
6037 3521
6033 3484
6033 3526
6033 3521


In [57]:
print("Length of PSI_Train :",len(ΔA_train))
print("Length of PSI_Test  :",len(new_PSIDFlist))

Length of PSI_Train : 10
Length of PSI_Test  : 8


In [58]:
PSI_train_matrix = AllRatingMatrices(ΔA_train,'userId','productId',rows_i ,cols_i)
PSI_train_matrix   

[<6038x3533 sparse matrix of type '<class 'numpy.float64'>'
 	with 1706 stored elements in Compressed Sparse Row format>,
 <6038x3533 sparse matrix of type '<class 'numpy.float64'>'
 	with 1189 stored elements in Compressed Sparse Row format>,
 <6038x3533 sparse matrix of type '<class 'numpy.float64'>'
 	with 1601 stored elements in Compressed Sparse Row format>,
 <6038x3533 sparse matrix of type '<class 'numpy.float64'>'
 	with 1521 stored elements in Compressed Sparse Row format>,
 <6038x3533 sparse matrix of type '<class 'numpy.float64'>'
 	with 2262 stored elements in Compressed Sparse Row format>,
 <6038x3533 sparse matrix of type '<class 'numpy.float64'>'
 	with 1357 stored elements in Compressed Sparse Row format>,
 <6038x3533 sparse matrix of type '<class 'numpy.float64'>'
 	with 1025 stored elements in Compressed Sparse Row format>,
 <6038x3533 sparse matrix of type '<class 'numpy.float64'>'
 	with 1509 stored elements in Compressed Sparse Row format>,
 <6038x3533 sparse matri

In [59]:
PSI_test_matrix = AllRatingMatrices(new_PSIDFlist,'Updated_UserID','Updated_ItemID',rows_i ,cols_i)                                                                      
print(len(PSI_test_matrix))
PSI_test_matrix


8


[<6038x3533 sparse matrix of type '<class 'numpy.float64'>'
 	with 828 stored elements in Compressed Sparse Row format>,
 <6038x3533 sparse matrix of type '<class 'numpy.float64'>'
 	with 866 stored elements in Compressed Sparse Row format>,
 <6038x3533 sparse matrix of type '<class 'numpy.float64'>'
 	with 550 stored elements in Compressed Sparse Row format>,
 <6038x3533 sparse matrix of type '<class 'numpy.float64'>'
 	with 472 stored elements in Compressed Sparse Row format>,
 <6038x3533 sparse matrix of type '<class 'numpy.float64'>'
 	with 957 stored elements in Compressed Sparse Row format>,
 <6038x3533 sparse matrix of type '<class 'numpy.float64'>'
 	with 524 stored elements in Compressed Sparse Row format>,
 <6038x3533 sparse matrix of type '<class 'numpy.float64'>'
 	with 839 stored elements in Compressed Sparse Row format>,
 <6038x3533 sparse matrix of type '<class 'numpy.float64'>'
 	with 903 stored elements in Compressed Sparse Row format>]

In [60]:
UserItemMAT_reglist = AllRatingMatrices(newUserItem_list,'Updated_UserID','Updated_ItemID',rows_i ,cols_i) 
UserItemMAT_reglist  

[<6038x3533 sparse matrix of type '<class 'numpy.float64'>'
 	with 569331 stored elements in Compressed Sparse Row format>,
 <6038x3533 sparse matrix of type '<class 'numpy.float64'>'
 	with 570323 stored elements in Compressed Sparse Row format>,
 <6038x3533 sparse matrix of type '<class 'numpy.float64'>'
 	with 570987 stored elements in Compressed Sparse Row format>,
 <6038x3533 sparse matrix of type '<class 'numpy.float64'>'
 	with 571557 stored elements in Compressed Sparse Row format>,
 <6038x3533 sparse matrix of type '<class 'numpy.float64'>'
 	with 572610 stored elements in Compressed Sparse Row format>,
 <6038x3533 sparse matrix of type '<class 'numpy.float64'>'
 	with 573232 stored elements in Compressed Sparse Row format>,
 <6038x3533 sparse matrix of type '<class 'numpy.float64'>'
 	with 574176 stored elements in Compressed Sparse Row format>,
 <6038x3533 sparse matrix of type '<class 'numpy.float64'>'
 	with 575193 stored elements in Compressed Sparse Row format>]

In [62]:
print(AllDF_list[-1]['userId'].nunique(),AllDF_list[-1]['productId'].nunique())
print(MovieLen_df['userId'].nunique(),MovieLen_df['productId'].nunique())   #new_MLDF

6038 3533
6038 3533


#### PSI Prediction

In [63]:
                                                                                   #integratorOnMat(A0,ΔA_train_matrix,ΔA_test_matrix,k):
Vpsi_listReg =  integratorOnMat(A0_Rating_matrix,PSI_train_matrix,PSI_test_matrix,k=50)  #PSI == projector splitting integrator
print(len(Vpsi_listReg))  

8


In [64]:
Top10Pred_psi = TopNPred(UserItemMAT_reglist[0], newHOLDOUT_LIST[0],Vpsi_listReg[0],'Updated_UserID',N=10)
print(Top10Pred_psi.shape)
Top10Pred_psi

(126, 10)


array([[ 639,  342,  206, ..., 1335,   59,  950],
       [  22,  556,   41, ...,  131,   40,   73],
       [ 328,  955,  144, ...,  908,  135,  651],
       ...,
       [ 127,  955,  271, ..., 1117,  549,  649],
       [ 574,  563,   73, ...,  576,  112,  630],
       [ 223,  970,  334, ...,  608,  521,  905]])

In [65]:
Hitrate_Eval(newHOLDOUT_LIST[0],Top10Pred_psi,'Updated_UserID','Updated_ItemID')

Number of hits:  8
Total Num of users:  126
Recommendation HitRate:  0.06349206349206349


0.06349206349206349

In [66]:
All_PSI_PRED =  TQDMgetALLTopNPred(UserItemMAT_reglist,newHOLDOUT_LIST,Vpsi_listReg,'Updated_UserID',N = 10)
print(len(All_PSI_PRED))
All_PSI_PRED[:2]

8it [00:00, 53.51it/s]

8





[array([[ 639,  342,  206, ..., 1335,   59,  950],
        [  22,  556,   41, ...,  131,   40,   73],
        [ 328,  955,  144, ...,  908,  135,  651],
        ...,
        [ 127,  955,  271, ..., 1117,  549,  649],
        [ 574,  563,   73, ...,  576,  112,  630],
        [ 223,  970,  334, ...,  608,  521,  905]]),
 array([[ 100,   71,   13, ...,  570,  635,  845],
        [ 485,   10,   79, ...,  360,  328,  480],
        [ 463, 1159,  465, ..., 1688,   89,  502],
        ...,
        [2933,  127,  209, ...,  514, 1226,  585],
        [   2,  719,  254, ...,  133,   18,  153],
        [ 970,  334,  312, ...,   57,  608,   62]])]

In [67]:
AllSteps_Hitrate_psi, LowerBand, Avg_HitRate, UpperBand   = getAll_HitRate(newHOLDOUT_LIST,All_PSI_PRED,'Updated_UserID','Updated_ItemID')

Number of hits:  8
Total Num of users:  126
Recommendation HitRate:  0.06349206349206349
Number of hits:  6
Total Num of users:  114
Recommendation HitRate:  0.05263157894736842
Number of hits:  5
Total Num of users:  98
Recommendation HitRate:  0.05102040816326531
Number of hits:  4
Total Num of users:  96
Recommendation HitRate:  0.041666666666666664
Number of hits:  6
Total Num of users:  97
Recommendation HitRate:  0.061855670103092786
Number of hits:  10
Total Num of users:  105
Recommendation HitRate:  0.09523809523809523
Number of hits:  4
Total Num of users:  114
Recommendation HitRate:  0.03508771929824561
Number of hits:  5
Total Num of users:  88
Recommendation HitRate:  0.056818181818181816
Average HitRate for All Recommendations:  0.05722629796587242


In [68]:
for df in newUserItem_list:
    print(df['Updated_UserID'].nunique(),df['Updated_ItemID'].nunique())

6036 3531
6036 3531
6037 3532
6037 3532
6038 3532
6038 3533
6038 3533
6038 3533


###  RandRec:Test

In [69]:
print(len(UserItem_MatUpdt),len(newHOLDOUT_LIST))

8 8


In [70]:
All_RandPred = get_ALLRandPred(UserItem_MatUpdt,newHOLDOUT_LIST,'Updated_UserID',N=10)
All_RandPred[:2]  

8it [00:00, 66.18it/s]


[array([[  79, 3109, 2110, ..., 2651, 3009,  437],
        [ 386, 3420, 1917, ..., 2282,  643, 2326],
        [2237, 3487, 2789, ..., 2811,  409, 2276],
        ...,
        [1069, 1977, 1391, ..., 1504, 1667,   77],
        [ 217, 2451, 1069, ...,  299,  479,  228],
        [1271,  877, 2141, ..., 2007, 2108, 3522]]),
 array([[1608,  183,  622, ...,  980, 1626, 1491],
        [1488, 2705,  233, ..., 1460, 3342, 1719],
        [2047, 2076,  803, ..., 3076, 3361, 3470],
        ...,
        [1354,  633,  316, ..., 2928,  448, 2092],
        [ 513, 2111, 1023, ...,  192, 1419, 1205],
        [2213,  780, 3445, ...,  268, 2157, 1359]])]

In [71]:
AllSteps_Hitrate, LowerBand, Avg_HitRate, UpperBand = getAll_RandomHitRate(newHOLDOUT_LIST,All_RandPred,'Updated_UserID','Updated_ItemID')

Number of hits:  2
Total Num of users:  126
Recommendation HitRate:  0.015873015873015872
Number of hits:  0
Total Num of users:  114
Recommendation HitRate:  0.0
Number of hits:  0
Total Num of users:  98
Recommendation HitRate:  0.0
Number of hits:  2
Total Num of users:  96
Recommendation HitRate:  0.020833333333333332
Number of hits:  1
Total Num of users:  97
Recommendation HitRate:  0.010309278350515464
Number of hits:  0
Total Num of users:  105
Recommendation HitRate:  0.0
Number of hits:  0
Total Num of users:  114
Recommendation HitRate:  0.0
Number of hits:  0
Total Num of users:  88
Recommendation HitRate:  0.0
Average HitRate for All Recommendations:  0.005876953444608083


### Most Pop Rec

In [74]:
All_MostPOPRED_List =  getAll_MOSTPOP_Pred(new_PSIDFlist,newHOLDOUT_LIST,'Updated_UserID','Updated_ItemID',N=10)
All_MostPOPRED_List[:2]

[array([[ 209, 3318,  355, ...,   15, 2363,  902],
        [ 209, 3318,  355, ...,   15, 2363,  902],
        [ 209, 3318,  355, ...,   15, 2363,  902],
        ...,
        [ 209, 3318,  355, ...,   15, 2363,  902],
        [ 209, 3318,  355, ...,   15, 2363,  902],
        [ 209, 3318,  355, ...,   15, 2363,  902]]),
 array([[3367,   10,  337, ...,  221,  317, 2933],
        [3367,   10,  337, ...,  221,  317, 2933],
        [3367,   10,  337, ...,  221,  317, 2933],
        ...,
        [3367,   10,  337, ...,  221,  317, 2933],
        [3367,   10,  337, ...,  221,  317, 2933],
        [3367,   10,  337, ...,  221,  317, 2933]])]

In [75]:
AllSteps_Hitrate, LowerBand, Avg_HitRate, UpperBand  = getAll_MostPOPHitRate(newHOLDOUT_LIST,All_MostPOPRED_List,'Updated_UserID','Updated_ItemID')

Number of hits:  2
Total Num of users:  126
Recommendation HitRate:  0.015873015873015872
Number of hits:  0
Total Num of users:  114
Recommendation HitRate:  0.0
Number of hits:  1
Total Num of users:  98
Recommendation HitRate:  0.01020408163265306
Number of hits:  3
Total Num of users:  96
Recommendation HitRate:  0.03125
Number of hits:  3
Total Num of users:  97
Recommendation HitRate:  0.030927835051546393
Number of hits:  5
Total Num of users:  105
Recommendation HitRate:  0.047619047619047616
Number of hits:  2
Total Num of users:  114
Recommendation HitRate:  0.017543859649122806
Number of hits:  1
Total Num of users:  88
Recommendation HitRate:  0.011363636363636364
Average HitRate for All Recommendations:  0.020597684523627764


In [76]:
print(len(UserItem_MatUpdt),len(newHOLDOUT_LIST))

8 8


### CORRELATION Report


In [77]:
print(UserItem_MatUpdt[0].shape)
print(UserItemMAT_reglist[0].shape)

(6036, 3531)
(6038, 3533)


#### getAll User Pred

In [78]:
AllUsers_updtPRED =  TQDMgetALLTopNPred_ALLUSERS(UserItem_MatUpdt,Vpsi_listUpdt, N=10) 
print(len(AllUsers_updtPRED))
AllUsers_updtPRED[:2]

8it [00:04,  1.69it/s]

8





[array([[ 138,  336,  142, ...,  279,  223,  641],
        [ 563,  223,  398, ...,  140,  136,  412],
        [ 219,  223,  220, ...,   57, 2362,  171],
        ...,
        [   2,  719,  455, ...,   18,  133,   12],
        [ 156, 1929,  709, ...,   43,  208,  841],
        [ 223,  970,  334, ...,  608,  521,  905]]),
 array([[ 138,  336,  142, ...,  279,  223,  641],
        [ 563,  223,  398, ...,  140,  136,  412],
        [ 219,  223,  220, ...,   57, 2362,  171],
        ...,
        [   2,  719,  254, ...,  133,   18,  153],
        [ 156, 1929,  709, ...,   43,  208,  841],
        [ 970,  334,  312, ...,   57,  608,  709]])]

In [79]:
AllUsers_updtPRED[2].shape

(6037, 10)

####  SingleRank Corr 

In [81]:
AllSteps = list(range(1,8))
print(len(AllSteps))
AllSteps

7


[1, 2, 3, 4, 5, 6, 7]

In [82]:
UpdtPSICorr_ = Updt_getAll_AvgCorr(AllUsers_updtPRED,newUserItem_list,AllSteps,'Updated_UserID')  #,
UpdtPSICorr_

array([array([1.        , 1.        , 1.        , ..., 0.83060516, 1.        ,
       0.31031422]),
       array([1.        , 1.        , 1.        , ..., 0.81329075, 1.        ,
       0.8714503 ]),
       array([1.       , 1.       , 1.       , ..., 1.       , 1.       ,
       0.9711577]),
       array([1., 1., 1., ..., 1., 1., 1.]),
       array([1.        , 1.        , 1.        , ..., 1.        , 1.        ,
       0.78651821]),
       array([1., 1., 1., ..., 1., 1., 1.]),
       array([1., 1., 1., ..., 1., 1., 1.])], dtype=object)

In [84]:
UpdtPSICorr_.shape
for df in UpdtPSICorr_:
    print(df.mean())  

0.9957622798367185
0.9943476878706993
0.9967992108398646
0.9934850818426556
0.9969058009851437
0.9957442717314196
0.9966838964624006


#### RegPSI Corr_

In [None]:
[0], newHOLDOUT_LIST[0],Vpsi_listReg[0]

In [85]:
AllUsersRegPIS_PRED =  TQDMgetALLTopNPred_ALLUSERS(UserItemMAT_reglist,Vpsi_listReg, N=10) 
print(len(AllUsersRegPIS_PRED))
AllUsersRegPIS_PRED[:2]

8it [00:04,  1.68it/s]

8





[array([[ 138,  336,  142, ...,  279,  223,  641],
        [ 563,  223,  398, ...,  140,  136,  412],
        [ 219,  223,  220, ...,   57, 2362,  171],
        ...,
        [ 223,  970,  334, ...,  608,  521,  905],
        [1181, 1179, 1182, ..., 1177, 1176, 3532],
        [1181, 1179, 1182, ..., 1177, 1176, 3532]]),
 array([[ 138,  336,  142, ...,  279,  223,  641],
        [ 563,  223,  398, ...,  140,  136,  412],
        [ 219,  223,  220, ...,   57, 2362,  171],
        ...,
        [ 970,  334,  312, ...,   57,  608,   62],
        [1181, 1179, 1182, ..., 1177, 1176, 3532],
        [1181, 1179, 1182, ..., 1177, 1176, 3532]])]

In [86]:
AllUsersRegPIS_PRED[2].shape

(6038, 10)

####  RegPSI SingleRankCorr 

In [87]:
AllSteps = list(range(1,8))
print(len(AllSteps))
print(len(AllUsersRegPIS_PRED))
AllSteps  

7
8


[1, 2, 3, 4, 5, 6, 7]

In [88]:
RegPSICorr_ = Updt_getAll_AvgCorr(AllUsersRegPIS_PRED,newUserItem_list,AllSteps,'Updated_UserID')  #,
RegPSICorr_

array([array([1.        , 1.        , 1.        , ..., 0.83060516, 1.        ,
       0.28164612]),
       array([0.98788042, 1.        , 1.        , ..., 0.79477204, 1.        ,
       0.94467132]),
       array([0.98788042, 1.        , 1.        , ..., 1.        , 1.        ,
       0.98307362]),
       array([0.98788042, 1.        , 1.        , ..., 1.        , 1.        ,
       0.99244163]),
       array([0.99244163, 1.        , 1.        , ..., 1.        , 1.        ,
       0.84271627]),
       array([0.95730575, 1.        , 0.99056095, ..., 1.        , 0.99244163,
       0.97749498]),
       array([1.        , 1.        , 1.        , ..., 1.        , 0.99244163,
       0.94467132])], dtype=object)

In [89]:
RegPSICorr_.shape
for df in RegPSICorr_:
    print(df.mean())  

0.9803436334307236
0.9842317028061345
0.9850667025781458
0.9816454010046866
0.9857413417123495
0.9811697275957267
0.981346903617547
