In [1]:
import numpy
print(numpy.__version__)

1.23.5


In [2]:
import time
import pandas as pd
import numpy as np
import scipy.sparse as sps
import random as rnd

from scipy.sparse import *

In [3]:
from scipy.sparse import save_npz
from scipy.sparse import load_npz

In [4]:
urm_path = './content/data_train.csv'
urm_all_df = pd.read_csv(filepath_or_buffer=urm_path,
                                sep=",",
                                header=0,
                                dtype={0:int, 1:int, 2:float},
                                engine='python')

urm_all_df.columns = ["UserID", "ItemID", "Interaction"]
print ("The number of interactions is {}".format(len(urm_all_df)))

The number of interactions is 478730


In [5]:
URM_all = sps.coo_matrix((urm_all_df["Interaction"].values,
                          (urm_all_df["UserID"].values, urm_all_df["ItemID"].values)))

URM_all

<13025x22348 sparse matrix of type '<class 'numpy.float64'>'
	with 478730 stored elements in COOrdinate format>

In [6]:
from scipy.sparse import load_npz

In [7]:
S_slim_elastic = load_npz("./content/item_item_similarity/slim_elastic_complete.npz")

In [8]:
S_slim_elastic

<22348x22348 sparse matrix of type '<class 'numpy.float32'>'
	with 2197697 stored elements in Compressed Sparse Row format>

In [9]:
S_easer = load_npz("./content/item_item_similarity/easer_complete.npz")

In [10]:
S_easer

<22348x22348 sparse matrix of type '<class 'numpy.float32'>'
	with 5044394 stored elements in Compressed Sparse Row format>

In [11]:
S_IBCF = load_npz("./content/item_item_similarity/IBCF_complete.npz")

In [12]:
S_IBCF

<22348x22348 sparse matrix of type '<class 'numpy.float32'>'
	with 310959 stored elements in Compressed Sparse Row format>

In [13]:
S_rp3beta = load_npz("./content/item_item_similarity/rp3beta_complete.npz")

In [14]:
S_rp3beta

<22348x22348 sparse matrix of type '<class 'numpy.float32'>'
	with 3619038 stored elements in Compressed Sparse Row format>

---

In [15]:
profile_length = np.ediff1d(sps.csr_matrix(URM_all).indptr)
profile_length, profile_length.shape

(array([ 0, 44, 38, ...,  8, 26, 71]), (13025,))

In [16]:
block_size = int(len(profile_length)*0.05)
block_size

651

In [17]:
sorted_users = np.argsort(profile_length)
sorted_users

array([    0,  9683,  4492, ...,  5659,  9912, 12096], dtype=int64)

In [18]:
for group_id in range(0, 20):
    start_pos = group_id * block_size
    end_pos = min((group_id+1) * block_size, len(profile_length))
    
    users_in_group = sorted_users[start_pos:end_pos]
    
    users_in_group_p_len = profile_length[users_in_group]
    
    print("Group {}, #users in group {}, average p.len {:.2f}, median {}, min {}, max {}".format(
        group_id, 
        users_in_group.shape[0],
        users_in_group_p_len.mean(),
        np.median(users_in_group_p_len),
        users_in_group_p_len.min(),
        users_in_group_p_len.max()))

Group 0, #users in group 651, average p.len 0.41, median 0.0, min 0, max 1
Group 1, #users in group 651, average p.len 1.00, median 1.0, min 1, max 1
Group 2, #users in group 651, average p.len 1.95, median 2.0, min 1, max 2
Group 3, #users in group 651, average p.len 3.04, median 3.0, min 2, max 4
Group 4, #users in group 651, average p.len 4.43, median 4.0, min 4, max 5
Group 5, #users in group 651, average p.len 5.99, median 6.0, min 5, max 7
Group 6, #users in group 651, average p.len 7.78, median 8.0, min 7, max 9
Group 7, #users in group 651, average p.len 9.89, median 10.0, min 9, max 11
Group 8, #users in group 651, average p.len 12.33, median 12.0, min 11, max 14
Group 9, #users in group 651, average p.len 15.40, median 15.0, min 14, max 17
Group 10, #users in group 651, average p.len 18.81, median 19.0, min 17, max 21
Group 11, #users in group 651, average p.len 23.00, median 23.0, min 21, max 25
Group 12, #users in group 651, average p.len 28.09, median 28.0, min 25, max 31


## Top Pop

In [19]:
from Recommenders.NonPersonalizedRecommender import TopPop

In [20]:
topPopRecommender_removeSeen = TopPop(URM_all)
topPopRecommender_removeSeen.fit()


TopPopRecommender: URM Detected 387 ( 3.0%) users with no interactions.
TopPopRecommender: URM Detected 126 ( 0.6%) items with no interactions.


## Best 4

In [21]:
from Recommenders.KNN.ItemKNNCustomSimilarityRecommender import ItemKNNCustomSimilarityRecommender

In [22]:
slim_elastic = ItemKNNCustomSimilarityRecommender(URM_all)
slim_elastic.fit(S_slim_elastic)

ItemKNNCustomSimilarityRecommender: URM Detected 387 ( 3.0%) users with no interactions.
ItemKNNCustomSimilarityRecommender: URM Detected 126 ( 0.6%) items with no interactions.


In [23]:
easer = ItemKNNCustomSimilarityRecommender(URM_all)
easer.fit(S_easer)

ItemKNNCustomSimilarityRecommender: URM Detected 387 ( 3.0%) users with no interactions.
ItemKNNCustomSimilarityRecommender: URM Detected 126 ( 0.6%) items with no interactions.


In [24]:
IBCF = ItemKNNCustomSimilarityRecommender(URM_all)
IBCF.fit(S_IBCF)

ItemKNNCustomSimilarityRecommender: URM Detected 387 ( 3.0%) users with no interactions.
ItemKNNCustomSimilarityRecommender: URM Detected 126 ( 0.6%) items with no interactions.


In [25]:
rp3beta = ItemKNNCustomSimilarityRecommender(URM_all)
rp3beta.fit(S_rp3beta)

ItemKNNCustomSimilarityRecommender: URM Detected 387 ( 3.0%) users with no interactions.
ItemKNNCustomSimilarityRecommender: URM Detected 126 ( 0.6%) items with no interactions.


In [26]:
alpha = 0.12
new_similarity = (1 - alpha) * S_slim_elastic + alpha * S_IBCF
hybrid = ItemKNNCustomSimilarityRecommender(URM_all)
hybrid.fit(new_similarity)

ItemKNNCustomSimilarityRecommender: URM Detected 387 ( 3.0%) users with no interactions.
ItemKNNCustomSimilarityRecommender: URM Detected 126 ( 0.6%) items with no interactions.


In [29]:
from sklearn.preprocessing import normalize

In [30]:
beta = 0.276238
alpha = 0.210134
new_similarity = alpha * normalize(S_slim_elastic) + beta * normalize(S_IBCF) + (1-alpha-beta)*normalize(S_easer)
hybrid2 = ItemKNNCustomSimilarityRecommender(URM_all)
hybrid2.fit(new_similarity)

ItemKNNCustomSimilarityRecommender: URM Detected 387 ( 3.0%) users with no interactions.
ItemKNNCustomSimilarityRecommender: URM Detected 126 ( 0.6%) items with no interactions.


In [31]:
recommender_object_dict = {}
recommender_object_dict["TopPop"] = topPopRecommender_removeSeen
recommender_object_dict["slim_elastic"] = slim_elastic
recommender_object_dict["easer"] = easer
recommender_object_dict["IBCF"] = IBCF
recommender_object_dict["rp3beta"] = rp3beta
recommender_object_dict["hybrid"] = hybrid
recommender_object_dict["hybrid2"] = hybrid2

In [35]:
class Predictor(object):

    def __init__(self, URM, recommender_object_dict):
        self.URM = URM
        self.recommender_object_dict = recommender_object_dict

    def recommend(self, user_id, at=10, exclude_seen=True, users_not_in_train=[]):
        # Check if user_id is a valid index
        if user_id < 0 or user_id >= self.URM.shape[0]:
            print(f"Invalid user_id: {user_id}")
            return
        group = -1
        
        # Check if user_id not in train use the topRec
        if user_id in users_not_in_train:
            group = 0
        else:
            # Find the index of user_id in sorted_users using numpy.where
            index = np.where(sorted_users == user_id)[0][0]
            # Calculate the group using integer division
            group = index // block_size
            
        print('Group of the user:')
        print(group)
        
        if group == 0 :
            model = self.recommender_object_dict['TopPop']
        else:
            model = self.recommender_object_dict['hybrid2']
        
        print('Model for the group:')
        print(model)
        # compute the scores using the fitted model
        if group == 0:
            scores = model._compute_item_score([user_id])[0]
        else:
            scores = model._compute_item_score(user_id)[0]

        if exclude_seen:
            scores = self.filter_seen(user_id, scores)

        # rank items
        ranking = scores.argsort()[::-1]

        return ranking[:at]


    def filter_seen(self, user_id, scores):

        start_pos = self.URM.indptr[user_id]
        end_pos = self.URM.indptr[user_id+1]

        user_profile = self.URM.indices[start_pos:end_pos]

        scores[user_profile] = -np.inf

        return scores

# Predictions

In [36]:
urm_pred_path = './content/data_target_users_test.csv'

urm_pred_df = pd.read_csv(filepath_or_buffer=urm_pred_path,
                                sep=",",
                                header=0,
                                dtype={0:int},
                                engine='python')

urm_pred_df.columns = ["UserID"]
len(urm_pred_df['UserID'])
print('Unique user id to predict:', urm_pred_df['UserID'].nunique())

Unique user id to predict: 10882


In [37]:
users_not_in_train = urm_pred_df[~urm_pred_df['UserID'].isin(urm_all_df['UserID'])]

print("Users in urm_pred_df but not in urm_all_orgdf:")
print(users_not_in_train)
print(len(users_not_in_train))

users_not_in_train = users_not_in_train['UserID'].to_numpy()

Users in urm_pred_df but not in urm_all_orgdf:
       UserID
54         60
58         65
147       168
223       261
272       316
...       ...
10682   12775
10699   12798
10729   12837
10802   12921
10856   12992

[221 rows x 1 columns]
221


In [38]:
URM_all = URM_all.tocsr()

In [39]:
recommender = Predictor(URM = URM_all, recommender_object_dict = recommender_object_dict)

In [40]:
user_id = 61
print(f'Predicting for user - {user_id}')
prediction = recommender.recommend(user_id,users_not_in_train = users_not_in_train)
print(f"The prediction is {prediction}")

Predicting for user - 61
Group of the user:
3
Model for the group:
<Recommenders.KNN.ItemKNNCustomSimilarityRecommender.ItemKNNCustomSimilarityRecommender object at 0x000001E7CAD26510>
The prediction is [ 1076   192  6887   487  3260 14256   180  3580  9336   393]


In [41]:
user_id = 60
print(f'Predicting for user - {user_id}')
prediction = recommender.recommend(user_id,users_not_in_train = users_not_in_train)
print(f"The prediction is {prediction}")

Predicting for user - 60
Group of the user:
0
Model for the group:
<Recommenders.NonPersonalizedRecommender.TopPop object at 0x000001E7CB5FF010>
The prediction is [ 2  4  1  7  3  6  8  9 15 20]


In [42]:
topPopRecommender_removeSeen.recommend(1, cutoff=20)

[2, 4, 1, 3, 6, 8, 9, 20, 14, 10, 5, 19, 11, 25, 22, 26, 17, 33, 31, 32]

In [43]:
topPopRecommender_removeSeen.recommend(60, cutoff=20)

[2, 4, 1, 7, 3, 6, 8, 9, 15, 20, 14, 10, 5, 19, 11, 25, 22, 26, 16, 17]

In [44]:
pred_df = pd.DataFrame(columns = ['user_id','item_list'])

for userid in urm_pred_df['UserID']:
  recommendations = recommender.recommend(userid, at=10,exclude_seen = True, users_not_in_train=users_not_in_train)
  recommendations = " ".join(str(item) for item in recommendations)
  pred_df.loc[len(pred_df)] = [userid,recommendations]

Group of the user:
14
Model for the group:
<Recommenders.KNN.ItemKNNCustomSimilarityRecommender.ItemKNNCustomSimilarityRecommender object at 0x000001E7CAD26510>
Group of the user:
14
Model for the group:
<Recommenders.KNN.ItemKNNCustomSimilarityRecommender.ItemKNNCustomSimilarityRecommender object at 0x000001E7CAD26510>
Group of the user:
4
Model for the group:
<Recommenders.KNN.ItemKNNCustomSimilarityRecommender.ItemKNNCustomSimilarityRecommender object at 0x000001E7CAD26510>
Group of the user:
14
Model for the group:
<Recommenders.KNN.ItemKNNCustomSimilarityRecommender.ItemKNNCustomSimilarityRecommender object at 0x000001E7CAD26510>
Group of the user:
17
Model for the group:
<Recommenders.KNN.ItemKNNCustomSimilarityRecommender.ItemKNNCustomSimilarityRecommender object at 0x000001E7CAD26510>
Group of the user:
10
Model for the group:
<Recommenders.KNN.ItemKNNCustomSimilarityRecommender.ItemKNNCustomSimilarityRecommender object at 0x000001E7CAD26510>
Group of the user:
11
Model for the

In [45]:
pred_df

Unnamed: 0,user_id,item_list
0,1,101 36 506 403 515 123 977 1546 112 183
1,2,1095 12 949 196 102 359 47 422 50 416
2,3,59 4252 3849 536 857 648 9467 956 259 2172
3,4,28 249 50 7 136 5 1 145 139 171
4,5,131 1570 77 95 1511 135 170 766 8 238
...,...,...
10877,13020,6450 6198 6452 7395 6749 7394 105 155 345 4323
10878,13021,6451 13621 17942 6179 20518 20505 133 6749 201...
10879,13022,1668 1411 1446 809 1674 1013 1258 5228 4688 10336
10880,13023,706 329 1107 1124 1290 456 1534 828 408 639


In [46]:
pred_df.to_csv('./content/hybridMatrixNormalization.csv',index=False)