We got the best model:
alpha = 0.2
beta = 0.1
gamma = 0.05

In [1]:
import time
import pandas as pd
import numpy as np
import scipy.sparse as sps
import random as rnd

from scipy.sparse import *

In [2]:
urm_path = './content/data_train.csv'
urm_all_df = pd.read_csv(filepath_or_buffer=urm_path,
                                sep=",",
                                header=0,
                                dtype={0:int, 1:int, 2:float},
                                engine='python')

urm_all_df.columns = ["UserID", "ItemID", "Interaction"]
print ("The number of interactions is {}".format(len(urm_all_df)))

The number of interactions is 478730


In [3]:
URM_all = sps.coo_matrix((urm_all_df["Interaction"].values,
                          (urm_all_df["UserID"].values, urm_all_df["ItemID"].values)))

URM_all

<13025x22348 sparse matrix of type '<class 'numpy.float64'>'
	with 478730 stored elements in COOrdinate format>

In [4]:
from scipy.sparse import load_npz

In [5]:
S_slim_elastic = load_npz("./content/item_item_similarity/slim_elastic_complete.npz")

In [11]:
S_slim_elastic

<22348x22348 sparse matrix of type '<class 'numpy.float32'>'
	with 2197697 stored elements in Compressed Sparse Row format>

In [15]:
S_easer = load_npz("./content/item_item_similarity/easer_complete.npz")

In [16]:
S_easer

<22348x22348 sparse matrix of type '<class 'numpy.float32'>'
	with 5044394 stored elements in Compressed Sparse Row format>

In [7]:
S_IBCF = load_npz("./content/item_item_similarity/IBCF_complete.npz")

In [13]:
S_IBCF

<22348x22348 sparse matrix of type '<class 'numpy.float32'>'
	with 310959 stored elements in Compressed Sparse Row format>

In [8]:
S_rp3beta = load_npz("./content/item_item_similarity/rp3beta_complete.npz")

In [14]:
S_rp3beta

<22348x22348 sparse matrix of type '<class 'numpy.float32'>'
	with 3619038 stored elements in Compressed Sparse Row format>

In [9]:
from Recommenders.KNN.ItemKNNCustomSimilarityRecommender import ItemKNNCustomSimilarityRecommender

In [17]:
alpha = 0.2
beta = 0.1
gamma = 0.05
new_similarity = (1 - alpha-beta-gamma) * S_slim_elastic + alpha * S_easer + beta * S_IBCF + gamma * S_rp3beta
recommender_object = ItemKNNCustomSimilarityRecommender(URM_all)
recommender_object.fit(new_similarity)

ItemKNNCustomSimilarityRecommender: URM Detected 387 ( 3.0%) users with no interactions.
ItemKNNCustomSimilarityRecommender: URM Detected 126 ( 0.6%) items with no interactions.


In [18]:
class Predictor(object):

    def __init__(self, URM, model):
        self.URM = URM
        self.model = model

    def recommend(self, user_id, at=10, exclude_seen=True, users_not_in_train=[]):
        # Check if user_id is a valid index
        if user_id < 0 or user_id >= self.URM.shape[0]:
            print(f"Invalid user_id: {user_id}")
            return

        # Check if user_id not in train use the topRec
        if user_id in users_not_in_train:
            return ["517 189 44 0 284 808 285 1 557 1266"]

        # compute the scores using the fitted model
        scores = self.model._compute_item_score(user_id)[0]

        if exclude_seen:
            scores = self.filter_seen(user_id, scores)

        # rank items
        ranking = scores.argsort()[::-1]

        return ranking[:at]


    def filter_seen(self, user_id, scores):

        start_pos = self.URM.indptr[user_id]
        end_pos = self.URM.indptr[user_id+1]

        user_profile = self.URM.indices[start_pos:end_pos]

        scores[user_profile] = -np.inf

        return scores

# Predictions

In [20]:
urm_pred_path = './content/data_target_users_test.csv'

urm_pred_df = pd.read_csv(filepath_or_buffer=urm_pred_path,
                                sep=",",
                                header=0,
                                dtype={0:int},
                                engine='python')

urm_pred_df.columns = ["UserID"]
len(urm_pred_df['UserID'])
print('Unique user id to predict:', urm_pred_df['UserID'].nunique())

Unique user id to predict: 10882


In [21]:
users_not_in_train = urm_pred_df[~urm_pred_df['UserID'].isin(urm_all_df['UserID'])]

print("Users in urm_pred_df but not in urm_all_orgdf:")
print(users_not_in_train)
print(len(users_not_in_train))

users_not_in_train = users_not_in_train['UserID'].to_numpy()

Users in urm_pred_df but not in urm_all_orgdf:
       UserID
54         60
58         65
147       168
223       261
272       316
...       ...
10682   12775
10699   12798
10729   12837
10802   12921
10856   12992

[221 rows x 1 columns]
221


In [24]:
URM_all = URM_all.tocsr()

In [25]:
recommender = Predictor(URM=URM_all, model = recommender_object)

In [26]:
user_id = 61
print(f'Predicting for user - {user_id}')
prediction = recommender.recommend(user_id,users_not_in_train = users_not_in_train)
print(f"The prediction is {prediction}")

Predicting for user - 61
The prediction is [  192  1076   393  3260  6887  3580  9336  7978   487 15142]


In [27]:
user_id = 60
print(f'Predicting for user - {user_id}')
prediction = recommender.recommend(user_id,users_not_in_train = users_not_in_train)
print(f"The prediction is {prediction}")

Predicting for user - 60
The prediction is ['517 189 44 0 284 808 285 1 557 1266']


In [29]:
pred_df = pd.DataFrame(columns = ['user_id','item_list'])

for userid in urm_pred_df['UserID']:
  recommendations = recommender.recommend(userid, at=10,exclude_seen = True, users_not_in_train=users_not_in_train)
  recommendations = " ".join(str(item) for item in recommendations)
  pred_df.loc[len(pred_df)] = [userid,recommendations]

In [30]:
pred_df

Unnamed: 0,user_id,item_list
0,1,101 36 506 515 403 123 1546 52 694 254
1,2,1095 47 50 12 11 28 4 2 1522 5
2,3,59 857 4252 2172 536 648 956 259 584 1281
3,4,28 249 50 136 145 139 5 146 314 277
4,5,1570 77 170 116 95 131 148 8 135 5138
...,...,...
10877,13020,6450 6198 6452 7395 6749 7394 4323 161 105 627
10878,13021,6179 6451 7027 6426 6720 7395 6749 13621 133 2...
10879,13022,1668 1411 1446 809 1674 10789 4688 1013 1258 776
10880,13023,1124 706 329 1290 1107 138 1534 96 208 32


In [32]:
pred_df.to_csv('./content/predHybridBest4.csv',index=False)