In [13]:
import pandas as pd
import numpy as np
import scipy.sparse as sps

filePath = "./data/data_train.csv"
df = pd.read_csv(filePath)
df

Unnamed: 0,row,col,data
0,0,3568,1.0
1,0,3827,1.0
2,0,4844,1.0
3,0,5734,1.0
4,0,6518,1.0
...,...,...,...
398631,30910,18176,1.0
398632,30910,18185,1.0
398633,30910,18248,1.0
398634,30910,18349,1.0


In [14]:
ratingList = list(df['data'])
userList = list(df['row'])
itemList = list(df['col'])

In [15]:
URM_all = sps.coo_matrix((ratingList, (userList, itemList)))
URM_all = URM_all.tocsr()
URM_all

<30911x18495 sparse matrix of type '<class 'numpy.float64'>'
	with 398636 stored elements in Compressed Sparse Row format>

In [16]:
warm_items_mask = np.ediff1d(URM_all.tocsc().indptr) > 0
warm_items = np.arange(URM_all.shape[1])[warm_items_mask]

URM_all = URM_all[:, warm_items]
URM_all

<30911x15277 sparse matrix of type '<class 'numpy.float64'>'
	with 398636 stored elements in Compressed Sparse Row format>

In [17]:
warm_users_mask = np.ediff1d(URM_all.tocsr().indptr) > 0
warm_users = np.arange(URM_all.shape[0])[warm_users_mask]

URM_all = URM_all[warm_users, :]
URM_all

<27255x15277 sparse matrix of type '<class 'numpy.float64'>'
	with 398636 stored elements in Compressed Sparse Row format>

In [6]:
from Algorithms.Notebooks_utils.data_splitter import train_test_holdout

URM_train, URM_test = train_test_holdout(URM_all, train_perc = 0.7)
URM_train

<30911x18495 sparse matrix of type '<class 'numpy.float64'>'
	with 279464 stored elements in Compressed Sparse Row format>

In [7]:
from CF.user_cf import UserBasedCollaborativeFiltering
from CF.item_cf import ItemBasedCollaborativeFiltering

ub_recommender = UserBasedCollaborativeFiltering(URM_train, topK=100, shrink=20)
ub_recommender.fit()

ub_recommender.recommend(0, at=10)

UserBasedCF created!
Similarity column 30911 ( 100 % ), 1871.97 column/sec, elapsed time 0.28 min


array([ 5300,  4169,  6447, 14145,  7246, 11941,  4420, 11783,  9475,
        3601])

In [8]:
from Algorithms.Notebooks_utils.evaluation_function import evaluate_algorithm
#array = []

#for user_id in range(0, URM_all.shape[0]):
#    array.append(ub_recommender.recommend(user_id, at=10))

x_tick = [3, 5, 7, 10]
MAP_per_k = []

for topK in x_tick:
    
    recommender = UserBasedCollaborativeFiltering(URM_train, topK=topK, shrink=20)
    recommender.fit()
    
    result_dict = evaluate_algorithm(URM_test, recommender)
    MAP_per_k.append(result_dict["MAP"])

UserBasedCF created!
Similarity column 30911 ( 100 % ), 1865.60 column/sec, elapsed time 0.28 min
Evaluated user 0 of 30911
Evaluated user 10000 of 30911
Evaluated user 20000 of 30911
Evaluated user 30000 of 30911
Recommender performance is: Precision = 0.0119, Recall = 0.0143, MAP = 0.0111
UserBasedCF created!
Similarity column 30911 ( 100 % ), 1828.83 column/sec, elapsed time 0.28 min
Evaluated user 0 of 30911
Evaluated user 10000 of 30911
Evaluated user 20000 of 30911
Evaluated user 30000 of 30911
Recommender performance is: Precision = 0.0116, Recall = 0.0110, MAP = 0.0090
UserBasedCF created!
Similarity column 30911 ( 100 % ), 1716.19 column/sec, elapsed time 0.30 min
Evaluated user 0 of 30911
Evaluated user 10000 of 30911
Evaluated user 20000 of 30911
Evaluated user 30000 of 30911
Recommender performance is: Precision = 0.0112, Recall = 0.0105, MAP = 0.0087
UserBasedCF created!
Similarity column 30911 ( 100 % ), 1700.86 column/sec, elapsed time 0.30 min
Evaluated user 0 of 30911


In [9]:
x_tick = [500, 600]
MAP_per_shrinkage = []

for shrinkage in x_tick:
    
    recommender = UserBasedCollaborativeFiltering(URM_train, topK=10, shrink=shrinkage)
    recommender.fit()
    
    result_dict = evaluate_algorithm(URM_test, recommender)
    MAP_per_k.append(result_dict["MAP"])

UserBasedCF created!
Similarity column 30911 ( 100 % ), 1969.86 column/sec, elapsed time 0.26 min
Evaluated user 0 of 30911
Evaluated user 10000 of 30911
Evaluated user 20000 of 30911
Evaluated user 30000 of 30911
Recommender performance is: Precision = 0.0007, Recall = 0.0007, MAP = 0.0003
UserBasedCF created!
Similarity column 30911 ( 100 % ), 1680.56 column/sec, elapsed time 0.31 min
Evaluated user 0 of 30911
Evaluated user 10000 of 30911
Evaluated user 20000 of 30911
Evaluated user 30000 of 30911
Recommender performance is: Precision = 0.0080, Recall = 0.0096, MAP = 0.0069
UserBasedCF created!
Similarity column 30911 ( 100 % ), 1562.42 column/sec, elapsed time 0.33 min
Evaluated user 0 of 30911
Evaluated user 10000 of 30911
Evaluated user 20000 of 30911
Evaluated user 30000 of 30911
Recommender performance is: Precision = 0.0197, Recall = 0.0245, MAP = 0.0198
UserBasedCF created!
Similarity column 30911 ( 100 % ), 1794.55 column/sec, elapsed time 0.29 min
Evaluated user 0 of 30911


In [11]:
ub_recommender = UserBasedCollaborativeFiltering(URM_train, topK=10, shrink=500)
ub_recommender.fit()

array = []

for user_id in range(0, URM_all.shape[0]):
    array.append(ub_recommender.recommend(user_id, at=10))

counter = 0

file = open("./output/file_out.csv", "w")

file.write("user_id,item_list\n")

for user_array in array:
    file.write(f'{counter},')
    for i in range(0,10):
        if i != 9:
            file.write(f'{user_array[i]} ')
        else:
            file.write(f'{user_array[i]}')
    file.write("\n")
    counter += 1

UserBasedCF created!
Similarity column 30911 ( 100 % ), 1546.10 column/sec, elapsed time 0.33 min
