# Data Load
### Create the Pandas Dataframe

In [17]:
import pandas as pd
path = "recommender-system-2022-challenge-polimi-data/interactions_and_impressions.csv"

Interactions_and_Impressions = pd.read_csv(filepath_or_buffer=path,
                               sep=",",
                               header=1,
                               engine='python',
                               names=['UserId', 'ItemId', 'ImpressionList', 'Data'])
Interactions_and_Impressions

Unnamed: 0,UserId,ItemId,ImpressionList,Data
0,0,21,,0
1,0,21,,0
2,0,21,20212223242526272829,0
3,0,21,,1
4,0,21,,1
...,...,...,...,...
5826500,41628,20448,,0
5826501,41628,20896,,1
5826502,41628,21506,,1
5826503,41628,22882,,0


In [18]:
print(Interactions_and_Impressions["UserId"].max())
print(Interactions_and_Impressions["ItemId"].max())

41628
24506


### Data Preparation
First we remove empty indeces:

In [19]:
userId_unique = Interactions_and_Impressions["UserId"].unique()
itemId_unique = Interactions_and_Impressions["ItemId"].unique()

In [20]:
mapped_id, original_id = pd.factorize(Interactions_and_Impressions["UserId"].unique())
user_original_Id_to_index = pd.Series(mapped_id, index=original_id)

mapped_id, original_id = pd.factorize(Interactions_and_Impressions["ItemId"].unique())
item_original_Id_to_index = pd.Series(mapped_id, index=original_id)

In [21]:
Interactions_and_Impressions["UserId"] = Interactions_and_Impressions["UserId"].map(user_original_Id_to_index)
Interactions_and_Impressions["ItemId"] = Interactions_and_Impressions["ItemId"].map(item_original_Id_to_index)

In [22]:
Interactions_and_Impressions.head(15)

Unnamed: 0,UserId,ItemId,ImpressionList,Data
0,0,0,,0
1,0,0,,0
2,0,0,20212223242526272829,0
3,0,0,,1
4,0,0,,1
5,0,0,,1
6,0,0,,1
7,0,0,,1
8,0,0,,1
9,0,0,,1


We want an Implicit/Binary Matrix, so:
1) We drop the last two columns, only keeping User and Item Id
2) We remove duplicates
3) Split data into Train/Test/Validation
4) We build the Sparse URM using Scipy

##### Step 1

In [23]:
Distinct_Interactions = Interactions_and_Impressions.copy(deep=True)
Distinct_Interactions = Distinct_Interactions.drop(["ImpressionList", "Data"], axis=1)

##### Step 2

In [24]:
Distinct_Interactions = Distinct_Interactions.drop_duplicates()
Distinct_Interactions['Rating'] = 1
Distinct_Interactions

Unnamed: 0,UserId,ItemId,Rating
0,0,0,1
12,0,1,1
27,0,2,1
28,0,3,1
29,0,4,1
...,...,...,...
5826500,41628,3699,1
5826501,41628,12693,1
5826502,41628,15131,1
5826503,41628,18127,1


In [25]:
unique_userId = Distinct_Interactions.UserId.unique()
num_users = len(unique_userId)
print(num_users)

unique_itemId = Distinct_Interactions.ItemId.unique()
num_items = len(unique_itemId)
print(num_items)

41629
24507


In [26]:
import scipy.sparse as sps

urm_train = sps.coo_matrix((Distinct_Interactions["Rating"].values,
                               (Distinct_Interactions["UserId"].values, Distinct_Interactions["ItemId"].values)))
urm_train

<41629x24507 sparse matrix of type '<class 'numpy.int64'>'
	with 1554639 stored elements in COOrdinate format>

# Now we try running SLIM

In [27]:
from Recommenders.SLIM.SLIMElasticNetRecommender import SLIMElasticNetRecommender

In [28]:
MySlimRecommender = SLIMElasticNetRecommender(urm_train)

In [29]:
MySlimRecommender.fit(l1_ratio=0.085, alpha = 0.001, positive_only=True, topK = 100)

SLIMElasticNetRecommender: Processed 4747 (19.4%) in 5.00 min. Items per second: 15.82
SLIMElasticNetRecommender: Processed 9770 (39.9%) in 10.00 min. Items per second: 16.28
SLIMElasticNetRecommender: Processed 15019 (61.3%) in 15.00 min. Items per second: 16.68
SLIMElasticNetRecommender: Processed 20173 (82.3%) in 20.00 min. Items per second: 16.81
SLIMElasticNetRecommender: Processed 24507 (100.0%) in 23.93 min. Items per second: 17.07


### No parameter tuning for now

# Computing the submission
First we compute the original indexes

In [30]:
target = pd.read_csv(filepath_or_buffer="recommender-system-2022-challenge-polimi-data/data_target_users_test.csv",
                                          sep=",",
                                          header=0,
                                          engine='python',
                                          names=['id'])
target

Unnamed: 0,id
0,0
1,1
2,2
3,3
4,4
...,...
41111,41624
41112,41625
41113,41626
41114,41627


In [43]:
def write_recommendations(file_name):
    recommendations = 'user_id,item_list'
    f = open("submissions/" + file_name + ".csv", "w")
    # f = open("slim_whole_dataset.txt", "w")

    for id in target['id']:
        recommendations_per_user = MySlimRecommender.recommend(user_id_array=id, remove_seen_flag=True, cutoff=10)

        recommendation_string = str(user_original_Id_to_index[user_original_Id_to_index==id].index.item()
                                    ) + ','

        for rec in recommendations_per_user:
            recommendation_string = recommendation_string + str(item_original_Id_to_index[item_original_Id_to_index==rec].index.item()) + ' '

        recommendation_string = recommendation_string[:-1]
        recommendations = recommendations + '\n' + recommendation_string

    f.write(recommendations)
    f.close()

In [44]:
write_recommendations("slim_whole_dataset.csv")