<a href="https://colab.research.google.com/github/Lausti98/bsc-recsys/blob/main/bsc_steam_initial.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install recpack

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting recpack
  Downloading recpack-0.3.5-py3-none-any.whl (241 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m241.2/241.2 KB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
Collecting hyperopt==0.2.*,>=0.2.7
  Downloading hyperopt-0.2.7-py2.py3-none-any.whl (1.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m38.4 MB/s[0m eta [36m0:00:00[0m
Collecting PyYAML==5.*,>=5.4.1
  Downloading PyYAML-5.4.1-cp38-cp38-manylinux1_x86_64.whl (662 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m662.4/662.4 KB[0m [31m35.7 MB/s[0m eta [36m0:00:00[0m
Collecting py4j
  Downloading py4j-0.10.9.7-py2.py3-none-any.whl (200 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m200.5/200.5 KB[0m [31m20.1 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: py4j, PyYAML, hyperopt, recpack
  Attempting uni

In [15]:
# import required module
from pathlib import Path
import pandas as pd
import re
from sklearn import preprocessing
fpath = 'steam-200k.csv'

df = pd.read_csv(fpath, names=['user_id', 'game_id', 'interaction', 'rating', 'n'])#, error_bad_lines=False, skiprows=190000)
df = df[df['interaction'].astype('string') == 'play'] # filter out purchase interactions
df = df.drop(columns=['interaction', 'n'])
le = preprocessing.LabelEncoder()
df['user_id'] = le.fit_transform(df['user_id'])
df['game_id'] = le.fit_transform(df['game_id'])
print(df.dtypes)
df


user_id      int64
game_id      int64
rating     float64
dtype: object


Unnamed: 0,user_id,game_id,rating
1,5088,3067,273.0
3,5088,1162,87.0
5,5088,2813,14.9
7,5088,1163,12.1
9,5088,1733,8.9
...,...,...,...
199991,4056,1155,2.4
199993,4056,1833,2.2
199995,4056,3220,1.5
199997,4056,1375,1.5


In [17]:
from recpack.preprocessing.preprocessors import DataFramePreprocessor
from recpack.preprocessing.filters import MinItemsPerUser, MinUsersPerItem

proc = DataFramePreprocessor(item_ix='game_id', user_ix='user_id')#, timestamp_ix='ts')
proc.add_filter(MinUsersPerItem(5, item_ix='game_id', user_ix='user_id'))
proc.add_filter(MinItemsPerUser(5, item_ix='game_id', user_ix='user_id'))

# Assuming you have loaded a dataframe called df
interaction_matrix = proc.process(df)

  0%|          | 0/54083 [00:00<?, ?it/s]

  0%|          | 0/54083 [00:00<?, ?it/s]

In [18]:
from recpack.scenarios import WeakGeneralization

# Split data into train, test and validation set. Random split not accounting for time data.
scenario = WeakGeneralization(0.75, validation=True)
scenario.split(interaction_matrix)

0it [00:00, ?it/s]

0it [00:00, ?it/s]

In [19]:
from recpack.pipelines import PipelineBuilder

builder = PipelineBuilder()

builder.set_data_from_scenario(scenario)

In [20]:
builder.add_algorithm('Popularity') # No real parameters to optimise

In [21]:
# ITEM BASED Nearest Neighbors Recommendation Recommender Systems: An Introduction 2.2
builder.add_algorithm('ItemKNN', grid={
    'K': [100, 200, 500],
    'similarity': ['cosine'],# 'conditional_probability'],
})

#builder.add_algorithm('SLIM')

#builder.add_algorithm('KUNN', grid={
#    'Ku': [100, 200, 500],
#    'Ki': [100, 200, 500],
#})



In [22]:
builder.set_optimisation_metric('NDCGK', K=10)
builder.add_metric('NDCGK', K=[10, 20, 50])
builder.add_metric('CoverageK', K=[10, 20])
builder.add_metric('RecallK', K=[10, 20, 50])
builder.add_metric('PrecisionK', K=[10, 20, 50])

In [23]:
pipeline = builder.build()
pipeline.run()
pipeline.get_metrics()

  0%|          | 0/2 [00:00<?, ?it/s]

2023-03-03 10:43:44,587 - base - recpack - INFO - Fitting Popularity complete - Took 0.0233s


INFO:recpack:Fitting Popularity complete - Took 0.0233s
  self._set_arrayXarray(i, j, x)


2023-03-03 10:43:46,431 - base - recpack - INFO - Fitting ItemKNN complete - Took 0.235s


INFO:recpack:Fitting ItemKNN complete - Took 0.235s


2023-03-03 10:43:47,201 - base - recpack - INFO - Fitting ItemKNN complete - Took 0.433s


INFO:recpack:Fitting ItemKNN complete - Took 0.433s


2023-03-03 10:43:48,986 - base - recpack - INFO - Fitting ItemKNN complete - Took 1.16s


INFO:recpack:Fitting ItemKNN complete - Took 1.16s


2023-03-03 10:43:50,929 - base - recpack - INFO - Fitting ItemKNN complete - Took 1.05s


INFO:recpack:Fitting ItemKNN complete - Took 1.05s


Unnamed: 0,NDCGK_10,NDCGK_20,NDCGK_50,CoverageK_10,CoverageK_20,RecallK_10,RecallK_20,RecallK_50,PrecisionK_10,PrecisionK_20,PrecisionK_50
Popularity(K=200),0.174141,0.195392,0.225902,0.018939,0.031566,0.198563,0.279364,0.386196,0.074405,0.052797,0.032601
"ItemKNN(K=500,normalize_X=False,normalize_sim=False,pop_discount=None,similarity=cosine)",0.278475,0.29965,0.333545,0.354798,0.518308,0.320859,0.402615,0.521089,0.107056,0.072505,0.042372
