# Benchmarks

## Surprise on Data

In [153]:
from surprise import Dataset, Reader, SVD, KNNWithMeans
from surprise.model_selection import cross_validate
import pandas as pd

In [29]:
df = pd.read_csv('data/cleaned.csv')
df.head()

Unnamed: 0,app_id,is_recommended,user_id
0,534380,1,10531
1,42700,0,185293
2,602960,1,319249
3,976730,0,747545
4,1091500,1,2113544


In [30]:
reader = Reader(line_format='item rating user', sep=',', skip_lines=1, rating_scale=(0, 1))
data = Dataset.load_from_file('data/cleaned.csv', reader=reader)
full_train = data.build_full_trainset()

In [None]:
svd = SVD()
results = cross_validate(svd, data, measures=['RMSE'], cv=5, verbose=True)

In [31]:
svd_fit = svd.fit(full_train)

In [44]:
svd_fit.bi

array([-0.07391577,  0.02504232,  0.08813493, ...,  0.07392884,
       -0.41369429,  0.04063683])

In [272]:
user = 90
user = full_train.to_raw_uid(user)
top = []
for item in full_train.all_items():
    item = full_train.to_raw_iid(item)
    prediction = svd_fit.predict(user, item)
    uid, iid, true, pred = prediction[0], prediction[1], prediction[2], prediction[3]
    top.append((pred, iid))
    top.sort(key=lambda x: x[0], reverse=True)
    top = top[:10]

In [273]:
df2 = pd.read_csv('data/games.csv')

In [274]:
print(top)

[(1, '546560'), (1, '1222140'), (1, '435150'), (1, '1426210'), (1, '1435790'), (1, '105600'), (1, '526870'), (1, '1794680'), (1, '239030'), (1, '294100')]


In [275]:
df2[df2['app_id'].isin([int(id) for r, id in top])]

Unnamed: 0,app_id,title,date_release,win,mac,linux,rating,positive_ratio,user_reviews,price_final,price_original,discount,steam_deck
2671,1794680,Vampire Survivors,2022-10-20,True,True,False,Overwhelmingly Positive,98,175903,3.99,4.99,20.0,True
3027,526870,Satisfactory,2020-06-08,True,False,False,Overwhelmingly Positive,97,106760,29.99,29.99,0.0,True
5353,294100,RimWorld,2018-10-17,True,True,True,Overwhelmingly Positive,98,130570,34.99,34.99,0.0,True
6422,239030,Papers Please,2013-08-08,True,True,True,Overwhelmingly Positive,97,51975,9.99,9.99,0.0,True
7345,105600,Terraria,2011-05-16,True,True,True,Overwhelmingly Positive,97,867226,9.99,9.99,0.0,True
8506,1222140,Detroit: Become Human,2020-06-18,True,False,False,Very Positive,94,65412,39.99,39.99,0.0,True
9455,1426210,It Takes Two,2021-03-25,True,False,False,Overwhelmingly Positive,95,102750,39.99,39.99,0.0,True
12529,435150,Divinity: Original Sin 2 - Definitive Edition,2017-09-14,True,True,False,Overwhelmingly Positive,95,137279,44.99,44.99,0.0,True
14371,1435790,Escape Simulator,2021-10-19,True,True,True,Very Positive,93,8852,14.99,14.99,0.0,True
19634,546560,Half-Life: Alyx,2020-03-23,True,False,False,Overwhelmingly Positive,98,67547,59.99,59.99,0.0,True


## ItemKNN

In [185]:
knn = KNNWithMeans(sim_options={'name':'pearson', 'user_based': False})

In [186]:
knn_fit = knn.fit(full_train)

Computing the pearson similarity matrix...
Done computing similarity matrix.


In [265]:
user = 300
user = full_train.to_raw_uid(user)
top = []
for item in full_train.all_items():
    item = full_train.to_raw_iid(item)
    prediction = knn_fit.predict(user, item)
    uid, iid, true, pred = prediction[0], prediction[1], prediction[2], prediction[3]
    top.append((pred, iid))
    top.sort(key=lambda x: x[0], reverse=True)
    top = top[:10]

In [266]:
print(top)

[(1, '546560'), (1, '1222140'), (1, '1426210'), (1, '1435790'), (1, '105600'), (1, '250900'), (1, '1092790'), (1, '960090'), (1, '1190000'), (1, '1290000')]


In [267]:
df2[df2['app_id'].isin([int(id) for r, id in top])]

Unnamed: 0,app_id,title,date_release,win,mac,linux,rating,positive_ratio,user_reviews,price_final,price_original,discount,steam_deck
4332,960090,Bloons TD 6,2018-12-17,True,True,False,Overwhelmingly Positive,97,209592,13.99,13.99,0.0,True
4996,1190000,Car Mechanic Simulator 2021,2021-08-11,True,False,False,Overwhelmingly Positive,95,17310,24.99,24.99,0.0,True
7345,105600,Terraria,2011-05-16,True,True,True,Overwhelmingly Positive,97,867226,9.99,9.99,0.0,True
8310,1290000,PowerWash Simulator,2022-07-14,True,False,False,Overwhelmingly Positive,97,29465,24.99,24.99,0.0,True
8506,1222140,Detroit: Become Human,2020-06-18,True,False,False,Very Positive,94,65412,39.99,39.99,0.0,True
9455,1426210,It Takes Two,2021-03-25,True,False,False,Overwhelmingly Positive,95,102750,39.99,39.99,0.0,True
10677,250900,The Binding of Isaac: Rebirth,2014-11-04,True,True,True,Overwhelmingly Positive,97,204840,14.99,14.99,0.0,True
14358,1092790,Inscryption,2021-10-19,True,True,True,Overwhelmingly Positive,97,79617,19.99,19.99,0.0,True
14371,1435790,Escape Simulator,2021-10-19,True,True,True,Very Positive,93,8852,14.99,14.99,0.0,True
19634,546560,Half-Life: Alyx,2020-03-23,True,False,False,Overwhelmingly Positive,98,67547,59.99,59.99,0.0,True
