# MetaData

In [1]:
import pandas as pd
from RecData import RecData
from pprint import pprint
import numpy as np
import random

In [2]:
# recs = pd.read_csv('data/recommendations.csv')
# recs = pd.read_csv('data/pruned.csv')
# recs = pd.read_csv('data/2plus.csv')
recs = pd.read_csv('data/full_pruned.csv')

# recs = recs[:5]


recs = recs.sort_values(by='date')
recs = recs.drop_duplicates(subset=['user_id', 'app_id'], keep='last')

USED_COLS = ['app_id', 'is_recommended', 'user_id']
recs = recs[USED_COLS]

item_data = pd.read_csv('data/games.csv')
titles = item_data[['app_id', 'title']]

print("Shape:", recs.shape)
recs.sort_values(by=['user_id', 'app_id']).head()


Shape: (1482464, 3)


Unnamed: 0,app_id,is_recommended,user_id
420790,12210,True,240
675882,22380,True,240
246231,239140,True,240
539676,251570,True,240
521289,270880,True,240


In [3]:
random.seed(42)
np.random.seed(42)
rec_data = RecData()
rec_data.create_from_dataframe(recs)
rec_data.set_titles(titles)

# del recs

print("Creating splits...")
train_data, val = rec_data.leave_k_out_split(k=1, create_val=False)
print("Done creating splits.")

Creating utility matrix...
Done utility matrix.
Creating splits...
Done user 1 / 63175
Done user 10001 / 63175
Done user 20001 / 63175
Done user 30001 / 63175
Done user 40001 / 63175
Done user 50001 / 63175
Done user 60001 / 63175
Done creating splits.


## Content Based

In [4]:
meta_data = pd.read_json('data/games_metadata.json', lines=True)
meta_data.head()

Unnamed: 0,app_id,description,tags
0,10090,"Call of Duty is back, redefining war like you'...","[Zombies, World War II, FPS, Multiplayer, Acti..."
1,13500,Enter the dark underworld of Prince of Persia ...,"[Action, Adventure, Parkour, Third Person, Gre..."
2,22364,,[Action]
3,113020,Monaco: What's Yours Is Mine is a single playe...,"[Co-op, Stealth, Indie, Heist, Local Co-Op, St..."
4,226560,Escape Dead Island is a Survival-Mystery adven...,"[Zombies, Adventure, Survival, Action, Third P..."


In [5]:
meta_data = meta_data[meta_data['app_id'].isin(recs['app_id'])]
meta_data.shape

(2215, 3)

In [6]:
meta_data['app_id'] = meta_data['app_id'].apply(lambda id: train_data.item_id_to_index(id))
meta_data.head()

Unnamed: 0,app_id,description,tags
0,34,"Call of Duty is back, redefining war like you'...","[Zombies, World War II, FPS, Multiplayer, Acti..."
1,98,Enter the dark underworld of Prince of Persia ...,"[Action, Adventure, Parkour, Third Person, Gre..."
3,173,Monaco: What's Yours Is Mine is a single playe...,"[Co-op, Stealth, Indie, Heist, Local Co-Op, St..."
4,327,Escape Dead Island is a Survival-Mystery adven...,"[Zombies, Adventure, Survival, Action, Third P..."
6,229,"“METAL SLUG 3”, the masterpiece in SNK’s emble...","[Arcade, Classic, Action, Co-op, Side Scroller..."


In [7]:
meta_data = meta_data.sort_values(by='app_id')
meta_data.head()

Unnamed: 0,app_id,description,tags
267,0,The sequel to the million-plus selling Dead Ri...,"[Zombies, Action, Open World, Co-op, Adventure..."
9698,1,"Create, discover, and download new player-crea...","[Turn-Based Strategy, Strategy, Turn-Based, Mu..."
11955,2,Fight in the theatre of war that changed the w...,"[World War II, Action, FPS, Realistic, Multipl..."
16484,3,Ride your music. Audiosurf is a music-adapting...,"[Music, Rhythm, Indie, Casual, Music-Based Pro..."
10445,4,Counter-Strike: Source blends Counter-Strike's...,"[Shooter, Action, FPS, Multiplayer, Team-Based..."


In [8]:
from sklearn.preprocessing import MultiLabelBinarizer, LabelBinarizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [9]:
mlb = MultiLabelBinarizer(sparse_output=True)
meta_tags = meta_data.join(pd.DataFrame.sparse.from_spmatrix(
    mlb.fit_transform(meta_data.pop('tags')),
    index=meta_data.index,
    columns=mlb.classes_
))
meta_tags.head()

Unnamed: 0,app_id,description,1980s,1990's,2.5D,2D,2D Fighter,2D Platformer,360 Video,3D,...,Well-Written,Werewolves,Western,Wholesome,Word Game,World War I,World War II,Wrestling,Zombies,eSports
267,0,The sequel to the million-plus selling Dead Ri...,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
9698,1,"Create, discover, and download new player-crea...",0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
11955,2,Fight in the theatre of war that changed the w...,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
16484,3,Ride your music. Audiosurf is a music-adapting...,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
10445,4,Counter-Strike: Source blends Counter-Strike's...,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1


In [10]:
genre_data = meta_tags.drop(columns=['description'])
genre_data.head()

Unnamed: 0,app_id,1980s,1990's,2.5D,2D,2D Fighter,2D Platformer,360 Video,3D,3D Fighter,...,Well-Written,Werewolves,Western,Wholesome,Word Game,World War I,World War II,Wrestling,Zombies,eSports
267,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
9698,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
11955,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
16484,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
10445,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1


In [11]:
desc_data = meta_tags[['app_id', 'description']]
desc_data.head()

Unnamed: 0,app_id,description
267,0,The sequel to the million-plus selling Dead Ri...
9698,1,"Create, discover, and download new player-crea..."
11955,2,Fight in the theatre of war that changed the w...
16484,3,Ride your music. Audiosurf is a music-adapting...
10445,4,Counter-Strike: Source blends Counter-Strike's...


In [12]:
tf = TfidfVectorizer()
X = tf.fit_transform(desc_data['description'])

In [13]:
desc_feats = desc_data.join(pd.DataFrame.sparse.from_spmatrix(X, index=desc_data.index))
desc_feats = desc_feats.drop(columns=['description'])
desc_feats.head()

Unnamed: 0,app_id,0,1,2,3,4,5,6,7,8,...,10057,10058,10059,10060,10061,10062,10063,10064,10065,10066
267,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9698,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
11955,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
16484,3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10445,4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [14]:
meta_all = genre_data.merge(desc_feats, on='app_id')
meta_all.head()

Unnamed: 0,app_id,1980s,1990's,2.5D,2D,2D Fighter,2D Platformer,360 Video,3D,3D Fighter,...,10057,10058,10059,10060,10061,10062,10063,10064,10065,10066
0,0,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,3,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,4,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## Trying to add more information.

In [15]:
games = pd.read_csv('data/games.csv')
games.head()

Unnamed: 0,app_id,title,date_release,win,mac,linux,rating,positive_ratio,user_reviews,price_final,price_original,discount,steam_deck
0,10090,Call of Duty: World at War,2008-11-18,True,False,False,Very Positive,92,37039,19.99,19.99,0.0,True
1,13500,Prince of Persia: Warrior Within™,2008-11-21,True,False,False,Very Positive,84,2199,9.99,9.99,0.0,True
2,22364,BRINK: Agents of Change,2011-08-03,True,False,False,Positive,85,21,2.99,2.99,0.0,True
3,113020,Monaco: What's Yours Is Mine,2013-04-24,True,True,True,Very Positive,92,3722,14.99,14.99,0.0,True
4,226560,Escape Dead Island,2014-11-18,True,False,False,Mixed,61,873,14.99,14.99,0.0,True


In [16]:
games = games[games['app_id'].isin(recs['app_id'])]
games['app_id'] = games['app_id'].apply(lambda id: train_data.item_id_to_index(id))
games = games.sort_values(by='app_id')
games.head()

Unnamed: 0,app_id,title,date_release,win,mac,linux,rating,positive_ratio,user_reviews,price_final,price_original,discount,steam_deck
267,0,Dead Rising® 2,2010-09-27,True,False,False,Mostly Positive,78,4317,19.99,19.99,0.0,True
9698,1,Sid Meier's Civilization® V,2010-09-21,True,True,True,Overwhelmingly Positive,96,115749,29.99,29.99,0.0,True
11955,2,Red Orchestra: Ostfront 41-45,2006-03-14,True,True,True,Very Positive,84,1282,4.99,4.99,0.0,True
16484,3,AudioSurf,2008-02-15,True,False,False,Overwhelmingly Positive,95,8396,9.99,9.99,0.0,True
10445,4,Counter-Strike: Source,2004-11-01,True,True,True,Overwhelmingly Positive,96,99952,9.99,9.99,0.0,True


In [17]:
mlb = LabelBinarizer(sparse_output=True)
games = games.join(pd.DataFrame.sparse.from_spmatrix(
    mlb.fit_transform(games.pop('rating')),
    index=games.index,
    columns=mlb.classes_
))
games.head()

Unnamed: 0,app_id,title,date_release,win,mac,linux,positive_ratio,user_reviews,price_final,price_original,discount,steam_deck,Mixed,Mostly Negative,Mostly Positive,Overwhelmingly Positive,Positive,Very Positive
267,0,Dead Rising® 2,2010-09-27,True,False,False,78,4317,19.99,19.99,0.0,True,0,0,1,0,0,0
9698,1,Sid Meier's Civilization® V,2010-09-21,True,True,True,96,115749,29.99,29.99,0.0,True,0,0,0,1,0,0
11955,2,Red Orchestra: Ostfront 41-45,2006-03-14,True,True,True,84,1282,4.99,4.99,0.0,True,0,0,0,0,0,1
16484,3,AudioSurf,2008-02-15,True,False,False,95,8396,9.99,9.99,0.0,True,0,0,0,1,0,0
10445,4,Counter-Strike: Source,2004-11-01,True,True,True,96,99952,9.99,9.99,0.0,True,0,0,0,1,0,0


In [18]:
games['date_release'] = pd.to_datetime(games['date_release']).dt.strftime("%Y%m%d")
games['date_release'] = games['date_release'].astype(int)
print(games['date_release'].dtype)
games.head()

int32


Unnamed: 0,app_id,title,date_release,win,mac,linux,positive_ratio,user_reviews,price_final,price_original,discount,steam_deck,Mixed,Mostly Negative,Mostly Positive,Overwhelmingly Positive,Positive,Very Positive
267,0,Dead Rising® 2,20100927,True,False,False,78,4317,19.99,19.99,0.0,True,0,0,1,0,0,0
9698,1,Sid Meier's Civilization® V,20100921,True,True,True,96,115749,29.99,29.99,0.0,True,0,0,0,1,0,0
11955,2,Red Orchestra: Ostfront 41-45,20060314,True,True,True,84,1282,4.99,4.99,0.0,True,0,0,0,0,0,1
16484,3,AudioSurf,20080215,True,False,False,95,8396,9.99,9.99,0.0,True,0,0,0,1,0,0
10445,4,Counter-Strike: Source,20041101,True,True,True,96,99952,9.99,9.99,0.0,True,0,0,0,1,0,0


In [19]:
tfidf = TfidfVectorizer()
games = games.join(pd.DataFrame.sparse.from_spmatrix(
    tfidf.fit_transform(games.pop('title')),
    index=games.index,
))
games.head()

Unnamed: 0,app_id,date_release,win,mac,linux,positive_ratio,user_reviews,price_final,price_original,discount,...,3035,3036,3037,3038,3039,3040,3041,3042,3043,3044
267,0,20100927,True,False,False,78,4317,19.99,19.99,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9698,1,20100921,True,True,True,96,115749,29.99,29.99,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
11955,2,20060314,True,True,True,84,1282,4.99,4.99,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
16484,3,20080215,True,False,False,95,8396,9.99,9.99,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10445,4,20041101,True,True,True,96,99952,9.99,9.99,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [20]:
meta_all.merge(games, on='app_id')
meta_all.head()

Unnamed: 0,app_id,1980s,1990's,2.5D,2D,2D Fighter,2D Platformer,360 Video,3D,3D Fighter,...,10057,10058,10059,10060,10061,10062,10063,10064,10065,10066
0,0,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,3,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,4,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [21]:
desc_sims = cosine_similarity(desc_feats.drop(columns=['app_id']), dense_output=True)
genre_sims = cosine_similarity(genre_data.drop(columns=['app_id']), dense_output=True)
meta_sims = cosine_similarity(meta_all.drop(columns=['app_id']), dense_output=True)

In [22]:
from KNN import ContentKNN
from pprint import pprint

In [23]:
genre_knn = ContentKNN(k=40)
genre_knn.fit(genre_data.drop(columns=['app_id']))
desc_knn = ContentKNN(k=40)
desc_knn.fit(desc_feats.drop(columns=['app_id']))
meta_knn = ContentKNN(k=40)
meta_knn.fit(meta_all.drop(columns=['app_id']))

## Genre KNN

In [24]:
train_data.search_title('dave')

[('DAVE THE DIVER', 2147)]

In [25]:
prefs = train_data.create_prefs([(810, 1), (0, 0), (618, 1), (642, 1), (39, 0), (1397, 0), (1292, 0)])
top = genre_knn.top_n(13, 10, prefs=prefs)
pprint(top)
pprint([train_data.index_to_title(i) for r, i in top])

[(0.14363035503530272, 988),
 (0.13613746333517893, 767),
 (0.13610089180336565, 1033),
 (0.1345486157918661, 1422),
 (0.13428636817775733, 1123),
 (0.13141650915222222, 1032),
 (0.12896449260870121, 1086),
 (0.1289580346605085, 731),
 (0.1289580346605085, 1532),
 (0.1289580346605085, 1746)]
['The Golf Club™ 2019 featuring PGA TOUR',
 'Capitalism 2',
 'Project Hospital',
 'Blackjack Championship',
 'Star Chef: Cooking & Restaurant Game',
 'Virtual Villagers Origins 2',
 'Wizard And Minion Idle',
 'Production Line : Car factory simulation',
 'Nebuchadnezzar',
 'Lords and Villeins']


## Description KNN

In [26]:
train_data.search_title('runescape')

[('Old School RuneScape', 1537), ('RuneScape ®', 1454)]

In [27]:
prefs = train_data.create_prefs([(810, 1), (0, 0), (145, 0), (1326, 1), (285, 0), (1032, 0), (26, 0), (2122, 0)])
top = desc_knn.top_n(13, 10, prefs=prefs)
pprint(top)
pprint([train_data.index_to_title(i) for r, i in top])

[(0.034450092510193084, 1247),
 (0.034445284008807996, 2139),
 (0.03199737286510107, 1121),
 (0.031389347624519816, 628),
 (0.02831234758390965, 1149),
 (0.027882929723099026, 2127),
 (0.025575072100385555, 1089),
 (0.024389197606743955, 1202),
 (0.02409232868631431, 308),
 (0.024025346315006513, 595)]
['Unity of Command II',
 'Cosmoteer: Starship Architect & Commander',
 'SUPER DRAGON BALL HEROES WORLD MISSION',
 'Shadowverse CCG',
 'Iron Marines',
 'Right and Down',
 "Conqueror's Blade",
 'Fantasy General II',
 'TRON 2.0',
 'Crossout']


## Ensemble KNN

In [21]:
from KNN import EnsembleKNN, ItemKNN
import pickle

In [29]:
import numba as nb
from numba import jit
from KNN import ItemKNN

model_dir = "saved_models/knn/sim1.pkl" 

with open(model_dir, 'rb') as file:
    train_data, sims, means = pickle.load(file)

@jit
def make_dict(items):
    return {k: v for k,v in items}

means_prime = make_dict(tuple(means.items()))
knn = ItemKNN(k=40, mean_centered=True, iuf=False)
knn._sims = sims
knn._item_means = means_prime
knn._num_users, knn._num_items = train_data.get_matrix().shape
knn._M = train_data.get_matrix()
knn._store_rating_pairs(knn._M)
knn._store_item_means(knn._M)


  @jit
[1m
File "..\..\..\AppData\Local\Temp\ipykernel_19088\1534073109.py", line 10:[0m
[1m<source missing, REPL/exec in use?>[0m
[0m
Fall-back from the nopython compilation path to the object mode compilation path has been detected. This is deprecated behaviour that will be removed in Numba 0.59.0.

For more information visit https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit
[1m
File "..\..\..\AppData\Local\Temp\ipykernel_19088\1534073109.py", line 10:[0m
[1m<source missing, REPL/exec in use?>[0m
[0m


Storing ratings in dictionary...
Done storing in dictionary.
Computing item means...
Done item 1 / 2215
Done item 101 / 2215
Done item 201 / 2215
Done item 301 / 2215
Done item 401 / 2215
Done item 501 / 2215
Done item 601 / 2215
Done item 701 / 2215
Done item 801 / 2215
Done item 901 / 2215
Done item 1001 / 2215
Done item 1101 / 2215
Done item 1201 / 2215
Done item 1301 / 2215
Done item 1401 / 2215
Done item 1501 / 2215
Done item 1601 / 2215
Done item 1701 / 2215
Done item 1801 / 2215
Done item 1901 / 2215
Done item 2001 / 2215
Done item 2101 / 2215
Done item 2201 / 2215
Done computing item means.


### Collaborative Filtering Test

In [4]:
from KNN import ItemKNN

In [5]:
knn = ItemKNN(k=40, mean_centered=False, iuf=False)
knn.fit(train_data.get_matrix())

Storing ratings in dictionary...
Done storing in dictionary.
Upto row 0
Upto row 1
Upto row 2
Upto row 3
Upto row 4
Upto row 5
Upto row 6
Upto row 7
Upto row 8
Upto row 9
Upto row 10
Upto row 11
Upto row 12
Upto row 13
Upto row 14
Upto row 15
Upto row 16
Upto row 17
Upto row 18
Upto row 19
Upto row 20
Upto row 21
Upto row 22
Upto row 23
Upto row 24
Upto row 25
Upto row 26
Upto row 27
Upto row 28
Upto row 29
Upto row 30
Upto row 31
Upto row 32
Upto row 33
Upto row 34
Upto row 35
Upto row 36
Upto row 37
Upto row 38
Upto row 39
Upto row 40
Upto row 41
Upto row 42
Upto row 43
Upto row 44
Upto row 45
Upto row 46
Upto row 47
Upto row 48
Upto row 49
Upto row 50
Upto row 51
Upto row 52
Upto row 53
Upto row 54
Upto row 55
Upto row 56
Upto row 57
Upto row 58
Upto row 59
Upto row 60
Upto row 61
Upto row 62
Upto row 63
Upto row 64
Upto row 65
Upto row 66
Upto row 67
Upto row 68
Upto row 69
Upto row 70
Upto row 71
Upto row 72
Upto row 73
Upto row 74
Upto row 75
Upto row 76
Upto row 77
Upto row 78
U

In [None]:
# Ensure file exists
model_dir = "saved_models/knn/jac_sims.pkl" 
file = open(model_dir, 'a')
file.close()

# Save model
print("Saving model...")
with open(model_dir, 'wb') as file:
    pickle.dump([train_data, knn._sims], file)
print("Done saving model.")

In [18]:
train_data.search_title('fishing')

[('Fishing Planet', 426),
 ('SEGA Bass Fishing', 90),
 ('Fishing: Barents Sea', 868),
 ('Russian Fishing 4', 957),
 ('Cat Goes Fishing', 348),
 ('Fishing: North Atlantic - Enhanced Edition', 1462),
 ("Luna's Fishing Garden", 1637),
 ('Fishing Sim World®: Pro Tour', 998),
 ('Fishing Paradiso', 1962)]

In [19]:
prefs = train_data.create_prefs([(426, 1)])
top = knn.top_n(13, 10, prefs=prefs)
pprint(top)
pprint([train_data.index_to_title(i) for r, i in top])

[(0.03969128996692393, 957),
 (0.03064798598949212, 677),
 (0.0241234221598878, 500),
 (0.023066485753052916, 181),
 (0.02171767028627838, 510),
 (0.020780856423173802, 263),
 (0.020535714285714286, 328),
 (0.01904296875, 272),
 (0.01866151866151866, 1043),
 (0.018404907975460124, 26)]
['Russian Fishing 4',
 'theHunter: Call of the Wild™',
 'American Truck Simulator',
 'DCS World Steam Edition',
 'Governor of Poker 3',
 'theHunter Classic',
 'Gems of War - Puzzle RPG',
 'Unturned',
 'Farming Simulator 19',
 'EVE Online']


In [22]:
ens_knn = EnsembleKNN(k=40)
ens_knn.set_sims([(knn._sims, 1)])

In [23]:
train_data.search_title('fishing')

[('Fishing Planet', 426),
 ('SEGA Bass Fishing', 90),
 ('Fishing: Barents Sea', 868),
 ('Russian Fishing 4', 957),
 ('Cat Goes Fishing', 348),
 ('Fishing: North Atlantic - Enhanced Edition', 1462),
 ("Luna's Fishing Garden", 1637),
 ('Fishing Sim World®: Pro Tour', 998),
 ('Fishing Paradiso', 1962)]

In [24]:
prefs = train_data.create_prefs([(426, 1)])
top = ens_knn.top_n(13, 10, prefs=prefs)
pprint(top)
pprint([train_data.index_to_title(i) for r, i in top])

[(0.03969128996692393, 957),
 (0.03064798598949212, 677),
 (0.0241234221598878, 500),
 (0.023066485753052916, 181),
 (0.02171767028627838, 510),
 (0.020780856423173802, 263),
 (0.020535714285714286, 328),
 (0.01904296875, 272),
 (0.01866151866151866, 1043),
 (0.018404907975460124, 26)]
['Russian Fishing 4',
 'theHunter: Call of the Wild™',
 'American Truck Simulator',
 'DCS World Steam Edition',
 'Governor of Poker 3',
 'theHunter Classic',
 'Gems of War - Puzzle RPG',
 'Unturned',
 'Farming Simulator 19',
 'EVE Online']


In [25]:
# Ensure file exists
model_dir = "saved_models/knn/ens_knn.pkl" 
file = open(model_dir, 'a')
file.close()

# Save model
print("Saving model...")
with open(model_dir, 'wb') as file:
    pickle.dump([train_data, ens_knn], file)
print("Done saving model.")

Saving model...
Done saving model.
