In [7]:
import pandas as pd
from sklearn.neighbors import NearestNeighbors

In [8]:
# Function to reduce the memory usage of a DataFrame.
def reduce_memory(df):
    for col in df.columns:
        if df[col].dtype == 'float64':
            df[col] = df[col].astype('float32')
        if df[col].dtype == 'int64':
            df[col] = df[col].astype('int32')
    return df

# Generator function to load data in chunks.
def data_generator(df, chunksize=10000):
    for i in range(0, df.shape[0], chunksize):
        yield df.iloc[i:i+chunksize]

In [9]:
df = reduce_memory(pd.read_csv("data/Dataset.csv"))

In [10]:
df

Unnamed: 0,UserID,Game,purchase/play,Heure_jouee,AppID,Release date,Estimated owners,Peak CCU,Required age,Price,...,Median playtime forever,Median playtime two weeks,Developers,Publishers,Categories,Genres,Tags,Score,Recommandable,GameID
0,1,The Elder Scrolls V Skyrim,play,273.0,72850,"Nov 10, 2011",5000000 - 10000000,4383,17,19.99,...,50.533333,14.633333,Bethesda Game Studios,Bethesda Softworks,"Single-player,Steam Achievements,Steam Trading...",RPG,"Open World,RPG,Fantasy,Adventure,Dragons,Modda...",5.0,True,1
1,2,The Elder Scrolls V Skyrim,play,58.0,72850,"Nov 10, 2011",5000000 - 10000000,4383,17,19.99,...,50.533333,14.633333,Bethesda Game Studios,Bethesda Softworks,"Single-player,Steam Achievements,Steam Trading...",RPG,"Open World,RPG,Fantasy,Adventure,Dragons,Modda...",5.0,True,1
2,3,The Elder Scrolls V Skyrim,play,110.0,72850,"Nov 10, 2011",5000000 - 10000000,4383,17,19.99,...,50.533333,14.633333,Bethesda Game Studios,Bethesda Softworks,"Single-player,Steam Achievements,Steam Trading...",RPG,"Open World,RPG,Fantasy,Adventure,Dragons,Modda...",5.0,True,1
3,4,The Elder Scrolls V Skyrim,play,465.0,72850,"Nov 10, 2011",5000000 - 10000000,4383,17,19.99,...,50.533333,14.633333,Bethesda Game Studios,Bethesda Softworks,"Single-player,Steam Achievements,Steam Trading...",RPG,"Open World,RPG,Fantasy,Adventure,Dragons,Modda...",5.0,True,1
4,5,The Elder Scrolls V Skyrim,play,220.0,72850,"Nov 10, 2011",5000000 - 10000000,4383,17,19.99,...,50.533333,14.633333,Bethesda Game Studios,Bethesda Softworks,"Single-player,Steam Achievements,Steam Trading...",RPG,"Open World,RPG,Fantasy,Adventure,Dragons,Modda...",5.0,True,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
56784,676,The Bug Butcher,play,0.3,350740,"Jan 19, 2016",100000 - 200000,1,0,7.99,...,2.516667,0.000000,Awfully Nice Studios,Awfully Nice Studios,"Single-player,Multi-player,Co-op,Shared/Split ...","Action,Indie","Indie,Action,Shoot 'Em Up,Arcade,Local Co-Op,C...",0.5,False,2513
56785,1363,Romance of the Three Kingdoms Maker,play,0.3,397720,"Dec 9, 2015",500000 - 1000000,7,0,0.00,...,0.016667,0.000000,"KOEI TECMO GAMES CO., LTD.","KOEI TECMO GAMES CO., LTD.","Single-player,Includes level editor",Simulation,"Simulation,Strategy",5.0,True,2514
56786,840,Life is Hard,play,3.1,414080,"Aug 14, 2021",50000 - 100000,2,0,9.99,...,5.366667,0.000000,"Pirozhok Studio,Vitaliy Ruzankin",Pirozhok Studio,"Single-player,Steam Achievements,Steam Trading...","Adventure,Indie,Simulation,Strategy","Strategy,Indie,Simulation,Adventure,Early Acce...",2.5,False,2515
56787,840,Executive Assault,play,2.7,331500,"Jul 17, 2015",20000 - 50000,1,0,12.99,...,1.633333,0.000000,Hesketh Studios Ltd,Hesketh Studios Ltd,"Single-player,Multi-player,PvP,Online PvP,Co-o...","Action,Strategy","Strategy,RTS,Action,FPS,Base-Building,Sci-fi,R...",5.0,True,2516


In [13]:
from scipy.sparse import coo_matrix

# map each user and item to a unique numeric value
user_ids = df['UserID'].astype('category').cat.codes
item_ids = df['GameID'].astype('category').cat.codes

# Get the unique user and game ids
unique_user_ids = df['UserID'].astype('category').cat.categories
unique_item_ids = df['GameID'].astype('category').cat.categories

# create a sparse matrix
user_game_matrix = coo_matrix((df['Heure_jouee'], (user_ids, item_ids)))

# Fit the model
model_knn = NearestNeighbors(metric='cosine', algorithm='brute')
model_knn.fit(user_game_matrix)

# Get top 5 recommendations for first user
distances, indices = model_knn.kneighbors(user_game_matrix.getrow(0), n_neighbors=6)
recommended_users = [unique_user_ids[i] for i in indices.flatten()[1:]]
print(f'Recommended users for the first user are: {recommended_users}')

Recommended users for the first user are: [332, 338, 598, 370, 140]


In [30]:
## On analyse le UserID n°1

# df[df["UserID"] == 1]

## L'algo nous indique que les UserID les plus proches sont les 5 suivants

# df[df["UserID"] == 332]
# df[df["UserID"] == 338]
# df[df["UserID"] == 598]
# df[df["UserID"] == 370]
# df[df["UserID"] == 140]

Unnamed: 0,UserID,Game,purchase/play,Heure_jouee,AppID,Release date,Estimated owners,Peak CCU,Required age,Price,...,Median playtime forever,Median playtime two weeks,Developers,Publishers,Categories,Genres,Tags,Score,Recommandable,GameID
331,332,The Elder Scrolls V Skyrim,play,235.0,72850,"Nov 10, 2011",5000000 - 10000000,4383,17,19.99,...,50.533333,14.633333,Bethesda Game Studios,Bethesda Softworks,"Single-player,Steam Achievements,Steam Trading...",RPG,"Open World,RPG,Fantasy,Adventure,Dragons,Modda...",5.0,True,1
765,332,Fallout 4,play,64.0,377160,"Nov 9, 2015",5000000 - 10000000,23636,17,19.99,...,36.049999,14.833333,Bethesda Game Studios,Bethesda Softworks,"Single-player,Steam Achievements,Full controll...",RPG,"Open World,Post-apocalyptic,Exploration,Single...",5.0,True,2
984,332,Fallout New Vegas,play,1.6,22380,"Oct 19, 2010",2000000 - 5000000,5776,16,9.99,...,16.266666,2.55,Obsidian Entertainment,Bethesda Softworks,"Single-player,Steam Achievements,Partial Contr...","Action,RPG","Open World,RPG,Post-apocalyptic,Singleplayer,M...",0.0,False,3
5164,332,Dragon Age Origins - Ultimate Edition,play,1.9,47810,"Oct 26, 2010",1000000 - 2000000,1183,17,29.99,...,12.933333,1.683333,BioWare,Electronic Arts,Single-player,RPG,"RPG,Fantasy,Story Rich,Choices Matter,Singlepl...",0.5,False,13
13105,332,Bastion,play,7.2,107100,"Aug 16, 2011",2000000 - 5000000,43,0,14.99,...,3.533333,0.25,Supergiant Games,Supergiant Games,"Single-player,Steam Achievements,Full controll...","Action,Indie,RPG","Great Soundtrack,Indie,Action,Narration,Single...",5.0,True,30
21142,332,Goat Simulator,play,2.3,265930,"Apr 1, 2014",2000000 - 5000000,123,0,9.99,...,2.9,0.0,Coffee Stain Studios,Coffee Stain Publishing,"Single-player,Multi-player,Shared/Split Screen...","Casual,Indie,Simulation","Simulation,Funny,Comedy,Open World,Sandbox,Phy...",3.5,False,108
32258,332,Chivalry Medieval Warfare,play,1.9,219640,"Oct 16, 2012",5000000 - 10000000,118,0,24.99,...,3.816667,0.0,Torn Banner Studios,Torn Banner Studios,"Multi-player,Steam Achievements,Full controlle...","Action,Indie","Medieval,Multiplayer,Action,Gore,First-Person,...",2.0,False,264
37980,332,Anno 2070,play,0.1,48240,"Nov 17, 2011",500000 - 1000000,136,0,19.99,...,14.2,0.65,"Blue Byte,Related Designs",Ubisoft,"Single-player,Multi-player,Steam Trading Cards",Strategy,"Strategy,City Builder,Simulation,Futuristic,RT...",0.0,False,390
46966,332,Project Zomboid,play,0.4,108600,"Nov 8, 2013",2000000 - 5000000,22793,0,19.99,...,10.75,3.283333,The Indie Stone,The Indie Stone,"Single-player,Multi-player,PvP,Online PvP,Co-o...","Indie,RPG,Simulation,Early Access","Survival,Zombies,Open World,Open World Surviva...",0.0,False,629
50906,332,Brothers - A Tale of Two Sons,play,4.2,225080,"Sep 3, 2013",1000000 - 2000000,28,0,14.99,...,2.733333,0.0,Starbreeze Studios AB,505 Games,"Single-player,Full controller support","Action,Adventure,Indie","Adventure,Atmospheric,Story Rich,Controller,In...",5.0,True,913


In [33]:
# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("fill-mask", model="vocab-transformers/distilbert-word2vec_256k-MLM_1M")

  self.comm = Comm(**args)
  self.comm = Comm(**args)
  self.comm = Comm(**args)
  self.comm = Comm(**args)
  self.comm = Comm(**args)
  self.comm = Comm(**args)
  self.comm = Comm(**args)
  self.comm = Comm(**args)
  self.comm = Comm(**args)
  self.comm = Comm(**args)
  self.comm = Comm(**args)


config.json:   0%|          | 0.00/589 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
  self.comm = Comm(**args)
  self.comm = Comm(**args)
  self.comm = Comm(**args)
  self.comm = Comm(**args)
  self.comm = Comm(**args)
  self.comm = Comm(**args)
  self.comm = Comm(**args)
  self.comm = Comm(**args)
  self.comm = Comm(**args)
  self.comm = Comm(**args)
  self.comm = Comm(**args)


pytorch_model.bin:   0%|          | 0.00/962M [00:00<?, ?B/s]

KeyboardInterrupt: 