In [202]:
from gensim.models import Word2Vec
from sklearn.metrics.pairwise import cosine_similarity
from gensim.models import Word2Vec
import pandas as pd
import ast  
import pyarrow.parquet as pq


In [213]:
steam_games = pd.read_parquet('steam_games.parquet')

In [214]:
steam_games

Unnamed: 0,genres,app_name,title,release_date,tags,specs,price,id,developer
0,"[action, casual, indie, simulation, strategy]",Lost Summoner Kitty,Lost Summoner Kitty,2018-01-04,"[strategy, action, indie, casual, simulation]",[single-player],4.99,761140,Kotoshiro
1,"[free to play, indie, rpg, strategy]",Ironbound,Ironbound,2018-01-04,"[free to play, strategy, indie, rpg, card game...","[single-player, multi-player, online multi-pla...",0.00,643980,Secret Level SRL
2,"[casual, free to play, indie, simulation, sports]",Real Pool 3D - Poolians,Real Pool 3D - Poolians,2017-07-24,"[free to play, simulation, sports, casual, ind...","[single-player, multi-player, online multi-pla...",0.00,670290,Poolians.com
3,"[action, adventure, casual]",弹炸人2222,弹炸人2222,2017-12-07,"[action, adventure, casual]",[single-player],0.99,767400,彼岸领域
5,"[action, adventure, simulation]",Battle Royale Trainer,Battle Royale Trainer,2018-01-04,"[action, adventure, simulation, fps, shooter, ...","[single-player, steam achievements]",3.99,772540,Trickjump Games Ltd
...,...,...,...,...,...,...,...,...,...
32129,"[action, adventure, casual, indie]",Kebab it Up!,Kebab it Up!,2018-01-04,"[action, indie, casual, violent, adventure]","[single-player, steam achievements, steam cloud]",1.99,745400,Bidoniera Games
32130,"[casual, indie, simulation, strategy]",Colony On Mars,Colony On Mars,2018-01-04,"[strategy, indie, casual, simulation]","[single-player, steam achievements]",1.99,773640,"Nikita ""Ghost_RUS"""
32131,"[casual, indie, strategy]",LOGistICAL: South Africa,LOGistICAL: South Africa,2018-01-04,"[strategy, indie, casual]","[single-player, steam achievements, steam clou...",4.99,733530,Sacada
32132,"[indie, racing, simulation]",Russian Roads,Russian Roads,2018-01-04,"[indie, simulation, racing]","[single-player, steam achievements, steam trad...",1.99,610660,Laush Dmitriy Sergeevich


In [192]:
steam_games['genres'] = steam_games.genres.apply(ast.literal_eval)
steam_games['tags'] = steam_games.tags.apply(ast.literal_eval)
steam_games['specs'] = steam_games.specs.apply(ast.literal_eval)

In [193]:
steam_games['keys_vec'] = steam_games.apply(lambda row: list(set(row['tags'] + row['specs'] + row['genres'] )), axis=1)


In [194]:
steam_games[['title','keys_vec']]

Unnamed: 0,title,keys_vec
0,Lost Summoner Kitty,"[single-player, casual, simulation, action, in..."
1,Ironbound,"[replay value, 2d, fantasy, card game, multi-p..."
2,Real Pool 3D - Poolians,"[in-app purchases, multiplayer, single-player,..."
3,弹炸人2222,"[action, single-player, adventure, casual]"
5,Battle Royale Trainer,"[single-player, adventure, simulation, shooter..."
...,...,...
32129,Kebab it Up!,"[single-player, adventure, casual, steam cloud..."
32130,Colony On Mars,"[single-player, casual, simulation, indie, ste..."
32131,LOGistICAL: South Africa,"[single-player, steam leaderboards, casual, st..."
32132,Russian Roads,"[steam trading cards, single-player, racing, s..."


In [195]:
steam_games.drop(columns=['tags','specs','genres','app_name','release_date','price','id','developer'], inplace=True)

In [203]:
steam_games

Unnamed: 0,title,keys_vec
0,Lost Summoner Kitty,"[single-player, casual, simulation, action, in..."
1,Ironbound,"[replay value, 2d, fantasy, card game, multi-p..."
2,Real Pool 3D - Poolians,"[in-app purchases, multiplayer, single-player,..."
3,弹炸人2222,"[action, single-player, adventure, casual]"
5,Battle Royale Trainer,"[single-player, adventure, simulation, shooter..."
...,...,...
32129,Kebab it Up!,"[single-player, adventure, casual, steam cloud..."
32130,Colony On Mars,"[single-player, casual, simulation, indie, ste..."
32131,LOGistICAL: South Africa,"[single-player, steam leaderboards, casual, st..."
32132,Russian Roads,"[steam trading cards, single-player, racing, s..."


In [200]:
df = steam_games.loc[:, ('title', 'keys_vec')]

model = Word2Vec(df['keys_vec'], vector_size=400, window=10, min_count=1, workers=5)  

def get_vector_for_list(word_list):
    vectors = [model.wv[word] for word in word_list if word in model.wv]
    return sum(vectors) / len(vectors) if vectors else None


df['vector'] = df['keys_vec'].apply(get_vector_for_list)

df = df.dropna(subset=['vector'])
df.reset_index(drop=True, inplace=True)

cosine_similarities = cosine_similarity(list(df['vector']), list(df['vector']))

def get_top_n_recommendations(game_title, n=5):
    game_index = df.query('title==@game_title').index[0]
    
    sim_scores = list(enumerate(cosine_similarities[game_index]))
    
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    top_n_indices = [i for i, _ in sim_scores[1:n+1]]

    recommended_games = df['title'].iloc[top_n_indices].tolist()

    return recommended_games



In [201]:
get_top_n_recommendations('Final DOOM')

['DOOM II',
 'Quake IV',
 'Return to Castle Wolfenstein',
 'Ultimate Doom',
 'Master Levels for Doom II']