In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

url_movies = 'https://drive.google.com/file/d/1DGY7oIpMq262Sh0_AcP6sGDWemxsrgCB/view?usp=sharing'

In [2]:
games = pd.read_json('../data/output_steam_games.json', lines=True)
games.dropna(how='all', inplace=True)
games.head()

Unnamed: 0,publisher,genres,app_name,title,url,release_date,tags,reviews_url,specs,price,early_access,id,developer
88310,Kotoshiro,"[Action, Casual, Indie, Simulation, Strategy]",Lost Summoner Kitty,Lost Summoner Kitty,http://store.steampowered.com/app/761140/Lost_...,2018-01-04,"[Strategy, Action, Indie, Casual, Simulation]",http://steamcommunity.com/app/761140/reviews/?...,[Single-player],4.99,0.0,761140.0,Kotoshiro
88311,"Making Fun, Inc.","[Free to Play, Indie, RPG, Strategy]",Ironbound,Ironbound,http://store.steampowered.com/app/643980/Ironb...,2018-01-04,"[Free to Play, Strategy, Indie, RPG, Card Game...",http://steamcommunity.com/app/643980/reviews/?...,"[Single-player, Multi-player, Online Multi-Pla...",Free To Play,0.0,643980.0,Secret Level SRL
88312,Poolians.com,"[Casual, Free to Play, Indie, Simulation, Sports]",Real Pool 3D - Poolians,Real Pool 3D - Poolians,http://store.steampowered.com/app/670290/Real_...,2017-07-24,"[Free to Play, Simulation, Sports, Casual, Ind...",http://steamcommunity.com/app/670290/reviews/?...,"[Single-player, Multi-player, Online Multi-Pla...",Free to Play,0.0,670290.0,Poolians.com
88313,彼岸领域,"[Action, Adventure, Casual]",弹炸人2222,弹炸人2222,http://store.steampowered.com/app/767400/2222/,2017-12-07,"[Action, Adventure, Casual]",http://steamcommunity.com/app/767400/reviews/?...,[Single-player],0.99,0.0,767400.0,彼岸领域
88314,,,Log Challenge,,http://store.steampowered.com/app/773570/Log_C...,,"[Action, Indie, Casual, Sports]",http://steamcommunity.com/app/773570/reviews/?...,"[Single-player, Full controller support, HTC V...",2.99,0.0,773570.0,


In [3]:
games.columns

Index(['publisher', 'genres', 'app_name', 'title', 'url', 'release_date',
       'tags', 'reviews_url', 'specs', 'price', 'early_access', 'id',
       'developer'],
      dtype='object')

In [4]:
games = games[[
'id',
#'title', 
'app_name', 
#'url', 
'publisher', 
'developer',
#'release_date',
#'price', 
#'reviews_url', 
'genres', 
'tags', 
'specs', 
#'early_access', 
]]
games.rename(columns={
    'id':'game_id',
    'app_name':'title',
}, inplace=True)
games.head()

Unnamed: 0,game_id,title,publisher,developer,genres,tags,specs
88310,761140.0,Lost Summoner Kitty,Kotoshiro,Kotoshiro,"[Action, Casual, Indie, Simulation, Strategy]","[Strategy, Action, Indie, Casual, Simulation]",[Single-player]
88311,643980.0,Ironbound,"Making Fun, Inc.",Secret Level SRL,"[Free to Play, Indie, RPG, Strategy]","[Free to Play, Strategy, Indie, RPG, Card Game...","[Single-player, Multi-player, Online Multi-Pla..."
88312,670290.0,Real Pool 3D - Poolians,Poolians.com,Poolians.com,"[Casual, Free to Play, Indie, Simulation, Sports]","[Free to Play, Simulation, Sports, Casual, Ind...","[Single-player, Multi-player, Online Multi-Pla..."
88313,767400.0,弹炸人2222,彼岸领域,彼岸领域,"[Action, Adventure, Casual]","[Action, Adventure, Casual]",[Single-player]
88314,773570.0,Log Challenge,,,,"[Action, Indie, Casual, Sports]","[Single-player, Full controller support, HTC V..."


In [5]:
games.isna().sum()

game_id         2
title           2
publisher    8052
developer    3299
genres       3283
tags          163
specs         670
dtype: int64

In [6]:
games.shape

(32135, 7)

In [7]:
games.dropna(subset='game_id', inplace=True)
games.replace(np.nan, '', inplace=True)

In [8]:
games['genres'] = games['genres'].apply(lambda x: ' '.join(x) if isinstance(x, list) else ' ')
games['tags'] = games['tags'].apply(lambda x: ' '.join(x) if isinstance(x, list) else ' ')
games['specs'] = games['specs'].apply(lambda x: ' '.join(x) if isinstance(x, list) else ' ')

In [9]:
games.head()

Unnamed: 0,game_id,title,publisher,developer,genres,tags,specs
88310,761140.0,Lost Summoner Kitty,Kotoshiro,Kotoshiro,Action Casual Indie Simulation Strategy,Strategy Action Indie Casual Simulation,Single-player
88311,643980.0,Ironbound,"Making Fun, Inc.",Secret Level SRL,Free to Play Indie RPG Strategy,Free to Play Strategy Indie RPG Card Game Trad...,Single-player Multi-player Online Multi-Player...
88312,670290.0,Real Pool 3D - Poolians,Poolians.com,Poolians.com,Casual Free to Play Indie Simulation Sports,Free to Play Simulation Sports Casual Indie Mu...,Single-player Multi-player Online Multi-Player...
88313,767400.0,弹炸人2222,彼岸领域,彼岸领域,Action Adventure Casual,Action Adventure Casual,Single-player
88314,773570.0,Log Challenge,,,,Action Indie Casual Sports,Single-player Full controller support HTC Vive...


In [10]:
games['game_id'] = games['game_id'].astype('int64')
#games.set_index('game_id',inplace=True)
games['data'] = games['title'] + ' ' + games['publisher'] + ' ' + games['developer'] + ' ' + games['genres'] + ' ' + games['tags'] + ' ' + games['specs']
games.set_index('game_id').to_csv('../data/csv/games_to_recommend.csv')
games.head()

Unnamed: 0,game_id,title,publisher,developer,genres,tags,specs,data
88310,761140,Lost Summoner Kitty,Kotoshiro,Kotoshiro,Action Casual Indie Simulation Strategy,Strategy Action Indie Casual Simulation,Single-player,Lost Summoner Kitty Kotoshiro Kotoshiro Action...
88311,643980,Ironbound,"Making Fun, Inc.",Secret Level SRL,Free to Play Indie RPG Strategy,Free to Play Strategy Indie RPG Card Game Trad...,Single-player Multi-player Online Multi-Player...,"Ironbound Making Fun, Inc. Secret Level SRL Fr..."
88312,670290,Real Pool 3D - Poolians,Poolians.com,Poolians.com,Casual Free to Play Indie Simulation Sports,Free to Play Simulation Sports Casual Indie Mu...,Single-player Multi-player Online Multi-Player...,Real Pool 3D - Poolians Poolians.com Poolians....
88313,767400,弹炸人2222,彼岸领域,彼岸领域,Action Adventure Casual,Action Adventure Casual,Single-player,弹炸人2222 彼岸领域 彼岸领域 Action Adventure Casual Acti...
88314,773570,Log Challenge,,,,Action Indie Casual Sports,Single-player Full controller support HTC Vive...,Log Challenge Action Indie Casual Sports S...


In [21]:
games = pd.read_csv('../data/csv/games_to_recommend.csv')
tfidf = TfidfVectorizer()
tfidf_matrix = tfidf.fit_transform(games['data'])
def Recommendation(title):
    title = title.lower()
    game_index = games[games['title'].str.lower() == title].index.values
    if len(game_index) > 0:
        similarity_scores = cosine_similarity(
            tfidf_matrix[game_index],
            tfidf_matrix
        )
        similar_games_index = similarity_scores.argsort()[0][-6:][::-1]
        similar_games = games.iloc[similar_games_index]
        game_index = similar_games[similar_games['title'].str.lower() == title ].index.values
        if len(game_index) > 0:
            similar_games.drop(index=game_index, inplace=True)
        else:
            similar_games.drop(similar_games.index[-1], inplace=True)

        return list(similar_games['game_id'])
    return 'Game not found'

In [22]:
Recommendation('Half-Life')

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  similar_games.drop(index=game_index, inplace=True)


[280, 50, 220, 323130, 360]

In [24]:
from scipy.sparse import save_npz, load_npz
save_npz('../data/matrix/tfidf_matrix.npz', tfidf_matrix)

In [None]:
loaded_tfidf_matrix = load_npz('../data/matrix/tfidf_matrix.npz')
loaded_tfidf_matrix