# Game Recommendation System

In [1]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import pandas as pd
import matplotlib as plt
from sklearn.feature_extraction.text import TfidfVectorizer

In [2]:
def combine_features(row):
    return row["publisher"]+" "+row["categories"]+" "+row["genres"]


In [3]:
df = pd.read_csv("steam_games.csv", encoding = "ISO-8859-1")

In [4]:
df.columns

Index(['index', 'name', 'publisher', 'categories', 'genres', 'steamspy_tags'], dtype='object')

In [5]:
features = ['genres','steamspy_tags','categories','publisher']
for feature in features:
    df[feature] = df[feature].fillna('')

In [6]:
df.head()

Unnamed: 0,index,name,publisher,categories,genres,steamspy_tags
0,1,Counter-Strike,Valve,Multi-player,Action,Action FPS Multiplayer
1,2,Team Fortress Classic,Valve,Multi-player,Action,Action FPS Multiplayer
2,3,Day of Defeat,Valve,Multi-player,Action,FPS World War II Multiplayer
3,4,Deathmatch Classic,Valve,Multi-player,Action,Action FPS Multiplayer
4,5,Half-Life: Opposing Force,Valve,Single-player Multi-player,Action,FPS Action Sci-fi


In [7]:
df["combined_features"] = df.apply(combine_features,axis=1)

In [8]:
df.head()

Unnamed: 0,index,name,publisher,categories,genres,steamspy_tags,combined_features
0,1,Counter-Strike,Valve,Multi-player,Action,Action FPS Multiplayer,Valve Multi-player Action
1,2,Team Fortress Classic,Valve,Multi-player,Action,Action FPS Multiplayer,Valve Multi-player Action
2,3,Day of Defeat,Valve,Multi-player,Action,FPS World War II Multiplayer,Valve Multi-player Action
3,4,Deathmatch Classic,Valve,Multi-player,Action,Action FPS Multiplayer,Valve Multi-player Action
4,5,Half-Life: Opposing Force,Valve,Single-player Multi-player,Action,FPS Action Sci-fi,Valve Single-player Multi-player Action


In [9]:
df4 = df
df4.head()

Unnamed: 0,index,name,publisher,categories,genres,steamspy_tags,combined_features
0,1,Counter-Strike,Valve,Multi-player,Action,Action FPS Multiplayer,Valve Multi-player Action
1,2,Team Fortress Classic,Valve,Multi-player,Action,Action FPS Multiplayer,Valve Multi-player Action
2,3,Day of Defeat,Valve,Multi-player,Action,FPS World War II Multiplayer,Valve Multi-player Action
3,4,Deathmatch Classic,Valve,Multi-player,Action,Action FPS Multiplayer,Valve Multi-player Action
4,5,Half-Life: Opposing Force,Valve,Single-player Multi-player,Action,FPS Action Sci-fi,Valve Single-player Multi-player Action


In [10]:
tfidf = TfidfVectorizer(stop_words='english')
count_matrix = tfidf.fit_transform(df["combined_features"])
count_matrix.shape

(27059, 13804)

In [11]:
count_matrix

<27059x13804 sparse matrix of type '<class 'numpy.float64'>'
	with 193171 stored elements in Compressed Sparse Row format>

In [12]:
cosine_sim = cosine_similarity(count_matrix)

In [13]:
user_tags = ['user_id', 'game', 'behavior', 'play_time', '0']
df2 = pd.read_csv("steam_user.csv", names=user_tags)
df2 = df2.drop(['0'],axis=1)
df2.head()

Unnamed: 0,user_id,game,behavior,play_time
0,151603712,The Elder Scrolls V Skyrim,purchase,1.0
1,151603712,The Elder Scrolls V Skyrim,play,273.0
2,151603712,Fallout 4,purchase,1.0
3,151603712,Fallout 4,play,87.0
4,151603712,Spore,purchase,1.0


In [14]:
 user_id_ip = int(input("Enter your user id"))

Enter your user id 151603712


In [15]:
user_data_ipr = df2[(df2.user_id == user_id_ip) & (df2.behavior == 'play') & (df2.play_time >= 10)]
user_data_ipr.head()

Unnamed: 0,user_id,game,behavior,play_time
1,151603712,The Elder Scrolls V Skyrim,play,273.0
3,151603712,Fallout 4,play,87.0
5,151603712,Spore,play,14.9
7,151603712,Fallout New Vegas,play,12.1


In [16]:
df3 = pd.merge(df, user_data_ipr, left_on="name", right_on="game")
df3.head()

Unnamed: 0,index,name,publisher,categories,genres,steamspy_tags,combined_features,user_id,game,behavior,play_time
0,5917,Fallout 4,Bethesda Softworks,Single-player,RPG,Open World Post-apocalyptic Exploration,Bethesda Softworks Single-player RPG,151603712,Fallout 4,play,87.0


In [17]:
print(df4['categories'].dtypes)

object


In [18]:
temp = df4["categories"].copy()
df4["publisher"]= df4["publisher"].str.cat(temp, sep =" ")

In [19]:
df4.head()

Unnamed: 0,index,name,publisher,categories,genres,steamspy_tags,combined_features
0,1,Counter-Strike,Valve Multi-player,Multi-player,Action,Action FPS Multiplayer,Valve Multi-player Action
1,2,Team Fortress Classic,Valve Multi-player,Multi-player,Action,Action FPS Multiplayer,Valve Multi-player Action
2,3,Day of Defeat,Valve Multi-player,Multi-player,Action,FPS World War II Multiplayer,Valve Multi-player Action
3,4,Deathmatch Classic,Valve Multi-player,Multi-player,Action,Action FPS Multiplayer,Valve Multi-player Action
4,5,Half-Life: Opposing Force,Valve Single-player Multi-player,Single-player Multi-player,Action,FPS Action Sci-fi,Valve Single-player Multi-player Action


In [20]:
temp2 = df4["genres"].copy()
df4["publisher"]= df4["publisher"].str.cat(temp2, sep =" ")
df4.head()

Unnamed: 0,index,name,publisher,categories,genres,steamspy_tags,combined_features
0,1,Counter-Strike,Valve Multi-player Action,Multi-player,Action,Action FPS Multiplayer,Valve Multi-player Action
1,2,Team Fortress Classic,Valve Multi-player Action,Multi-player,Action,Action FPS Multiplayer,Valve Multi-player Action
2,3,Day of Defeat,Valve Multi-player Action,Multi-player,Action,FPS World War II Multiplayer,Valve Multi-player Action
3,4,Deathmatch Classic,Valve Multi-player Action,Multi-player,Action,Action FPS Multiplayer,Valve Multi-player Action
4,5,Half-Life: Opposing Force,Valve Single-player Multi-player Action,Single-player Multi-player,Action,FPS Action Sci-fi,Valve Single-player Multi-player Action


In [21]:
df4 = df4.drop(["categories", "genres"], axis=1)
df4.head()

Unnamed: 0,index,name,publisher,steamspy_tags,combined_features
0,1,Counter-Strike,Valve Multi-player Action,Action FPS Multiplayer,Valve Multi-player Action
1,2,Team Fortress Classic,Valve Multi-player Action,Action FPS Multiplayer,Valve Multi-player Action
2,3,Day of Defeat,Valve Multi-player Action,FPS World War II Multiplayer,Valve Multi-player Action
3,4,Deathmatch Classic,Valve Multi-player Action,Action FPS Multiplayer,Valve Multi-player Action
4,5,Half-Life: Opposing Force,Valve Single-player Multi-player Action,FPS Action Sci-fi,Valve Single-player Multi-player Action


In [22]:
df4 = df4.rename(columns = {"publisher":"tags"}, inplace = False)
df4.head()

Unnamed: 0,index,name,tags,steamspy_tags,combined_features
0,1,Counter-Strike,Valve Multi-player Action,Action FPS Multiplayer,Valve Multi-player Action
1,2,Team Fortress Classic,Valve Multi-player Action,Action FPS Multiplayer,Valve Multi-player Action
2,3,Day of Defeat,Valve Multi-player Action,FPS World War II Multiplayer,Valve Multi-player Action
3,4,Deathmatch Classic,Valve Multi-player Action,Action FPS Multiplayer,Valve Multi-player Action
4,5,Half-Life: Opposing Force,Valve Single-player Multi-player Action,FPS Action Sci-fi,Valve Single-player Multi-player Action


In [23]:
df3["combined_features1"] = df3.apply(combine_features,axis=1)

In [24]:
df3.sort_values("play_time", axis = 0, ascending = False,
                 inplace = True, na_position ='first')
df3.head()

Unnamed: 0,index,name,publisher,categories,genres,steamspy_tags,combined_features,user_id,game,behavior,play_time,combined_features1
0,5917,Fallout 4,Bethesda Softworks,Single-player,RPG,Open World Post-apocalyptic Exploration,Bethesda Softworks Single-player RPG,151603712,Fallout 4,play,87.0,Bethesda Softworks Single-player RPG


In [25]:
count_matrix1 = tfidf.fit_transform(df3["combined_features1"])
count_matrix1.shape

(1, 5)

In [26]:
def get_combinef_from_name(name):
    return df[df.name == name]["combined_features"].values[0]

In [27]:
def get_game_from_user(user_id):
    return df3[df3.user_id == user_id][["index","game","combined_features1"]].values[0]

In [28]:
game_index = get_game_from_user(user_id_ip)

In [29]:
print(game_index)

[5917 'Fallout 4' 'Bethesda Softworks Single-player  RPG']


In [30]:
def get_index_from_user_id(user_id):
    return df3[df3.user_id == user_id]["index"].values[0]


In [31]:
g_index = get_index_from_user_id(user_id_ip)
print(g_index)


5917


In [32]:
sim_games =  list(enumerate(cosine_sim[g_index]))

In [33]:
sorted_sim_games = sorted(sim_games,key=lambda x:x[1],reverse=True)

In [34]:
def get_title_from_index(index):
    return df[df.index == index]["name"].values[0]
i=0
for game in sorted_sim_games:
    print(get_title_from_index(game[0]))
    i=i+1
    if i>15:
        break

Edolie
Echoes of Aetheria
Pale Echoes
ã«ããã£ãã¯ãã¼ã³ åéã¸ãéæ¨
ã«ããã£ãã¯ãã¼ã³ ã¬ã¸ã§ã³ãããã¯
Heroes of Legionwood
Aveyond 3-3: The Lost Orb
Remnants Of Isolation
3 Stars of Destiny
Sweet Lily Dreams
Whisper of a Rose
The Princess' Heart
Aveyond 4: Shadow of the Mist
Skyborn
Deadly Sin 2
Legionwood 2: Rise of the Eternal's Realm - Director's Cut
