In [40]:
import pandas as pd
import numpy as np

In [41]:
prime = pd.read_csv("../DATA/steam.csv")

In [42]:
tags = pd.read_csv("../DATA/steamspy_tag_data.csv")

In [43]:
cats = prime['categories'].str.split(';')
gens = prime['genres'].str.split(';')
devs = prime['developer'].str.split(';')
pubs = prime['publisher'].str.split(';')

In [44]:
from sklearn.preprocessing import MultiLabelBinarizer
mlb = MultiLabelBinarizer()

In [45]:
cats = pd.DataFrame(mlb.fit_transform(cats)) #Categories
gens = pd.DataFrame(mlb.fit_transform(gens)) #Genres
devs = pd.DataFrame(mlb.fit_transform(devs)) #Devs
pubs = pd.DataFrame(mlb.fit_transform(pubs)) #Publishers

In [None]:
tagsf = tags.drop('appid', axis = 1) #Tags

In [46]:
year = prime['release_date'].str.split('-').apply(pd.Series)[0]

In [47]:
own = prime['owners'].str.split('-').apply(pd.Series)
own = own.apply(pd.to_numeric)
own = np.mean(own, axis = 1)
own.columns = ['mean']

In [48]:
st = pd.DataFrame({'name':prime.name, 'own':own})

In [49]:
names = st[st.own > 10000].name

In [52]:
from sklearn.metrics.pairwise import cosine_similarity as cs

In [53]:
def Similarity(game1, game2):
    
    ind1 = prime[prime.name == game1].index[0]
    ind2 = prime[prime.name == game2].index[0]
    
    a1 = tagsf.iloc[ind1].values.reshape(1, -1)
    b1 = tagsf.iloc[ind2].values.reshape(1, -1)
    score1 = cs(a1, b1) #Tags
    
    a2 = cats.iloc[ind1].values.reshape(1, -1)
    b2 = cats.iloc[ind2].values.reshape(1, -1)
    score2 = cs(a2, b2) #Categories
    
    a3 = gens.iloc[ind1].values.reshape(1, -1)
    b3 = gens.iloc[ind2].values.reshape(1, -1)
    score3 = cs(a3, b3) #Genres
    
    a4 = devs.iloc[ind1].values.reshape(1, -1)
    b4 = devs.iloc[ind2].values.reshape(1, -1)
    score4 = cs(a4, b4) #Developers
    
    a5 = pubs.iloc[ind1].values.reshape(1, -1)
    b5 = pubs.iloc[ind2].values.reshape(1, -1)
    score5 = cs(a5, b5) #Publishers
        
    return (10*score1 + 6.125*score2 + 6.125*score3 + 10*score4 + 2.5*score5)

In [55]:
import operator

In [56]:
def predict():
    text = input('Enter a game title : ')
    print("Games that match : ")
    game = prime[prime['name'].str.contains(text)]['name']
    print(game.to_string(index = False))
    new = input('Enter the game : ')
    print('Selected game : ', new)
    
    sims = []
    
    for i in names:
        if i != new:
            dist = Similarity(new, i)[0][0]
            sims.append((i, dist))
    
    sims.sort(key=operator.itemgetter(1), reverse = True)
    
    return sims[:10]

In [57]:
predict()

Enter a game title : Counter
Games that match : 
                      Counter-Strike
      Counter-Strike: Condition Zero
              Counter-Strike: Source
    Counter-Strike: Global Offensive
       Counter-Strike Nexon: Zombies
                       Counter Spell
         CTU: Counter Terrorism Unit
                       CounterAttack
                      Counter Agents
                       Counter Fight
      Counter Fight: Samurai Edition
                       Graze Counter
        Special Counter Force Attack
祭品的逆襲 The Counterattack Of Sacrifice
                     Counter Fight 3
Enter the game : Counter-Strike: Global Offensive
Selected game :  Counter-Strike: Global Offensive


[('Team Fortress 2', 26.569375864877323),
 ('Counter-Strike: Source', 26.509255051107218),
 ('Day of Defeat: Source', 25.500124426074834),
 ('Counter-Strike: Condition Zero', 24.768705594546557),
 ('Counter-Strike', 24.465001147524923),
 ('Day of Defeat', 24.252964897346565),
 ('Left 4 Dead 2', 24.18548430938463),
 ('Half-Life Deathmatch: Source', 23.94867392302621),
 ('Half-Life 2: Deathmatch', 23.8798293227111),
 ('Team Fortress Classic', 23.546664203084582)]