In [1]:
import pandas as pd
# Read a small chunk of the file to inspect column data types
games = pd.read_csv('games.csv', encoding='ISO-8859-1')
print(games.dtypes)

id                              int64
title                          object
release_date                   object
developer                      object
publisher                      object
genres                         object
multiplayer_or_singleplayer    object
price                          object
percent_positive               object
win_support                     int64
mac_support                     int64
lin_support                     int64
dtype: object


In [2]:
games.isnull().sum()

id                                0
title                             0
release_date                      0
developer                         0
publisher                         7
genres                            0
multiplayer_or_singleplayer       0
price                             0
percent_positive               1426
win_support                       0
mac_support                       0
lin_support                       0
dtype: int64

In [3]:
games.columns

Index(['id', 'title', 'release_date', 'developer', 'publisher', 'genres',
       'multiplayer_or_singleplayer', 'price', 'percent_positive',
       'win_support', 'mac_support', 'lin_support'],
      dtype='object')

In [4]:
games=games[['id', 'title', 'developer', 'genres']]

In [5]:
games

Unnamed: 0,id,title,developer,genres
0,1,Counter-Strike: Global Offensive,Valve;Hidden Path Entertainment,Action;Free to Play
1,2,Dota 2,Valve,Action;Free to Play;Strategy
2,3,Dead by Daylight,Behaviour Digital Inc.,Action
3,4,Warframe,Digital Extremes,Action;Free to Play
4,5,War Thunder,Gaijin Entertainment,Action;Free to Play;Massively Multiplayer;Simu...
...,...,...,...,...
18583,18584,Malkia,Sports interactive,Indie
18584,18585,Rise,New State,Action;Indie
18585,18586,MERCS,Munchkin's Lair,Indie;Strategy;Early Access
18586,18587,Evil Genius,Elixir Studios,Strategy


In [6]:
games['tags'] = games['developer']+games['genres']

In [7]:
games

Unnamed: 0,id,title,developer,genres,tags
0,1,Counter-Strike: Global Offensive,Valve;Hidden Path Entertainment,Action;Free to Play,Valve;Hidden Path EntertainmentAction;Free to ...
1,2,Dota 2,Valve,Action;Free to Play;Strategy,ValveAction;Free to Play;Strategy
2,3,Dead by Daylight,Behaviour Digital Inc.,Action,Behaviour Digital Inc.Action
3,4,Warframe,Digital Extremes,Action;Free to Play,Digital ExtremesAction;Free to Play
4,5,War Thunder,Gaijin Entertainment,Action;Free to Play;Massively Multiplayer;Simu...,Gaijin EntertainmentAction;Free to Play;Massiv...
...,...,...,...,...,...
18583,18584,Malkia,Sports interactive,Indie,Sports interactiveIndie
18584,18585,Rise,New State,Action;Indie,New StateAction;Indie
18585,18586,MERCS,Munchkin's Lair,Indie;Strategy;Early Access,Munchkin's LairIndie;Strategy;Early Access
18586,18587,Evil Genius,Elixir Studios,Strategy,Elixir StudiosStrategy


In [8]:
new_data  = games.drop(columns=['developer', 'genres'])

In [9]:
new_data

Unnamed: 0,id,title,tags
0,1,Counter-Strike: Global Offensive,Valve;Hidden Path EntertainmentAction;Free to ...
1,2,Dota 2,ValveAction;Free to Play;Strategy
2,3,Dead by Daylight,Behaviour Digital Inc.Action
3,4,Warframe,Digital ExtremesAction;Free to Play
4,5,War Thunder,Gaijin EntertainmentAction;Free to Play;Massiv...
...,...,...,...
18583,18584,Malkia,Sports interactiveIndie
18584,18585,Rise,New StateAction;Indie
18585,18586,MERCS,Munchkin's LairIndie;Strategy;Early Access
18586,18587,Evil Genius,Elixir StudiosStrategy


In [10]:
from sklearn.feature_extraction.text import CountVectorizer

In [11]:
cv=CountVectorizer(max_features=10000, stop_words='english')

In [12]:
cv

In [13]:
vector=cv.fit_transform(new_data['tags'].values.astype('U')).toarray()

In [14]:
vector.shape

(18588, 10000)

In [15]:
from sklearn.metrics.pairwise import cosine_similarity

In [16]:
similarity=cosine_similarity(vector)

In [17]:
similarity

array([[1.        , 0.40824829, 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.40824829, 1.        , 0.        , ..., 0.25      , 0.        ,
        0.        ],
       [0.        , 0.        , 1.        , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.        , 0.25      , 0.        , ..., 1.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 1.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        1.        ]])

In [18]:
new_data[new_data['title']=="Dead by Daylight"].index[0]

2

In [19]:
distance = sorted(list(enumerate(similarity[2])), reverse=True, key=lambda vector:vector[1])
for i in distance[0:5]:
    print(new_data.iloc[i[0]].title)

Dead by Daylight
BLACK CLOVER: QUARTET KNIGHTS
Wild Guns Reloaded
DisneyPixar Brave: The Video Game
Hentai Shooter 3D


In [20]:
def recommand(movies):
    index=new_data[new_data['title']==movies].index[0]
    distance = sorted(list(enumerate(similarity[index])), reverse=True, key=lambda vector:vector[1])
    for i in distance[0:5]:
        print(new_data.iloc[i[0]].title)

In [21]:
recommand("Insurgency: Sandstorm")

Insurgency: Sandstorm
Insurgency
Day of Infamy
Nancy Drew®: Secret of the Scarlet Hand
The Last Operator


In [22]:
import pickle

In [23]:
pickle.dump(new_data, open('games_list.pkl', 'wb'))

In [24]:
pickle.dump(similarity, open('similarity.pkl', 'wb'))

In [25]:
pickle.load(open('games_list.pkl', 'rb'))

Unnamed: 0,id,title,tags
0,1,Counter-Strike: Global Offensive,Valve;Hidden Path EntertainmentAction;Free to ...
1,2,Dota 2,ValveAction;Free to Play;Strategy
2,3,Dead by Daylight,Behaviour Digital Inc.Action
3,4,Warframe,Digital ExtremesAction;Free to Play
4,5,War Thunder,Gaijin EntertainmentAction;Free to Play;Massiv...
...,...,...,...
18583,18584,Malkia,Sports interactiveIndie
18584,18585,Rise,New StateAction;Indie
18585,18586,MERCS,Munchkin's LairIndie;Strategy;Early Access
18586,18587,Evil Genius,Elixir StudiosStrategy
