In [1]:
import pandas as pd
import numpy as np

In [2]:
games = pd.read_csv("games.csv")
meta_data = pd.read_json("games_metadata.json", lines=True, orient = "records")
recommendations = pd.read_csv("recommendations.csv")
users = pd.read_csv("users.csv")

In [3]:
games.columns

Index(['app_id', 'title', 'date_release', 'win', 'mac', 'linux', 'rating',
       'positive_ratio', 'user_reviews', 'price_final', 'price_original',
       'discount', 'steam_deck'],
      dtype='object')

In [4]:
meta_data.columns

Index(['app_id', 'description', 'tags'], dtype='object')

In [5]:
meta_data['description'][0]

"Call of Duty is back, redefining war like you've never experienced before. Building on the Call of Duty 4®: Modern Warfare engine, Call of Duty: World at War immerses players into the most gritty and chaotic WWII combat ever experienced."

NO PREPROCESSING SINCE THERE ARE NO NULL VALUES

In [6]:
games[["positive_ratio", "user_reviews", "price_final", "discount"]].describe()

Unnamed: 0,positive_ratio,user_reviews,price_final,discount
count,48854.0,48854.0,48854.0,48854.0
mean,76.883981,1751.241,8.669736,5.051378
std,18.228963,37958.91,11.522157,17.923545
min,0.0,10.0,0.0,0.0
25%,66.0,20.0,0.99,0.0
50%,81.0,50.0,4.99,0.0
75%,91.0,212.75,11.8775,0.0
max,100.0,6941137.0,299.99,90.0


In [7]:
appid = meta_data['app_id'][0]
appid

10090

In [8]:
games.columns

Index(['app_id', 'title', 'date_release', 'win', 'mac', 'linux', 'rating',
       'positive_ratio', 'user_reviews', 'price_final', 'price_original',
       'discount', 'steam_deck'],
      dtype='object')

In [9]:
games['app_id'].unique()

array([  10090,   13500,   22364, ..., 1905020, 2439040, 2063610],
      dtype=int64)

In [10]:
meta_data

Unnamed: 0,app_id,description,tags
0,10090,"Call of Duty is back, redefining war like you'...","[Zombies, World War II, FPS, Multiplayer, Acti..."
1,13500,Enter the dark underworld of Prince of Persia ...,"[Action, Adventure, Parkour, Third Person, Gre..."
2,22364,,[Action]
3,113020,Monaco: What's Yours Is Mine is a single playe...,"[Co-op, Stealth, Indie, Heist, Local Co-Op, St..."
4,226560,Escape Dead Island is a Survival-Mystery adven...,"[Zombies, Adventure, Survival, Action, Third P..."
...,...,...,...
48849,1769980,A marvellous romance with a hint of murder. Lo...,"[Visual Novel, Dating Sim, Detective, Romance,..."
48850,2399890,,[Strategy]
48851,1905020,This story-driven Survival-Horror game is abou...,"[Survival Horror, Horror, First-Person, Single..."
48852,2439040,,"[Action, Adventure, RPG, Massively Multiplayer..."


In [11]:
meta_data['description']

0        Call of Duty is back, redefining war like you'...
1        Enter the dark underworld of Prince of Persia ...
2                                                         
3        Monaco: What's Yours Is Mine is a single playe...
4        Escape Dead Island is a Survival-Mystery adven...
                               ...                        
48849    A marvellous romance with a hint of murder. Lo...
48850                                                     
48851    This story-driven Survival-Horror game is abou...
48852                                                     
48853    Manage a team of professional cyclists: contro...
Name: description, Length: 48854, dtype: object

In [12]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

In [13]:
tfid = TfidfVectorizer(stop_words="english")

In [14]:
tf_matrix = tfid.fit_transform(meta_data['description'])

In [15]:
tf_matrix

<48854x49599 sparse matrix of type '<class 'numpy.float64'>'
	with 717833 stored elements in Compressed Sparse Row format>

In [16]:
similarity = linear_kernel(tf_matrix,tf_matrix)

In [17]:
similarity

array([[1.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.00732723],
       [0.        , 1.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.        , 0.        , 0.        , ..., 1.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.00732723, 0.        , 0.        , ..., 0.        , 0.        ,
        1.        ]])

In [18]:
indices = pd.Series(games.index, index = games['title'])

In [19]:
indices

title
Call of Duty: World at War                                          0
Prince of Persia: Warrior Within™                                   1
BRINK: Agents of Change                                             2
Monaco: What's Yours Is Mine                                        3
Escape Dead Island                                                  4
                                                                ...  
Mask of the Rose                                                48849
Warhammer 40000: Gladius - Firepower Pack                       48850
Greyhill Incident                                               48851
Black Desert - [Pre-Order] Land of the Morning Light Edition    48852
Pro Cycling Manager 2023                                        48853
Length: 48854, dtype: int64

In [20]:
def recommendations(title,similarity = similarity):
    idx = indices[title]
    score = enumerate(similarity[idx])
    score = sorted(score,key = lambda x:x[1], reverse=True)
    score = score[1:11]

    index = [i[0] for i in score]
    print(games['title'].iloc[index])

In [25]:
recommendations('Grand Theft Auto V')

25995           Battle Strike World War
41715      Super Powered Battle Friends
5844                      SMASH LEGENDS
44366                  Colors! Platform
19906                          Brawlout
35510                         Conquest!
29630                   Kickoff Legends
10090    Dark Roll: Free Kick Challenge
18624                            KUBOOM
23337                  SLAYERS FOR HIRE
Name: title, dtype: object
