In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

from nltk.corpus import stopwords
sw = set(stopwords.words('english'))

from nltk.stem import PorterStemmer
ps = PorterStemmer()

In [2]:
df = pd.read_csv('PC_Games_2021.csv')

In [3]:
df.head()

Unnamed: 0.1,Unnamed: 0,Title,Description,Release Date,Metascore,User Score
0,0,Disco Elysium: The Final Cut,Disco Elysium - The Final Cut is the definitiv...,"March 30, 2021",97,8.2
1,1,Half-Life 2,[Metacritic's 2004 PC Game of the Year] By ta...,"November 16, 2004",96,9.2
2,2,Grand Theft Auto V,Los Santos: a sprawling sun-soaked metropolis ...,"April 13, 2015",96,7.8
3,3,The Orange Box,Games included in The Orange Box compilation: ...,"October 10, 2007",96,9.3
4,4,Half-Life,Half-Life combines great storytelling in the t...,"November 19, 1998",96,9.1


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4500 entries, 0 to 4499
Data columns (total 6 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   Unnamed: 0    4500 non-null   int64 
 1   Title         4500 non-null   object
 2   Description   4468 non-null   object
 3   Release Date  4500 non-null   object
 4   Metascore     4500 non-null   int64 
 5   User Score    4500 non-null   object
dtypes: int64(2), object(4)
memory usage: 211.1+ KB


In [5]:
df.reset_index(inplace=True)

In [6]:
df.head()

Unnamed: 0.1,index,Unnamed: 0,Title,Description,Release Date,Metascore,User Score
0,0,0,Disco Elysium: The Final Cut,Disco Elysium - The Final Cut is the definitiv...,"March 30, 2021",97,8.2
1,1,1,Half-Life 2,[Metacritic's 2004 PC Game of the Year] By ta...,"November 16, 2004",96,9.2
2,2,2,Grand Theft Auto V,Los Santos: a sprawling sun-soaked metropolis ...,"April 13, 2015",96,7.8
3,3,3,The Orange Box,Games included in The Orange Box compilation: ...,"October 10, 2007",96,9.3
4,4,4,Half-Life,Half-Life combines great storytelling in the t...,"November 19, 1998",96,9.1


In [7]:
df.drop(columns=['index','Unnamed: 0'],inplace=True)

In [8]:
df.head()

Unnamed: 0,Title,Description,Release Date,Metascore,User Score
0,Disco Elysium: The Final Cut,Disco Elysium - The Final Cut is the definitiv...,"March 30, 2021",97,8.2
1,Half-Life 2,[Metacritic's 2004 PC Game of the Year] By ta...,"November 16, 2004",96,9.2
2,Grand Theft Auto V,Los Santos: a sprawling sun-soaked metropolis ...,"April 13, 2015",96,7.8
3,The Orange Box,Games included in The Orange Box compilation: ...,"October 10, 2007",96,9.3
4,Half-Life,Half-Life combines great storytelling in the t...,"November 19, 1998",96,9.1


In [9]:
df.dropna(inplace=True)

In [10]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4468 entries, 0 to 4499
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   Title         4468 non-null   object
 1   Description   4468 non-null   object
 2   Release Date  4468 non-null   object
 3   Metascore     4468 non-null   int64 
 4   User Score    4468 non-null   object
dtypes: int64(1), object(4)
memory usage: 209.4+ KB


In [11]:
df.reset_index(inplace=True)

In [12]:
complete_data = df.copy()

In [13]:
pub = complete_data['Title']

In [14]:
complete_data['Publisher'] = pub

In [15]:
complete_data = complete_data[['Title','Publisher']]
complete_data.head()

Unnamed: 0,Title,Publisher
0,Disco Elysium: The Final Cut,Disco Elysium: The Final Cut
1,Half-Life 2,Half-Life 2
2,Grand Theft Auto V,Grand Theft Auto V
3,The Orange Box,The Orange Box
4,Half-Life,Half-Life


In [16]:
new_desc = []
for pub in complete_data['Publisher']:
    update = pub.split()
    update = [ps.stem(word) for word in update if word.lower() not in sw]
    new_desc.append(update)

In [17]:
complete_data['Pub'] = new_desc

In [18]:
complete_data['Pub'] = complete_data['Pub'].str.join(" ")

In [19]:
complete_data.head()

Unnamed: 0,Title,Publisher,Pub
0,Disco Elysium: The Final Cut,Disco Elysium: The Final Cut,disco elysium: final cut
1,Half-Life 2,Half-Life 2,half-lif 2
2,Grand Theft Auto V,Grand Theft Auto V,grand theft auto v
3,The Orange Box,The Orange Box,orang box
4,Half-Life,Half-Life,half-lif


In [20]:
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer().fit_transform(complete_data['Pub'])

In [21]:
from sklearn.metrics.pairwise import cosine_similarity
cs = cosine_similarity(cv)

In [22]:
cs

array([[1., 0., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 0., 0., 1.]])

In [23]:
cs.shape

(4468, 4468)

In [27]:
Title = "Assassin's Creed II"
Title

"Assassin's Creed II"

In [28]:
def recommend_game(game_name):
    id = complete_data[complete_data.Title == game_name].index[0]
    scores = list(enumerate(cs[id]))
    sorted_scores = sorted(scores, key = lambda x:x[1], reverse=True)[1:16]

    for item in sorted_scores:
        print(complete_data.iloc[item[0]].Title)

In [36]:
recommend_game("FIFA 16")

FIFA 17
FIFA 15
FIFA 18
FIFA 21
FIFA Soccer 12
FIFA Soccer 13
FIFA Soccer 11
FIFA Soccer 2003
FIFA Soccer 06
FIFA 07 Soccer
FIFA Soccer 2002
FIFA Soccer 09
FIFA Soccer 08
FIFA Soccer 2005
FIFA Soccer 2004


In [33]:
import pickle

In [34]:
pickle.dump(complete_data.to_dict(),open('PC_Games_withpub.pkl','wb'))

In [35]:
pickle.dump(cs,open('cs1.pkl','wb'))