In [94]:
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score

In [95]:
#importing and cleaning dataset
gamedf=pd.read_csv(r"\Users\Zain\Downloads\Dataset\vgsales.csv")
gamedf2=gamedf
gamedf.convert_dtypes().dtypes

Rank              Int64
Name             string
Platform         string
Year              Int64
Genre            string
Publisher        string
NA_Sales        Float64
EU_Sales        Float64
JP_Sales        Float64
Other_Sales     Float64
Global_Sales    Float64
Unnamed: 11       Int64
dtype: object

In [96]:
gamedf=gamedf.dropna()
gamedf=gamedf.drop('Year',axis=1)
gamedf=gamedf.drop('NA_Sales',axis=1)
gamedf=gamedf.drop('EU_Sales',axis=1)
gamedf=gamedf.drop('JP_Sales',axis=1)
gamedf=gamedf.drop('Other_Sales',axis=1)

In [97]:
#lists of columns
genreList = gamedf['Genre'].unique().tolist()
publisherList =gamedf['Publisher'].unique().tolist()
platformList=gamedf['Platform'].unique().tolist()

In [98]:
#for multiple genres
def binarygen(genre_list):
    binaryList = []

    for genre in genreList:
        if genre in genre_list:
            binaryList.append(1)
        else:
            binaryList.append(0)
    return binaryList

In [99]:
def binarypub(publisher_list):
    binaryList = []

    for pub in publisherList:
        if pub in publisher_list:
            binaryList.append(1)
        else:
            binaryList.append(0)

    return binaryList

In [100]:
def binaryplat(plat_list):
    binaryList=[]

    for plat in platformList:
        if plat in plat_list:
            binaryList.append(1)
        else:
            binaryList.append(0)

    return binaryList


In [101]:
gamedf['Genre_multiple'] = gamedf['Genre'].apply(lambda x: binarygen(x))
gamedf['Plat_multiple'] = gamedf['Platform'].apply(lambda x: binaryplat(x))
gamedf['Publisher_multiple'] = gamedf['Publisher'].apply(lambda x: binarypub(x))

In [102]:
#sciPy Spatial library helps in implementing K nearest neighbor point, and utilities for distance computation
#Using Cosine Similarity for finding the similarity between 2 games.
from scipy import spatial
def CosineSimilarity(n1, n2):
    a = gamedf.iloc[n1]
    b = gamedf.iloc[n2]

    genreA = a['Genre_multiple']
    genreB = b['Genre_multiple']

    genreCoDistance = spatial.distance.cosine(genreA, genreB)
    PlatformA = a['Plat_multiple']
    PlatformB = b['Plat_multiple']
    PlatformCoDistance = spatial.distance.cosine(PlatformA, PlatformB)

    PublisherA = a['Publisher_multiple']
    PublisherB = b['Publisher_multiple']
    PublisherCoDistance = spatial.distance.cosine(PublisherA, PublisherB)
    return genreCoDistance+PlatformCoDistance+PublisherCoDistance
print(CosineSimilarity(9,85))


3.0


In [103]:
import operator
def getCosineNeighbors(basegame, K):
    distances = []

    for index, game in gamedf.iterrows():
        if game['Rank'] == 16291:
            break
        if game['Rank'] != basegame['Rank'].values[0]:
            dist = CosineSimilarity(basegame['Rank'].values[0], game['Rank'])
            distances.append((game['Rank'], dist))

    distances.sort(key=operator.itemgetter(1))
    neighbors = []

    for x in range(K):
        neighbors.append(distances[x])
    return neighbors

In [104]:
def predictscore():
    name = input('Enter a game name :  ')
    new_game = gamedf[gamedf['Name'].str.contains(name)].iloc[0].to_frame().T
    print('Selected Game: ',new_game.Name.values[0])
    Cosinepredict_score(new_game)
    Euclideanpredict_score(new_game)
    return new_game
    

In [105]:
def Cosinepredict_score(new_game):
    
    K = 10
    neighbors = getCosineNeighbors(new_game, K)
    print(neighbors)
    print('\nCosine Recommended Games: \n')
    for neighbor in neighbors:
        print( gamedf.iloc[neighbor[0]][1] )

    print('\n')

In [106]:
def EuclideanSimilarity(n1, n2):
    a = gamedf.iloc[n1]
    b = gamedf.iloc[n2]

    genreA = a['Genre_multiple']
    genreB = b['Genre_multiple']

    genreCoDistance = spatial.distance.euclidean(genreA, genreB)
    PlatformA = a['Plat_multiple']
    PlatformB = b['Plat_multiple']
    PlatformCoDistance = spatial.distance.euclidean(PlatformA, PlatformB)

    PublisherA = a['Publisher_multiple']
    PublisherB = b['Publisher_multiple']
    PublisherCoDistance = spatial.distance.euclidean(PublisherA, PublisherB)
    return genreCoDistance+PlatformCoDistance+PublisherCoDistance
print(EuclideanSimilarity(9,85))


4.242640687119286


In [107]:
def getEuclideanNeighbors(basegame, K):
    distances = []

    for index, game in gamedf.iterrows():
        if game['Rank'] == 16291:
            break
        if game['Rank'] != basegame['Rank'].values[0]:
            dist = EuclideanSimilarity(basegame['Rank'].values[0], game['Rank'])
            distances.append((game['Rank'], dist))

    distances.sort(key=operator.itemgetter(1))
    print(distances)
    neighbors = []
    for x in range(K):
        neighbors.append(distances[x])
    return neighbors

In [108]:
def Euclideanpredict_score(new_game):
    K = 10
    neighbors = getCosineNeighbors(new_game, K)
    print('\nEuclidean Recommended Games: \n')
    for neighbor in neighbors:
        print( gamedf.iloc[neighbor[0]][1] )

    print('\n')

In [109]:
def getCosineDistances(basegame):
    distances = []

    for index, game in gamedf.iterrows():
        if game['Rank'] == 16291:
            break
        if game['Rank'] != basegame['Rank'].values[0]:
            dist = CosineSimilarity(basegame['Rank'].values[0], game['Rank'])
            distances.append((game['Rank'], dist))

    list= []
    distances.sort(key=operator.itemgetter(1))
    for i in distances :
        list.append(i[0])
    return list
    

In [110]:
def getEuclideanDistances(basegame):
    distances = []

    for index, game in gamedf.iterrows():
        if game['Rank'] == 16291:
            break
        if game['Rank'] != basegame['Rank'].values[0]:
            dist = EuclideanSimilarity(basegame['Rank'].values[0], game['Rank'])
            distances.append((game['Rank'], dist))

    list= []
    distances.sort(key=operator.itemgetter(1))
    for i in distances :
        list.append(i[0])
    return list

bg=predictscore()

Selected Game:  Call of Duty: Modern Warfare 3
[(31, 0), (35, 0), (36, 0), (61, 0), (70, 0)]

Cosine Recommended Games: 

Call of Duty: Black Ops
Call of Duty: Black Ops II
Call of Duty: Modern Warfare 2
Call of Duty: Ghosts
Call of Duty 4: Modern Warfare



Euclidean Recommended Games: 

Call of Duty: Black Ops
Call of Duty: Black Ops II
Call of Duty: Modern Warfare 2
Call of Duty: Ghosts
Call of Duty 4: Modern Warfare




In [111]:
ed=getEuclideanDistances(bg)
cd=getCosineDistances(bg)
publabel=bg['Publisher_multiple'].tolist()
genlabel=bg['Genre_multiple'].tolist()
platlabel=bg['Plat_multiple'].tolist()

In [124]:
# Evaluate metrics for cosine similarity
list_acc=[]
list_rc=[]
list_f1=[]
list_precision=[]
for i in range(100) :
    cosine_predicted_labels = gamedf['Publisher_multiple'].values[cd[i]]

    pubcosine_accuracy = accuracy_score(publabel[0], cosine_predicted_labels)
    pubcosine_precision = precision_score(publabel[0], cosine_predicted_labels)
    pubcosine_recall = recall_score(publabel[0], cosine_predicted_labels)
    pubcosine_f1 = f1_score(publabel[0], cosine_predicted_labels)

    cosine_predicted_labels = gamedf['Genre_multiple'].values[cd[i]]

    gencosine_accuracy = accuracy_score(genlabel[0], cosine_predicted_labels)
    gencosine_precision = precision_score(genlabel[0], cosine_predicted_labels)
    gencosine_recall = recall_score(genlabel[0], cosine_predicted_labels)
    gencosine_f1 = f1_score(genlabel[0], cosine_predicted_labels)

    cosine_predicted_labels = gamedf['Plat_multiple'].values[cd[i]]

    platcosine_accuracy = accuracy_score(platlabel[0], cosine_predicted_labels)
    platcosine_precision = precision_score(platlabel[0], cosine_predicted_labels)
    platcosine_recall = recall_score(platlabel[0], cosine_predicted_labels)
    platcosine_f1 = f1_score(platlabel[0], cosine_predicted_labels)
#mean accuracy
    cosine_accuracy=(pubcosine_accuracy+gencosine_accuracy+platcosine_accuracy)/3
    cosine_precision=(pubcosine_precision+gencosine_precision+platcosine_precision)/3
    cosine_recall=(pubcosine_recall+gencosine_recall+platcosine_recall)/3
    cosine_f1=(pubcosine_f1+gencosine_f1+platcosine_f1)/3
    
    list_acc.append(cosine_accuracy)
    list_rc.append(cosine_recall)
    list_f1.append(cosine_f1)
    list_precision.append(cosine_precision)

cosine_accuracy=sum(list_acc)/100
cosine_f1=sum(list_f1)/100
cosine_recall=sum(list_rc)/100
cosine_precision=sum(list_precision)/100

print("Cosine Accuracy : ")
print(cosine_accuracy*100)
print("Cosine f1 : ")
print(cosine_f1*100)
print("Cosine Recall : ")
print(cosine_recall*100)
print("Cosine Precision : ")
print(cosine_precision*100)

Cosine Accuracy : 
98.7156511350061
Cosine f1 : 
76.88888888888887
Cosine Recall : 
76.99999999999999
Cosine Precision : 
76.83333333333331


In [125]:
# Evaluate metrics for euclidean similarity
list_acc=[]
list_rc=[]
list_f1=[]
list_precision=[]
for i in range(100) :
    euclidean_predicted_labels = gamedf['Publisher_multiple'].values[ed[i]]

    pubeuclidean_accuracy = accuracy_score(publabel[0], euclidean_predicted_labels)
    pubeuclidean_precision = precision_score(publabel[0], euclidean_predicted_labels)
    pubeuclidean_recall = recall_score(publabel[0], euclidean_predicted_labels)
    pubeuclidean_f1 = f1_score(publabel[0], euclidean_predicted_labels)

    euclidean_predicted_labels = gamedf['Genre_multiple'].values[ed[i]]

    geneuclidean_accuracy = accuracy_score(genlabel[0], euclidean_predicted_labels)
    geneuclidean_precision = precision_score(genlabel[0], euclidean_predicted_labels)
    geneuclidean_recall = recall_score(genlabel[0], euclidean_predicted_labels)
    geneuclidean_f1 = f1_score(genlabel[0], euclidean_predicted_labels)

    euclidean_predicted_labels = gamedf['Plat_multiple'].values[ed[i]]

    plateuclidean_accuracy = accuracy_score(platlabel[0], euclidean_predicted_labels)
    plateuclidean_precision = precision_score(platlabel[0], euclidean_predicted_labels)
    plateuclidean_recall = recall_score(platlabel[0], euclidean_predicted_labels)
    plateuclidean_f1 = f1_score(platlabel[0], euclidean_predicted_labels)
#mean accuracy
    euclidean_accuracy=(pubeuclidean_accuracy+geneuclidean_accuracy+plateuclidean_accuracy)/3
    euclidean_precision=(pubeuclidean_precision+geneuclidean_precision+plateuclidean_precision)/3
    euclidean_recall=(pubeuclidean_recall+geneuclidean_recall+plateuclidean_recall)/3
    euclidean_f1=(pubeuclidean_f1+geneuclidean_f1+plateuclidean_f1)/3
    
    list_acc.append(euclidean_accuracy)
    list_rc.append(euclidean_recall)
    list_f1.append(euclidean_f1)
    list_precision.append(euclidean_precision)

euclidean_accuracy=sum(list_acc)/100
euclidean_f1=sum(list_f1)/100
euclidean_recall=sum(list_rc)/100
euclidean_precision=sum(list_precision)/100

print("euclidean Accuracy : ")
print(euclidean_accuracy*100)
print("euclidean f1 : ")
print(euclidean_f1*100)
print("euclidean Recall : ")
print(euclidean_recall*100)
print("euclidean Precision : ")
print(euclidean_precision*100)

euclidean Accuracy : 
98.84612081839917
euclidean f1 : 
76.88888888888887
euclidean Recall : 
76.99999999999999
euclidean Precision : 
76.83333333333331
