# Building the Recommendation system

In this notebook, I am building a table for the consine dist for both content and reviews

In [2]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import json
import csv
from collections import Counter

# Content: 

Import Game Meta Data

In [3]:
from os import listdir
from os.path import isfile, join
onlyfiles = [f for f in listdir('./game_meta_data/') if isfile(join('./game_meta_data/', f))]
list_of_meta = []
feature_list = ['guid', 'name', 'concepts', 'themes', 'deck', 'developers', 'publishers', 'genres', 'people']
for file in onlyfiles:
    with open(f'./game_meta_data/{file}', 'r') as f:
        game = json.load(f)
        game_dict = {key:value for key, value in game.items() if key in feature_list}

        # convert lists to strings 
        for key, value in game_dict.items():
            if type(value) == list:
                game_dict[key] = ', '.join(value)
        list_of_meta.append(game_dict)
df = pd.DataFrame(list_of_meta)
df.fillna("", inplace=True)

In [4]:
df.head()

Unnamed: 0,concepts,deck,developers,genres,guid,name,people,publishers,themes
0,"Achievements, PlayStation Trophies, Steam, Dig...",Kill The Bad Guy is a puzzle-game where physic...,Exkee,"Strategy, Simulation",3030-46539,Kill the Bad Guy,"Iaromil Stievenard, Anibal G. Hormeche, Sylvai...",,
1,,Zeal is an indie online ARPG developed by Lyca...,Lycanic Studios,"Action, Role-Playing, MOBA",3030-68714,Zeal,,,"Fantasy, Medieval"
2,,Vertical Drop Heroes HD is an action platformer.,Nerdook Productions,"Action, Role-Playing, Platformer",3030-48249,Vertical Drop Heroes HD,Yong Cheol Sim,,Fantasy
3,"Unreal Engine 4, PlayStation VR Support",A puzzle mystery game for PS VR.,Tarsier Studios,Puzzle,3030-57976,Statik,,,
4,,A compilation of all three Banner Saga titles.,Stoic,"Strategy, Role-Playing, Compilation",3030-68731,The Banner Saga Trilogy,,,Fantasy


In [5]:
def split_features_from_col(df, col):
    '''
    Returns a dataframe of 1 hot encoded features from the selected col 
    '''
    df[col] = ['' if entry == None else entry for entry in df[col] ]
    cvec = CountVectorizer(stop_words='english', tokenizer=lambda x: x.split(', '))
    bow = cvec.fit_transform(df[col])
    ret_df  = pd.DataFrame(bow.todense(),
                       columns=map(lambda x: col + "_" + x, cvec.get_feature_names()))
    ret_df.drop(col+'_', 1, inplace=True)
    return ret_df

def split_features(df, list_of_cols):
    '''
    Returns a dataframe of 1 hot encoded features from a list of cols
    '''
    ret_df = df.loc[:, ['name', 'guid']]
    for col in list_of_cols:
        ret_df = pd.merge(ret_df, split_features_from_col(df, col), left_index=True, right_index=True)
        
    return ret_df

In [6]:
dummied_df = split_features(df, ['concepts', 'genres', 'themes', 'developers', 'people'])

Get the Columns that have at least 6 games

In [7]:
thresh = 5
len(dummied_df.iloc[:,2:].sum().sort_values()[dummied_df.iloc[:,2:].sum().sort_values() > thresh])

MemoryError: 

Get the games that have at least 6 features

In [7]:
bit_map = dummied_df.iloc[:,2:].T.sum()>thresh

In [8]:
dummied_df.loc[bit_map, :].shape

(1203, 5071)

In [12]:
dummied_df.set_index("name", inplace=True)

In [13]:
dummied_df.drop("guid", 1, inplace=True)

In [9]:
from scipy import sparse
from sklearn.metrics.pairwise import pairwise_distances

In [15]:
sparse_content = sparse.csr_matrix(dummied_df)
distances = pairwise_distances(sparse_content, metric='cosine')
distance_df = pd.DataFrame(distances, index=dummied_df.index, columns=dummied_df.index)
distance_df.head()

name,Kill the Bad Guy,Zeal,Vertical Drop Heroes HD,Statik,The Banner Saga Trilogy,Koi,Giana Sisters: Twisted Dreams,M3 Sono Kuroki Hagane: Mission Memento Mori,Toukiden 2,Atomine,...,Rollercoaster Dreams,Hotline Miami 2: Wrong Number,Flipping Death,Huntdown,The Magic Circle,Ghostbusters,Gran Turismo Sport,Guts and Glory,Mighty No. 9,Sonic Forces
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Kill the Bad Guy,0.0,1.0,1.0,1.0,0.880477,0.890891,0.790835,1.0,1.0,1.0,...,0.732739,0.953476,1.0,0.880477,0.937006,1.0,1.0,1.0,0.798544,1.0
Zeal,1.0,0.0,0.452277,1.0,0.634852,1.0,0.954356,1.0,0.528595,1.0,...,1.0,0.928933,1.0,1.0,0.903775,0.764298,1.0,1.0,0.938454,0.910913
Vertical Drop Heroes HD,1.0,0.452277,0.0,1.0,0.6,1.0,0.9,1.0,0.483602,1.0,...,1.0,0.92215,1.0,1.0,0.894591,0.741801,1.0,1.0,0.86516,0.80482
Statik,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,...,0.75,1.0,1.0,1.0,1.0,1.0,0.885292,1.0,1.0,1.0
The Banner Saga Trilogy,0.880477,0.634852,0.6,1.0,0.0,1.0,0.95,1.0,0.741801,1.0,...,1.0,1.0,1.0,1.0,0.894591,1.0,1.0,1.0,1.0,0.90241


In [32]:
search = 'Call of Duty'
for title in df.loc[df['name'].str.contains(search), 'name'].values:
    if title in distance_df.columns:
        print(title)
        print()
        print('Similar Games:')
        print(distance_df[title].sort_values()[1:11])
        print()
        print()


Call of Duty: Black Ops III

Similar Games:
name
Battleborn                        0.671734
FlatOut 4: Total Insanity         0.683277
Call of Duty: WWII                0.684736
Get Even                          0.694871
Call of Duty: Black Ops 4         0.696761
Infinity Runner                   0.708657
Super Mega Baseball 2             0.708657
The Golf Club 2                   0.709341
Call of Duty: Advanced Warfare    0.713466
Defense Grid 2                    0.716736
Name: Call of Duty: Black Ops III, dtype: float64


Call of Duty 4: Modern Warfare

Similar Games:
name
Call of Duty: Ghosts                    0.641836
Battlefield 4                           0.710387
Metal Gear Solid V: The Phantom Pain    0.742681
Grand Theft Auto V                      0.745412
Far Cry 3                               0.746664
Call of Duty: Advanced Warfare          0.759808
Resident Evil 5                         0.764389
Shadow Complex                          0.778199
Far Cry 4                

157.5