In [None]:
import pandas as pd
import numpy as np
import json
import re

#https://bl.ocks.org/rofrischmann/0de01de85296591eb45a1dde2040c5a1

In [None]:
details = pd.read_csv("../Data/extra_details_complete.csv",index_col=0)

# Sort database by descending metascore
details = details.sort_values('metascore',ascending=False)

## Find specific game and all its neigbours

In [None]:
# Filter database first for one platform only
ps3_details = details.loc[details['console']=='PS3']

ps3_details.loc[ps3_details['name'].str.contains('Club').fillna(False)]
ps3_details


## Functions for finding root, finding possible neighbours, making links and making nodes

In [None]:
def GetGameFeatures(game, console_dataframe):
    '''Takes a console specific dataframe and game name as input and returns the
    relevant features of that game'''
    game_row = console_dataframe.loc[console_dataframe['name'] == game]
    franchise = list(game_row['franchise'])[0]
    developers = list(game_row['developer'])[0]
    publishers = list(game_row['publisher'])[0]
    genres = list(game_row['genre(s)'])[0]
    online_mp = list(game_row['number of online players'])[0]
    offline_mp = list(game_row['number of players'])[0]
    
    if type(franchise) == str:
        franchise = franchise.split(',')
    else: franchise = False
        
    if type(developers) == str:
        developers = developers.split(',')
    else: developers = ''
        
    if type(publishers) == str:
        publishers = publishers.split(',')
    else: publishers = ''
        
    if type(genres) == str:
        genres = genres.split(',')
    else: genres =''
        
    if type(online_mp) == str:
        if (online_mp == 'No Online Multiplayer') or (online_mp == '')or (online_mp == ' '):
            online_mp = False
        else:
            online_mp = True
    else: online_mp = False
            
    if type(offline_mp) == str:
        if (offline_mp == '1 Player') or (offline_mp == '') or (offline_mp == ' '):
            offline_mp = False
        else:
            offline_mp = True
    else: offline_mp = False
    
    game_features = {'genres':genres,
               'developers':developers,
               'publisher':publishers,
               'franchise':franchise,
               'online mp':online_mp,
               'offline mp':offline_mp}
    return(game_features)

def GetReccomendedGames(game_feat_dict, details_df):
    '''Given all the categories in a dictionary, searches a dataframe and returns all 
    rows that match for each category, including duplicates across categories'''
    print(game_feat_dict['franchise'])
    franchise = game_feat_dict['franchise']
    developers = game_feat_dict['developers']
    publishers = game_feat_dict['publisher']
    genres = game_feat_dict['genres']
    offline = game_feat_dict['offline mp']
    online = game_feat_dict['online mp']
    
    if franchise != False:
        franchise_games = {franchise[0]: details_df.loc[details_df['franchise']==franchise[0]]}
    else: franchise_games = {'No Franchise':details_df.iloc[0:0]}
            
    dev_games = {}
    for dev in developers:
        dev_games[dev] =  details_df.loc[details_df['developer'].str.contains(dev+'(,|$)')]
        
    pub_games = {}
    for pub in publishers:
        pub_games[pub] =  details_df.loc[details_df['publisher']==pub]
    
    genre_games = {}
    for genre in genres:
        # Regex matchs either a comma following or the end of line so Action won't match Action Adventure
        genre_games[genre] =  details_df.loc[details_df['genre(s)'].str.contains(genre+'(,|$)')]

    if offline == True:
        offline_games = {'offline':details_df.loc[details_df['number of players'].str.contains('s').fillna(False)]}
    else: offline_games = {'offline':details_df.iloc[0:0]}
        
    if online == True:
        online_games = {'online':details_df.loc[details_df['number of online players'].str.contains('s').fillna(False)]}
    else: online_games = {'online':details_df.iloc[0:0]}
    
    return [franchise_games, dev_games, pub_games, genre_games, offline_games, online_games]
        
def MakeTopNLinks(source, values_df, N, already_included):
    '''Takes a category source node, a dataframe, N and a list of games that have already been used
    and returns a dictionary of links between the source and the N games with the highest metascores
    that are not in the already used list'''
    already = already_included[:]
    root = already[0]
    topn_links = []
    i = 0
    names = list(values_df['name'])
    for name in names:
        if i >= N:
            break
        if name == root:
            link_dict = {'source':source, 'target': name, 'strength': 0.2}
        elif name in already:
            link_dict = {'source':source, 'target': name, 'strength': 0.001}
        else:
            link_dict = {'source':source, 'target': name, 'strength': 0.1}
            already.append(name)
            i += 1
        topn_links.append(link_dict)
    return topn_links, already

def LinkMakerWrapper(game, category_games_dictionary, N = 5):
    already_list = [a_game_name]
    franchise_games, dev_games, pub_games, gen_games, off_games, on_games = category_games_dictionary
    all_leaf_nodes = []

    # Franchise then developer then publisher then genres then the two multiplayers
    for key,val in franchise_games.items():
        topn_franchise, already_list = MakeTopNLinks(key, val, N, already_list)
        all_leaf_nodes += topn_franchise

    for key,val in dev_games.items():
        topn_dev, already_list = MakeTopNLinks(key, val, N, already_list)
        all_leaf_nodes += topn_dev

    for key,val in pub_games.items():
        topn_pub, already_list = MakeTopNLinks(key, val, N, already_list)
        all_leaf_nodes += topn_pub

    for key,val in gen_games.items():
        topn_by_gen, already_list = MakeTopNLinks(key, val, N, already_list)
        all_leaf_nodes += topn_by_gen
        
    for key,val in off_games.items():
        topn_offgames, already_list = MakeTopNLinks(key, val, N, already_list)
        all_leaf_nodes += topn_offgames
        
    for key,val in on_games.items():
        topn_ongames, already_list = MakeTopNLinks(key, val, N, already_list)
        all_leaf_nodes += topn_ongames
        
    return all_leaf_nodes,already_list

def MakeListOfCategories(list_category_game_dicts):
    categories_node_list = []
    for cat_game_dict in list_category_game_dicts:
        current_categories = list(cat_game_dict.keys())
        if (current_categories == ['online']) or (current_categories == ['offline']):
            if list(cat_game_dict.values())[0].empty:
                continue
        categories_node_list += current_categories
    return(categories_node_list)

def MakeNodes(root_game, list_of_categories, list_of_games):
    'Makes the nodes for the graph'
    node_list = []
    
    game_root = {'id':root_game, 'group':1, 'label':root_game, 'level':1}
    node_list.append(game_root)
    
    cat_node_list = []
    for cat in list_of_categories:
        cat_node = {'id':cat, 'group':3, 'label':cat, 'level':2}
        cat_node_list.append(cat_node)
    node_list += cat_node_list
    
    other_game_node_list = []
    for name in list_of_games:
        if name == root_game:
            continue
        else:
            other_game_node_list.append({'id':name, 'group':1,'label':name,'level':3})
    
    node_list += other_game_node_list
    
    return(node_list)

## Need to make a list of dictionaries for the nodes and links 


In [None]:
a_game_name = 'The Last of Us'
a_games_features = GetGameFeatures(a_game_name, ps3_details)
list_rec_game_dicts = GetReccomendedGames(a_games_features, ps3_details)
links, leaf_game_list = LinkMakerWrapper(a_game_name, list_rec_game_dicts, 10)
categories_list = MakeListOfCategories(list_rec_game_dicts)
nodes = MakeNodes(a_game_name, categories_list, leaf_game_list)

nodes

## Need to also make a list of dictionaries for nodes

In [None]:
links