# On-the-fly ggame computation

In [7]:
import pandas as pd
import seaborn as sns
import numpy as np
import streamlit as st
import sklearn.metrics.pairwise as sklpw
from PIL import Image
from fuzzywuzzy import fuzz
import re

In [8]:
# LOAD DATA
allgamedata_df = pd.read_pickle('datasources/gamerankurl.pkl') # USE ONLY FOR URLS
allgamedata_df = allgamedata_df.astype({'game_rank':'int32'},copy=True)
allgamedocvects = np.load('datasources/allgamedocvects_v3.npz')['arr_0']
finalgamelist_df = pd.read_pickle('datasources/BGG_GameSimilarityKey.pkl')
finalgamelist_df =  finalgamelist_df.astype({'game_rank':'int32'},copy=True)
finalgamelist_df.reset_index(drop=True,inplace=True) # So that row ids are indices to gamevector array

# FOr gameplay-based vectors
bgg_gameplay_df = pd.read_pickle('datasources/bgg_gameplayfeatures.pkl')
bgg_gameplay_df.dropna(inplace=True) # Some not-NAs here...
bgg_gameplay_df.reset_index(drop=True,inplace=True)
allgamePLAYdocvects = np.array(bgg_gameplay_df.iloc[:,1:]) # Create right here

## Verify data sizes

In [None]:
print('finalgamelist_df {} should match allgamedocvects {}'.format(finalgamelist_df.shape,allgamedocvects.shape))

In [None]:
print('bgg_gameplay_df {} should match allgamePLAYdocvects {}'.format(bgg_gameplay_df.shape,allgamePLAYdocvects.shape))

In [None]:
# FUNCTIONS

def getcompute_similar_by_gameplay(gamename,allgamedata_df, bgg_gameplay_df, allgamePLAYdocvects):
    # Get game rank from game name, and matrix index from rank
  
    gamerank = list(allgamedata_df.loc[allgamedata_df['game_name']==gamename,'game_rank'])[0]
    gamerank = int(gamerank)
    gamerank_idx = list(bgg_gameplay_df.index[bgg_gameplay_df['game_rank']==gamerank])[0]
    #print(gamename, gamerank_idx)
    
    mygamePLAYvector = allgamePLAYdocvects[gamerank_idx,:] 
    mygamePLAYvector = mygamePLAYvector.reshape(-1,1)

    mysimilarities_gp = []
    for t in range(0,allgamePLAYdocvects.shape[0]):
        currgamevect_gp = allgamePLAYdocvects[t,:]
        currgamevect_gp = currgamevect_gp.reshape(-1,1)
        dum = sklpw.cosine_similarity(currgamevect_gp.T,mygamePLAYvector.T)
        mysimilarities_gp.append(dum[0][0])
    mycompleteGPsimlist_df = pd.concat((pd.DataFrame({'game_rank':bgg_gameplay_df['game_rank']}),pd.DataFrame({'GameplaySimilarity':mysimilarities_gp})),axis=1)
    return mycompleteGPsimlist_df


def getcompute_similar_games_by_id(mygameid,mygamename,allgamedata_df,allgamedocvects,finalgamelist_df,bgg_gameplay_df, allgamePLAYdocvects,W1,W2):
    myvectid = mygameid
    mygamevector = allgamedocvects[myvectid,:]
    mygamevector= mygamevector.reshape(-1,1)
    mysimilarities = []
    for t in range(0,allgamedocvects.shape[0]):
        currgamevect = allgamedocvects[t,:]
        currgamevect = currgamevect.reshape(-1,1)
        dum = sklpw.cosine_similarity(currgamevect.T,mygamevector.T)
        mysimilarities.append(dum[0][0])
    mycompletesimlist_df = pd.concat((finalgamelist_df['game_rank'],finalgamelist_df['game_name'],pd.DataFrame({'Similarity':mysimilarities})),axis=1)
    # Get this also for GAMEPLAY data
    mycompleteGPsimlist_df = getcompute_similar_by_gameplay(mygamename,allgamedata_df, bgg_gameplay_df, allgamePLAYdocvects)

    # PUt sim lists together
    mycompletesimlist_df  = mycompletesimlist_df.astype({'game_rank':'int32'},copy=True)
    mycompleteGPsimlist_df = mycompleteGPsimlist_df.astype({'game_rank':'int32'},copy=True)
    mycompletesimlist_df.set_index('game_rank',inplace=True)
    mycompleteGPsimlist_df.set_index('game_rank',inplace=True)
    # Do it
    myFINALsimlist_df = mycompletesimlist_df.join(mycompleteGPsimlist_df,how='inner')
    
    
    weightedsimilarity = (np.array(myFINALsimlist_df['Similarity'])*W1) + (np.array(myFINALsimlist_df['GameplaySimilarity'])*W2)
    myFINALsimlist_df['WghtdSimilarity'] = weightedsimilarity
    
    
    myFINALsimlist_df.sort_values(by='WghtdSimilarity',ascending=False,inplace=True)
    mytop10simlist_df = myFINALsimlist_df[1:11]
    # Create output list
    urllist=[]
    for gamename in mytop10simlist_df['game_name']:
        urllist.append(list(allgamedata_df.loc[allgamedata_df['game_name']==gamename,'bgg_url'])[0])
    #mytop10simlist_df = pd.DataFrame({'Game':mytop10simlist_df['game_name'],'Similarity':mytop10simlist_df['WghtdSimilarity'],'url':urllist})
    mytop10simlist_df = pd.DataFrame({'Game':mytop10simlist_df['game_name'],'url':urllist})
    mytop10simlist_df.reset_index(drop=True,inplace=True)
    mytop10simlist_df.index = mytop10simlist_df.index+1
    return mytop10simlist_df
    
    
    
    #mycompletesimlist_df.sort_values(by='Similarity',ascending=False,inplace=True)
    #mytop10simlist_df = mycompletesimlist_df[1:11]
    # Create output list
    #urllist=[]
    #for gamename in mytop10simlist_df['gamename']:
    #    urllist.append(list(allgamedata_df.loc[allgamedata_df['game_name']==gamename,'bgg_url'])[0])
    #mytop10simlist_df = pd.DataFrame({'Game':mytop10simlist_df['gamename'],'Similarity':mytop10simlist_df['Similarity'],'url':urllist})
    #mytop10simlist_df.reset_index(drop=True,inplace=True)
    #mytop10simlist_df.index = mytop10simlist_df.index+1
    #return mytop10simlist_df


def get_real_name_fuzzy(usergamename,finalgamelist_df):
    # Clean up
    usergamename = re.sub(r"(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)|^rt|http.+?", "", usergamename)
    #usergamename = re.sub(r"\d+", "", usergamename) # Maybe dont remove numbers?
    gamename_matchlist = [fuzz.token_sort_ratio(x,usergamename) for x in finalgamelist_df['game_name']]
    possiblegame_idx  = [i for i, x in enumerate(gamename_matchlist) if x == max(gamename_matchlist)]
    possiblegame_idx = possiblegame_idx[0] # Get first, make it number
    #possiblegame_name = list(finalgamelist_df.loc[finalgamelist_df['idx']==possiblegame_idx,'gamename'])[0]
    possiblegame_name = finalgamelist_df.iloc[possiblegame_idx,2]
    #print('Best match: {}'.format(possiblegame_name))
    
    return possiblegame_name,max(gamename_matchlist)

def make_clickable(url,text): # Make liks in pd df for url in table
    return f'<a target="_blank" href="{url}">{text}</a>'

def streamlitify_df(df):
    # Get original URLS
    df['Game_link'] = [make_clickable(a,b) for a,b in zip(list(df['url']),list(df['Game']))]
    return df


In [None]:
# CREATES THE DEMO GAME LIST
allgamedata_df['numeric_ranks']=[int(x) for x in allgamedata_df['game_rank']]
topranked_df = pd.DataFrame(allgamedata_df.loc[allgamedata_df['numeric_ranks']<=50,'game_name']) # To go back n forth
topranked_df.sort_values(by='game_name',inplace=True)
topranked_idx = topranked_df.index
demo_gamelist = tuple(list(finalgamelist_df.loc[topranked_idx,'game_name']))

## Pretend some game selected

In [None]:
mygamename = 'blood rage'
# IF NAME IS UNCLEAR
mygamename,qltynum = get_real_name_fuzzy(mygamename,finalgamelist_df)
print('Best guess: {} (match score: {}/100)'.format(mygamename,str(qltynum)))
mygameid = list(finalgamelist_df.index[finalgamelist_df['game_name']==mygamename])[0] # Need INDEX, not idx
mygameurl=list(allgamedata_df.loc[allgamedata_df['game_name']==mygamename,'bgg_url'])[0]

In [None]:
W1=0 # Semantic
W2=1 # Feature
mytop10simlist_df = getcompute_similar_games_by_id(mygameid,mygamename,allgamedata_df,allgamedocvects,finalgamelist_df,bgg_gameplay_df, allgamePLAYdocvects,W1,W2)
mygamevect_df = streamlitify_df(mytop10simlist_df)

mygamevect_df.style.set_properties(**{'background-color': 'black',
                           'color': 'lawngreen',
                           'border-color': 'white'})
mygamevect_df

In [None]:
mygamename_st_url = f'<a target="_blank" href="{mygameurl}">{mygamename}</a>'
mygamename_st_url

#  ******** ROUGH STUFF ****************