In [46]:
from sql_functions import get_dataframe
import pandas as pd
import capstone_functions as cf
import numpy as np
import statsmodels.api as sm
import statsmodels.formula.api as smf
from scipy import stats
from scipy import interpolate
from sklearn import linear_model
import seaborn as sns

pd.options.mode.chained_assignment = None  # default='warn'

############################
# VARIBALES
############################
# How many mechanics are in the top mechanics
top_XX_mechanic = 25

#schema and table names
schema = 'bgg_data'

table_main = 'unfiltered_main_stats_cleaned'
table_subdomain = 'subdomain'
table_unique_subdomain = 'unique_subdomain'
table_kickstarter = 'kickstarter_unique_campaigns'
table_slug = 'unique_slug_bgg_id'
table_mechanic = 'mechanics'
table_unique_mechanics = 'unique_mechanics'
table_family = 'family_bgg'

### Build base dataframes

In [47]:
df_main = get_dataframe(f"SELECT * FROM {schema}.{table_main}")
df_family = get_dataframe(f"SELECT * FROM {schema}.{table_family};")
df_mech = get_dataframe(f"SELECT * FROM {schema}.{table_mechanic}")
df_u_mech = get_dataframe(f"SELECT * FROM {schema}.{table_unique_mechanics}")
df_mech = pd.merge(df_mech,df_u_mech,on='mechanic_id')

### CREATE Dataframe for upcoming games

In [48]:
df_main_family = pd.merge(df_main,df_family,on='id')
df_upcoming_games = df_main_family.query("family_type == 'admin' and family_value == 'upcoming releases'")
df_upcoming_games.drop(["yearpublished", 'trading', 'numcomments','family_type', 'family_value',
       'family_id','average', 'user_rated'],axis=1,inplace=True)

### CREATE Dataframe for mechanics and list for top mechanics

In [49]:
df_temp = pd.merge(df_main,df_mech,on='id')
top_rated_mechanics_list = list(df_temp.groupby('mechanic').mean('average').sort_values('average',ascending=False).reset_index().mechanic.head(top_XX_mechanic))

df_mech["is_in_top_XX_mechanics"] = df_mech["mechanic"].isin(top_rated_mechanics_list)
df_mech = df_mech[['id','is_in_top_XX_mechanics']]
df_mech = df_mech.groupby('id').sum().reset_index()
df_mech[f"top_{top_XX_mechanic}_mechanic"] = df_mech.is_in_top_XX_mechanics > 0
df_mech = df_mech[['id',f"top_{top_XX_mechanic}_mechanic"]]

### MERGE upcoming_games and mech

In [50]:
df_upcoming_games_mech = pd.merge(df_upcoming_games,df_mech, on='id')

### Filter for only upcoming games on kickstarter

In [51]:
df_upcoming_games_mech_only_ks_and_topXX = df_upcoming_games_mech.loc[df_upcoming_games_mech.kickstarter,:]

In [57]:
df_upcoming_games_mech_only_ks_and_topXX.dropna(inplace=True)
df_upcoming = df_upcoming_games_mech_only_ks_and_topXX.sort_values('wishing',ascending=False)
df_upcoming.drop(['id','playtime','kickstarter'],axis=1,inplace=True)
df_upcoming.top_25_mechanic = df_upcoming.top_25_mechanic.astype(int)

### CREATE dataframe for recent games

In [59]:
df_main_family = pd.merge(df_main,df_family,on='id')
df_existing_games = df_main_family.query("family_value != 'upcoming releases'")
df_existing_games.drop(["yearpublished", 'trading', 'numcomments','family_type', 'family_value',
       'family_id','average', 'user_rated'],axis=1,inplace=True)