In [1]:
import pandas as pd
import plotly.express as px
pd.options.plotting.backend = 'plotly'
from functools import reduce

In [2]:
business = pd.read_csv('./data/business.csv')
attributes = pd.read_csv('./data/atributtes_clean.csv', low_memory=False)
categories = pd.read_csv('./data/categories_fixed.csv')
hours = pd.read_csv('./data/hours_fixed.csv')
gps = pd.read_csv('./data/gps_clustering_fixed.csv')
checkins = pd.read_csv('./data/n_checkins.csv')
top_tips = pd.read_csv('./data/top_tips.csv')
target1 = pd.read_csv('./data/target.csv')
target2 = pd.read_csv('./data/target2.csv')
target3 = pd.read_csv('./data/target_3_influencer_modified.csv')
sentiment = pd.read_csv('./data/sentiment_ok_unique.csv')

In [3]:
target3['target'] = 0
target3.loc[target3['success_score'] >= target3['success_score'].mean() * 2,'target'] = 1

In [4]:
data_frames = [business, attributes, categories, hours, gps, checkins, top_tips, target3]
df = reduce(lambda left,right: pd.merge(left,right,on='business_id', how='left'), data_frames)

In [5]:
def lower_col_names(cols):
    '''
    Function to lower case all column names
    
    Parameters
    ----------
    cols: list
        List of column names
    
    Returns
    -------
    new_names: dict
        Dictionary with the new column names
    '''
    new_names = {}
    for x in cols:
        new_names[x] = x.lower()
    return new_names    

In [6]:
df.rename(columns=lower_col_names(df.columns), inplace=True)

In [84]:
df.columns

Index(['business_id', 'name', 'postal_code', 'latitude_x', 'longitude_x',
       'stars', 'review_count_x', 'is_open', 'good_ambience', 'garage',
       'businessacceptscreditcards', 'restaurantspricerange2', 'bikeparking',
       'wifi', 'delivery', 'goodforkids', 'outdoorseating',
       'restaurantsreservations', 'hastv', 'restaurantsgoodforgroups',
       'alcohol', 'byappointmentonly', 'caters', 'restaurantsattire',
       'noiselevel', 'meal_diversity', 'restaurants', 'food', 'shopping',
       'home services', 'beauty & spas', 'nightlife', 'health & medical',
       'local services', 'bars', 'automotive', 'total_categories', '7days',
       'weekends', 'n_open_days', 'mean_total_hours_open', 'mean_open_hour',
       'mean_close_hour', 'latitude_y', 'longitude_y', 'latlong_div', 'areas',
       'number_visits', 'number_tips', 'unnamed: 0', 'review_count_y',
       'avg(reactions)', 'avg(stars)', 'avg(influencer_2)', 'success_score',
       'target'],
      dtype='object')

In [7]:
labels = {
        0: 'Philadelphia',
        1: 'Reno',
        2: 'Indianapolis',
        3: 'Tucson',
        4: 'New Orleans',
        5: 'St. Louis',
        6: 'Tampa',
        7: 'Boise',
        8: 'Canada',
        9: 'Nashville',
        10: 'Santa Barbara'}

df['areas'] = df['areas'].map(labels)

In [8]:
import plotly.express as px

# Create the treemap
fig = px.treemap(df,
                path=['areas'],
                values='success_score',
                labels=labels,
                title='Success Score by City')

# Show the treemap
fig.show()
