In [1]:
import pandas as pd
import json
import numpy as np

In [2]:
df_covid = pd.read_json('yelp_academic_dataset_covid_features.json', lines=True)
# https://medium.com/13-fund/analyzing-covid-19-restaurant-closures-with-yelp-data-f9116c7d563a
# November 2020

In [3]:
df_businesses = pd.read_json('yelp_academic_dataset_business.json', lines=True)

In [4]:
df_all = pd.merge(df_businesses, df_covid, on='business_id', how='inner')

In [15]:
states_to_keep = ['AZ', 'NC', 'NV', 'OH', 'ON', 'PA', 'QC']
columns_to_group = ['state','delivery or takeout']

In [16]:
df_all = df_all.loc[lambda f: f['is_open'] == 1]
df_all = df_all.loc[lambda f: f['state'].isin(states_to_keep)]

In [25]:
df_all['category'] = df_all['categories'].apply(lambda x: 'Other' if x is None
                                 else'Restaurant' if 'restaurant' in str.lower(x)
                                 else 'Health' if 'health' in str.lower(x)
                                 else 'Services' if 'services' in str.lower(x)
                                 else 'Services' if 'auto' in str.lower(x)
                                 else 'Shopping' if 'shopping' in str.lower(x)
                                # else 'Entertainment' if 'art' in str.lower(x)       
                                 #else 'Entertainment' if 'entertainment' in str.lower(x)
                                 else 'Shopping' if 'food' in str.lower(x)
                                 else 'Other')
df_all['Delivery or virtual'] = (df_all['delivery or takeout'] == 'TRUE') | (df_all['Grubhub enabled'] == 'TRUE')  | (df_all['Virtual Services Offered'] == 'TRUE')
df_all['Delivery or virtual'] = np.where(df_all['Delivery or virtual']==True, 'Delivery or virtual', 'No delivery or virtual')
df_all['Call To Action enabled'] = np.where(df_all['Call To Action enabled']=='TRUE', 'Call to Action Enabled', 'No Call to Action')

In [26]:
# Level 1: State
# Level 2: Category
# Level 3: Call To Action enabled 
# Level 4: Delivery or virtual

In [27]:
df_gp = df_all.groupby(['state', 'category', #'Call To Action enabled', 
                        'Delivery or virtual'])['business_id'].count().reset_index()

In [28]:
class Node(object):
    def __init__(self, name, children = None, value = None):
        self.name = name
        self.children = children
        self.value = value


In [29]:
def create_node(field, name, df):
    order = ['Overall', 'state', 'category',# 'Call To Action enabled',
             'Delivery or virtual']
    ix_field = order.index(field)
    if (ix_field == (len(order) - 1)): # If last level
        cur_df = df[df[field] == name]
        return Node(name, value=str(cur_df['business_id'].values[0]))
    else:
        next_field = order[ix_field+1]
        children = [create_node(next_field,name_val, df[df[next_field] == name_val]) for name_val in df[next_field].drop_duplicates().values]
        return Node(name, children=children, value='')

In [30]:
df_on = df_gp[df_gp['state'] == 'ON']
#new_node = create_node('state', 'ON', df_on)
new_node = create_node('Overall', 'State / Province', df_gp)

In [31]:
def print_node(node):
    if node.children is not None:
        print('{')
        print('"name": "{}",'.format(node.name))
        print('"children": [')
        for child in node.children:
            print_node(child)
        print("]")
        print("},")
    else:
        print("{" + '"name": "{}", "value": {}'.format(node.name, node.value) + "},")
    
        

In [32]:
print_node(new_node)

{
"name": "All",
"children": [
{
"name": "AZ",
"children": [
{
"name": "Health",
"children": [
{"name": "Delivery or virtual", "value": 187},
{"name": "No delivery or virtual", "value": 7421},
]
},
{
"name": "Other",
"children": [
{"name": "Delivery or virtual", "value": 38},
{"name": "No delivery or virtual", "value": 6328},
]
},
{
"name": "Restaurant",
"children": [
{"name": "Delivery or virtual", "value": 7365},
{"name": "No delivery or virtual", "value": 514},
]
},
{
"name": "Services",
"children": [
{"name": "Delivery or virtual", "value": 810},
{"name": "No delivery or virtual", "value": 21284},
]
},
{
"name": "Shopping",
"children": [
{"name": "Delivery or virtual", "value": 1504},
{"name": "No delivery or virtual", "value": 4148},
]
},
]
},
{
"name": "NC",
"children": [
{
"name": "Health",
"children": [
{"name": "Delivery or virtual", "value": 77},
{"name": "No delivery or virtual", "value": 1268},
]
},
{
"name": "Other",
"children": [
{"name": "Delivery or virtual", "value": 7