In [1]:
import pandas as pd
import numpy as np
import os
import csv
import ast
from collections import Counter
import geopandas as gpd
import h5py
from datetime import datetime, timedelta
#### personal packages####
import sys
sys.path.append("../Codes/")
# import mobility as mbl
from mobility import mobility_matrix_extract_modzcta as mm
from mobility import parameters as mp
from mobility import analysis as ma
##### for plotting ###
import matplotlib.pyplot as plt
from matplotlib.colors import TwoSlopeNorm, LogNorm
import matplotlib.gridspec as gridspec
import matplotlib.dates as mdates

my package for mobility extraction has been imported!


In [3]:
cluster_name = {0: 'Glocery&Pharmacies', 1: 'Retails', 2: 'Arts&Entertainment', 3: 'Restaurants&Bars',
                4: 'Educations', 5: 'Healthcares', 6: 'others'}

In [3]:
gdf_merged = pd.read_csv('move_7c_wk_raw.csv')
gdf_merged['MODZCTA'] = gdf_merged['MODZCTA'].astype(str)

In [9]:
gdf_merged.shape

(177, 407)

In [9]:
cluster_cols = [cluster_name[0]+'_{}'.format(i) for i in range(56)]
df_vist_cluster_i = gdf_merged[['MODZCTA', 'MODIFIED_ZCTA','borough', 'mod_idx','POP_DENOMINATOR']+cluster_cols].copy()

final_df = pd.melt(df_vist_cluster_i, id_vars=['MODZCTA', 'MODIFIED_ZCTA', 'borough', 'mod_idx','POP_DENOMINATOR'], value_vars = cluster_cols[3:3+51], var_name='week', value_name=cluster_name[0]+'_visits_weekly')
# Correctly adjust the code to perform the intended replacement
final_df['week'] = final_df['week'].apply(lambda x: (int(x.split('_')[1]) - 3))

for c_idx in range(1,7):
    print(cluster_name[c_idx])
    cluster_cols = [cluster_name[c_idx]+'_{}'.format(i) for i in range(56)]
    df_vist_cluster_i = gdf_merged[['MODZCTA', 'MODIFIED_ZCTA','borough', 'mod_idx','POP_DENOMINATOR']+cluster_cols].copy()
    temp_df = pd.melt(df_vist_cluster_i, id_vars=['MODZCTA', 'MODIFIED_ZCTA', 'borough', 'mod_idx','POP_DENOMINATOR'], value_vars = cluster_cols[3:3+51], 
                    var_name='week', value_name=cluster_name[c_idx]+'_visits_weekly')
    temp_df['week'] = temp_df['week'].apply(lambda x: (int(x.split('_')[1]) - 3))
    final_df = pd.merge(final_df, temp_df, on=['MODZCTA', 'MODIFIED_ZCTA','borough', 'mod_idx','week','POP_DENOMINATOR'])

Retails
Arts&Entertainment
Restaurants&Bars
Educations
Healthcares
others


In [10]:
final_df.columns

Index(['MODZCTA', 'MODIFIED_ZCTA', 'borough', 'mod_idx', 'POP_DENOMINATOR',
       'week', 'Glocery&Pharmacies_visits_weekly', 'Retails_visits_weekly',
       'Arts&Entertainment_visits_weekly', 'Restaurants&Bars_visits_weekly',
       'Educations_visits_weekly', 'Healthcares_visits_weekly',
       'others_visits_weekly'],
      dtype='object')

In [12]:
### 2 read scores data #########
# df_zip_score = pd.read_csv('zipcode_scores.csv')
df_zip_score = pd.read_csv('zipcode_scores_v2.csv')
df_zip_score['zipcode'] = df_zip_score['zipcode'].astype(str)
df_zip_score['zipcode_5'] = df_zip_score['zipcode'].str.slice(0, 5)
# df_zip_score['gain_loss_bias'] = df_zip_score['gain_bias'] + df_zip_score['gain_bias']
df_2 = final_df.merge(df_zip_score, 
                                    left_on = 'MODZCTA', right_on='zipcode_5',how='left')
### 3 select the zipcodes #########
df_zipcode = pd.read_csv('../results/zipcode_counts.csv')
df_zip_selected = df_zipcode.iloc[:22,:]
zips = df_zip_selected['zip_code'].values
zips_plot = [str(z) for z in zips]
df_3 = df_2[df_2['zipcode_5'].isin(zips_plot)]
##### 4 social economic data ######
nyc_socialecon = pd.read_csv('../Data/zipcode_data_FINAL.csv')
nyc_socialecon['zipcode_5'] = nyc_socialecon['ZIPCODE'].astype(str)
nyc_socialecon_s_cols_dict = {'AREA':'sum','POPULATION':'sum','HOUSEHOLD_SIZE':'mean','HOUSEHOLD_INCOME':'mean','BLACK':'sum','HISPANIC':'sum','AGE65_PLUS':'sum','BACHELOR_S':'sum','NO_HEALTH_INSURANCE':'mean',
                              'RESTAURANT_COUNT':'sum','GROCERY_COUNT':'sum','EDUCATION_COUNT':'sum',	
                              'ENTERTAINMENT_COUNT':'sum','HEALTHCARE_COUNT':'sum','RETAIL_COUNT':'sum'}
nyc_socialecon_s_cols = list(nyc_socialecon_s_cols_dict.keys()) + ['zipcode_5']
df_social = nyc_socialecon[nyc_socialecon_s_cols].copy()
# Group by 'zipcode' and specify the aggregation for each column
aggregated_df = df_social.groupby('zipcode_5').agg(nyc_socialecon_s_cols_dict).reset_index()
df_4 = df_3.merge(aggregated_df, on='zipcode_5')
df_4.rename(columns={'zipcode_5':'zip_char'},inplace=True)
######### 5 merge with ages ########
# df_ages = pd.read_csv('../Data/estimated_average_age.csv')
df_ages_vehicles = pd.read_csv('../Data/estimated_average_age_vehicles.csv')
# df_ages['tract'].apply(lambda x: str(x)[:11])
df_nyc_tract_sorted = pd.read_csv('df_nyc_tract_modidx_v33.csv')
df_tract_zip =df_nyc_tract_sorted[['tract','zip']]
# df_tract_zip['tract'] = df_tract_zip['tract'].apply(lambda x: str(x)[:11])
df_ages_nyc = df_ages_vehicles.merge(df_tract_zip, on='tract', how='right')
df_ages_nyc['zip_char'] = df_ages_nyc['zip'].astype(str)
# aggregated_age = df_ages_nyc.groupby('zip_char').agg({'EstimatedAverageAge':'mean'}).reset_index()
aggregated_data = df_ages_nyc.groupby('zip_char').agg({
    'EstimatedAverageAge': 'mean',
    'no_vehicles': 'sum',
    'vehicle_owned': 'sum',
    'public_transportation': 'sum',
    'household_num': 'sum'
}).reset_index()
# df_5 = df_4.merge(aggregated_age, on='zip_char')
df_5 = df_4.merge(aggregated_data, on='zip_char')
### 6 merge with cases ###
covid_cases = pd.read_csv('cases_borough_weekly.csv')
for col in ['CASE_COUNT','DEATH_COUNT', 'borough_case_count', 'borough_death_count']:
    covid_cases[col] = covid_cases[col].astype(float)
    covid_cases.loc[covid_cases[col]==0,col] = 0.1
    covid_cases[col+'_log'] = np.log(covid_cases[col])
df_6 = pd.merge(covid_cases, df_5, on=['borough', 'week'], how='outer')
### 7 merge with policies ####
df_policies = pd.read_csv('../Data/stringency_index_ny_wk.csv')
visits_scores_wk = df_6.merge(df_policies, on='week', how='left')

In [13]:
visits_scores_wk.columns

Index(['CASE_COUNT', 'DEATH_COUNT', 'week', 'borough_case_count', 'id',
       'borough_death_count', 'borough', 'CASE_COUNT_log', 'DEATH_COUNT_log',
       'borough_case_count_log', 'borough_death_count_log', 'MODZCTA',
       'MODIFIED_ZCTA', 'mod_idx', 'POP_DENOMINATOR',
       'Glocery&Pharmacies_visits_weekly', 'Retails_visits_weekly',
       'Arts&Entertainment_visits_weekly', 'Restaurants&Bars_visits_weekly',
       'Educations_visits_weekly', 'Healthcares_visits_weekly',
       'others_visits_weekly', 'zipcode', 'Age_mean', 'Age_median', 'Age_std',
       'Risk_preference_mean', 'Risk_preference_median', 'Risk_preference_std',
       'score_mean', 'score_median', 'score_std', 'loss_aversion_scores_mean',
       'loss_aversion_scores_median', 'loss_aversion_scores_std',
       'agency_mean', 'agency_median', 'agency_std', 'regulated_scores_mean',
       'regulated_scores_median', 'regulated_scores_std',
       'regulated_agency_mean', 'regulated_agency_median',
       'regulated

In [14]:
visits_scores_wk.to_csv('unpivot_merged_data_raw_v9.csv',index=False)

In [2]:
visits_scores_wk = pd.read_csv('unpivot_merged_data_raw_v7.csv')

In [4]:
visits_scores_wk.head()

Unnamed: 0,CASE_COUNT,DEATH_COUNT,week,borough_case_count,id,borough_death_count,borough,CASE_COUNT_log,DEATH_COUNT_log,borough_case_count_log,...,NO_HEALTH_INSURANCE,RESTAURANT_COUNT,GROCERY_COUNT,EDUCATION_COUNT,ENTERTAINMENT_COUNT,HEALTHCARE_COUNT,RETAIL_COUNT,EstimatedAverageAge,Date,StringencyIndex_WeightedAverage
0,1.0,0.1,0,0.1,51,0.1,BK,0.0,-2.302585,-2.302585,...,0.086066,391,81.0,63.0,69.0,136.0,315.0,34.113808,2020-03-01,14.682857
1,1.0,0.1,0,0.1,51,0.1,BK,0.0,-2.302585,-2.302585,...,0.078994,601,141.0,58.0,48.0,346.0,379.0,37.360114,2020-03-01,14.682857
2,1.0,0.1,0,0.1,51,0.1,BK,0.0,-2.302585,-2.302585,...,0.063365,380,122.0,52.0,36.0,242.0,314.0,37.272314,2020-03-01,14.682857
3,1.0,0.1,0,0.1,51,0.1,BK,0.0,-2.302585,-2.302585,...,0.130641,259,46.0,40.0,89.0,351.0,373.0,40.954869,2020-03-01,14.682857
4,45.0,0.1,1,12.0,52,0.1,BK,3.806662,-2.302585,2.484907,...,0.086066,391,81.0,63.0,69.0,136.0,315.0,34.113808,2020-03-08,19.44


In [5]:
def scale_to_100(x, min_val, max_val):
    """
    Scale a given value x from its original range [min_val, max_val] to [0, 100].
    
    Parameters:
    - x: The value to scale.
    - min_val: The minimum value of the original range.
    - max_val: The maximum value of the original range.
    
    Returns:
    - The scaled value in the range [0, 100].
    """
    return (x - min_val) * 100 / (max_val - min_val)

In [26]:
min_agency, max_agency = -32, 32  #
min_scores, max_scores = 0,  16
min_loss, max_loss = 0, 6 

# Scaling the variables
scaled_x1 = scale_to_100(visits_scores_wk['agency_mean'], min_agency, max_agency)
scaled_x2 = scale_to_100(visits_scores_wk['score_mean'], min_scores, max_scores)
scaled_x3 = scale_to_100(visits_scores_wk['loss_aversion_scores_mean'], min_loss, max_loss)