In [74]:
import os
import numpy as np
import pandas as pd
import networkx as nx
import json

In [75]:
POPULATION_INP_ROOT = '/u/ayushc/projects/GradABM/MacroEcon/simulator_data/census_populations/NYC/user_input/'

In [76]:
pop_data_file = os.path.join(POPULATION_INP_ROOT, 'NYC_POP.pkl')

In [77]:
from census_loader import CensusDataLoader

In [5]:
pop_data_df = pd.read_pickle(pop_data_file)
area_selector = pop_data_df['age_gender']['area'][:1]
geo_mapping = None

In [6]:
loader = CensusDataLoader(n_cpu=8, use_parallel=False, area_list=area_selector, geo_mapping=geo_mapping)

In [None]:
POPULATION_OUT_ROOT = '/u/ayushc/projects/GradABM/MacroEcon/simulator_data/census_populations/NYC/{}_synthetic_populations/'.format(area_selector)


# Create Synthetic Agent Population

In [7]:
save_population_data_path = os.path.join(POPULATION_OUT_ROOT, 'population_data.pkl')
loader.generate_basepop(input_data=pop_data_df, save_path=save_population_data_path)

INFO:process.base_pop:Processing: 0/1
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ethnicity_data['probability'] = ethnicity_data.apply(lambda row: row['count'] / total_population[row['area']], axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ethnicity_data['probability'] = ethnicity_data.apply(lambda row: row['count'] / total_population[row['area']], axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas

In [8]:
loader.population_df.head()

Unnamed: 0,area,age,gender,ethnicity
0,BK0101,20t29,male,white
1,BK0101,20t29,female,white
2,BK0101,20t29,female,white
3,BK0101,20t29,male,hispanic
4,BK0101,20t29,female,white


In [9]:
loader.population_df.shape

(37518, 4)

In [10]:
loader.population_df['area'].unique()

array(['BK0101'], dtype=object)

## Create Synthetic Mobility Networks

In [11]:
num_steps = 2
mobility_mapping_path = os.path.join(POPULATION_INP_ROOT, 'mobility_mapping.json')

print(os.path.exists(mobility_mapping_path))

with open(mobility_mapping_path) as file:
    mobility_mapping = json.load(file)
    
save_mobility_network_dir = os.path.join(POPULATION_OUT_ROOT, 'mobility_networks/')
print("Mobility Network Dir: ", save_mobility_network_dir)
    
loader.generate_mobility_networks(num_steps, mobility_mapping, save_mobility_network_dir)

True
Mobility Network Dir:  /u/ayushc/projects/GradABM/MacroEcon/simulator_data/census_populations/NYC/synthetic_populations/mobility_networks/


## Generate Household Population

In [None]:
household_data_path = '../../housing_v2.pkl'
household_data = np.load(household_data_path, allow_pickle=True)
household_mapping = {'adult_list': ['20t29','30t39', '40t49', '50t64', '65A'], 'children_list': ['U19']}

In [None]:
save_household_path = os.path.join(SAVE_ROOT, 'base_household.pkl')

loader.generate_household(household_data, household_mapping, save_path=save_household_path)

## Generate demographic data files

In [None]:
def preprocess_data(df_path, output_dir):
    df = pd.read_pickle(df_path)
    attributes = df.keys()
    mapping_collection = {}
    for attribute in attributes:
        df[attribute],mapping = pd.factorize(df[attribute])
        output_att_path = os.path.join(output_dir, attribute)
        df[attribute].to_pickle(f'{output_att_path}.pickle')
        mapping_collection[attribute] = mapping.tolist()
    output_mapping_path = os.path.join(output_dir, 'population_mapping.json')
    
    print("output mapping path: ", output_mapping_path)
    
    with open(output_mapping_path, 'w') as f:
        json.dump(mapping_collection, f)    

In [12]:
# df_path = os.path.join(POPULATION_OUT_ROOT, 'population_data.pkl')
output_dir = '/u/ayushc/projects/GradABM/MacroEcon/simulator_data/census_populations/NYC/simulation_input2/'

In [None]:
if not os.path.exists(output_dir):
    os.makedirs(output_dir)
    
preprocess_data(df_path, output_dir)

In [13]:
loader.export(output_dir)