In [15]:
import pandas as pd


# [Block Level]
## South Carolina 
### Get Economic Population Data 

In [16]:
df_sc_econ = pd.read_csv('raw/census_block/income/sc_inc_2022_bg/sc_inc_2022_bg.csv')

In [17]:
print(df_sc_econ.columns)
print(df_sc_econ.value_counts('LESS_10K22'))

Index(['GEOID', 'STATEFP', 'STATE', 'COUNTYFP', 'COUNTY', 'MEDN_INC22',
       'TOT_HOUS22', 'LESS_10K22', '10K_15K22', '15K_20K22', '20K_25K22',
       '25K_30K22', '30K_35K22', '35K_40K22', '40K_45K22', '45K_50K22',
       '50K_60K22', '60K_75K22', '75K_100K22', '100_125K22', '125_150K22',
       '150_200K22', '200K_MOR22'],
      dtype='object')
LESS_10K22
0      795
15      73
13      60
9       59
11      57
      ... 
210      1
214      1
224      1
227      1
657      1
Name: count, Length: 219, dtype: int64


In [31]:
bins = {
    '0_35K': ['LESS_10K22', '10K_15K22', '15K_20K22', '20K_25K22', '25K_30K22', '30K_35K22'],
    '35K_60K': ['35K_40K22', '40K_45K22', '45K_50K22', '50K_60K22'],
    '60K-100K': ['60K_75K22', '75K_100K22'],
    '100K_125K': ['100_125K22'],
    '125K_150K': ['125_150K22'],
    '150K_MORE': ['150_200K22','200K_MOR22']
}
property_bins = {
    'TOT_CVAP': ['TOT_CVAP22'],
    'BPV_CVAP': ['BPV_CVAP22'],
    'APV_CVAP': ['APV_CVAP22'],
    'APV_HOUS': ['TOT_APOV22'],
    'BPV_HOUS': ['TOT_BPOV22'],
}

In [21]:
def match_bins(df, bins, columns_to_keep):
    """
    Parameters:
    - df (pd.DataFrame)
    - bins (dict)
    - columns_to_keep (list)
    Returns:
    - pd.DataFrame: A new DataFrame containing the specified columns and the binned data.
    """
    binned_data = {}
    for bin_name, columns in bins.items():
        binned_data[bin_name] = df[columns].sum(axis=1)
    df_binned = df[columns_to_keep].copy()
    for bin_name, values in binned_data.items():
        df_binned[bin_name] = values
    return df_binned


In [22]:
df_sc_econ_binned = match_bins(df_sc_econ, bins, ['GEOID', 'STATEFP', 'STATE', 'COUNTYFP', 'COUNTY', 'MEDN_INC22', 'TOT_HOUS22'])
print(df_sc_econ_binned)

             GEOID  STATEFP            STATE  COUNTYFP            COUNTY  \
0     450219701011       45   South Carolina        21   Cherokee County   
1     450219701012       45   South Carolina        21   Cherokee County   
2     450219701021       45   South Carolina        21   Cherokee County   
3     450219701022       45   South Carolina        21   Cherokee County   
4     450219701023       45   South Carolina        21   Cherokee County   
...            ...      ...              ...       ...               ...   
3403  450339706012       45   South Carolina        33     Dillon County   
3404  450339706013       45   South Carolina        33     Dillon County   
3405  450339706021       45   South Carolina        33     Dillon County   
3406  450339706022       45   South Carolina        33     Dillon County   
3407  450339706023       45   South Carolina        33     Dillon County   

      MEDN_INC22  TOT_HOUS22  0_35K  35K_60K  60K-100K  100K_125K  125K_150K  \
0      

In [9]:
path = "processed_individual/sc_econ_block.csv"
df_sc_econ_binned.to_csv(path, index=False)

### Get Poverty Population Data 

In [28]:
df_sc_poverty = pd.read_csv('raw/census_block/poverty/sc_pov_2022_bg/sc_pov_2022_bg.csv')

In [30]:
print(df_sc_poverty.columns)

Index(['GEOID', 'STATEFP', 'STATE', 'COUNTYFP', 'COUNTY', 'TOT_HOUS22',
       'TOT_CHI22', 'TOT_MAR22', 'TOT_MAL22', 'TOT_FEM22', 'TOT_BPOV22',
       'CHI_BPOV22', 'MAR_BPOV22', 'MAL_BPOV22', 'FEM_BPOV22', 'TOT_APOV22',
       'CHI_APOV22', 'MAR_APOV22', 'MAL_APOV22', 'FEM_APOV22', 'TOT_CVAP22',
       'BPV_CVAP22', 'APV_CVAP22'],
      dtype='object')


In [32]:
df_md_pov_binned = match_bins(df_sc_poverty, property_bins, ['GEOID', 'STATEFP', 'STATE', 'COUNTYFP', 'COUNTY', 'TOT_HOUS22'])

In [34]:
print(df_md_pov_binned.columns)

Index(['GEOID', 'STATEFP', 'STATE', 'COUNTYFP', 'COUNTY', 'TOT_HOUS22',
       'TOT_CVAP', 'BPV_CVAP', 'APV_CVAP', 'APV_HOUS', 'BPV_HOUS'],
      dtype='object')


## Maryland
### Get Economic Population Data 

In [23]:
df_md_econ = pd.read_csv('raw/census_block/income/md_inc_2022_bg/md_inc_2022_bg.csv')

In [24]:
print(df_md_econ.columns)

Index(['GEOID', 'STATEFP', 'STATE', 'COUNTYFP', 'COUNTY', 'MEDN_INC22',
       'TOT_HOUS22', 'LESS_10K22', '10K_15K22', '15K_20K22', '20K_25K22',
       '25K_30K22', '30K_35K22', '35K_40K22', '40K_45K22', '45K_50K22',
       '50K_60K22', '60K_75K22', '75K_100K22', '100_125K22', '125_150K22',
       '150_200K22', '200K_MOR22'],
      dtype='object')


In [25]:
df_md_econ_binned = match_bins(df_md_econ, bins, ['GEOID', 'STATEFP', 'STATE', 'COUNTYFP', 'COUNTY', 'MEDN_INC22', 'TOT_HOUS22'])

In [26]:
print(df_md_econ_binned)

             GEOID  STATEFP      STATE  COUNTYFP                   COUNTY  \
0     240217402001       24   Maryland        21         Frederick County   
1     240217402002       24   Maryland        21         Frederick County   
2     240217402003       24   Maryland        21         Frederick County   
3     240217402004       24   Maryland        21         Frederick County   
4     240217501001       24   Maryland        21         Frederick County   
...            ...      ...        ...       ...                      ...   
4074  240338074102       24   Maryland        33   Prince George's County   
4075  240338075001       24   Maryland        33   Prince George's County   
4076  240338075002       24   Maryland        33   Prince George's County   
4077  240338075003       24   Maryland        33   Prince George's County   
4078  240339800001       24   Maryland        33   Prince George's County   

      MEDN_INC22  TOT_HOUS22  0_35K  35K_60K  60K-100K  100K_125K  125K_150

In [27]:
path = "processed_individual/md_econ_block.csv"
df_md_econ_binned.to_csv(path, index=False)