In [21]:
from factfinder.calculate import Calculate
from dotenv import load_dotenv
import os


In [22]:
try:
    env_path = "../.env"
    load_dotenv(dotenv_path=env_path)
except:
    print(".env file is missing ...")

In [23]:
calculate = Calculate(
        api_key=os.environ["API_KEY"], year=2019, source="acs", geography='2010_to_2020'
    )

In [24]:
# Calculate 2010 and 2020 mdvl values
df2020 = calculate('mdvl', 'CT20')
df2010 = calculate('mdvl', 'tract')

In [25]:
df2020.loc[df2020.labs_geoid.isin(['1007001', '1007002']), :]

Unnamed: 0,census_geoid,labs_geoid,geotype,labs_geotype,pff_variable,c,e,m,p,z
1236,36061007001,1007001,CT20,CT2020,mdvl,25.5,1286266.0,539404.0,100.0,
1237,36061007002,1007002,CT20,CT2020,mdvl,24.1,1278841.0,506524.0,100.0,


In [26]:
df2010.loc[df2010.labs_geoid.isin(['1007000']), :]

Unnamed: 0,census_geoid,labs_geoid,geotype,labs_geotype,pff_variable,c,e,m,p,z
151,36061007000,1007000,tract,CT2010,mdvl,17.5,1288800.0,371746.0,100.0,


In [27]:
pff_variable = 'mdvl'
geotype = 'CT20'

# Get ranges and design factor from metadata
ranges = calculate.meta.median_ranges(pff_variable)
print(ranges)
design_factor = calculate.meta.median_design_factor(pff_variable)
print(design_factor)

{'ovlu10': [0, 9999], 'ovl10t14': [10000, 14999], 'ovl15t19': [15000, 19999], 'ovl20t24': [20000, 24999], 'ovl25t29': [25000, 29999], 'ovl30t34': [30000, 34999], 'ovl35t39': [35000, 39999], 'ovl40t49': [40000, 49999], 'ovl50t59': [50000, 59999], 'ovl60t69': [60000, 69999], 'ovl70t79': [70000, 79999], 'ovl80t89': [80000, 89999], 'ovl90t99': [90000, 99999], 'ov100t124': [100000, 124999], 'ov125t149': [125000, 149999], 'ov150t174': [150000, 174999], 'ov175t199': [175000, 199999], 'ov200t249': [200000, 249999], 'ov250t299': [250000, 299999], 'ov300t399': [300000, 399999], 'ov400t499': [400000, 499999], 'ov500t749': [500000, 749999], 'ov750t999': [750000, 999999], 'ov1t149m': [1000000, 1499999], 'ov150t199m': [1500000, 1999999], 'ov2milpl': [2000000, 5000000]}
1.4


In [28]:
# Calculate inputs in 2020 geogs
df = calculate.calculate_e_m_multiprocessing(list(ranges.keys()), geotype)
print(df.loc[df.census_geoid.isin(['36061007001', '36061007002']), :])

     census_geoid pff_variable geotype           e           m
1236  36061007001       ovlu10    CT20    0.000000         NaN
1237  36061007002       ovlu10    CT20    0.000000         NaN
1236  36061007001     ovl10t14    CT20    0.000000         NaN
1237  36061007002     ovl10t14    CT20    0.000000         NaN
1236  36061007001     ovl15t19    CT20    0.000000         NaN
1237  36061007002     ovl15t19    CT20    0.000000         NaN
1236  36061007001     ovl20t24    CT20    0.000000         NaN
1237  36061007002     ovl20t24    CT20    0.000000         NaN
1236  36061007001     ovl25t29    CT20    0.000000         NaN
1237  36061007002     ovl25t29    CT20    0.000000         NaN
1236  36061007001     ovl30t34    CT20    0.000000         NaN
1237  36061007002     ovl30t34    CT20    0.000000         NaN
1236  36061007001     ovl35t39    CT20    0.000000         NaN
1237  36061007002     ovl35t39    CT20    0.000000         NaN
1236  36061007001     ovl40t49    CT20    0.000000     

In [29]:
# 3. create a pivot table with census_geoid as the index, and
# pff_variable as column names. df_pivoted.e -> the estimation dataframe
df_pivoted = df.loc[df.census_geoid.isin(['36061007001', '36061007002']), ["census_geoid", "pff_variable", "e"]].pivot(
    index="census_geoid", columns="pff_variable", values=["e"]
)
print(df_pivoted)

                     e                                                        \
pff_variable ov100t124 ov125t149 ov150t174  ov150t199m  ov175t199   ov1t149m   
census_geoid                                                                   
36061007001        0.0       0.0       0.0   93.781874  13.064142  83.517191   
36061007002        0.0       0.0       0.0  107.218126  14.935858  95.482809   

                                                          ...           \
pff_variable  ov200t249 ov250t299    ov2milpl  ov300t399  ... ovl25t29   
census_geoid                                              ...            
36061007001   23.795401       0.0  107.312592  30.794048  ...      0.0   
36061007002   27.204599       0.0  122.687408  35.205952  ...      0.0   

                                                                             \
pff_variable ovl30t34 ovl35t39 ovl40t49 ovl50t59 ovl60t69 ovl70t79 ovl80t89   
census_geoid                                                          

In [30]:
full_calc = calculate(pff_variable, geotype)
print(full_calc.loc[full_calc.census_geoid.isin(['36061007001', '36061007002']),:])

     census_geoid labs_geoid geotype labs_geotype pff_variable     c  \
1236  36061007001    1007001    CT20       CT2020         mdvl  25.5   
1237  36061007002    1007002    CT20       CT2020         mdvl  24.1   

              e         m      p   z  
1236  1286266.0  539404.0  100.0 NaN  
1237  1278841.0  506524.0  100.0 NaN  
