In [1]:
#Initialize
from factfinder.calculate import Calculate
from factfinder.median import Median
from factfinder.utils import get_median
from dotenv import load_dotenv
import os
import numpy as np
import pandas as pd

# pd.options.display.float_format = "{:,.18f}".format

try:
    env_path = "../.env"
    load_dotenv(dotenv_path=env_path)
except:
    print(".env file is missing ...")

calculate = Calculate(api_key=os.environ["API_KEY"], year=2019, source="acs", geography='2010_to_2020')

In [2]:
##INPUTS -- change here
pff_variable = 'mdvl'
geotype = 'CT20'
census_geoid_list = ['36005017902']

In [3]:
# See all digits of ratio
ratio = calculate.geo.ratio
ratio.loc[ratio.geoid_ct2020.isin(census_geoid_list), :]


Unnamed: 0,geoid_ct2010,geoid_ct2020,ratio
121,36005017902,36005017902,1.0


In [4]:
# Get ranges and design factor from metadata
ranges = calculate.meta.median_ranges(pff_variable)
design_factor = calculate.meta.median_design_factor(pff_variable)
print(f"\nDesign factor: {design_factor}")


Design factor: 1.4


In [12]:
# Calculate inputs in 2020 geogs
df = calculate.calculate_e_m_multiprocessing(list(ranges.keys()), geotype)

In [6]:
# 3. create a pivot table with census_geoid as the index, and
# pff_variable as column names. df_pivoted.e -> the estimation dataframe
df_pivoted = df.loc[df.census_geoid.isin(census_geoid_list), ["census_geoid", "pff_variable", "e"]].pivot(
    index="census_geoid", columns="pff_variable", values=["e"]
)
df_pivoted = df_pivoted.round(16)
df_pivoted

Unnamed: 0_level_0,e,e,e,e,e,e,e,e,e,e,e,e,e,e,e,e,e,e,e,e,e
pff_variable,ov100t124,ov125t149,ov150t174,ov150t199m,ov175t199,ov1t149m,ov200t249,ov250t299,ov2milpl,ov300t399,...,ovl25t29,ovl30t34,ovl35t39,ovl40t49,ovl50t59,ovl60t69,ovl70t79,ovl80t89,ovl90t99,ovlu10
census_geoid,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
36005017902,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [7]:
# Empty dataframe to store the results
results = pd.DataFrame()
results["census_geoid"] = df_pivoted.index
results["pff_variable"] = pff_variable
results["geotype"] = geotype
results

Unnamed: 0,census_geoid,pff_variable,geotype
0,36005017902,mdvl,CT20


In [8]:
# 4. calculate median estimation using get_median
results["e"] = (
    df_pivoted.e.loc[
        df_pivoted.e.index == results.census_geoid, list(ranges.keys())
    ]
    .apply(lambda row: Median(ranges, row, design_factor).median, axis=1)
    .to_list()
)

DEBUG:root:Cumulative frequency is 0
DEBUG:root:
MEDIAN: 0.0



In [9]:
# 5. Calculate median moe using get_median_moe
# Note that median moe calculation needs the median estimation
# so we seperated df_pivoted.m out as a seperate dataframe
e = df_pivoted.e.copy()
e["e"] = results.loc[e.index == results.census_geoid, "e"].to_list()
results["m"] = (
    e.loc[e.index == results.census_geoid, list(ranges.keys()) + ["e"]]
    .apply(lambda row: Median(ranges, row, design_factor).median_moe, axis=1)
    .to_list()
)

DEBUG:root:Cumulative frequency is 0
DEBUG:root:
MEDIAN: 0.0

DEBUG:root:Cumulative frequency is 0
DEBUG:root:
MEDIAN: 0.0

DEBUG:root:
MEDIAN STATS:
-----
Median = 0.0
Median_MOE = nan
B = 0.0
se_50 = nan
p_lower = nan
p_upper = nan
lower_bin = nan
upper_bin = nan
first_non_zero_bin = 0

DISTRIBUTION:
-----
- [0, 9999]: nan
- [10000, 14999]: nan
- [15000, 19999]: nan
- [20000, 24999]: nan
- [25000, 29999]: nan
- [30000, 34999]: nan
- [35000, 39999]: nan
- [40000, 49999]: nan
- [50000, 59999]: nan
- [60000, 69999]: nan
- [70000, 79999]: nan
- [80000, 89999]: nan
- [90000, 99999]: nan
- [100000, 124999]: nan
- [125000, 149999]: nan
- [150000, 174999]: nan
- [175000, 199999]: nan
- [200000, 249999]: nan
- [250000, 299999]: nan
- [300000, 399999]: nan
- [400000, 499999]: nan
- [500000, 749999]: nan
- [750000, 999999]: nan
- [1000000, 1499999]: nan
- [1500000, 1999999]: nan
- [2000000, 5000000]: nan


In [10]:
results.head()

Unnamed: 0,census_geoid,pff_variable,geotype,e,m
0,36005017902,mdvl,CT20,0.0,


In [11]:
# Peform full calculation (including cleaning/rounding) to show display output
import logging
logging.basicConfig(level="ERROR")
full_calc = calculate(pff_variable, geotype)
full_calc.loc[full_calc.census_geoid.isin(census_geoid_list),:]

Unnamed: 0,census_geoid,labs_geoid,geotype,labs_geotype,pff_variable,c,e,m,p,z
116,36005017902,2017902,CT20,CT2020,mdvl,,0.0,0.0,,
