In [1]:
!pip -q install -r ./config_files/requirements.txt

In [2]:
import pandas as pd

In [3]:
pd.options.mode.chained_assignment = None

# Forecast Automation Tool Documentation

**Authors: CRA, EWA**

Before proceeding, please connect to the SANDAG network. Additionally, it is highly recommended to configure your jupyter environment to include nbextensions, specifically the "Collapsible Headings" extension in order to better view the threshold dictionaries.

Additional meaning of the dataset's features can be found here: https://github.com/SANDAG/ABM/wiki/input-files#input-files-data-dictionary

## Part 1
The purpose of Part 1 is to concatenate all the ABM data stored as csv's in SANDAG's T-Drive and aggregate them at the MGRA, CPA, Jurisdiction, and Region levels. If you need to download data based on a specific individual datasource-id **OR** download data from two specific datasource-id's to compare, please run `initiate_window()` in a code cell below.

In [4]:
%run ./scripts/Part_1.ipynb

In [5]:
# initiate_window()

## Part 2
Part 2 performs a series of checks that identify anomalies in the ABM data. The Part 2 checks will flag anomalies and output them in the directory: `part_2_outputs`.

In [6]:
%run ./scripts/Part_2.ipynb

#### Inputs

The following cell should be filled out according to the desired checks.

- download_ds_data(first_ID, second_ID=None, folder='./outputs/'): Downloads necessary datafiles as dataframes to run the following checks on.
    - Inputs: first_ID, second_ID (optional), folder (***Make sure this is the directory path that contains Part 1 generated files***)
    - Outputs: dataframes that can be accessed: 
        - If second_ID is not provided: mgra_first, cpa_first, jur_first, reg_first
        - If second_ID is provided: mgra_first, cpa_first, jur_first, reg_first, mgra_second, cpa_second, jur_second, reg_second, mgra_both, mgra_diff

In [7]:
first_ID = 'DS41' # this is the first datasource ID
second_ID = None # this will only be used if running comparison funcs
file_path = './outputs/' # this folder should contain all the files created in part 1

In [8]:
download_ds_data(first_ID, second_ID=second_ID, folder=file_path)

You have all the files you need to run the non-comparison functions


In [9]:
conn = pyodbc.connect('Driver={ODBC Driver 17 for SQL Server};'
                      'Server=DDAMWSQL16.sandag.org;'
                      'Database=demographic_warehouse;'
                      'Trusted_Connection=yes;')

In [10]:
dim_mgra = f"SELECT [mgra_id], [mgra], [cpa], [cpa_id], [jurisdiction] FROM [demographic_warehouse].\
[dim].[mgra_denormalize]  WHERE series=14" 
d_mgra= pd.read_sql_query(dim_mgra, conn)

In [11]:
jobs_query = "SELECT mgra_id, yr_id, jobs\
  FROM [demographic_warehouse].[fact].[jobs]\
  WHERE datasource_id = 41 AND yr_id >= 2016"

In [12]:
jobs = pd.read_sql_query(jobs_query, conn)

In [13]:
jobs

Unnamed: 0,mgra_id,yr_id,jobs
0,1400000101,2029,0
1,1400000102,2029,0
2,1400000201,2029,0
3,1400000202,2029,0
4,1400000301,2029,0
...,...,...,...
5326615,1401262501,2020,5
5326616,1401264201,2020,5
5326617,1401264401,2020,5
5326618,1401267501,2020,5


In [14]:
jobs_merged = jobs.merge(d_mgra, on='mgra_id')

In [15]:
jobs_merged = jobs_merged[['yr_id', 'jobs', 'mgra']]

In [16]:
jobs_merged

Unnamed: 0,yr_id,jobs,mgra
0,2029,0,1
1,2023,0,1
2,2045,0,1
3,2032,0,1
4,2016,0,1
...,...,...,...
5326615,2020,1,23002
5326616,2023,2,23002
5326617,2025,2,23002
5326618,2029,2,23002


In [17]:
jobs_merged.groupby(['mgra', 'yr_id']).sum()['jobs']#.sum()

mgra   yr_id
1      2016     10
       2018     10
       2020     10
       2023      0
       2025     10
                ..
23002  2032      2
       2035      2
       2040      2
       2045      2
       2050      2
Name: jobs, Length: 299026, dtype: int64

In [18]:
jobs_merged

Unnamed: 0,yr_id,jobs,mgra
0,2029,0,1
1,2023,0,1
2,2045,0,1
3,2032,0,1
4,2016,0,1
...,...,...,...
5326615,2020,1,23002
5326616,2023,2,23002
5326617,2025,2,23002
5326618,2029,2,23002


In [19]:
mgra_first['emp_total']#.sum()

mgra   year
1      2016    10
       2018    10
       2020    10
       2023    10
       2025    10
               ..
23002  2032     2
       2035     2
       2040     2
       2045     2
       2050     2
Name: emp_total, Length: 299026, dtype: int64

In [20]:
mgra_first.reset_index()[mgra_first.reset_index()['year'] != 2023]

Unnamed: 0,mgra,year,taz,hs,hs_Single_Family,hs_Multiple_Family,hs_Mobile_Homes,Household Population (hh),hh_Single_Family,hh_Multiple_Family,...,American Indian,Asian,Black,Hispanic,Other,Pacific Islander,Two or More,White,Female,Male
0,1,2016,3331,19,19,0,0,18,18,0,...,0.0,1.0,0.0,1.0,1.0,0.0,2.0,36.0,23.0,18.0
1,1,2018,3331,19,19,0,0,18,18,0,...,0.0,1.0,0.0,11.0,1.0,0.0,2.0,35.0,26.0,24.0
2,1,2020,3331,19,19,0,0,18,18,0,...,0.0,1.0,0.0,4.0,1.0,0.0,6.0,31.0,23.0,20.0
4,1,2025,3331,20,20,0,0,18,18,0,...,0.0,1.0,0.0,4.0,1.0,0.0,2.0,34.0,24.0,18.0
5,1,2026,3331,20,20,0,0,18,18,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
299021,23002,2032,1254,120,20,100,0,98,17,81,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
299022,23002,2035,1254,120,20,100,0,98,17,81,...,2.0,44.0,3.0,52.0,1.0,1.0,3.0,133.0,125.0,114.0
299023,23002,2040,1254,120,20,100,0,102,18,84,...,1.0,69.0,2.0,60.0,0.0,1.0,14.0,109.0,136.0,120.0
299024,23002,2045,1254,120,20,100,0,109,20,89,...,0.0,77.0,5.0,69.0,1.0,0.0,9.0,102.0,134.0,129.0


In [21]:
mgra_first['emp_total'][mgra_first['emp_total'].eq(jobs_merged.groupby(['mgra', 'yr_id']).sum()['jobs']) == False]

mgra   year
1      2023     10
       2026     10
       2029     10
       2032     10
3      2023      5
              ... 
22999  2032    131
23001  2023      7
       2026      7
       2029      7
       2032      7
Name: emp_total, Length: 63536, dtype: int64

In [22]:
yr_query = 'SELECT [yr_id] FROM [demographic_warehouse].[dim].[yr]'
d_year= pd.read_sql_query(yr_query, conn)
unshared_years = set(mgra_first.index.get_level_values('year')) ^ set(d_year['yr_id'].to_list())

In [23]:
unshared_years

{2000,
 2008,
 2010,
 2011,
 2012,
 2013,
 2014,
 2015,
 2017,
 2019,
 2023,
 2026,
 2029,
 2032}

In [74]:
csv_jobs = mgra_first.reset_index()[~mgra_first.reset_index()['year'].isin(unshared_years)][['mgra', 'year', 'emp_total']].groupby(['mgra', 'year']).sum()

In [75]:
csv_jobs

Unnamed: 0_level_0,Unnamed: 1_level_0,emp_total
mgra,year,Unnamed: 2_level_1
1,2016,10
1,2018,10
1,2020,10
1,2025,10
1,2030,10
...,...,...
23002,2030,2
23002,2035,2
23002,2040,2
23002,2045,2


In [72]:
sql_jobs = jobs_merged[~jobs_merged['yr_id'].isin(unshared_years)].groupby(['mgra', 'yr_id']).sum().reset_index().rename(columns={'yr_id': 'year'}).groupby(['mgra', 'year']).sum()

In [73]:
sql_jobs

Unnamed: 0_level_0,Unnamed: 1_level_0,jobs
mgra,year,Unnamed: 2_level_1
1,2016,10
1,2018,10
1,2020,10
1,2025,10
1,2030,10
...,...,...
23002,2030,2
23002,2035,2
23002,2040,2
23002,2045,2


In [77]:
combined_temp = csv_jobs.join(sql_jobs)

In [78]:
combined_temp['diffs'] = abs(combined_temp['jobs'] - combined_temp['emp_total'])

In [79]:
combined_temp

Unnamed: 0_level_0,Unnamed: 1_level_0,emp_total,jobs,diffs
mgra,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,2016,10,10,0
1,2018,10,10,0
1,2020,10,10,0
1,2025,10,10,0
1,2030,10,10,0
...,...,...,...,...
23002,2030,2,2,0
23002,2035,2,2,0
23002,2040,2,2,0
23002,2045,2,2,0


In [80]:
combined_temp[combined_temp['diffs'] != 0]

Unnamed: 0_level_0,Unnamed: 1_level_0,emp_total,jobs,diffs
mgra,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
5,2018,0,1,1
5,2025,1,0,1
5,2035,1,0,1
5,2040,1,0,1
8,2018,1,0,1
...,...,...,...,...
22980,2020,10,11,1
22998,2018,22,21,1
22998,2020,22,21,1
22998,2035,21,22,1


In [25]:
sql_jobs = jobs_merged[~jobs_merged['yr_id'].isin(unshared_years)].groupby(['mgra', 'yr_id']).sum()['jobs']

In [26]:
temp = pd.DataFrame({'csv': csv_jobs.reset_index()['emp_total'], 'sql': sql_jobs.reset_index()['jobs']})

In [27]:
csv_jobs.reset_index()['emp_total']

0         10
1         10
2         10
3         10
4         10
          ..
207013     2
207014     2
207015     2
207016     2
207017     2
Name: emp_total, Length: 207018, dtype: int64

In [28]:
sql_jobs.reset_index()['jobs']

0         10
1         10
2         10
3         10
4         10
          ..
207013     2
207014     2
207015     2
207016     2
207017     2
Name: jobs, Length: 207018, dtype: int64

In [29]:
temp

Unnamed: 0,csv,sql
0,10,10
1,10,10
2,10,10
3,10,10
4,10,10
...,...,...
207013,2,2
207014,2,2
207015,2,2
207016,2,2


In [30]:
temp['diffs'] = abs(temp['csv'] - temp['sql'])

In [31]:
temp[temp['diffs'] != 0]

Unnamed: 0,csv,sql,diffs
37,0,1,1
39,1,0,1
41,1,0,1
42,1,0,1
64,1,0,1
...,...,...,...
206813,10,11,1
206974,22,21,1
206975,22,21,1
206978,21,22,1


In [32]:
temp[~temp['diffs'] != 0]

Unnamed: 0,csv,sql,diffs
0,10,10,0
1,10,10,0
2,10,10,0
3,10,10,0
4,10,10,0
...,...,...,...
207013,2,2,0
207014,2,2,0
207015,2,2,0
207016,2,2,0


In [33]:
csv_jobs.reset_index()[csv_jobs.reset_index()['emp_total'].eq(sql_jobs.reset_index()['jobs']) == False]

Unnamed: 0,index,emp_total
37,53,0
39,56,1
41,61,1
42,62,1
64,92,1
...,...,...
206813,298729,10
206974,298962,22
206975,298963,22
206978,298970,21


In [34]:
jobs['jobs'].sum()

16861609

In [35]:
jobs['yr_id'].value_counts()

2029    409740
2023    409740
2045    409740
2032    409740
2016    409740
2050    409740
2035    409740
2025    409740
2018    409740
2026    409740
2040    409740
2030    409740
2020    409740
Name: yr_id, dtype: int64

In [36]:
mgra_first.reset_index()['year'].value_counts()

2016    23002
2018    23002
2020    23002
2023    23002
2025    23002
2026    23002
2029    23002
2030    23002
2032    23002
2035    23002
2040    23002
2045    23002
2050    23002
Name: year, dtype: int64

In [None]:
mgra_first['emp_total']

In [None]:
mgra_first['emp_total'].sum()

In [None]:
for col in mgra_first.columns:
    print(col)

In [None]:
# From the dim table bring in information on elementary,secondary, and school district info. 
query = "SELECT * FROM [demographic_warehouse].[dim].[mgra_denormalize]"
school_data = pd.read_sql_query(query, conn)

s_14_school_data = school_data[school_data['series']==14] # This is forecast 14

mgra_school_data = s_14_school_data[['mgra_id', 'mgra', 'secondary', 'elementary','unified']]

# This is creating a new column that tells us which information is present in regards to school district, secondary, elementary info.
conditions = [
    (mgra_school_data['secondary'].isna()) & (mgra_school_data['elementary'].isna()) & (~mgra_school_data['unified'].isna()),
    (~mgra_school_data['secondary'].isna()) & (~mgra_school_data['elementary'].isna()) & (mgra_school_data['unified'].isna())
]

values = ['Only Unified', 'S&E No Unified']

mgra_school_data.loc[:, 'School Data Present'] = np.select(conditions, values)

mgra_school_data = mgra_school_data.reset_index()

In [None]:
mgra_school_data[mgra_school_data['School Data Present'] == 'S&E No Unified']['unified']#.value_counts()

In [None]:
mgra_school_data[mgra_school_data['School Data Present'] == 'Only Unified']['elementary'].value_counts()

#### Input and Consistency Checks

These functions will compare the csv files to an ideal output and flag any anomalies that may occur.

- **check_cols(dataframe)**: checks that the necessary columns exist in the imported dataset and returns which columns are missing, or True if all columns exist.
    - Inputs: any geography level dataframe
    - Outputs: string stating check outcome


- **compare_totals(mgra_dataframe, jur_dataframe, reg_dataframe)**: checks that the totals of each column for each geography level (MGRA, jurisdiction, and region) match. Returns dictionary where keys are the non-MGRA geography levels and the value is a string describing how many columns match with the MGRA geography level.
    - Inputs: MGRA-level dataframe, jurisdiction-level dataframe, region-level dataframe
    - Outputs: dictionary containing strings stating the check outcome for each geography level


- **database_comparison(dataframe, first_ID)**: compares total population values (sum of database values for gender, ethnicity, and age groups) from SQL data to population values in the CSV values. Outputs the mismatched rows as an xlsx file with a sheet for each category called "database_pop_mismatches_DSID".
    - Inputs: MGRA-level dataframe, datasource-ID
    - Outputs: string stating status of output, creates an xlsx file with comparison differences for each category (ethn, income, age group, etc.)
    
   
- **check_vacancy_rate(mgra_dataframe)**: checks and flags any rows with a vacancy rate of at least 4 percent.
    - Inputs: MGRA-level dataframe
    - Outputs: dataframe with new `Flag` column which holds True values for rows with a vacancy rate of 4 percent or higher.
    
    
    
    
    
    
    

In [38]:
check_cols(mgra_first)

'All desired columns exist.'

In [39]:
compare_totals(mgra_first, jur_first, reg_first)

{'jurisdiction': '129 columns did not match out of 139 columns.',
 'region': 'all columns matched.'}

In [40]:
pop_mismatches = database_comparison(mgra_first, first_ID)

Output generated successfully.


In [41]:
pop_mismatches['Gender']

Unnamed: 0_level_0,Unnamed: 1_level_0,Female,Male,gender_total,pop
mgra,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
13338,2016,2.0,0.0,2.0,1
13338,2018,0.0,0.0,0.0,5
13338,2020,0.0,0.0,0.0,6
13338,2025,0.0,0.0,0.0,7
13338,2030,0.0,0.0,0.0,1
13338,2035,0.0,0.0,0.0,1
13338,2040,0.0,0.0,0.0,1
13338,2045,0.0,0.0,0.0,1
13338,2050,0.0,0.0,0.0,1
19598,2025,18.0,23.0,41.0,55


In [42]:
vacancy_df = check_vacancy_rate(mgra_first)

In [43]:
vacancy_df

Unnamed: 0_level_0,Unnamed: 1_level_0,taz,hs,hs_Single_Family,hs_Multiple_Family,hs_Mobile_Homes,Household Population (hh),hh_Single_Family,hh_Multiple_Family,hh_Mobile_Homes,gq_civ,...,Asian,Black,Hispanic,Other,Pacific Islander,Two or More,White,Female,Male,Flag
mgra,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1,2016,3331,19,19,0,0,18,18,0,0,0,...,1.0,0.0,1.0,1.0,0.0,2.0,36.0,23.0,18.0,True
1,2018,3331,19,19,0,0,18,18,0,0,0,...,1.0,0.0,11.0,1.0,0.0,2.0,35.0,26.0,24.0,True
1,2020,3331,19,19,0,0,18,18,0,0,0,...,1.0,0.0,4.0,1.0,0.0,6.0,31.0,23.0,20.0,True
1,2023,3331,20,20,0,0,18,18,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,False
1,2025,3331,20,20,0,0,18,18,0,0,0,...,1.0,0.0,4.0,1.0,0.0,2.0,34.0,24.0,18.0,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23002,2032,1254,120,20,100,0,98,17,81,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,False
23002,2035,1254,120,20,100,0,98,17,81,0,0,...,44.0,3.0,52.0,1.0,1.0,3.0,133.0,125.0,114.0,True
23002,2040,1254,120,20,100,0,102,18,84,0,0,...,69.0,2.0,60.0,0.0,1.0,14.0,109.0,136.0,120.0,True
23002,2045,1254,120,20,100,0,109,20,89,0,0,...,77.0,5.0,69.0,1.0,0.0,9.0,102.0,134.0,129.0,True


#### Threshold Analysis Checks

These functions will calculate differences across datasource, years, or proportions, and identify any anomalies using specified thresholds. Thresholds should be specified using dictionaries containing both the value threshold and the percentage threshold. The functions use **OR** logic, so specifying multiple column thresholds will flag any rows that meet any of the specified threholds.

- **yearly_diff_threshold(dataframe, threshold dictionary)**: given a dictionary with columns and thresholds, flags any differences between years that meet the threshold level. 
    - Inputs: Any geography level dataframe, thresholds dictionary with both actual and percentage thresholds (*The input dictionary should have 0 values for columns without a specified threshold.*)
    - Outputs: dataframe with differences by year with a `Flag` column that indicates whether the specified threshold(s) were met.


- **ds_diff_threshold(mgra_diff, mgra_second, threshold dictionary)**: given a dictionary with columns and thresholds, flags any differences between datasource_id's that meet the threshold level.
    - Inputs: datasource-ID difference MGRA-level dataframe, second datasource-ID MGRA-level dataframe, thresholds dictionary with both actual and percentage thresholds (*The input dictionary should have 0 values for columns without a specified threshold.*)
    - Outputs: dataframe with differences by datasource-ID with a `Flag` column that indicates whether the specified threshold(s) were met.
    

- **shares(dataframe, threshold dictionary)**: given a dictionary with columns and thresholds, flags proportions of yearly percent change within designated columns that meet the threshold level.
    - Inputs: Any geography level dataframe, thresholds dictionary (*The input dictionary should have 0 values for columns without a specified threshold.*)
    - Outputs: dataframe with yearly percent change with a `Flag` column that indicates whether the specified threshold(s) were met.
    - Example: For an income category (\\$15,000 to \\$29,999), the value = (difference in that income category from 2016 to 2018) / (Sum of all the income categories)



##### Yearly Difference Thresholds

In [44]:
year_thresholds = {
    'taz':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'hs':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'hs_Single_Family':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'hs_Multiple_Family':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'hs_Mobile_Homes':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'Household Population (hh)':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'hh_Single_Family':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'hh_Multiple_Family':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'hh_Mobile_Homes':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'gq_civ':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'Group Quarters - Military (gq_mil)':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'Less than $15,000':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    '$15,000 to $29,999':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    '$30,000 to $44,999':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    '$45,000 to $59,999':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    '$60,000 to $74,999':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    '$75,000 to $99,999':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    '$100,000 to $124,999':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    '$125,000 to $149,999':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    '$150,000 to $199,999':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    '$200,000 or more':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'hhs':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'pop':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'hhp':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'emp_Agricultural_and_Extractive':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'emp_const_non_bldg_prod':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'emp_const_non_bldg_Office':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'emp_utilities_prod':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'emp_utilities_Office':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'emp_const_bldg_prod':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'emp_const_bldg_Office':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'emp_Manufacturing_prod':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'emp_Manufacturing_Office':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'emp_whsle_whs':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'emp_trans':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'emp_retail':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'emp_prof_bus_svcs':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'emp_prof_bus_svcs_bldg_maint':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'emp_pvt_ed_k12':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'emp_pvt_ed_post_k12_Other_Residential':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'emp_health':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'emp_personal_svcs_Office':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'emp_amusement':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'emp_hotel':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'emp_restaurant_bar':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'emp_personal_svcs_retail':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'emp_religious':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'emp_pvt_hh':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'emp_state_local_Government_ent':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'emp_fed_non_Military':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'emp_fed_Military':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'emp_state_local_Government_blue':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'emp_state_local_Government_white':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'emp_public_ed':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'emp_own_occ_dwell_mgmt':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'emp_fed_Government_accts':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'emp_st_lcl_Government_accts':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'emp_cap_accts':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'emp_total':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'enrollgradekto8':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'enrollgrade9to12':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'collegeenroll':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'othercollegeenroll':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'adultschenrl':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'ech_dist':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'hch_dist':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'pseudomsa':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'parkarea':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'hstallsoth':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'hstallssam':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'hparkcost':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'numfreehrs':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'dstallsoth':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'dstallssam':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'dparkcost':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'mstallsoth':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'mstallssam':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'mparkcost':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'totint':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'duden':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'empden':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'popden':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'retempden':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'totintbin':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'empdenbin':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'dudenbin':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'zip09':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'parkactive':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'openspaceparkpreserve':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'beachactive':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'budgetroom':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'economyroom':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'luxuryroom':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'midpriceroom':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'upscaleroom':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'hotelroomtotal':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'luz_id':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'truckregiontype':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'district27':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'milestocoast':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'acres':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'effective_acres':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'land_acres':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'units':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'vacancy':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'unoccupiable':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'vacancy_rate':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'elem_population':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'high_population':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    '10 to 14':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    '15 to 17':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    '18 and 19':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    '20 to 24':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    '25 to 29':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    '30 to 34':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    '35 to 39':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    '40 to 44':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    '45 to 49':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    '5 to 9':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    '50 to 54':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    '55 to 59':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    '60 and 61':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    '62 to 64':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    '65 to 69':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    '70 to 74':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    '75 to 79':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    '80 to 84':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    '85 and Older':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'Under 5':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'American Indian':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'Asian':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'Black':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'Hispanic':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'Other':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'Pacific Islander':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'Two or More':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'White':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'Female':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'Male':
        {'value_threshold': 0,
        'percentage_threshold': 0},
}

In [45]:
year_thresh_df = yearly_diff_threshold(mgra_first, year_thresholds)

In [46]:
year_thresh_df[year_thresh_df['Flag']]

Unnamed: 0_level_0,Unnamed: 1_level_0,taz,hs,hs_Single_Family,hs_Multiple_Family,hs_Mobile_Homes,Household Population (hh),hh_Single_Family,hh_Multiple_Family,hh_Mobile_Homes,gq_civ,...,Asian,Black,Hispanic,Other,Pacific Islander,Two or More,White,Female,Male,Flag
mgra,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1,2016-2018,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,10.0,0.0,0.0,0.0,-1.0,3.0,6.0,True
1,2018-2020,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,-7.0,0.0,0.0,4.0,-4.0,-3.0,-4.0,True
1,2020-2023,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-1.0,0.0,-4.0,-1.0,0.0,-6.0,-31.0,-23.0,-20.0,True
1,2023-2025,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,4.0,1.0,0.0,2.0,34.0,24.0,18.0,True
1,2025-2026,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-1.0,0.0,-4.0,-1.0,0.0,-2.0,-34.0,-24.0,-18.0,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23002,2030-2032,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-35.0,-5.0,-51.0,-1.0,-1.0,-2.0,-134.0,-116.0,-114.0,True
23002,2032-2035,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,44.0,3.0,52.0,1.0,1.0,3.0,133.0,125.0,114.0,True
23002,2035-2040,0.0,0.0,0.0,0.0,0.0,4.0,1.0,3.0,0.0,0.0,...,25.0,-1.0,8.0,-1.0,0.0,11.0,-24.0,11.0,6.0,True
23002,2040-2045,0.0,0.0,0.0,0.0,0.0,7.0,2.0,5.0,0.0,0.0,...,8.0,3.0,9.0,1.0,-1.0,-5.0,-7.0,-2.0,9.0,True


##### DS Difference Thresholds (requires two DS_ID's)

In [47]:
ds_thresholds = {
    'taz':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'hs':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'hs_Single_Family':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'hs_Multiple_Family':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'hs_Mobile_Homes':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'Household Population (hh)':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'hh_Single_Family':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'hh_Multiple_Family':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'hh_Mobile_Homes':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'gq_civ':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'Group Quarters - Military (gq_mil)':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'Less than $15,000':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    '$15,000 to $29,999':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    '$30,000 to $44,999':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    '$45,000 to $59,999':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    '$60,000 to $74,999':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    '$75,000 to $99,999':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    '$100,000 to $124,999':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    '$125,000 to $149,999':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    '$150,000 to $199,999':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    '$200,000 or more':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'hhs':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'pop':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'hhp':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'emp_Agricultural_and_Extractive':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'emp_const_non_bldg_prod':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'emp_const_non_bldg_Office':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'emp_utilities_prod':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'emp_utilities_Office':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'emp_const_bldg_prod':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'emp_const_bldg_Office':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'emp_Manufacturing_prod':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'emp_Manufacturing_Office':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'emp_whsle_whs':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'emp_trans':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'emp_retail':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'emp_prof_bus_svcs':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'emp_prof_bus_svcs_bldg_maint':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'emp_pvt_ed_k12':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'emp_pvt_ed_post_k12_Other_Residential':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'emp_health':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'emp_personal_svcs_Office':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'emp_amusement':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'emp_hotel':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'emp_restaurant_bar':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'emp_personal_svcs_retail':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'emp_religious':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'emp_pvt_hh':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'emp_state_local_Government_ent':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'emp_fed_non_Military':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'emp_fed_Military':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'emp_state_local_Government_blue':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'emp_state_local_Government_white':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'emp_public_ed':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'emp_own_occ_dwell_mgmt':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'emp_fed_Government_accts':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'emp_st_lcl_Government_accts':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'emp_cap_accts':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'emp_total':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'enrollgradekto8':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'enrollgrade9to12':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'collegeenroll':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'othercollegeenroll':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'adultschenrl':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'ech_dist':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'hch_dist':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'pseudomsa':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'parkarea':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'hstallsoth':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'hstallssam':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'hparkcost':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'numfreehrs':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'dstallsoth':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'dstallssam':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'dparkcost':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'mstallsoth':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'mstallssam':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'mparkcost':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'totint':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'duden':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'empden':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'popden':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'retempden':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'totintbin':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'empdenbin':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'dudenbin':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'zip09':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'parkactive':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'openspaceparkpreserve':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'beachactive':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'budgetroom':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'economyroom':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'luxuryroom':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'midpriceroom':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'upscaleroom':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'hotelroomtotal':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'luz_id':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'truckregiontype':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'district27':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'milestocoast':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'acres':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'effective_acres':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'land_acres':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'units':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'vacancy':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'unoccupiable':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'vacancy_rate':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'elem_population':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'high_population':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    '10 to 14':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    '15 to 17':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    '18 and 19':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    '20 to 24':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    '25 to 29':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    '30 to 34':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    '35 to 39':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    '40 to 44':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    '45 to 49':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    '5 to 9':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    '50 to 54':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    '55 to 59':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    '60 and 61':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    '62 to 64':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    '65 to 69':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    '70 to 74':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    '75 to 79':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    '80 to 84':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    '85 and Older':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'Under 5':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'American Indian':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'Asian':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'Black':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'Hispanic':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'Other':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'Pacific Islander':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'Two or More':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'White':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'Female':
        {'value_threshold': 0,
        'percentage_threshold': 0},
    'Male':
        {'value_threshold': 0,
        'percentage_threshold': 0},
}

In [48]:
if second_ID:
    ds_diff_example = ds_diff_threshold(mgra_diff, mgra_second, ds_thresholds)
    ds_diff_example

##### Share Thresholds

###### Employment

In [49]:
employment_thresholds = {'emp_Agricultural_and_Extractive': 0,
 'emp_const_non_bldg_prod': 0,
 'emp_const_non_bldg_Office': 0,
 'emp_utilities_prod': 0,
 'emp_utilities_Office': 0,
 'emp_const_bldg_prod': 0,
 'emp_const_bldg_Office': 0,
 'emp_Manufacturing_prod': 0,
 'emp_Manufacturing_Office': 0,
 'emp_whsle_whs': 0,
 'emp_trans': 0,
 'emp_retail': 0,
 'emp_prof_bus_svcs': 0,
 'emp_prof_bus_svcs_bldg_maint': 0,
 'emp_pvt_ed_k12': 0,
 'emp_pvt_ed_post_k12_Other_Residential': 0,
 'emp_health': 0,
 'emp_personal_svcs_Office': 0,
 'emp_amusement': 0,
 'emp_hotel': 0,
 'emp_restaurant_bar': 0,
 'emp_personal_svcs_retail': 0,
 'emp_religious': 0,
 'emp_pvt_hh': 0,
 'emp_state_local_Government_ent': 0,
 'emp_fed_non_Military': 0,
 'emp_fed_Military': 0,
 'emp_state_local_Government_blue': 0,
 'emp_state_local_Government_white': 0,
 'emp_public_ed': 0,
 'emp_own_occ_dwell_mgmt': 0,
 'emp_fed_Government_accts': 0,
 'emp_st_lcl_Government_accts': 0,
 'emp_cap_accts': 0,
 'emp_total': 0}

In [50]:
employment_shares = shares(mgra_first, threshold_dict=employment_thresholds)

In [51]:
employment_shares

Unnamed: 0_level_0,Unnamed: 1_level_0,emp_Agricultural_and_Extractive,emp_const_non_bldg_prod,emp_const_non_bldg_Office,emp_utilities_prod,emp_utilities_Office,emp_const_bldg_prod,emp_const_bldg_Office,emp_Manufacturing_prod,emp_Manufacturing_Office,emp_whsle_whs,...,emp_fed_Military,emp_state_local_Government_blue,emp_state_local_Government_white,emp_public_ed,emp_own_occ_dwell_mgmt,emp_fed_Government_accts,emp_st_lcl_Government_accts,emp_cap_accts,emp_total,Flag
mgra,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1,2016-2018,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,True
1,2018-2020,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,True
1,2020-2023,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,True
1,2023-2025,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,True
1,2025-2026,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23002,2030-2032,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,True
23002,2032-2035,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,True
23002,2035-2040,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,True
23002,2040-2045,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,True


###### Income

In [52]:
income_thresholds = {'Less than $15,000': 0,
 '$15,000 to $29,999': 0,
 '$30,000 to $44,999': 0,
 '$45,000 to $59,999': 0,
 '$60,000 to $74,999': 0,
 '$75,000 to $99,999': 0,
 '$100,000 to $124,999': 0,
 '$125,000 to $149,999': 0,
 '$150,000 to $199,999': 0,
 '$200,000 or more': 0}

In [53]:
income_shares = shares(mgra_first, threshold_dict=income_thresholds)

In [54]:
income_shares

Unnamed: 0_level_0,Unnamed: 1_level_0,"Less than $15,000","$15,000 to $29,999","$30,000 to $44,999","$45,000 to $59,999","$60,000 to $74,999","$75,000 to $99,999","$100,000 to $124,999","$125,000 to $149,999","$150,000 to $199,999","$200,000 or more",Flag
mgra,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1,2016-2018,-11.111111,5.555556,-11.111111,16.666667,-5.555556,-5.555556,0.000000,-11.111111,-16.666667,38.888889,True
1,2018-2020,11.111111,11.111111,-5.555556,-5.555556,0.000000,0.000000,-11.111111,0.000000,11.111111,-11.111111,True
1,2020-2023,0.000000,-5.555556,0.000000,0.000000,0.000000,11.111111,5.555556,11.111111,-5.555556,-16.666667,True
1,2023-2025,-5.555556,0.000000,5.555556,-5.555556,0.000000,-16.666667,11.111111,-5.555556,0.000000,16.666667,True
1,2025-2026,5.555556,-11.111111,-5.555556,-5.555556,0.000000,0.000000,-5.555556,-5.555556,0.000000,27.777778,True
...,...,...,...,...,...,...,...,...,...,...,...,...
23002,2030-2032,0.000000,0.000000,2.040816,-3.061224,1.020408,4.081633,-5.102041,-1.020408,-3.061224,5.102041,True
23002,2032-2035,-1.020408,-2.040816,0.000000,1.020408,4.081633,-5.102041,3.061224,-3.061224,1.020408,2.040816,True
23002,2035-2040,2.821128,0.660264,1.840736,1.760704,-2.440976,-2.521008,-3.421369,2.741096,1.600640,-3.041216,True
23002,2040-2045,-1.295197,0.350783,-0.314805,-5.027883,-1.547041,6.583918,-4.236373,-1.421119,4.812017,2.095701,True


###### Ethnicities

In [55]:
ethnicity_thresholds = {'Hispanic': 0,
 'White': 0,
 'Black': 0,
 'American Indian': 0,
 'Asian': 0,
 'Pacific Islander': 0,
 'Other': 0,
 'Two or More': 0}

In [56]:
ethnicity_shares = shares(mgra_first, threshold_dict=ethnicity_thresholds)

In [57]:
ethnicity_shares

Unnamed: 0_level_0,Unnamed: 1_level_0,Hispanic,White,Black,American Indian,Asian,Pacific Islander,Other,Two or More,Flag
mgra,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1,2016-2018,19.560976,-17.804878,0.000000,0.000000,-0.439024,0.000000,-0.439024,-0.878049,True
1,2018-2020,-12.697674,2.093023,0.000000,0.000000,0.325581,0.000000,0.325581,9.953488,True
1,2035-2040,0.000000,-2.564103,0.000000,2.564103,0.000000,0.000000,0.000000,0.000000,True
1,2040-2045,7.852564,-3.365385,0.000000,-2.564103,1.121795,0.000000,-0.480769,-2.564103,True
1,2045-2050,9.095528,-15.396341,0.000000,7.317073,-1.371951,0.000000,0.355691,0.000000,True
...,...,...,...,...,...,...,...,...,...,...
23001,2040-2045,-4.602967,0.774433,0.010908,0.002727,5.350131,0.000000,0.000000,-1.535231,True
23001,2045-2050,3.256813,1.052490,-1.068600,-0.010740,-4.148208,0.000000,0.000000,0.918244,True
23002,2035-2040,1.680178,-13.070411,-0.473980,-0.446195,8.543083,-0.027785,-0.418410,4.213520,True
23002,2040-2045,2.798241,-3.794855,1.119891,-0.390625,2.324442,-0.390625,0.380228,-2.046697,True


###### Custom

In [58]:
# custom_thresholds = {'column 1': 0,
#  'column 2': 0,
#  'column 3': 0}

In [59]:
# custom_shares = shares(mgra_first, threshold_dict=custom_thresholds)

In [60]:
# custom_thresholds