# Checks: 2023-040 CCM Forecast Output (series 15)

In [1]:
# module imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

PATH = r'C:\Users\jchu\OneDrive - San Diego Association of Governments\Projects\2023\2023-040 CCM Forecast Output (series 15)\Data\Version 3'

In [2]:
# raw data
household_char = pd.read_excel(PATH + r'\Household_ characteristics_2020_2060_forQA_07072023.xlsx',
                              sheet_name = 'data')
household_char

Unnamed: 0,year,race,sex,age,control_pop,gq_pop,households,WithKids,AnySenior,HouseholderLF
0,2020,AIAN_NH,F,0,73,0,0,0,0,0
1,2020,AIAN_NH,F,1,55,0,0,0,0,0
2,2020,AIAN_NH,F,2,55,0,0,0,0,0
3,2020,AIAN_NH,F,3,74,0,0,0,0,0
4,2020,AIAN_NH,F,4,91,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...
63709,2060,White_NH,M,106,7,0,5,0,5,0
63710,2060,White_NH,M,107,4,0,3,0,3,0
63711,2060,White_NH,M,108,2,0,1,0,1,0
63712,2060,White_NH,M,109,1,0,1,0,1,0


# 3. Households with kids comparable with age groups <18 

In [3]:
# Number of houesholds with kids
hh_w_kids = household_char[['year', 'race', 'WithKids']]\
                .groupby(['year', 'race'])\
                .sum()\
                .reset_index()\
                            
hh_w_kids

Unnamed: 0,year,race,WithKids
0,2020,AIAN_NH,1493
1,2020,Asian_NH,46498
2,2020,Black_NH,17839
3,2020,Hispanic,137396
4,2020,Multi-Race NH,16874
...,...,...,...
282,2060,Black_NH,17034
283,2060,Hispanic,145853
284,2060,Multi-Race NH,19856
285,2060,NHPI_NH,1770


In [4]:
# Number of kids in cohort
kids_pop = household_char[household_char['age']<18]\
                [['year', 'race', 'control_pop', 'gq_pop']]\
                .groupby(['year', 'race'])\
                .sum()\
                .reset_index()
kids_pop

Unnamed: 0,year,race,control_pop,gq_pop
0,2020,AIAN_NH,2622,16
1,2020,Asian_NH,74481,183
2,2020,Black_NH,31885,678
3,2020,Hispanic,336988,1411
4,2020,Multi-Race NH,67659,209
...,...,...,...,...
282,2060,Black_NH,29600,549
283,2060,Hispanic,237633,1080
284,2060,Multi-Race NH,69219,210
285,2060,NHPI_NH,2420,0


In [5]:
hh_kids = hh_w_kids.merge(kids_pop, how='outer', on=['year', 'race'])
hh_kids['hh_pop_kids'] = (hh_kids['control_pop']-hh_kids['gq_pop'])
hh_kids['kids/hh'] = hh_kids['hh_pop_kids']/hh_kids['WithKids']
hh_kids.sort_values(['race', 'year']).to_excel('Test 3 - Households with Kids.xlsx', index=False)
hh_kids.sort_values(['race', 'year'])

Unnamed: 0,year,race,WithKids,control_pop,gq_pop,hh_pop_kids,kids/hh
0,2020,AIAN_NH,1493,2622,16,2606,1.745479
7,2021,AIAN_NH,1587,2542,17,2525,1.591052
14,2022,AIAN_NH,1648,2500,19,2481,1.505461
21,2023,AIAN_NH,1601,2451,16,2435,1.520924
28,2024,AIAN_NH,1645,2427,16,2411,1.465653
...,...,...,...,...,...,...,...
258,2056,White_NH,131069,193804,962,192842,1.471301
265,2057,White_NH,130658,193231,959,192272,1.471567
272,2058,White_NH,130216,192647,957,191690,1.472093
279,2059,White_NH,129799,192080,953,191127,1.472484


In [6]:
hh_kids.sort_values(['kids/hh'])

Unnamed: 0,year,race,WithKids,control_pop,gq_pop,hh_pop_kids,kids/hh
266,2058,AIAN_NH,1698,1861,11,1850,1.089517
273,2059,AIAN_NH,1682,1847,11,1836,1.091558
280,2060,AIAN_NH,1667,1832,11,1821,1.092382
252,2056,AIAN_NH,1712,1901,12,1889,1.103388
259,2057,AIAN_NH,1691,1880,11,1869,1.105263
...,...,...,...,...,...,...,...
242,2054,Multi-Race NH,20097,70201,214,69987,3.482460
25,2023,Multi-Race NH,17281,61761,204,61557,3.562120
18,2022,Multi-Race NH,17170,61569,206,61363,3.573850
11,2021,Multi-Race NH,16980,64264,202,64062,3.772792


# 4. Households with senior comparable with age groups >= 65 

In [7]:
# Number of households with seniors
hh_w_senior = household_char[['year', 'race', 'AnySenior']]\
                .groupby(['year', 'race'])\
                .sum()\
                .reset_index()\
                            
hh_w_senior

Unnamed: 0,year,race,AnySenior
0,2020,AIAN_NH,1616
1,2020,Asian_NH,37616
2,2020,Black_NH,13282
3,2020,Hispanic,63294
4,2020,Multi-Race NH,7888
...,...,...,...
282,2060,Black_NH,15476
283,2060,Hispanic,137444
284,2060,Multi-Race NH,19808
285,2060,NHPI_NH,2480


In [8]:
# Number of seniors in cohort
senior_pop = household_char[household_char['age']>=65]\
                [['year', 'race', 'control_pop', 'gq_pop']]\
                .groupby(['year', 'race'])\
                .sum()\
                .reset_index()
senior_pop

Unnamed: 0,year,race,control_pop,gq_pop
0,2020,AIAN_NH,1892,60
1,2020,Asian_NH,63757,1377
2,2020,Black_NH,16294,1421
3,2020,Hispanic,86862,2330
4,2020,Multi-Race NH,8232,406
...,...,...,...,...
282,2060,Black_NH,19795,1926
283,2060,Hispanic,224266,7256
284,2060,Multi-Race NH,24435,1331
285,2060,NHPI_NH,4951,159


In [9]:
hh_senior = hh_w_senior.merge(senior_pop, how='outer', on=['year', 'race'])
hh_senior['hh_pop_senior'] = (hh_senior['control_pop']-hh_senior['gq_pop'])
hh_senior['senior/hh'] = hh_senior['hh_pop_senior']/hh_senior['AnySenior']
hh_senior.sort_values(['race', 'year']).to_excel('Test 4 - Households with senior.xlsx', index=False)
hh_senior.sort_values(['race', 'year'])

Unnamed: 0,year,race,AnySenior,control_pop,gq_pop,hh_pop_senior,senior/hh
0,2020,AIAN_NH,1616,1892,60,1832,1.133663
7,2021,AIAN_NH,1773,1982,56,1926,1.086294
14,2022,AIAN_NH,1795,2110,71,2039,1.135933
21,2023,AIAN_NH,1867,2223,84,2139,1.145688
28,2024,AIAN_NH,1945,2325,72,2253,1.158355
...,...,...,...,...,...,...,...
258,2056,White_NH,252057,346069,11107,334962,1.328914
265,2057,White_NH,250892,344426,11042,333384,1.328795
272,2058,White_NH,249747,342846,10977,331869,1.328821
279,2059,White_NH,248626,341340,10911,330429,1.329020


# 5. No households with householder age <15

Pass. No findings.

In [10]:
# age characteristics
age = household_char.groupby(['year', 'age']).sum().reset_index()
age['num_householders'] = age['households']
age['household_pop'] = age['control_pop']-age['gq_pop']
age['headship_rate'] = (age['household_pop']/age['num_householders'])*100
age

Unnamed: 0,year,age,control_pop,gq_pop,households,WithKids,AnySenior,HouseholderLF,num_householders,household_pop,headship_rate
0,2020,0,38519,0,0,0,0,0,0,38519,inf
1,2020,1,39536,2,0,0,0,0,0,39534,inf
2,2020,2,41869,0,0,0,0,0,0,41869,inf
3,2020,3,43136,2,0,0,0,0,0,43134,inf
4,2020,4,43893,13,0,0,0,0,0,43880,inf
...,...,...,...,...,...,...,...,...,...,...,...
4546,2060,106,56,1,31,0,31,0,31,55,177.419355
4547,2060,107,31,0,17,0,17,0,17,31,182.352941
4548,2060,108,16,0,7,0,7,0,7,16,228.571429
4549,2060,109,8,0,4,0,4,0,4,8,200.000000


In [11]:
age[(age['age']<15) & (age['num_householders']>0)]

Unnamed: 0,year,age,control_pop,gq_pop,households,WithKids,AnySenior,HouseholderLF,num_householders,household_pop,headship_rate


# 6. No householder in LF has age less than 15 years
Pass. No findings.

In [12]:
age[(age['HouseholderLF']>0) & (age['age']<15)]

Unnamed: 0,year,age,control_pop,gq_pop,households,WithKids,AnySenior,HouseholderLF,num_householders,household_pop,headship_rate


# 12. Households with and without children  

Compare forecast and distribution with version 2 

In [13]:
v2 = pd.read_excel(r'C:\Users\jchu\OneDrive - San Diego Association of Governments\Projects\2023\2023-040 CCM Forecast Output (series 15)\Data\Version 2\Household_ characteristics_2020_2060_05312023.xlsx',
                 sheet_name = 'data')
v2

Unnamed: 0,race,sex,age,year,control_pop,gq_pop,households,WithKids,AnySenior,HouseholderLF
0,AIAN_NH,F,0,2020,73,0,0,0,0,0
1,AIAN_NH,F,1,2020,55,0,0,0,0,0
2,AIAN_NH,F,2,2020,55,0,0,0,0,0
3,AIAN_NH,F,3,2020,74,0,0,0,0,0
4,AIAN_NH,F,4,2020,91,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...
63709,White_NH,M,106,2060,7,0,5,0,5,0
63710,White_NH,M,107,2060,4,0,3,0,3,0
63711,White_NH,M,108,2060,2,0,1,0,1,0
63712,White_NH,M,109,2060,1,0,1,0,1,0


In [14]:
# v2 kids distribution
v2_kids = v2[['year', 'WithKids', 'race', 'households']]\
            .groupby(['year', 'race'])\
            .sum()\
            .reset_index()
v2_kids['WithoutKids'] = v2_kids['households'] - v2_kids['WithKids']
v2_kids['WithKidsPct'] = (v2_kids['WithKids']/v2_kids['households'])*100
v2_kids['WithoutKidsPct'] = (v2_kids['WithoutKids']/v2_kids['households'])*100
v2_kids

Unnamed: 0,year,race,WithKids,households,WithoutKids,WithKidsPct,WithoutKidsPct
0,2020,AIAN_NH,1493,4583,3090,32.576915,67.423085
1,2020,Asian_NH,46498,129286,82788,35.965224,64.034776
2,2020,Black_NH,17839,54617,36778,32.661992,67.338008
3,2020,Hispanic,137396,298604,161208,46.012779,53.987221
4,2020,Multi-Race NH,16874,43912,27038,38.426854,61.573146
...,...,...,...,...,...,...,...
282,2060,Black_NH,16700,53304,36604,31.329731,68.670269
283,2060,Hispanic,142628,383807,241179,37.161386,62.838614
284,2060,Multi-Race NH,18530,59409,40879,31.190560,68.809440
285,2060,NHPI_NH,1777,5599,3822,31.737810,68.262190


In [15]:
# v3 kids distribution
v3_kids = household_char[['year', 'WithKids', 'race', 'households']]\
            .groupby(['year', 'race'])\
            .sum()\
            .reset_index()
v3_kids['WithoutKids'] = v3_kids['households'] - v3_kids['WithKids']
v3_kids['WithKidsPct'] = (v3_kids['WithKids']/v3_kids['households'])*100
v3_kids['WithoutKidsPct'] = (v3_kids['WithoutKids']/v3_kids['households'])*100
v3_kids

Unnamed: 0,year,race,WithKids,households,WithoutKids,WithKidsPct,WithoutKidsPct
0,2020,AIAN_NH,1493,4583,3090,32.576915,67.423085
1,2020,Asian_NH,46498,129286,82788,35.965224,64.034776
2,2020,Black_NH,17839,54617,36778,32.661992,67.338008
3,2020,Hispanic,137396,298604,161208,46.012779,53.987221
4,2020,Multi-Race NH,16874,43912,27038,38.426854,61.573146
...,...,...,...,...,...,...,...
282,2060,Black_NH,17034,54455,37421,31.280874,68.719126
283,2060,Hispanic,145853,392122,246269,37.195822,62.804178
284,2060,Multi-Race NH,19856,63839,43983,31.103244,68.896756
285,2060,NHPI_NH,1770,5521,3751,32.059410,67.940590


In [16]:
v3_v2_diff = (v3_kids.set_index(['year', 'race'])-v2_kids.set_index(['year', 'race'])).reset_index()
v3_v2_diff

Unnamed: 0,year,race,WithKids,households,WithoutKids,WithKidsPct,WithoutKidsPct
0,2020,AIAN_NH,0,0,0,0.000000,0.000000
1,2020,Asian_NH,0,0,0,0.000000,0.000000
2,2020,Black_NH,0,0,0,0.000000,0.000000
3,2020,Hispanic,0,0,0,0.000000,0.000000
4,2020,Multi-Race NH,0,0,0,0.000000,0.000000
...,...,...,...,...,...,...,...
282,2060,Black_NH,334,1151,817,-0.048857,0.048857
283,2060,Hispanic,3225,8315,5090,0.034436,-0.034436
284,2060,Multi-Race NH,1326,4430,3104,-0.087316,0.087316
285,2060,NHPI_NH,-7,-78,-71,0.321599,-0.321599


In [17]:
# writer = pd.ExcelWriter(r'Test 12 - Households with and without children by race.xlsx', engine='xlsxwriter')
# v3_kids.to_excel(writer, sheet_name = 'v3', index=False)
# v2_kids.to_excel(writer, sheet_name = 'v2', index=False)
# v3_v2_diff.to_excel(writer, sheet_name = 'v3-v2', index=False)
# writer.save()

# 13. Households with and without senior  

Compare forecast and distribution with version 2 

In [18]:
# v2 seniors distribution
v2_seniors = v2[['year', 'AnySenior', 'race', 'households']]\
            .groupby(['year', 'race'])\
            .sum()\
            .reset_index()
v2_seniors['NoSenior'] = v2_seniors['households'] - v2_seniors['AnySenior']
v2_seniors['AnySeniorPct'] = (v2_seniors['AnySenior']/v2_seniors['households'])*100
v2_seniors['NoSeniorPct'] = (v2_seniors['NoSenior']/v2_seniors['households'])*100
v2_seniors

Unnamed: 0,year,race,AnySenior,households,NoSenior,AnySeniorPct,NoSeniorPct
0,2020,AIAN_NH,1616,4583,2967,35.260746,64.739254
1,2020,Asian_NH,37616,129286,91670,29.095184,70.904816
2,2020,Black_NH,13282,54617,41335,24.318436,75.681564
3,2020,Hispanic,63294,298604,235310,21.196635,78.803365
4,2020,Multi-Race NH,7888,43912,36024,17.963199,82.036801
...,...,...,...,...,...,...,...
282,2060,Black_NH,15064,53304,38240,28.260543,71.739457
283,2060,Hispanic,134955,383807,248852,35.162204,64.837796
284,2060,Multi-Race NH,19202,59409,40207,32.321702,67.678298
285,2060,NHPI_NH,2469,5599,3130,44.097160,55.902840


In [19]:
# v3 seniors distribution
v3_seniors = household_char[['year', 'AnySenior', 'race', 'households']]\
            .groupby(['year', 'race'])\
            .sum()\
            .reset_index()
v3_seniors['NoSenior'] = v3_seniors['households'] - v3_seniors['AnySenior']
v3_seniors['AnySeniorPct'] = (v3_seniors['AnySenior']/v3_seniors['households'])*100
v3_seniors['NoSeniorPct'] = (v3_seniors['NoSenior']/v3_seniors['households'])*100
v3_seniors

Unnamed: 0,year,race,AnySenior,households,NoSenior,AnySeniorPct,NoSeniorPct
0,2020,AIAN_NH,1616,4583,2967,35.260746,64.739254
1,2020,Asian_NH,37616,129286,91670,29.095184,70.904816
2,2020,Black_NH,13282,54617,41335,24.318436,75.681564
3,2020,Hispanic,63294,298604,235310,21.196635,78.803365
4,2020,Multi-Race NH,7888,43912,36024,17.963199,82.036801
...,...,...,...,...,...,...,...
282,2060,Black_NH,15476,54455,38979,28.419796,71.580204
283,2060,Hispanic,137444,392122,254678,35.051336,64.948664
284,2060,Multi-Race NH,19808,63839,44031,31.028055,68.971945
285,2060,NHPI_NH,2480,5521,3041,44.919399,55.080601


In [20]:
v3_v2_diff = (v3_seniors.set_index(['year', 'race'])-v2_seniors.set_index(['year', 'race'])).reset_index()
v3_v2_diff

Unnamed: 0,year,race,AnySenior,households,NoSenior,AnySeniorPct,NoSeniorPct
0,2020,AIAN_NH,0,0,0,0.000000,0.000000
1,2020,Asian_NH,0,0,0,0.000000,0.000000
2,2020,Black_NH,0,0,0,0.000000,0.000000
3,2020,Hispanic,0,0,0,0.000000,0.000000
4,2020,Multi-Race NH,0,0,0,0.000000,0.000000
...,...,...,...,...,...,...,...
282,2060,Black_NH,412,1151,739,0.159253,-0.159253
283,2060,Hispanic,2489,8315,5826,-0.110868,0.110868
284,2060,Multi-Race NH,606,4430,3824,-1.293647,1.293647
285,2060,NHPI_NH,11,-78,-89,0.822238,-0.822238


In [21]:
# writer = pd.ExcelWriter(r'Test 13 - Households with and without senior by race.xlsx', engine='xlsxwriter')
# v3_seniors.to_excel(writer, sheet_name = 'v3', index=False)
# v2_seniors.to_excel(writer, sheet_name = 'v2', index=False)
# v3_v2_diff.to_excel(writer, sheet_name = 'v3-v2', index=False)
# writer.save()