In [1]:
import pandas as pd
import sqlite3 as sq
import matplotlib as mpl
from matplotlib import rcParams
import matplotlib.pyplot as plt
import numpy as np

"The average size of households is defined as total population less group-quarters population divided by the number of households. Mean household income is defined as total personal income less estimated income of group- quarters population divided by the number of households" (From the W&P 2023 Technical Document)

In [2]:
conn = sq.connect('../../Data-Pipelines/Outputs/Dem_Transpo_Housing_Collection.db')
sql_query = pd.read_sql('SELECT * FROM [WPURBANSIM_PopulationHousehold_CurrentandProjected_Annual_WideFormat_Noreplacement]', conn)
initial = pd.DataFrame(sql_query)
initial = initial[['NAME', 'Households 2017', 'Households 2020', 'Households 2025', 'Households 2035', 'Households 2045']]
thelist = ['Cheatham County, Tennessee', 'Davidson County, Tennessee', 'Dickson County, Tennessee', 'Houston County, Tennessee', 
           'Humphreys County, Tennessee', 'Maury County, Tennessee', 'Montgomery County, Tennessee', 'Robertson County, Tennessee', 
           'Rutherford County, Tennessee', 'Stewart County, Tennessee', 'Sumner County, Tennessee', 'Williamson County, Tennessee', 
           'Wilson County, Tennessee', 'Trousdale County, Tennessee', 'GNRC', 'MPO', 'GNRC Region']
initial = initial.loc[initial['NAME'].isin(thelist)].reset_index(drop = True)
initial = initial.rename(columns = {'Households 2017': '2017', 'Households 2020': '2020', 'Households 2025': '2025', 
                                      'Households 2035': '2035', 'Households 2045': '2045'})

#first option
#initial['Base'] = '2023'
# second option
initial = initial.melt(id_vars = ['NAME'], var_name = 'Year', value_name = 'Households 2017 Base')

In [3]:
initial.tail()

Unnamed: 0,NAME,Year,Households 2017 Base
80,"Williamson County, Tennessee",2045,196361.0
81,"Wilson County, Tennessee",2045,96057.0
82,GNRC,2045,1171266.0
83,GNRC Region,2045,1216853.0
84,MPO,2045,1014756.0


In [4]:
initial['NAME'].unique()

array(['Cheatham County, Tennessee', 'Davidson County, Tennessee',
       'Dickson County, Tennessee', 'Houston County, Tennessee',
       'Humphreys County, Tennessee', 'Maury County, Tennessee',
       'Montgomery County, Tennessee', 'Robertson County, Tennessee',
       'Rutherford County, Tennessee', 'Stewart County, Tennessee',
       'Sumner County, Tennessee', 'Trousdale County, Tennessee',
       'Williamson County, Tennessee', 'Wilson County, Tennessee', 'GNRC',
       'GNRC Region', 'MPO'], dtype=object)

In [5]:
conn = sq.connect('../../Data-Pipelines/Outputs/Dem_Transpo_Housing_Collection.db')
sql_query = pd.read_sql('SELECT * FROM [WPURBANSIM_PopulationHousehold_CurrentandProjected_Annual_WideFormat_Noreplacement_2023Base]', conn)
initial1 = pd.DataFrame(sql_query)
initial1 = initial1[['NAME', 'Households 2017', 'Households 2020', 'Households 2025', 'Households 2035', 'Households 2045']]
thelist = ['Cheatham County, Tennessee', 'Davidson County, Tennessee', 'Dickson County, Tennessee', 'Houston County, Tennessee', 
           'Humphreys County, Tennessee', 'Maury County, Tennessee', 'Montgomery County, Tennessee', 'Robertson County, Tennessee', 
           'Rutherford County, Tennessee', 'Stewart County, Tennessee', 'Sumner County, Tennessee', 'Williamson County, Tennessee', 
           'Wilson County, Tennessee', 'Trousdale County, Tennessee', 'GNRC', 'MPO', 'GNRC Region']
initial1 = initial1.loc[initial1['NAME'].isin(thelist)].reset_index(drop = True)
initial1 = initial1.rename(columns = {'Households 2017': '2017', 'Households 2020': '2020', 'Households 2025': '2025', 
                                      'Households 2035': '2035', 'Households 2045': '2045'})

#first option
#initial1['Base'] = '2023'
# second option
initial1 = initial1.melt(id_vars = ['NAME'], var_name = 'Year', value_name = 'Households 2023 Base')

In [6]:
initial1.head()

Unnamed: 0,NAME,Year,Households 2023 Base
0,"Cheatham County, Tennessee",2017,16002.0
1,"Davidson County, Tennessee",2017,292104.0
2,"Dickson County, Tennessee",2017,20919.0
3,"Houston County, Tennessee",2017,3515.0
4,"Humphreys County, Tennessee",2017,7951.0


In [7]:
data = initial.merge(initial1, on = ['NAME', 'Year'], how = 'outer')

In [8]:
data.head()

Unnamed: 0,NAME,Year,Households 2017 Base,Households 2023 Base
0,"Cheatham County, Tennessee",2017,16345.0,16002.0
1,"Davidson County, Tennessee",2017,285326.0,292104.0
2,"Dickson County, Tennessee",2017,21352.0,20919.0
3,"Houston County, Tennessee",2017,3591.0,3515.0
4,"Humphreys County, Tennessee",2017,7884.0,7951.0


In [9]:
data['Difference 2023-2017 Base'] = data['Households 2023 Base'] - data['Households 2017 Base']
data['Difference % 2023-2017 Base'] = ((data['Households 2023 Base'] - data['Households 2017 Base'])/data['Households 2017 Base'])*100

In [10]:
data.tail()

Unnamed: 0,NAME,Year,Households 2017 Base,Households 2023 Base,Difference 2023-2017 Base,Difference % 2023-2017 Base
80,"Williamson County, Tennessee",2045,196361.0,166468.0,-29893.0,-15.223491
81,"Wilson County, Tennessee",2045,96057.0,85752.0,-10305.0,-10.728005
82,GNRC,2045,1171266.0,1128307.0,-42959.0,-3.667741
83,GNRC Region,2045,1216853.0,1179141.0,-37712.0,-3.099142
84,MPO,2045,1014756.0,985492.0,-29264.0,-2.883846


In [11]:
data['Year'].unique()

array(['2017', '2020', '2025', '2035', '2045'], dtype=object)

In [12]:
#data = data.loc[(data['NAME'] != 'GNRC')&(data['NAME'] != 'MPO')]
#data = data.loc[(data['Year'] == '2045')].reset_index(drop = True)

In [13]:
data['NAME'].unique()

array(['Cheatham County, Tennessee', 'Davidson County, Tennessee',
       'Dickson County, Tennessee', 'Houston County, Tennessee',
       'Humphreys County, Tennessee', 'Maury County, Tennessee',
       'Montgomery County, Tennessee', 'Robertson County, Tennessee',
       'Rutherford County, Tennessee', 'Stewart County, Tennessee',
       'Sumner County, Tennessee', 'Trousdale County, Tennessee',
       'Williamson County, Tennessee', 'Wilson County, Tennessee', 'GNRC',
       'GNRC Region', 'MPO'], dtype=object)

In [15]:
data.to_csv('../data/households_projectioncomps.csv', index = False)