In [1]:
import pandas as pd
import sqlite3 as sq
import matplotlib as mpl
from matplotlib import rcParams
import matplotlib.pyplot as plt
import numpy as np

"The average size of households is defined as total population less group-quarters population divided by the number of households. Mean household income is defined as total personal income less estimated income of group- quarters population divided by the number of households" (From the W&P 2023 Technical Document)

In [2]:
conn = sq.connect('../../Data-Pipelines/Outputs/WoodsandPooleandAffiliated.db')
sql_query = pd.read_sql('SELECT * FROM [WPHouseholds_2017Base]', conn)
initial = pd.DataFrame(sql_query)
initial = initial.loc[initial['Year'] != 'None']
initial = initial[['NAME', 'Year', 'personsperhousehold']]
thelist = ['Cheatham County, Tennessee', 'Davidson County, Tennessee', 'Dickson County, Tennessee', 'Houston County, Tennessee', 
           'Humphreys County, Tennessee', 'Maury County, Tennessee', 'Montgomery County, Tennessee', 'Robertson County, Tennessee', 
           'Rutherford County, Tennessee', 'Stewart County, Tennessee', 'Sumner County, Tennessee', 'Williamson County, Tennessee', 
           'Wilson County, Tennessee', 'Trousdale County, Tennessee']
initial = initial.loc[initial['NAME'].isin(thelist)].reset_index(drop = True)
initial = initial.rename(columns = {'personsperhousehold': 'Persons per Household 2017 Base'})

In [3]:
initial.tail()

Unnamed: 0,NAME,Year,Persons per Household 2017 Base
849,"Wilson County, Tennessee",2046,2.66
850,"Wilson County, Tennessee",2047,2.66
851,"Wilson County, Tennessee",2048,2.65
852,"Wilson County, Tennessee",2049,2.65
853,"Wilson County, Tennessee",2050,2.64


In [4]:
initial['NAME'].unique()

array(['Cheatham County, Tennessee', 'Davidson County, Tennessee',
       'Dickson County, Tennessee', 'Houston County, Tennessee',
       'Humphreys County, Tennessee', 'Maury County, Tennessee',
       'Montgomery County, Tennessee', 'Robertson County, Tennessee',
       'Rutherford County, Tennessee', 'Stewart County, Tennessee',
       'Sumner County, Tennessee', 'Trousdale County, Tennessee',
       'Williamson County, Tennessee', 'Wilson County, Tennessee'],
      dtype=object)

In [5]:
conn = sq.connect('../../Data-Pipelines/Outputs/WoodsandPooleandAffiliated.db')
sql_query = pd.read_sql('SELECT * FROM [WPHouseholds_2023Base]', conn)
initial1 = pd.DataFrame(sql_query)
initial1 = initial1.loc[initial1['Year'] != 'None']
initial1 = initial1[['NAME', 'Year', 'personsperhousehold']]
thelist = ['Cheatham County, Tennessee', 'Davidson County, Tennessee', 'Dickson County, Tennessee', 'Houston County, Tennessee', 
           'Humphreys County, Tennessee', 'Maury County, Tennessee', 'Montgomery County, Tennessee', 'Robertson County, Tennessee', 
           'Rutherford County, Tennessee', 'Stewart County, Tennessee', 'Sumner County, Tennessee', 'Williamson County, Tennessee', 
           'Wilson County, Tennessee', 'Trousdale County, Tennessee']
initial1 = initial1.loc[initial1['NAME'].isin(thelist)].reset_index(drop = True)
initial1 = initial1.rename(columns = {'personsperhousehold': 'Persons per Household 2023 Base'})

In [6]:
initial1.head()

Unnamed: 0,NAME,Year,Persons per Household 2023 Base
0,"Cheatham County, Tennessee",1990,2.83
1,"Cheatham County, Tennessee",1991,2.83
2,"Cheatham County, Tennessee",1992,2.81
3,"Cheatham County, Tennessee",1993,2.82
4,"Cheatham County, Tennessee",1994,2.83


In [7]:
data = initial.merge(initial1, on = ['NAME', 'Year'], how = 'outer')

In [8]:
years = ['2020', '2025', '2035', '2045']
data = data.loc[data['Year'].isin(years)]

In [9]:
data.head()

Unnamed: 0,NAME,Year,Persons per Household 2017 Base,Persons per Household 2023 Base
30,"Cheatham County, Tennessee",2020,2.45,2.51
35,"Cheatham County, Tennessee",2025,2.44,2.45
45,"Cheatham County, Tennessee",2035,2.48,2.43
55,"Cheatham County, Tennessee",2045,2.48,2.44
91,"Davidson County, Tennessee",2020,2.31,2.28


In [10]:
data['Difference 2023-2017 Base'] = data['Persons per Household 2023 Base'] - data['Persons per Household 2017 Base']
data['Difference % 2023-2017 Base'] = ((data['Persons per Household 2023 Base'] - data['Persons per Household 2017 Base'])/data['Persons per Household 2017 Base'])*100

In [11]:
data.tail()

Unnamed: 0,NAME,Year,Persons per Household 2017 Base,Persons per Household 2023 Base,Difference 2023-2017 Base,Difference % 2023-2017 Base
787,"Williamson County, Tennessee",2045,2.78,2.63,-0.15,-5.395683
823,"Wilson County, Tennessee",2020,2.57,2.73,0.16,6.225681
828,"Wilson County, Tennessee",2025,2.57,2.63,0.06,2.33463
838,"Wilson County, Tennessee",2035,2.64,2.6,-0.04,-1.515152
848,"Wilson County, Tennessee",2045,2.66,2.6,-0.06,-2.255639


In [12]:
data['Year'].unique()

array(['2020', '2025', '2035', '2045'], dtype=object)

In [13]:
#data = data.loc[(data['NAME'] != 'GNRC')&(data['NAME'] != 'MPO')]
#data = data.loc[(data['Year'] == '2045')].reset_index(drop = True)

In [14]:
data['NAME'].unique()

array(['Cheatham County, Tennessee', 'Davidson County, Tennessee',
       'Dickson County, Tennessee', 'Houston County, Tennessee',
       'Humphreys County, Tennessee', 'Maury County, Tennessee',
       'Montgomery County, Tennessee', 'Robertson County, Tennessee',
       'Rutherford County, Tennessee', 'Stewart County, Tennessee',
       'Sumner County, Tennessee', 'Trousdale County, Tennessee',
       'Williamson County, Tennessee', 'Wilson County, Tennessee'],
      dtype=object)

In [16]:
data.to_csv('../data/personsperhousehold_projectioncomps.csv', index = False)