###### Imports and Settings

In [50]:
import pandas as pd
import numpy as np
import requests
from collections import deque
from functools import reduce
import pickle
import matplotlib.pyplot as plt
#pd.options.display.float_format = '{:,.0f}'.format
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.width', 150)
pd.options.mode.chained_assignment = None  # default='warn'
pd.options.mode.chained_assignment = None  # default='warn'
from warnings import simplefilter
simplefilter(action="ignore", category=pd.errors.PerformanceWarning)
import sys
sys.path.append("..") # Adds higher directory to python modules path
import geodict
namestocommon = geodict.namestocommon
geotogeoid = geodict.geotogeoid
GNRC = geodict.GNRC
KY = geodict.KY
censusplaces = geodict.censusplaces
shorttnplaces = geodict.shorttnplaces
import sqlite3 as sq
#functions
def percentchange(x, y):
    try:
        return ((x - y)*100/y)
    except ZeroDivisionError:
        return 0
def realchange(x, y):
    return x-y
#calculate real and percent change between all columns for all possible time frames
def calculate_changes(df, columns, time_frames, years):
    for column in columns:
        for time_frame in time_frames:
            start_year, end_year = time_frame.split('-')
            df[f'{column} % Change', 'None', f'{time_frame}'] = percentchange(df[(column, int(end_year), 'None')], df[(column, int(start_year), 'None')])
            df[f'{column} Change', 'None', f'{time_frame}'] = (df[(column, int(end_year), 'None')] - df[(column, int(start_year), 'None')])

    return df
#generate all possible time frames from a list of years
def generate_time_frames(years):
    time_frames = []
    for i in range(len(years)-1):
        for j in range(i+1, len(years)):
            time_frames.append(f"{years[i]}-{years[j]}")
    return time_frames

In [51]:
#this already has the 2020 PL number for 2020 population
data = pd.read_csv('../../Data Downloads/CensusBureau_HistoricalPopulation.csv')
data = data[['NAME', '1900','1910', '1920', '1930', '1940', '1950', '1960', '1970', '1980', '1990', '2000', '2010', '2020']]
data = data.set_index(['NAME'])
#data.columns = pd.MultiIndex.from_product([['Population'], data.columns, ['None']])
data.head()

Unnamed: 0_level_0,1900,1910,1920,1930,1940,1950,1960,1970,1980,1990,2000,2010,2020
NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
"Adams city, Tennessee",0,542,672,0,0,0,0,458,600,587,566,633,624
"Ashland City town, Tennessee",0,641,649,712,957,1024,1400,2027,2329,2552,3641,4541,5193
"Belle Meade city, Tennessee",0,0,0,0,2061,2831,3082,0,3182,2839,2943,2912,2901
"Berry Hill city, Tennessee",0,0,0,0,0,1248,1551,0,1113,802,674,537,2112
"Brentwood city, Tennessee",0,0,0,0,0,0,0,0,9431,16392,23445,37060,45373


In [52]:
data = data.transpose()
GNRCCounties = [data['Stewart County, Tennessee'],data['Montgomery County, Tennessee'],
                data['Houston County, Tennessee'],data['Humphreys County, Tennessee'],
                data['Dickson County, Tennessee'],data['Cheatham County, Tennessee'],
                data['Robertson County, Tennessee'],data['Sumner County, Tennessee'],
                data['Davidson County, Tennessee'],data['Wilson County, Tennessee'],
                data['Trousdale County, Tennessee'],data['Williamson County, Tennessee'],
                data['Rutherford County, Tennessee']]
data['GNRC'] = sum(GNRCCounties)
GNRCCountiesAll = [data['Stewart County, Tennessee'],data['Montgomery County, Tennessee'],
                   data['Houston County, Tennessee'],data['Humphreys County, Tennessee'],
                   data['Dickson County, Tennessee'],data['Cheatham County, Tennessee'],
                   data['Robertson County, Tennessee'],data['Sumner County, Tennessee'],
                   data['Davidson County, Tennessee'],data['Wilson County, Tennessee'],
                   data['Trousdale County, Tennessee'],data['Williamson County, Tennessee'],
                   data['Rutherford County, Tennessee'],data['Maury County, Tennessee']]
data['GNRC Region'] = sum(GNRCCountiesAll)
MPOCounties = [data['Robertson County, Tennessee'],data['Sumner County, Tennessee'],
               data['Davidson County, Tennessee'],data['Wilson County, Tennessee'],
               data['Williamson County, Tennessee'],data['Rutherford County, Tennessee'],
               data['Maury County, Tennessee']]
data['MPO'] = sum(MPOCounties)
RuthInc = [data['Eagleville city, Tennessee'],data['La Vergne city, Tennessee'],
           data['Murfreesboro city, Tennessee'],data['Smyrna town, Tennessee']]
data['Rutherford Incorporated'] = sum(RuthInc)
data['Rutherford Unincorporated'] = data['Rutherford County, Tennessee'] - data['Rutherford Incorporated']
WilsonInc = [data['Lebanon city, Tennessee'],data['Mount Juliet city, Tennessee'],
             data['Watertown city, Tennessee']]
data['Wilson Incorporated'] = sum(WilsonInc)
data['Wilson Unincorporated'] = data['Wilson County, Tennessee'] - data['Wilson Incorporated']
CheathInc = [data['Ashland City town, Tennessee'],data['Kingston Springs town, Tennessee'],
             data['Pegram town, Tennessee'],data['Pleasant View city, Tennessee']]
data['Cheatham Incorporated'] = sum(CheathInc)
data['Cheatham Unincorporated'] = data['Cheatham County, Tennessee'] - data['Cheatham Incorporated']
DicksInc = [data['Burns town, Tennessee'],data['Charlotte town, Tennessee'],
            data['Dickson city, Tennessee'],data['Slayden town, Tennessee'],
            data['Vanleer town, Tennessee'],data['White Bluff town, Tennessee']]
data['Dickson Incorporated'] = sum(DicksInc)
data['Dickson Unincorporated'] = data['Dickson County, Tennessee'] - data['Dickson Incorporated']
HumphInc = [data['McEwen city, Tennessee'],data['New Johnsonville city, Tennessee'],
            data['Waverly city, Tennessee']]
data['Humphreys Incorporated'] = sum(HumphInc)
data['Humphreys Unincorporated'] = data['Humphreys County, Tennessee'] - data['Humphreys Incorporated']
data['Montgomery Incorporated'] = data['Clarksville city, Tennessee']
data['Montgomery Unincorporated'] = data['Montgomery County, Tennessee'] - data['Montgomery Incorporated']

In [53]:
data = data.transpose()

In [54]:
data.head()

Unnamed: 0_level_0,1900,1910,1920,1930,1940,1950,1960,1970,1980,1990,2000,2010,2020
NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
"Adams city, Tennessee",0,542,672,0,0,0,0,458,600,587,566,633,624
"Ashland City town, Tennessee",0,641,649,712,957,1024,1400,2027,2329,2552,3641,4541,5193
"Belle Meade city, Tennessee",0,0,0,0,2061,2831,3082,0,3182,2839,2943,2912,2901
"Berry Hill city, Tennessee",0,0,0,0,0,1248,1551,0,1113,802,674,537,2112
"Brentwood city, Tennessee",0,0,0,0,0,0,0,0,9431,16392,23445,37060,45373


In [55]:
cols = data.columns
data = data.reset_index(drop = False)
data = data.melt(id_vars = 'NAME', var_name = 'Year', value_name = 'Population')#, values = cols)

In [56]:
data['Year'] = data['Year'].astype(int)

In [57]:
histpop = data

In [58]:
histpop['GEO_ID'] = histpop['NAME'].map(geotogeoid)
histpop['Source'] = 'Historical Decennial Censuses'

In [59]:
histpop.head()

Unnamed: 0,NAME,Year,Population,GEO_ID,Source
0,"Adams city, Tennessee",1900,0,1600000US4700200,Historical Decennial Censuses
1,"Ashland City town, Tennessee",1900,0,1600000US4702180,Historical Decennial Censuses
2,"Belle Meade city, Tennessee",1900,0,1600000US4704620,Historical Decennial Censuses
3,"Berry Hill city, Tennessee",1900,0,1600000US4705140,Historical Decennial Censuses
4,"Brentwood city, Tennessee",1900,0,1600000US4708280,Historical Decennial Censuses


In [60]:
histpop.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1118 entries, 0 to 1117
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   NAME        1118 non-null   object
 1   Year        1118 non-null   int32 
 2   Population  1118 non-null   int64 
 3   GEO_ID      884 non-null    object
 4   Source      1118 non-null   object
dtypes: int32(1), int64(1), object(3)
memory usage: 39.4+ KB


In [61]:
#export to feather to join to all tidydata before creating change metrics
histpop.to_feather('../../Outputs/HIST_PopulationTIDY')

In [62]:
#create a list of years from the dataframe to pass through our "generate time frames" function to create a list of all possible time frames - need this here for later
years = list(data['Year'].unique().astype(int))
time_frames = generate_time_frames(years)

In [63]:
data.head()

Unnamed: 0,NAME,Year,Population,GEO_ID,Source
0,"Adams city, Tennessee",1900,0,1600000US4700200,Historical Decennial Censuses
1,"Ashland City town, Tennessee",1900,0,1600000US4702180,Historical Decennial Censuses
2,"Belle Meade city, Tennessee",1900,0,1600000US4704620,Historical Decennial Censuses
3,"Berry Hill city, Tennessee",1900,0,1600000US4705140,Historical Decennial Censuses
4,"Brentwood city, Tennessee",1900,0,1600000US4708280,Historical Decennial Censuses


In [64]:
data = data.drop(columns = ['GEO_ID', 'Source'])
data.head()

Unnamed: 0,NAME,Year,Population
0,"Adams city, Tennessee",1900,0
1,"Ashland City town, Tennessee",1900,0
2,"Belle Meade city, Tennessee",1900,0
3,"Berry Hill city, Tennessee",1900,0
4,"Brentwood city, Tennessee",1900,0


In [65]:
#create a multilevel column header with year and placeholder for time frames
#pivot the table and create a multiindex of year and column header
cols = list(data.columns)
cols.remove('NAME')
cols.remove('Year')
df_pivot = data.pivot_table(index = 'NAME', columns = ['Year'], values = cols)
df_pivot.head(2)

Unnamed: 0_level_0,Population,Population,Population,Population,Population,Population,Population,Population,Population,Population,Population,Population,Population
Year,1900,1910,1920,1930,1940,1950,1960,1970,1980,1990,2000,2010,2020
NAME,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
"Adams city, Tennessee",0,542,672,0,0,0,0,458,600,587,566,633,624
"Ashland City town, Tennessee",0,641,649,712,957,1024,1400,2027,2329,2552,3641,4541,5193


In [66]:
#add a level to the multiindex to accomodate the time period metrics
df_pivot.columns = pd.MultiIndex.from_tuples([(col[0], col[1], 'None') for col in df_pivot.columns])
df_pivot.head(3)

Unnamed: 0_level_0,Population,Population,Population,Population,Population,Population,Population,Population,Population,Population,Population,Population,Population
Unnamed: 0_level_1,1900,1910,1920,1930,1940,1950,1960,1970,1980,1990,2000,2010,2020
Unnamed: 0_level_2,None,None,None,None,None,None,None,None,None,None,None,None,None
NAME,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3
"Adams city, Tennessee",0,542,672,0,0,0,0,458,600,587,566,633,624
"Ashland City town, Tennessee",0,641,649,712,957,1024,1400,2027,2329,2552,3641,4541,5193
"Belle Meade city, Tennessee",0,0,0,0,2061,2831,3082,0,3182,2839,2943,2912,2901


In [67]:
#get a list of the varaibles to loop through by indexing into the first level only of the column headers
first_level = data.columns.get_level_values(0).unique().tolist()
first_level.remove('NAME')
first_level.remove('Year')
#remove percentages - don't want change metrics on them
first_level = [item for item in first_level if '%' not in item]

In [68]:
#pass the dataframe, the list of variables, time frames, and years through the "calculate change" function
data = calculate_changes(df_pivot, first_level, time_frames = time_frames, years = years)

In [69]:
data.head()

Unnamed: 0_level_0,Population,Population,Population,Population,Population,Population,Population,Population,Population,Population,Population,Population,Population,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change,Population % Change,Population Change
Unnamed: 0_level_1,1900,1910,1920,1930,1940,1950,1960,1970,1980,1990,2000,2010,2020,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None
Unnamed: 0_level_2,None,None,None,None,None,None,None,None,None,None,None,None,None,1900-1910,1900-1910,1900-1920,1900-1920,1900-1930,1900-1930,1900-1940,1900-1940,1900-1950,1900-1950,1900-1960,1900-1960,1900-1970,1900-1970,1900-1980,1900-1980,1900-1990,1900-1990,1900-2000,1900-2000,1900-2010,1900-2010,1900-2020,1900-2020,1910-1920,1910-1920,1910-1930,1910-1930,1910-1940,1910-1940,1910-1950,1910-1950,1910-1960,1910-1960,1910-1970,1910-1970,1910-1980,1910-1980,1910-1990,1910-1990,1910-2000,1910-2000,1910-2010,1910-2010,1910-2020,1910-2020,1920-1930,1920-1930,1920-1940,1920-1940,1920-1950,1920-1950,1920-1960,1920-1960,1920-1970,1920-1970,1920-1980,1920-1980,1920-1990,1920-1990,1920-2000,1920-2000,1920-2010,1920-2010,1920-2020,1920-2020,1930-1940,1930-1940,1930-1950,1930-1950,1930-1960,1930-1960,1930-1970,1930-1970,1930-1980,1930-1980,1930-1990,1930-1990,1930-2000,1930-2000,1930-2010,1930-2010,1930-2020,1930-2020,1940-1950,1940-1950,1940-1960,1940-1960,1940-1970,1940-1970,1940-1980,1940-1980,1940-1990,1940-1990,1940-2000,1940-2000,1940-2010,1940-2010,1940-2020,1940-2020,1950-1960,1950-1960,1950-1970,1950-1970,1950-1980,1950-1980,1950-1990,1950-1990,1950-2000,1950-2000,1950-2010,1950-2010,1950-2020,1950-2020,1960-1970,1960-1970,1960-1980,1960-1980,1960-1990,1960-1990,1960-2000,1960-2000,1960-2010,1960-2010,1960-2020,1960-2020,1970-1980,1970-1980,1970-1990,1970-1990,1970-2000,1970-2000,1970-2010,1970-2010,1970-2020,1970-2020,1980-1990,1980-1990,1980-2000,1980-2000,1980-2010,1980-2010,1980-2020,1980-2020,1990-2000,1990-2000,1990-2010,1990-2010,1990-2020,1990-2020,2000-2010,2000-2010,2000-2020,2000-2020,2010-2020,2010-2020
NAME,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3,Unnamed: 22_level_3,Unnamed: 23_level_3,Unnamed: 24_level_3,Unnamed: 25_level_3,Unnamed: 26_level_3,Unnamed: 27_level_3,Unnamed: 28_level_3,Unnamed: 29_level_3,Unnamed: 30_level_3,Unnamed: 31_level_3,Unnamed: 32_level_3,Unnamed: 33_level_3,Unnamed: 34_level_3,Unnamed: 35_level_3,Unnamed: 36_level_3,Unnamed: 37_level_3,Unnamed: 38_level_3,Unnamed: 39_level_3,Unnamed: 40_level_3,Unnamed: 41_level_3,Unnamed: 42_level_3,Unnamed: 43_level_3,Unnamed: 44_level_3,Unnamed: 45_level_3,Unnamed: 46_level_3,Unnamed: 47_level_3,Unnamed: 48_level_3,Unnamed: 49_level_3,Unnamed: 50_level_3,Unnamed: 51_level_3,Unnamed: 52_level_3,Unnamed: 53_level_3,Unnamed: 54_level_3,Unnamed: 55_level_3,Unnamed: 56_level_3,Unnamed: 57_level_3,Unnamed: 58_level_3,Unnamed: 59_level_3,Unnamed: 60_level_3,Unnamed: 61_level_3,Unnamed: 62_level_3,Unnamed: 63_level_3,Unnamed: 64_level_3,Unnamed: 65_level_3,Unnamed: 66_level_3,Unnamed: 67_level_3,Unnamed: 68_level_3,Unnamed: 69_level_3,Unnamed: 70_level_3,Unnamed: 71_level_3,Unnamed: 72_level_3,Unnamed: 73_level_3,Unnamed: 74_level_3,Unnamed: 75_level_3,Unnamed: 76_level_3,Unnamed: 77_level_3,Unnamed: 78_level_3,Unnamed: 79_level_3,Unnamed: 80_level_3,Unnamed: 81_level_3,Unnamed: 82_level_3,Unnamed: 83_level_3,Unnamed: 84_level_3,Unnamed: 85_level_3,Unnamed: 86_level_3,Unnamed: 87_level_3,Unnamed: 88_level_3,Unnamed: 89_level_3,Unnamed: 90_level_3,Unnamed: 91_level_3,Unnamed: 92_level_3,Unnamed: 93_level_3,Unnamed: 94_level_3,Unnamed: 95_level_3,Unnamed: 96_level_3,Unnamed: 97_level_3,Unnamed: 98_level_3,Unnamed: 99_level_3,Unnamed: 100_level_3,Unnamed: 101_level_3,Unnamed: 102_level_3,Unnamed: 103_level_3,Unnamed: 104_level_3,Unnamed: 105_level_3,Unnamed: 106_level_3,Unnamed: 107_level_3,Unnamed: 108_level_3,Unnamed: 109_level_3,Unnamed: 110_level_3,Unnamed: 111_level_3,Unnamed: 112_level_3,Unnamed: 113_level_3,Unnamed: 114_level_3,Unnamed: 115_level_3,Unnamed: 116_level_3,Unnamed: 117_level_3,Unnamed: 118_level_3,Unnamed: 119_level_3,Unnamed: 120_level_3,Unnamed: 121_level_3,Unnamed: 122_level_3,Unnamed: 123_level_3,Unnamed: 124_level_3,Unnamed: 125_level_3,Unnamed: 126_level_3,Unnamed: 127_level_3,Unnamed: 128_level_3,Unnamed: 129_level_3,Unnamed: 130_level_3,Unnamed: 131_level_3,Unnamed: 132_level_3,Unnamed: 133_level_3,Unnamed: 134_level_3,Unnamed: 135_level_3,Unnamed: 136_level_3,Unnamed: 137_level_3,Unnamed: 138_level_3,Unnamed: 139_level_3,Unnamed: 140_level_3,Unnamed: 141_level_3,Unnamed: 142_level_3,Unnamed: 143_level_3,Unnamed: 144_level_3,Unnamed: 145_level_3,Unnamed: 146_level_3,Unnamed: 147_level_3,Unnamed: 148_level_3,Unnamed: 149_level_3,Unnamed: 150_level_3,Unnamed: 151_level_3,Unnamed: 152_level_3,Unnamed: 153_level_3,Unnamed: 154_level_3,Unnamed: 155_level_3,Unnamed: 156_level_3,Unnamed: 157_level_3,Unnamed: 158_level_3,Unnamed: 159_level_3,Unnamed: 160_level_3,Unnamed: 161_level_3,Unnamed: 162_level_3,Unnamed: 163_level_3,Unnamed: 164_level_3,Unnamed: 165_level_3,Unnamed: 166_level_3,Unnamed: 167_level_3,Unnamed: 168_level_3,Unnamed: 169_level_3
"Adams city, Tennessee",0,542,672,0,0,0,0,458,600,587,566,633,624,inf,542,inf,672,,0,,0,,0,,0,inf,458,inf,600,inf,587,inf,566,inf,633,inf,624,23.98524,130,-100.0,-542,-100.0,-542,-100.0,-542,-100.0,-542,-15.498155,-84,10.701107,58,8.302583,45,4.428044,24,16.789668,91,15.129151,82,-100.0,-672,-100.0,-672,-100.0,-672,-100.0,-672,-31.845238,-214,-10.714286,-72,-12.64881,-85,-15.77381,-106,-5.803571,-39,-7.142857,-48,,0,,0,,0,inf,458,inf,600,inf,587,inf,566,inf,633,inf,624,,0,,0,inf,458,inf,600,inf,587,inf,566,inf,633,inf,624,,0,inf,458,inf,600,inf,587,inf,566,inf,633,inf,624,inf,458,inf,600,inf,587,inf,566,inf,633,inf,624,31.004367,142,28.165939,129,23.580786,108,38.209607,175,36.244541,166,-2.166667,-13,-5.666667,-34,5.5,33,4.0,24,-3.577513,-21,7.836457,46,6.303237,37,11.837456,67,10.24735,58,-1.421801,-9
"Ashland City town, Tennessee",0,641,649,712,957,1024,1400,2027,2329,2552,3641,4541,5193,inf,641,inf,649,inf,712,inf,957,inf,1024,inf,1400,inf,2027,inf,2329,inf,2552,inf,3641,inf,4541,inf,5193,1.24805,8,11.076443,71,49.297972,316,59.75039,383,118.408736,759,216.224649,1386,263.338534,1688,298.127925,1911,468.018721,3000,608.424337,3900,710.140406,4552,9.707242,63,47.457627,308,57.781202,375,115.716487,751,212.326656,1378,258.859784,1680,293.220339,1903,461.016949,2992,599.691834,3892,700.154083,4544,34.410112,245,43.820225,312,96.629213,688,184.691011,1315,227.106742,1617,258.426966,1840,411.376404,2929,537.780899,3829,629.353933,4481,7.001045,67,46.290491,443,111.807732,1070,143.364681,1372,166.666667,1595,280.45977,2684,374.503657,3584,442.633229,4236,36.71875,376,97.949219,1003,127.441406,1305,149.21875,1528,255.566406,2617,343.457031,3517,407.128906,4169,44.785714,627,66.357143,929,82.285714,1152,160.071429,2241,224.357143,3141,270.928571,3793,14.898865,302,25.900345,525,79.625062,1614,124.025654,2514,156.191416,3166,9.574925,223,56.33319,1312,94.976385,2212,122.971232,2864,42.672414,1089,77.938871,1989,103.487461,2641,24.718484,900,42.625652,1552,14.358071,652
"Belle Meade city, Tennessee",0,0,0,0,2061,2831,3082,0,3182,2839,2943,2912,2901,,0,,0,,0,inf,2061,inf,2831,inf,3082,,0,inf,3182,inf,2839,inf,2943,inf,2912,inf,2901,,0,,0,inf,2061,inf,2831,inf,3082,,0,inf,3182,inf,2839,inf,2943,inf,2912,inf,2901,,0,inf,2061,inf,2831,inf,3082,,0,inf,3182,inf,2839,inf,2943,inf,2912,inf,2901,inf,2061,inf,2831,inf,3082,,0,inf,3182,inf,2839,inf,2943,inf,2912,inf,2901,37.360505,770,49.539059,1021,-100.0,-2061,54.391072,1121,37.748666,778,42.79476,882,41.290636,851,40.756914,840,8.866125,251,-100.0,-2831,12.398446,351,0.282586,8,3.956199,112,2.86118,81,2.472625,70,-100.0,-3082,3.244646,100,-7.884491,-243,-4.510058,-139,-5.515899,-170,-5.87281,-181,inf,3182,inf,2839,inf,2943,inf,2912,inf,2901,-10.779384,-343,-7.510999,-239,-8.485229,-270,-8.830924,-281,3.663262,104,2.571328,73,2.183868,62,-1.053347,-31,-1.427115,-42,-0.377747,-11
"Berry Hill city, Tennessee",0,0,0,0,0,1248,1551,0,1113,802,674,537,2112,,0,,0,,0,,0,inf,1248,inf,1551,,0,inf,1113,inf,802,inf,674,inf,537,inf,2112,,0,,0,,0,inf,1248,inf,1551,,0,inf,1113,inf,802,inf,674,inf,537,inf,2112,,0,,0,inf,1248,inf,1551,,0,inf,1113,inf,802,inf,674,inf,537,inf,2112,,0,inf,1248,inf,1551,,0,inf,1113,inf,802,inf,674,inf,537,inf,2112,inf,1248,inf,1551,,0,inf,1113,inf,802,inf,674,inf,537,inf,2112,24.278846,303,-100.0,-1248,-10.817308,-135,-35.737179,-446,-45.99359,-574,-56.971154,-711,69.230769,864,-100.0,-1551,-28.239845,-438,-48.291425,-749,-56.544165,-877,-65.377176,-1014,36.170213,561,inf,1113,inf,802,inf,674,inf,537,inf,2112,-27.942498,-311,-39.442947,-439,-51.752022,-576,89.757412,999,-15.9601,-128,-33.042394,-265,163.341646,1310,-20.326409,-137,213.353116,1438,293.296089,1575
"Brentwood city, Tennessee",0,0,0,0,0,0,0,0,9431,16392,23445,37060,45373,,0,,0,,0,,0,,0,,0,,0,inf,9431,inf,16392,inf,23445,inf,37060,inf,45373,,0,,0,,0,,0,,0,,0,inf,9431,inf,16392,inf,23445,inf,37060,inf,45373,,0,,0,,0,,0,,0,inf,9431,inf,16392,inf,23445,inf,37060,inf,45373,,0,,0,,0,,0,inf,9431,inf,16392,inf,23445,inf,37060,inf,45373,,0,,0,,0,inf,9431,inf,16392,inf,23445,inf,37060,inf,45373,,0,,0,inf,9431,inf,16392,inf,23445,inf,37060,inf,45373,,0,inf,9431,inf,16392,inf,23445,inf,37060,inf,45373,inf,9431,inf,16392,inf,23445,inf,37060,inf,45373,73.809776,6961,148.595059,14014,292.959389,27629,381.104867,35942,43.027086,7053,126.085896,20668,176.799658,28981,58.072084,13615,93.529537,21928,22.431193,8313


In [70]:
#reformat and rename columns
data = data.stack([1, 1])
data = data.reset_index(drop = False)
data = data.rename(columns = {'level_1':'Year', 'level_2':'Time Frame'})

In [71]:
data.head()

Unnamed: 0,NAME,Year,Time Frame,Population,Population % Change,Population Change
0,"Adams city, Tennessee",1900,,0.0,,
1,"Adams city, Tennessee",1910,,542.0,,
2,"Adams city, Tennessee",1920,,672.0,,
3,"Adams city, Tennessee",1930,,0.0,,
4,"Adams city, Tennessee",1940,,0.0,,


In [72]:
#map to geoid dictionary from module and add source
data['GEO_ID'] = data['NAME'].map(geotogeoid)
data['Source'] = 'Historical Decennial Censuses'

In [73]:
#final check
data.head()

Unnamed: 0,NAME,Year,Time Frame,Population,Population % Change,Population Change,GEO_ID,Source
0,"Adams city, Tennessee",1900,,0.0,,,1600000US4700200,Historical Decennial Censuses
1,"Adams city, Tennessee",1910,,542.0,,,1600000US4700200,Historical Decennial Censuses
2,"Adams city, Tennessee",1920,,672.0,,,1600000US4700200,Historical Decennial Censuses
3,"Adams city, Tennessee",1930,,0.0,,,1600000US4700200,Historical Decennial Censuses
4,"Adams city, Tennessee",1940,,0.0,,,1600000US4700200,Historical Decennial Censuses


In [75]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7826 entries, 0 to 7825
Data columns (total 8 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   NAME                 7826 non-null   object 
 1   Year                 7826 non-null   object 
 2   Time Frame           7826 non-null   object 
 3   Population           1118 non-null   float64
 4   Population % Change  6056 non-null   float64
 5   Population Change    6708 non-null   float64
 6   GEO_ID               6188 non-null   object 
 7   Source               7826 non-null   object 
dtypes: float64(3), object(5)
memory usage: 489.2+ KB


In [74]:
#export to the SQLite database
conn = sq.connect('../../Outputs/CensusBureau.db')
data.to_sql('CensusBureau_HistoricalPopulation_Annual_Change', conn, if_exists = 'replace', index = False)

7826