In [8]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from functools import reduce
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.width', 150)
import datetime
def percentchange(x, y):
    try:
        return ((x - y)*100/y)
    except ZeroDivisionError:
        return 0
def realchange(x, y):
    return x-y
def percent(x, y):
    return (x/y)*100

## This notebook outlines the high level industry data formatting for the Comprehensive Plans with place level primary geographies. There is another document for the information that is pulled for all relevant geographies.

The Woods & Poole data that is used more at the county level is only used at the place level for total employment projections. This is for consistency. This is a land use model ultimately, so JobsEQ employment is selected to model place level employment for place of work. This data is derived from the QCEW and other inputs so is selected as a better option - both are modeled so we use the one modeled directly from UI claims rather than land use at this granular of a geography.

To retrieve data:
+ Log into JobsEQ and navigate to the "Data Explorer" function  
+ go to "Draft Mode"
+ Select the "Industry" dataset, quarterly  
+ Select the following dimensions: Region, Industry, Year: fill regions with all geographies including US, State, all regions, all counties, all places, and then select total and high level industries. Fill the year/quarter with 2019-current.
+ Select the following values: "Employment (4Q Moving Average)"

Once it is completed, open and delete the top row that says "Industry Data", and delete the FIPS column. Rename the Region column to NAME, delete the data notes at the bottom.  "JobsEQIndustryEmployment_quarterlycovid19.csv"

In [30]:
data = pd.read_csv('../../Data Downloads/JobsEQIndustryEmployment_quarterlycovid19.csv')

In [31]:
data.head()

Unnamed: 0,NAME,Time,Total - All Industries,Construction (23),Manufacturing (31),Information (51),Public Administration (92),Other/Unclassified,Leisure & Hospitality,Education & Health Services,Professional & Business Services,Financial Activities,"Trade, Transportation, & Utilities",Natural Resources & Mining
0,USA,2019Q1,158031299.4,9094807.625,13015409.16,3081771.166,7426728.516,7248880.031,17441949.38,35570293.91,23193839.0,8966304.61,30156671.43,2834644.558
1,USA,2019Q2,158451024.1,9153189.405,13056876.64,3089322.21,7440921.997,7239161.091,17510564.61,35698475.54,23276916.36,8989721.82,30168449.41,2827424.982
2,USA,2019Q3,158927138.5,9218993.26,13079814.86,3098997.34,7458965.343,7236964.46,17575453.79,35848167.12,23367768.7,9023208.262,30191717.14,2827088.183
3,USA,2019Q4,159354215.7,9269804.102,13075923.91,3109391.242,7477155.748,7232671.375,17643544.13,35997550.04,23437696.69,9058956.382,30235859.5,2815662.548
4,USA,2020Q1,159692496.4,9313433.837,13059381.21,3125891.264,7507789.12,7237951.751,17657119.85,36149418.82,23502857.33,9091833.402,30266053.65,2780766.158


In [32]:
data['NAME'] = data['NAME'].str.strip()

In [33]:
data = data.rename(columns = {'Total - All Industries': 'Total', 'Construction (23)': 'Construction', 'Manufacturing (31)': 'Manufacturing', 
                              'Information (51)': 'Information', 'Public Administration (92)': 'Public Administration'})

In [36]:
cols = data.columns
data = data.melt(var_name = 'Industry', id_vars = ['NAME', 'Time'], value_vars = cols, value_name = 'Employment')

In [37]:
data.head()

Unnamed: 0,NAME,Time,Industry,Employment
0,USA,2019Q1,Total,158031299.4
1,USA,2019Q2,Total,158451024.1
2,USA,2019Q3,Total,158927138.5
3,USA,2019Q4,Total,159354215.7
4,USA,2020Q1,Total,159692496.4


https://stackoverflow.com/questions/53898482/clean-way-to-convert-quarterly-periods-to-datetime-in-pandas

In [38]:
qs = data['Time'].str.replace(r'(Q\d) (\d+)', r'\2-\1')

  qs = data['Time'].str.replace(r'(Q\d) (\d+)', r'\2-\1')


In [39]:
data['TimeStamp'] = pd.PeriodIndex(qs, freq = 'Q').to_timestamp()

In [40]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16020 entries, 0 to 16019
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   NAME        16020 non-null  object        
 1   Time        16020 non-null  object        
 2   Industry    16020 non-null  object        
 3   Employment  16020 non-null  float64       
 4   TimeStamp   16020 non-null  datetime64[ns]
dtypes: datetime64[ns](1), float64(1), object(3)
memory usage: 625.9+ KB


In [47]:
data = data.set_index('NAME').transpose()
data = data.rename(columns = {'Adams city, TN': 'Adams', 'Ashland City town, TN': 'Ashland City', 'Belle Meade city, TN': 'Belle Meade',
                                    'Berry Hill city, TN': 'Berry Hill', 'Brentwood city, TN': 'Brentwood', 'Burns town, TN': 'Burns',
                                    'Cedar Hill city, TN': 'Cedar Hill', 'Charlotte town, TN': 'Charlotte',
                                    'Cheatham County, Tennessee': 'Cheatham County', 'Clarksville city, TN': 'Clarksville',
                                    'Columbia city, TN': 'Columbia', 'Coopertown town, TN': 'Coopertown',
                                    'Cross Plains city, TN': 'Cross Plains', 'Cumberland City town, TN': 'Cumberland City',
                                    'Davidson County, Tennessee': 'Davidson County', 'Dickson city, TN': 'Dickson',
                                    'Dickson County, Tennessee': 'Dickson County', 'Dover city, TN': 'Dover',
                                    'Eagleville city, TN': 'Eagleville', 'Erin city, TN': 'Erin', 'Fairview city, TN': 'Fairview',
                                    'Forest Hills city, TN': 'Forest Hills', 'Franklin city, TN': 'Franklin', 'Gallatin city, TN': 'Gallatin',
                                    'Goodlettsville city, TN': 'Goodlettsville', 'Greenbrier town, TN': 'Greenbrier',
                                    'Hartsville/Trousdale County, TN': 'Hartsville/Trousdale', 'Hendersonville city, TN': 'Hendersonville',
                                    'Houston County, Tennessee': 'Houston County', 'Humphreys County, Tennessee': 'Humphreys County', 
                                    'Kingston Springs town, TN': 'Kingston Springs', 'La Vergne city, TN': 'La Vergne',
                                    'Lebanon city, TN': 'Lebanon', 'McEwen city, TN': 'McEwen','Maury County, Tennessee': 'Maury County',
                                    'Millersville city, TN': 'Millersville', 'Mitchellville city, TN': 'Mitchellville',
                                    'Montgomery County, Tennessee': 'Montgomery County', 'Mount Juliet city, TN': 'Mount Juliet',
                                    'Mount Pleasant city, TN': 'Mount Pleasant', 'Murfreesboro city, TN': 'Murfreesboro',
                                    'Nashville-Davidson metropolitan government (balance), TN': 'Nashville', 
                                    'New Johnsonville city, TN': 'New Johnsonville', 'Nolensville town, TN': 'Nolensville',
                                    'Oak Hill city, TN': 'Oak Hill', 'Pegram town, TN': 'Pegram', 'Pleasant View city, TN': 'Pleasant View',
                                    'Portland city, TN': 'Portland', 'Ridgetop city, TN': 'Ridgetop','Robertson County, Tennessee': 'Robertson County',
                                    'Rutherford County, Tennessee': 'Rutherford County', 'Slayden town, TN': 'Slayden', 'Smyrna town, TN': 'Smyrna',
                                    'Spring Hill city, TN': 'Spring Hill', 'Springfield city, TN': 'Springfield',
                                    'Stewart County, Tennessee': 'Stewart County', 'Sumner County, Tennessee': 'Sumner County',
                                    'Tennessee Ridge town, TN': 'Tennessee Ridge', "Thompson's Station town, TN": "Thompson's Station",
                                    'Trousdale County, Tennessee': 'Trousdale County', 'Vanleer town, TN': 'Vanleer', 'Watertown city, TN': 'Watertown',
                                    'Waverly city, TN': 'Waverly', 'Westmoreland town, TN': 'Westmoreland', 'White Bluff town, TN': 'White Bluff',
                                    'White House city, TN': 'White House', 'Williamson County, Tennessee': 'Williamson County',
                                    'Wilson County, Tennessee': 'Wilson County'})
data = data.transpose()
data = data.reset_index()

In [48]:
data['NAME'].value_counts()

USA                          180
Mount Pleasant               180
Tennessee Ridge              180
Spring Hill                  180
Springfield                  180
Smyrna                       180
Slayden                      180
Shackle Island CDP, TN       180
Rural Hill CDP, TN           180
Rockvale CDP, TN             180
Ridgetop                     180
Portland                     180
Pleasant View                180
Pegram                       180
Orlinda city, TN             180
Oak Hill                     180
Oak Grove CDP, TN            180
Nolensville                  180
New Johnsonville             180
New Deal CDP, TN             180
Nashville                    180
Thompson's Station           180
Vanleer                      180
Walnut Grove CDP, TN         180
Humphreys County             180
Williamson County            180
Trousdale County             180
Sumner County                180
Stewart County               180
Rutherford County            180
Robertson 

In [49]:
data.to_csv('../../Outputs/JOBSEQ_INDUSTRYQUARTERLY_COVID19.csv', index = False)