In [13]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from functools import reduce
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.width', 150)
import datetime
pd.options.mode.chained_assignment = None  # default='warn'
from warnings import simplefilter
simplefilter(action="ignore", category=pd.errors.PerformanceWarning)
def percentchange(x, y):
    try:
        return ((x - y)*100/y)
    except ZeroDivisionError:
        return 0
def realchange(x, y):
    return x-y
def percent(x, y):
    return (x/y)*100

Quarterly Employment During the Heart of COVID 19 Pandemic

The Woods & Poole data that is used more at the county level is only used at the place level for total employment projections. This is for consistency. This is a land use model ultimately, so JobsEQ employment is selected to model place level employment for place of work. This data is derived from the QCEW and other inputs so is selected as a better option - both are modeled so we use the one modeled directly from UI claims rather than land use at this granular of a geography.

To retrieve data:
+ Log into JobsEQ and navigate to the "Data Explorer" function  
+ go to "Draft Mode"
+ Select the "Industry" dataset, quarterly  
+ Select the following dimensions: Region, Industry, Year: fill regions with all geographies including US, State, GNRC (13), MPO (7), GNRC Region (14) and sub all counties, all places  
+ Fill the year/quarter with 2019-current (in the future change this to relevant time period if need be)  
+ select total and high level industries as listed below:  

Industries to Select:  
(NAICS Codes)  
+ Total - All Industries  
+ Construction (23)  
+ Manufacturing (31)  
+ Information (51)  
+ Public Administration (92)  
(Custom Created Groups *begin to type them as they appear here and they will appear in the search bar, if not then remake the custom group as described below*)  
+ Other/Unclassified (81: Other, Except Public Administration, 99: Unclassified)  
+ Leisure & Hospitality (71: Arts, Entertainment, & Recreation, 72: Accommodation & Food Services)   
+ Education & Health Services (61: Education, 62: Healthcare & Social Assistance)   
+ Professional & Business Services (54: Professional, Scientific, & Technical, 55: Management of Companies & Enterprises, 56: Administrative & Support & Waste Management & Remediation)   
+ Financial Activities (52: Finance & Insurance, 53: Real Estate & Rental & Leasing)   
+ Trade, Transportation & Utilities (42: Wholesale Trade, 44-45: Retail Trade, 48-49: Transportation & Warehousing, 22: Utilities)  
+ Natural Resources & Mining (11: Agricultural Activities, Forestry, Fishing, & Hunting, 21: Mining, Quarrying, Oil & Gas)  

+ Select the following values: "Employment (4Q Moving Average)"

Once it is completed, open and delete the top row that says "Industry Data", and adjust so that the column headers reflect the industry, as it is all the same datapoint. Delete the FIPS column. Rename the Region column to NAME and the quarter column to Time, delete the data notes at the bottom. Save as:   "JobsEQ_IndustryEmploymentCOVIDQuarterly_AllGeos.csv"

In [8]:
#import data and examine
data = pd.read_csv('../../Data Downloads/JobsEQ_IndustryEmploymentCOVIDQuarterly_AllGeos.csv')
data.head(2)

Unnamed: 0,NAME,Time,Total - All Industries,Construction (23),Manufacturing (31),Information (51),Public Administration (92),Other/Unclassified,Leisure & Hospitality,Education & Health Services,Professional & Business Services,Financial Activities,"Trade, Transportation, & Utilities",Natural Resources & Mining
0,USA,2019Q1,158031299.4,9094807.625,13015409.16,3081771.166,7426728.516,7248880.031,17441949.38,35570293.91,23193839.0,8966304.61,30156671.43,2834644.558
1,USA,2019Q2,158451024.1,9153189.405,13056876.64,3089322.21,7440921.997,7239161.091,17510564.61,35698475.54,23276916.36,8989721.82,30168449.41,2827424.982


In [32]:
#data['NAME'] = data['NAME'].str.strip()

In [9]:
#rename the industry groups
data = data.rename(columns = {'Total - All Industries': 'Total', 'Construction (23)': 'Construction', 'Manufacturing (31)': 'Manufacturing', 
                              'Information (51)': 'Information', 'Public Administration (92)': 'Public Administration'})

In [10]:
#name cols as all of the columns and then go from wide to long format with .melt()
cols = data.columns
data = data.melt(var_name = 'Industry', id_vars = ['NAME', 'Time'], value_vars = cols, value_name = 'Employment')

In [12]:
data.head(2)

Unnamed: 0,NAME,Time,Industry,Employment
0,USA,2019Q1,Total,158031299.4
1,USA,2019Q2,Total,158451024.1


Use datetime to convert the Time column to a quarterly period.

https://stackoverflow.com/questions/53898482/clean-way-to-convert-quarterly-periods-to-datetime-in-pandas

In [14]:
qs = data['Time'].str.replace(r'(Q\d) (\d+)', r'\2-\1')

  qs = data['Time'].str.replace(r'(Q\d) (\d+)', r'\2-\1')


In [15]:
#create a time stamp
data['TimeStamp'] = pd.PeriodIndex(qs, freq = 'Q').to_timestamp()

In [16]:
#check that this worked
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16020 entries, 0 to 16019
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   NAME        16020 non-null  object        
 1   Time        16020 non-null  object        
 2   Industry    16020 non-null  object        
 3   Employment  16020 non-null  float64       
 4   TimeStamp   16020 non-null  datetime64[ns]
dtypes: datetime64[ns](1), float64(1), object(3)
memory usage: 625.9+ KB


In [17]:
#set index to name and transpose to properly rename the geographies, then transpose back and reset index without dropping
data = data.set_index('NAME').transpose()
data = data.rename(columns = {'Adams city, TN': 'Adams', 'Ashland City town, TN': 'Ashland City', 'Belle Meade city, TN': 'Belle Meade',
                                    'Berry Hill city, TN': 'Berry Hill', 'Brentwood city, TN': 'Brentwood', 'Burns town, TN': 'Burns',
                                    'Cedar Hill city, TN': 'Cedar Hill', 'Charlotte town, TN': 'Charlotte',
                                    'Cheatham County, Tennessee': 'Cheatham County', 'Clarksville city, TN': 'Clarksville',
                                    'Columbia city, TN': 'Columbia', 'Coopertown town, TN': 'Coopertown',
                                    'Cross Plains city, TN': 'Cross Plains', 'Cumberland City town, TN': 'Cumberland City',
                                    'Davidson County, Tennessee': 'Davidson County', 'Dickson city, TN': 'Dickson',
                                    'Dickson County, Tennessee': 'Dickson County', 'Dover city, TN': 'Dover',
                                    'Eagleville city, TN': 'Eagleville', 'Erin city, TN': 'Erin', 'Fairview city, TN': 'Fairview',
                                    'Forest Hills city, TN': 'Forest Hills', 'Franklin city, TN': 'Franklin', 'Gallatin city, TN': 'Gallatin',
                                    'Goodlettsville city, TN': 'Goodlettsville', 'Greenbrier town, TN': 'Greenbrier',
                                    'Hartsville/Trousdale County, TN': 'Hartsville/Trousdale', 'Hendersonville city, TN': 'Hendersonville',
                                    'Houston County, Tennessee': 'Houston County', 'Humphreys County, Tennessee': 'Humphreys County', 
                                    'Kingston Springs town, TN': 'Kingston Springs', 'La Vergne city, TN': 'La Vergne',
                                    'Lebanon city, TN': 'Lebanon', 'McEwen city, TN': 'McEwen','Maury County, Tennessee': 'Maury County',
                                    'Millersville city, TN': 'Millersville', 'Mitchellville city, TN': 'Mitchellville',
                                    'Montgomery County, Tennessee': 'Montgomery County', 'Mount Juliet city, TN': 'Mount Juliet',
                                    'Mount Pleasant city, TN': 'Mount Pleasant', 'Murfreesboro city, TN': 'Murfreesboro',
                                    'Nashville-Davidson metropolitan government (balance), TN': 'Nashville', 
                                    'New Johnsonville city, TN': 'New Johnsonville', 'Nolensville town, TN': 'Nolensville',
                                    'Oak Hill city, TN': 'Oak Hill', 'Pegram town, TN': 'Pegram', 'Pleasant View city, TN': 'Pleasant View',
                                    'Portland city, TN': 'Portland', 'Ridgetop city, TN': 'Ridgetop','Robertson County, Tennessee': 'Robertson County',
                                    'Rutherford County, Tennessee': 'Rutherford County', 'Slayden town, TN': 'Slayden', 'Smyrna town, TN': 'Smyrna',
                                    'Spring Hill city, TN': 'Spring Hill', 'Springfield city, TN': 'Springfield',
                                    'Stewart County, Tennessee': 'Stewart County', 'Sumner County, Tennessee': 'Sumner County',
                                    'Tennessee Ridge town, TN': 'Tennessee Ridge', "Thompson's Station town, TN": "Thompson's Station",
                                    'Trousdale County, Tennessee': 'Trousdale County', 'Vanleer town, TN': 'Vanleer', 'Watertown city, TN': 'Watertown',
                                    'Waverly city, TN': 'Waverly', 'Westmoreland town, TN': 'Westmoreland', 'White Bluff town, TN': 'White Bluff',
                                    'White House city, TN': 'White House', 'Williamson County, Tennessee': 'Williamson County',
                                    'Wilson County, Tennessee': 'Wilson County'})
data = data.transpose().reset_index()

In [22]:
data.head()

Unnamed: 0,NAME,Time,Industry,Employment,TimeStamp
0,USA,2019Q1,Total,158031299.4,2019-01-01
1,USA,2019Q2,Total,158451024.1,2019-04-01
2,USA,2019Q3,Total,158927138.5,2019-07-01
3,USA,2019Q4,Total,159354215.7,2019-10-01
4,USA,2020Q1,Total,159692496.4,2020-01-01


In [21]:
data.to_feather('../../Outputs/JobsEQ_QuarterlyData')