In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
import scipy.sparse
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
import numpy as np
from datetime import datetime
import tensorflow_datasets as tfds
import IPython
import IPython.display


Download the data from the github
- Set the start and end date for the data
- Set the data directory where the downloaded datasets are stored

In [14]:
#def download_economic_data():
  #  dl_manager = tfds.download.DownloadManager(download_dir = '/tmp/economic_recovery', 
   #                                            extract_dir='/tmp/economic_recovery')
    #path = dl_manager.download_and_extract(
     #   'https://github.com/OpportunityInsights/EconomicTracker/tree/main.zip'
    #)
    #data_dir = path + '/EconomicTracker-main/data/'
    #return data_dir


In [18]:

start_date = '2020-03-07'
end_date = '2021-03-06'
#data_dir = download_economic_data()
data_dir = 'EconomyTracker/'
state = 26
IPython.display.clear_output()
print('data dir:',data_dir)

data dir: EconomyTracker/


The function below does the following:

- Read the csv file into Pandas dataframe
- If state is passed, then filter the records for the state (will be using data for Michigan only)
- The data contains three columns for the date. year, month, and day. So, concatenates them and convert it to datetime type
- If a column list is passed, then select only those columns from the dataframe, otherwise use the entire column list available
- If a column contains '.', it will be converted to '0.0'
- The datatype of the columns are converted to float
- Interpolate missing data if any
- Select only the date range passed and return to the caller

In [19]:
def prepare_data(csv_file, start_date=start_date, end_date=end_date, day=None, 
                 state=state, selected_cols=[], interpolate=False):
    df = pd.read_csv(data_dir + csv_file)
    if (state is not None):
        df = df[df['statefips'] == state] 
        
    if (day is None):
        day = 'day'
        
    df['date'] = df[['year', 'month', day]].apply(lambda s : datetime(*s),axis = 1)
    df.drop(('year'), axis=1, inplace=True)
    df.drop(('month'), axis=1, inplace=True)
    df.drop((day), axis=1, inplace=True)
    df.index = pd.DatetimeIndex(df['date'])
    df.sort_index()

    if (len(selected_cols) > 0):
        df = df[selected_cols]
    if (interpolate):
        df = df.reindex(pd.date_range(start_date, end_date), fill_value="NaN")
    df.drop(('date'), axis=1, inplace=True, errors='ignore')
    
    str_cols = df.select_dtypes(include=['object']).columns
    df[str_cols] = df[str_cols].replace({'.':'0.0'})
    
    df = df.astype(float)
    df = df.interpolate(method='linear', axis=0).ffill().bfill()
    df = df[df.index >= start_date] 
    df = df[df.index <= end_date] 
    return df

In [23]:
#03/13/2021

selected_cols=['initclaims_count_regular', 'contclaims_count_regular', 'initclaims_count_combined','contclaims_count_combined']
ui_claims = prepare_data('UI Claims - State - Weekly.csv', day='day_endofweek', selected_cols=selected_cols, interpolate=True)
ui_claims.tail()

Unnamed: 0,initclaims_count_regular,contclaims_count_regular,initclaims_count_combined,contclaims_count_combined
2021-03-02,11497.0,178216.0,15530.0,0.0
2021-03-03,11497.0,178216.0,15530.0,0.0
2021-03-04,11497.0,178216.0,15530.0,0.0
2021-03-05,11497.0,178216.0,15530.0,0.0
2021-03-06,11497.0,178216.0,15530.0,0.0


In [24]:
#03/09/2021

selected_cols=['bg_posts_ss60', 'bg_posts_ss70']
burning_glass = prepare_data('Burning Glass - State - Weekly.csv', start_date='2020-03-06', selected_cols=selected_cols, day='day_endofweek', interpolate=True)
burning_glass.head(10)

Unnamed: 0,bg_posts_ss60,bg_posts_ss70
2020-03-06,0.0822,0.096
2020-03-07,0.072643,0.0837
2020-03-08,0.063086,0.0714
2020-03-09,0.053529,0.0591
2020-03-10,0.043971,0.0468
2020-03-11,0.034414,0.0345
2020-03-12,0.024857,0.0222
2020-03-13,0.0153,0.0099
2020-03-14,-0.0066,-0.0228
2020-03-15,-0.0285,-0.0555


In [25]:
#03/12/2021

selected_cols=['case_count', 'death_count']
covid_mi_daily = prepare_data('COVID - State - Daily.csv', selected_cols=selected_cols)
covid_mi_daily.head()

Unnamed: 0_level_0,case_count,death_count
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-03-07,0.0,0.0
2020-03-08,0.0,0.0
2020-03-09,0.0,0.0
2020-03-10,2.0,0.0
2020-03-11,2.0,0.0


In [26]:
#selected_cols=['emp_combined_ss60', 'emp_combined_ss70']
#employment_daily = prepare_data('Employment Combined - State - Daily.csv', selected_cols=selected_cols)
#employment_daily.head()

In [27]:
#03/12/2021

selected_cols=['revenue_ss60', 'revenue_ss70']
revenue_daily = prepare_data('Womply - State - Daily.csv', selected_cols=selected_cols)
revenue_daily.head()

Unnamed: 0_level_0,revenue_ss60,revenue_ss70
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-03-07,0.0546,-0.0413
2020-03-08,0.101,-0.0357
2020-03-09,0.0707,-0.037
2020-03-10,0.0271,-0.043
2020-03-11,0.0279,-0.0489


In [28]:
#03/12/2021

selected_cols=['spend_acf', 'spend_hcs']
spend_daily = prepare_data('Affinity - State - Daily.csv', selected_cols=selected_cols)
spend_daily.head()

Unnamed: 0_level_0,spend_acf,spend_hcs
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-03-07,-0.031,-0.0155
2020-03-08,-0.0269,0.00977
2020-03-09,-0.0243,-0.0123
2020-03-10,-0.0102,0.19
2020-03-11,-0.0144,0.217


In [29]:
result = pd.concat([ui_claims, burning_glass, covid_mi_daily, revenue_daily, spend_daily], axis=1, join='inner')
result.head()

Unnamed: 0,initclaims_count_regular,contclaims_count_regular,initclaims_count_combined,contclaims_count_combined,bg_posts_ss60,bg_posts_ss70,case_count,death_count,revenue_ss60,revenue_ss70,spend_acf,spend_hcs
2020-03-07,5150.0,77661.0,5150.0,77661.0,0.072643,0.0837,0.0,0.0,0.0546,-0.0413,-0.031,-0.0155
2020-03-08,5176.857143,77389.0,5176.857143,77389.0,0.063086,0.0714,0.0,0.0,0.101,-0.0357,-0.0269,0.00977
2020-03-09,5203.714286,77117.0,5203.714286,77117.0,0.053529,0.0591,0.0,0.0,0.0707,-0.037,-0.0243,-0.0123
2020-03-10,5230.571429,76845.0,5230.571429,76845.0,0.043971,0.0468,2.0,0.0,0.0271,-0.043,-0.0102,0.19
2020-03-11,5257.428571,76573.0,5257.428571,76573.0,0.034414,0.0345,2.0,0.0,0.0279,-0.0489,-0.0144,0.217


In [32]:
result.to_csv(r'C:\Users\salwa\combinedcovid.csv')