# Prepare Data for EDA and Models

In [3]:
# Imports
import pandas as pd
from os import listdir
import matplotlib.pyplot as plt


In [4]:
def combine_files(path_to_files):
    df = pd.DataFrame()
    file_list = listdir('../data/')
    for file in file_list:
        if ('.csv' in file) & (file != 'all_states.csv'):
            df = pd.concat([df, pd.read_csv(f'../data/{file}')])
    return df

In [5]:
all_states = combine_files('../data/')
all_states.head()

Unnamed: 0,state,week,depression,anxiety,addiction,counselling,mental_health,stay_at_home,mask_mandate,gatherings_banned,business_closures,travel_restrictions
0,Alaska,2017-12-31,28,46,24,37,0,0,0,0,0,0
1,Alaska,2018-01-07,37,45,25,65,37,0,0,0,0,0
2,Alaska,2018-01-14,10,46,16,53,30,0,0,0,0,0
3,Alaska,2018-01-21,33,41,10,35,14,0,0,0,0,0
4,Alaska,2018-01-28,32,24,0,40,0,0,0,0,0,0


In [6]:
# updating the week column to be datetime type
all_states['week'] = pd.to_datetime(all_states['week'], format = '%Y-%m-%d')
all_states.dtypes

state                          object
week                   datetime64[ns]
depression                      int64
anxiety                         int64
addiction                       int64
counselling                     int64
mental_health                   int64
stay_at_home                    int64
mask_mandate                    int64
gatherings_banned               int64
business_closures               int64
travel_restrictions             int64
dtype: object

In [8]:
# Spliting the data in states with most and least restrictions

all_states['covid_restrictions'] = all_states['state'].apply(lambda x: 'Most Restrictions' if (x == 'Washington') | (x == 'New York') | (x == 'California') | (x == 'Hawaii') | (x == 'Alaska') else 'Least Restrictions')

In [10]:
# Save combined data to csv
all_states.to_csv('../data/all_states.csv', index=False)

In [22]:
most_restricted = all_states[all_states['covid_restrictions'] == 'Most Restrictions'].groupby('week').mean()

most_restricted.head()

Unnamed: 0_level_0,depression,anxiety,addiction,counselling,mental_health,stay_at_home,mask_mandate,gatherings_banned,business_closures,travel_restrictions
week,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2017-12-31,53.0,65.8,26.4,32.2,23.8,0.0,0.0,0.0,0.0,0.0
2018-01-07,58.0,66.0,26.0,40.4,33.8,0.0,0.0,0.0,0.0,0.0
2018-01-14,54.2,60.6,22.8,41.0,35.2,0.0,0.0,0.0,0.0,0.0
2018-01-21,63.4,65.2,21.6,38.0,33.2,0.0,0.0,0.0,0.0,0.0
2018-01-28,66.8,63.2,23.6,36.4,30.4,0.0,0.0,0.0,0.0,0.0


In [26]:
# Save Most Restricted to csv
most_restricted.to_csv('../data/most_restricted.csv')

In [23]:
least_restricted = all_states[all_states['covid_restrictions'] == 'Least Restrictions'].groupby('week').mean()
least_restricted.head()

Unnamed: 0_level_0,depression,anxiety,addiction,counselling,mental_health,stay_at_home,mask_mandate,gatherings_banned,business_closures,travel_restrictions
week,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2017-12-31,47.8,56.0,21.2,27.8,13.6,0.0,0.0,0.0,0.0,0.0
2018-01-07,56.2,58.6,20.6,24.8,20.2,0.0,0.0,0.0,0.0,0.0
2018-01-14,53.2,57.6,22.2,32.2,24.0,0.0,0.0,0.0,0.0,0.0
2018-01-21,63.8,61.6,24.8,34.4,23.2,0.0,0.0,0.0,0.0,0.0
2018-01-28,55.8,61.4,23.6,35.6,20.8,0.0,0.0,0.0,0.0,0.0


In [27]:
# Save Least Restricted to csv
least_restricted.to_csv('../data/least_restricted.csv')