In [14]:
import pandas as pd
import numpy as np
import itertools
import matplotlib.pyplot as plt
%matplotlib inline
import plotly.express as px
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

In [15]:
 #sklearn preproc
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.compose import ColumnTransformer, make_column_transformer, make_column_selector
from sklearn.impute import SimpleImputer, KNNImputer
from sklearn.preprocessing import RobustScaler, OneHotEncoder, OrdinalEncoder
from sklearn.metrics import make_scorer

from sklearn.model_selection import cross_val_score
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import Ridge, Lasso, LinearRegression
from sklearn.model_selection import RandomizedSearchCV
from sklearn.svm import SVR
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.ensemble import AdaBoostRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import VotingRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import StackingRegressor
from sklearn.ensemble import RandomForestRegressor

In [16]:
# Vacination Dataset
url_vaccination = 'https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/vaccinations/vaccinations.csv'
url_ages = 'https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/vaccinations/vaccinations-by-age-group.csv'

# Index URLs
url_index_strigency = 'https://raw.githubusercontent.com/OxCGRT/covid-policy-tracker/master/data/timeseries/stringency_index_avg.csv'
url_index_gov_response = 'https://raw.githubusercontent.com/OxCGRT/covid-policy-tracker/master/data/timeseries/government_response_index_avg.csv'
url_index_health = 'https://raw.githubusercontent.com/OxCGRT/covid-policy-tracker/master/data/timeseries/containment_health_index_avg.csv'
url_index_economic = 'https://raw.githubusercontent.com/OxCGRT/covid-policy-tracker/master/data/timeseries/economic_support_index.csv'

# Indicators URLs
# C sub-indicators
url_c1 = 'https://raw.githubusercontent.com/OxCGRT/covid-policy-tracker/master/data/timeseries/c1m_school_closing.csv'
url_c2 = 'https://raw.githubusercontent.com/OxCGRT/covid-policy-tracker/master/data/timeseries/c2m_workplace_closing.csv'
url_c3 = 'https://raw.githubusercontent.com/OxCGRT/covid-policy-tracker/master/data/timeseries/c3m_cancel_public_events.csv'
url_c4 = 'https://raw.githubusercontent.com/OxCGRT/covid-policy-tracker/master/data/timeseries/c4m_restrictions_on_gatherings.csv'
url_c5 = 'https://raw.githubusercontent.com/OxCGRT/covid-policy-tracker/master/data/timeseries/c5m_close_public_transport.csv'
url_c6 = 'https://raw.githubusercontent.com/OxCGRT/covid-policy-tracker/master/data/timeseries/c6m_stay_at_home_requirements.csv'
url_c7 = 'https://raw.githubusercontent.com/OxCGRT/covid-policy-tracker/master/data/timeseries/c7m_movementrestrictions.csv'
url_c8 = 'https://raw.githubusercontent.com/OxCGRT/covid-policy-tracker/master/data/timeseries/c8ev_internationaltravel.csv'

# E sub-indicators
url_e1 = 'https://raw.githubusercontent.com/OxCGRT/covid-policy-tracker/master/data/timeseries/e1_income_support.csv'
url_e2 = 'https://raw.githubusercontent.com/OxCGRT/covid-policy-tracker/master/data/timeseries/e2_debtrelief.csv'

# H sub-indicators
url_h1 = 'https://raw.githubusercontent.com/OxCGRT/covid-policy-tracker/master/data/timeseries/h1_public_information_campaigns.csv'
url_h2 = 'https://raw.githubusercontent.com/OxCGRT/covid-policy-tracker/master/data/timeseries/h2_testing_policy.csv'
url_h3 = 'https://raw.githubusercontent.com/OxCGRT/covid-policy-tracker/master/data/timeseries/h3_contact_tracing.csv'
url_h6 = 'https://raw.githubusercontent.com/OxCGRT/covid-policy-tracker/master/data/timeseries/h6m_facial_coverings.csv'
url_h7 = 'https://raw.githubusercontent.com/OxCGRT/covid-policy-tracker/master/data/timeseries/h7_vaccination_policy.csv'
url_h8 = 'https://raw.githubusercontent.com/OxCGRT/covid-policy-tracker/master/data/timeseries/h8m_protection_of_elderly_ppl.csv'

# Target URLs
url_cases = 'https://raw.githubusercontent.com/OxCGRT/covid-policy-tracker/master/data/timeseries/confirmed_cases.csv'
url_deaths = 'https://raw.githubusercontent.com/OxCGRT/covid-policy-tracker/master/data/timeseries/confirmed_deaths.csv'

In [17]:
# DataFrame Index
df_strigency_raw = pd.read_csv(url_index_strigency)
df_gov_response_raw = pd.read_csv(url_index_gov_response)
df_health_raw = pd.read_csv(url_index_health)
df_economic_raw = pd.read_csv(url_index_economic)

# DataFrame Vaccination
df_vaccination_raw = pd.read_csv(url_vaccination)
df_ages_raw = pd.read_csv(url_ages)

df_c1_raw = pd.read_csv(url_c1)
df_c2_raw = pd.read_csv(url_c2)
df_c3_raw = pd.read_csv(url_c3)
df_c4_raw = pd.read_csv(url_c4)
df_c5_raw = pd.read_csv(url_c5)
df_c6_raw = pd.read_csv(url_c6)
df_c7_raw = pd.read_csv(url_c7)
df_c8_raw = pd.read_csv(url_c8)

df_e1_raw = pd.read_csv(url_e1)
df_e2_raw = pd.read_csv(url_e2)

df_h1_raw = pd.read_csv(url_h1)
df_h2_raw = pd.read_csv(url_h2)
df_h3_raw = pd.read_csv(url_h3)
df_h6_raw = pd.read_csv(url_h6)
df_h7_raw = pd.read_csv(url_h7)
df_h8_raw = pd.read_csv(url_h8)


# Data Frame target
df_cases_raw = pd.read_csv(url_cases)
df_deaths_raw = pd.read_csv(url_deaths)

In [18]:
def data_cleaning_all_index(name_data_table):
    trans_table=name_data_table.groupby('country_code').sum().T.drop('Unnamed: 0')
    trans_table.index = pd.to_datetime(trans_table.index)
    return trans_table
    

In [19]:
def data_cleaning_all_indicator(name_data_table):
    trans_table=name_data_table.groupby('country_code').mean().round(decimals = 0).T.drop('Unnamed: 0')
    trans_table.index = pd.to_datetime( trans_table.index)
    return trans_table

In [20]:
def generate_country_code(country):
    code=df_cases_raw[df_cases_raw['country_name']==country]['country_code']
    code=code.iloc[0]
    return code
    
generate_country_code('France')

'FRA'

In [21]:
df_strigency = data_cleaning_all_index(df_strigency_raw)
df_gov_response = data_cleaning_all_index(df_gov_response_raw)
df_health= data_cleaning_all_index(df_health_raw)
df_economic= data_cleaning_all_index(df_economic_raw)


# DataFrames Indicator
df_c1 = data_cleaning_all_indicator(df_c1_raw)
df_c2 = data_cleaning_all_indicator(df_c2_raw)
df_c3 = data_cleaning_all_indicator(df_c3_raw)
df_c4 = data_cleaning_all_indicator(df_c4_raw)
df_c5 = data_cleaning_all_indicator(df_c5_raw)
df_c6 = data_cleaning_all_indicator(df_c6_raw)
df_c7 = data_cleaning_all_indicator(df_c6_raw)
df_c8 = data_cleaning_all_indicator(df_c7_raw)

df_e1 =data_cleaning_all_indicator(df_e1_raw)
df_e2 = data_cleaning_all_indicator(df_e2_raw)

df_h1 = data_cleaning_all_indicator(df_h1_raw)
df_h2 = data_cleaning_all_indicator(df_h2_raw)
df_h3 = data_cleaning_all_indicator(df_h3_raw)
df_h6 = data_cleaning_all_indicator(df_h6_raw)
df_h7 = data_cleaning_all_indicator(df_h7_raw)
df_h8 =data_cleaning_all_indicator(df_h8_raw)


# Data Frame target
df_cases =data_cleaning_all_index(df_cases_raw)
df_deaths = data_cleaning_all_index(df_deaths_raw)

# Cleaning Vaccination Features
df_vaccination = df_vaccination_raw.copy()
df_vaccination = df_vaccination[['date','iso_code','total_vaccinations', 'people_vaccinated','people_fully_vaccinated', 'total_boosters']]
df_vaccination=df_vaccination.set_index('date')
#df_vaccination = pd.to_datetime( df_vaccination.index)

#df_vaccination.rename(columns = {'iso_code': 'country_code'}, inplace = True)
df_vaccination.index = pd.to_datetime( df_vaccination.index)
df_vaccination.head()

Unnamed: 0_level_0,iso_code,total_vaccinations,people_vaccinated,people_fully_vaccinated,total_boosters
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2021-02-22,AFG,0.0,0.0,,
2021-02-23,AFG,,,,
2021-02-24,AFG,,,,
2021-02-25,AFG,,,,
2021-02-26,AFG,,,,


In [22]:

def country_output(country):
    df_gov_response_usa=df_gov_response[country]
    country_index=df_gov_response_usa
    country_index=pd.DataFrame(country_index)
    country_index.columns = ['gov_response']
    country_index.insert(0, 'containment_and_health', df_health[country])
    country_index.insert(1, 'stringency', df_strigency[country])
    country_index.insert(2,'economics_sup',df_economic[country])
    country_index.insert(3,'total_cases',df_cases[country])
    country_index.insert(4,'new_cases',df_cases[country]-df_cases[country].shift(1))
    country_index.insert(5,'total_deaths',df_deaths[country])
    country_index.insert(6,'new_deaths',df_deaths[country] - df_deaths[country].shift(1))
    country_index.index.name='date'
    country_index['new_cases'].loc[country_index['new_cases'] < 0] = 0
    country_index['new_deaths'].loc[country_index['new_deaths'] < 0] = 0
    country_index['gov_response'] = (country_index['gov_response'] / country_index['gov_response'].sum()) * 100
    country_index['containment_and_health'] = (country_index['containment_and_health'] / country_index['containment_and_health'].sum()) * 100
    country_index['stringency'] = (country_index['stringency'] / country_index['stringency'].sum()) * 100
    country_index['economics_sup'] = (country_index['economics_sup'] / country_index['economics_sup'].sum()) * 100
    country_index['economics_sup'] = (country_index['economics_sup'] / country_index['stringency'].sum()) * 100
    
    
    
    
    #vaccination
    country_vaccination=df_vaccination[df_vaccination['iso_code']==country]
    country_vaccination=country_vaccination[['total_vaccinations', 'people_vaccinated','people_fully_vaccinated', 'total_boosters']]
    
    #indicator
    df_c2_usa=df_c2[country]
    country_indicator= df_c2_usa
    country_indicator=pd.DataFrame(country_indicator)
    country_indicator.columns = ['workplace_closing']
    country_indicator.insert(0, 'cancel_public_events', df_c3[country])
    country_indicator.insert(1, 'school_closing', df_c1[country])
    country_indicator.insert(2, 'restrictions_on_gathering', df_c4[country])
    country_indicator.insert(3,'close_public_transport',df_c5[country])
    country_indicator.insert(4,'stay_at_home_requirements',df_c6[country])
    country_indicator.insert(5,'restrictions_on_internal_movement',df_c7[country])
    country_indicator.insert(6,'international_travel_controls',df_c8[country])
    country_indicator.insert(7,'income_support',df_e1[country])
    country_indicator.insert(8,'debt/contract_relief',df_e2[country])
    country_indicator.insert(9,'public_information_campaigns',df_h1[country])
    country_indicator.insert(10,'testing_policy',df_h2[country])
    country_indicator.insert(11,'contact_tracing',df_h3[country])
    country_indicator.insert(12,'facial_coverings',df_h6[country])
    country_indicator.insert(13,'vaccination_policy',df_h7[country])
    country_indicator.insert(14,'protection_of_elderly_people',df_h8[country])
    country_indicator.insert(15,'total_cases',df_cases[country])
    country_indicator.insert(16,'new_cases',df_cases[country]-df_cases[country].shift(1))
    country_indicator.insert(17,'total_deaths',df_deaths[country])
    country_indicator.insert(18,'new_deaths',df_deaths[country] - df_deaths[country].shift(1))
    country_indicator.index.name='date'
    country_indicator['new_cases'].loc[country_indicator['new_cases'] < 0] = 0
    country_indicator['new_deaths'].loc[country_indicator['new_deaths'] < 0] = 0
    
    
    country_index = country_index.merge(country_vaccination, how = 'left' , on = 'date')
    country_indicator=country_indicator.merge(country_vaccination, how = 'left' , on = 'date')
 
    
    indicator_death=country_indicator[country_indicator['total_deaths']>0]
    first_death_date=indicator_death.index[0]
    last_death_date=indicator_death.index[-1]
    country_indicator=country_indicator[~(country_indicator.index < first_death_date)]
    country_indicator=country_indicator[~(country_indicator.index > last_death_date)]
    
    index_death=country_index[country_index['total_deaths']>0]
    first_death_date_index=index_death.index[0]
    last_death_date_index=index_death.index[-1]
    country_index=country_index[~(country_index.index < first_death_date_index)]
    country_index=country_index[~(country_index.index > last_death_date_index)]
    
    country_indicator = country_indicator.fillna(0)
    country_index = country_index.fillna(0)
    
    
    
    
    return country_index,country_indicator

In [23]:
country='United Kingdom'
code=generate_country_code(country)
index=f'index_{country}'
indicator=f'indicator_{country}'
index=pd.DataFrame(country_output(code)[0])
indicator=pd.DataFrame(country_output(code)[1])
index


Unnamed: 0_level_0,containment_and_health,stringency,economics_sup,total_cases,new_cases,total_deaths,new_deaths,gov_response,total_vaccinations,people_vaccinated,people_fully_vaccinated,total_boosters
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2020-01-30,0.005077,0.005028,0.0,4.0,4.0,1.0,1.0,0.010315,0.0,0.0,0.0,0.0
2020-01-31,0.011937,0.006281,0.0,6.0,2.0,1.0,0.0,0.023307,0.0,0.0,0.0,0.0
2020-02-01,0.011937,0.006281,0.0,6.0,0.0,1.0,0.0,0.023307,0.0,0.0,0.0,0.0
2020-02-02,0.014224,0.010048,0.0,10.0,4.0,2.0,1.0,0.028867,0.0,0.0,0.0,0.0
2020-02-03,0.014224,0.010048,0.0,88.0,78.0,2.0,0.0,0.028867,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...
2022-09-07,0.000000,0.000000,0.0,83990958.0,6275.0,377715.0,0.0,0.041138,0.0,0.0,0.0,0.0
2022-09-08,0.000000,0.000000,0.0,27092511.0,0.0,378910.0,1195.0,0.041138,0.0,0.0,0.0,0.0
2022-09-09,0.000000,0.000000,0.0,27092511.0,0.0,205718.0,0.0,0.041138,0.0,0.0,0.0,0.0
2022-09-10,0.000000,0.000000,0.0,27092511.0,0.0,205718.0,0.0,0.041138,0.0,0.0,0.0,0.0


In [19]:
path=f'../data/out_csv/indicator_{country}.csv'
indicator.to_csv(path)

In [20]:
path_index=f'../data/out_csv/index_{country}.csv'
index.to_csv(path_index)

In [36]:
data_countries = {}

countries = df_cases_raw['country_name']
#countries = countries.drop(['Turkmenistan'])
save_path = f'../data/out_csv/index_{country}.csv'
countries

0             Aruba
1       Afghanistan
2            Angola
3           Albania
4           Andorra
           ...     
258         Vanuatu
259           Yemen
260    South Africa
261          Zambia
262        Zimbabwe
Name: country_name, Length: 263, dtype: object

In [37]:

for country in countries:
    code=generate_country_code(country)
    index=f'index_{country}'
    index=pd.DataFrame(country_output(code)[0])
    print(country)
    path_index=f'../data/out_csv/index_{country}.csv'
    index.to_csv(path_index)

Aruba
Afghanistan
Angola
Albania
Andorra
United Arab Emirates
Argentina
Australia
Australia
Australia
Australia
Australia
Australia
Australia
Australia
Australia
Austria
Azerbaijan
Burundi
Belgium
Benin
Burkina Faso
Bangladesh
Bulgaria
Bahrain
Bahamas
Bosnia and Herzegovina
Belarus
Belize
Bermuda
Bolivia
Brazil
Barbados
Brunei
Bhutan
Botswana
Central African Republic
Canada
Canada
Canada
Canada
Canada
Canada
Canada
Canada
Canada
Canada
Canada
Canada
Canada
Canada
Switzerland
Chile
China
Cote d'Ivoire
Cameroon
Democratic Republic of Congo
Congo
Colombia
Comoros
Cape Verde
Costa Rica
Cuba
Cyprus
Czech Republic
Germany
Djibouti
Dominica
Denmark
Dominican Republic
Algeria
Ecuador
Egypt
Eritrea
Spain
Estonia
Ethiopia
Finland
Fiji
France
Faeroe Islands
Gabon
United Kingdom
United Kingdom
United Kingdom
United Kingdom
United Kingdom
Georgia
Ghana
Guinea
Gambia
Greece
Grenada
Greenland
Guatemala
Guam
Guyana
Hong Kong
Honduras
Croatia
Haiti
Hungary
Indonesia
India
Ireland
Iran
Iraq
Iceland
Isra

IndexError: index 0 is out of bounds for axis 0 with size 0