In [35]:
import numpy as np 
import pandas as pd
from matplotlib import pyplot as plt
from datetime import datetime


In [2]:
data_ox = pd.read_csv("OxCGRT_march_21_2021.csv",low_memory=False)
data_owid = pd.read_csv("owid-covid-data_march_21_2021.csv",low_memory=False)

### data_ox include the oxford data base which include: 

In [3]:
data_ox.columns

Index(['CountryName', 'CountryCode', 'RegionName', 'RegionCode',
       'Jurisdiction', 'Date', 'C1_School closing', 'C1_Flag',
       'C2_Workplace closing', 'C2_Flag', 'C3_Cancel public events', 'C3_Flag',
       'C4_Restrictions on gatherings', 'C4_Flag', 'C5_Close public transport',
       'C5_Flag', 'C6_Stay at home requirements', 'C6_Flag',
       'C7_Restrictions on internal movement', 'C7_Flag',
       'C8_International travel controls', 'E1_Income support', 'E1_Flag',
       'E2_Debt/contract relief', 'E3_Fiscal measures',
       'E4_International support', 'H1_Public information campaigns',
       'H1_Flag', 'H2_Testing policy', 'H3_Contact tracing',
       'H4_Emergency investment in healthcare', 'H5_Investment in vaccines',
       'H6_Facial Coverings', 'H6_Flag', 'H7_Vaccination policy', 'H7_Flag',
       'H8_Protection of elderly people', 'H8_Flag', 'M1_Wildcard',
       'ConfirmedCases', 'ConfirmedDeaths', 'StringencyIndex',
       'StringencyIndexForDisplay', 'Stringenc

### Data_owid include the owid data base which include:

In [4]:
data_owid.columns

Index(['iso_code', 'continent', 'location', 'date', 'total_cases', 'new_cases',
       'new_cases_smoothed', 'total_deaths', 'new_deaths',
       'new_deaths_smoothed', 'total_cases_per_million',
       'new_cases_per_million', 'new_cases_smoothed_per_million',
       'total_deaths_per_million', 'new_deaths_per_million',
       'new_deaths_smoothed_per_million', 'reproduction_rate', 'icu_patients',
       'icu_patients_per_million', 'hosp_patients',
       'hosp_patients_per_million', 'weekly_icu_admissions',
       'weekly_icu_admissions_per_million', 'weekly_hosp_admissions',
       'weekly_hosp_admissions_per_million', 'new_tests', 'total_tests',
       'total_tests_per_thousand', 'new_tests_per_thousand',
       'new_tests_smoothed', 'new_tests_smoothed_per_thousand',
       'positive_rate', 'tests_per_case', 'tests_units', 'total_vaccinations',
       'people_vaccinated', 'people_fully_vaccinated', 'new_vaccinations',
       'new_vaccinations_smoothed', 'total_vaccinations_per_hun

####  The table should be: 

#### State: (26 features)

###### Geographic and general state( 9 features): 
(1) CountryName,  <br />
(2) Date,  <br />
(3) population,  <br />
(4) population_density,  <br />
(5) median_age,  <br />
(6) gdp_per_capita,  <br />
(7) aged_65_older/population,  <br />
(8) life_expectancy,  <br />
(9)human_development_index. <br />

###### Corona state (9 features): 
(1) total_cases per million, <br />
(2)total_deaths per million, <br />
(3) new_cases_per_million, <br />
(4) new_deaths_per_million,<br />
(5) new_tests_per_thousand, <br />
(6) positive_rate, <br />
(7) people_fully_vaccinated_per_hundred, <br />
(8)icu_patients_per_million, <br />
(9) StringencyIndex(oxford) <br />

###### Health care state (6 features):  
(1) cardiovasc_death_rate, <br />
(2) diabetes_prevalence, <br />
(3) female_smokers+ male_smokers/population,<br />
(4) hospital_beds_per_thousand, <br />
(5) hosp_patients_per_million, <br />
(6) icu_patients_per_million. <br />


#### Policy: ( 23 features )
(1) C1_School closing,         + 0.5 X(2)  C1_flag <br />
(3) C2_Workplace closing,      + 0.5 X (4)  C2_flag <br />
(5) C3_Cancel public events,   + 0.5 X (6)  C3_flag <br />
(7) C4_Restrictions on gatherings,+  0.5X (8)  C4_flag <br />
(9) C5_Close public transport,    + 0.5X(10) C5_flag  <br />
(11) C6_Stay at home requirements, + 0.5X (12) C6_Flag <br />
(13) C7_Restrictions on internal movement +0.5X(14) C7_Flag <br />
(15) C8_International travel controls, +0.5X(16)C8_Flag  <br />
(17) H1_Public information campaigns, +0.5X(18)H1_Flag <br />
(19) H2_Testing policy, +0.5X (20) H2_Flag <br />
(21) H6_Facial Coverings, +0.5X(22) H6_Flag <br />
(23) C9_Vaccinate_n precetage of the population (new_vaccinations[t+1]/population) <br />


### seir models:

![image info](./images/SEIR-SEIRS.png)

####  (1)

\begin{split}\begin{aligned}
\frac{dS}{dt} & = -\frac{\beta SI}{N}\\
\frac{dE}{dt} & = \frac{\beta SI}{N} - \sigma E\\
\frac{dI}{dt} & = \sigma E - \gamma I\\
\frac{dR}{dt} & = \gamma I
\end{aligned}\end{split}

### We want to learn:
\begin{equation}
\begin{split}
& \beta - \textrm{Rate of spread, the probability of transmitting disease between a susceptible and an infectious individual } \\ 
& \sigma - \textrm{incubation rate, the rate of latent individuals becoming infectious} \\ 
& \gamma - \textrm{Recovery rate, = 1/D, is determined by the average duration, D, of infection}  \\ 
& \xi - \textrm{rate which recovered individuals return to the susceptible state} \\ 
\end{split}
\end{equation}

#### (2) SEIR with vital dynamics: (enabling vital dynamics (births and deaths) )

\begin{split}\begin{aligned}
\frac{dS}{dt} & = \mu N - \nu S - \frac{\beta SI}{N}\\
\frac{dE}{dt} & = \frac{\beta SI}{N} - \nu E - \sigma E\\
\frac{dI}{dt} & = \sigma E - \gamma I - \nu I\\
\frac{dR}{dt} & = \gamma I - \nu R
\end{aligned}\end{split}

### Addition:
\begin{equation}
\begin{split}
& \mu - \textrm{birth rate } \\ 
& \nu - \textrm{death rates} \\ 
\end{split}
\end{equation}

### The output: 
\begin{equation}
\begin{split}
& S- \textrm{susceptible population} \\
& I - \textrm{infected}, \\
& R - \textrm{removed population (either by death or recovery)}\\
& N = S+I+R
\end{split}
\end{equation}

In [5]:
data_owid.date = pd.to_datetime(data_owid['date'],format='%Y-%m-%d')
data_ox.Date = pd.to_datetime(data_ox['Date'],format='%Y%m%d')

In [6]:
data_ox = data_ox[data_ox.RegionName.isna()]

In [7]:
data_ox.columns

Index(['CountryName', 'CountryCode', 'RegionName', 'RegionCode',
       'Jurisdiction', 'Date', 'C1_School closing', 'C1_Flag',
       'C2_Workplace closing', 'C2_Flag', 'C3_Cancel public events', 'C3_Flag',
       'C4_Restrictions on gatherings', 'C4_Flag', 'C5_Close public transport',
       'C5_Flag', 'C6_Stay at home requirements', 'C6_Flag',
       'C7_Restrictions on internal movement', 'C7_Flag',
       'C8_International travel controls', 'E1_Income support', 'E1_Flag',
       'E2_Debt/contract relief', 'E3_Fiscal measures',
       'E4_International support', 'H1_Public information campaigns',
       'H1_Flag', 'H2_Testing policy', 'H3_Contact tracing',
       'H4_Emergency investment in healthcare', 'H5_Investment in vaccines',
       'H6_Facial Coverings', 'H6_Flag', 'H7_Vaccination policy', 'H7_Flag',
       'H8_Protection of elderly people', 'H8_Flag', 'M1_Wildcard',
       'ConfirmedCases', 'ConfirmedDeaths', 'StringencyIndex',
       'StringencyIndexForDisplay', 'Stringenc

### Actions data base data_ox

In [8]:
data_ox=data_ox.replace(np.nan, 0)

In [9]:
data_ox['C1_index']=(data_ox['C1_School closing']+0.5*data_ox['C1_Flag'])*2
data_ox['C2_index']=(data_ox['C2_Workplace closing']+0.5*data_ox['C2_Flag'])*2
data_ox['C3_index']=(data_ox['C3_Cancel public events']+0.5*data_ox['C3_Flag'])*2
data_ox['C4_index']=(data_ox['C4_Restrictions on gatherings']+0.5*data_ox['C4_Flag'])*2
data_ox['C5_index']=(data_ox['C5_Close public transport']+0.5*data_ox['C5_Flag'])*2
data_ox['C6_index']=(data_ox['C6_Stay at home requirements']+0.5*data_ox['C6_Flag'])*2
data_ox['C7_index']=(data_ox['C7_Restrictions on internal movement']+0.5*data_ox['C7_Flag'])*2
data_ox['C8_index']= data_ox['C8_International travel controls']
data_ox['H1_index']=(data_ox['H1_Public information campaigns']+0.5*data_ox['H1_Flag'])*2
data_ox['H6_index']=(data_ox['H6_Facial Coverings']+0.5*data_ox['H6_Flag'])*2
data_ox['H8_index']=(data_ox['H8_Protection of elderly people']+0.5*data_ox['H8_Flag'])*2


#data_ox['C9_index'] ----> need the other data base to be done...

data_ox=data_ox.drop(columns=[
            'C1_School closing','C2_Workplace closing','C3_Cancel public events','C4_Restrictions on gatherings'
            ,'C5_Close public transport', 'C6_Stay at home requirements', 'C7_Restrictions on internal movement',
            'C8_International travel controls','H1_Public information campaigns','H6_Facial Coverings', 
            'C1_Flag','C2_Flag','C3_Flag','C4_Flag','C5_Flag','C6_Flag','C7_Flag', 'H1_Flag','H6_Flag'
            ])

# Delete economic staff
data_ox=data_ox.drop(columns=[
       'E2_Debt/contract relief', 'E3_Fiscal measures',
       'E4_International support', 'H2_Testing policy', 'H3_Contact tracing',
       'H4_Emergency investment in healthcare', 'H5_Investment in vaccines',
       'H7_Vaccination policy', 'H7_Flag','H8_Protection of elderly people', 'H8_Flag',
       'M1_Wildcard', 'StringencyIndexForDisplay',
       'StringencyLegacyIndex', 'StringencyLegacyIndexForDisplay',
       'GovernmentResponseIndex', 'GovernmentResponseIndexForDisplay',
        'ContainmentHealthIndexForDisplay',
       'EconomicSupportIndex', 'EconomicSupportIndexForDisplay','Jurisdiction',
       'E1_Income support', 'E1_Flag',
        'CountryName', 'RegionName', 'RegionCode'
            ])


In [10]:
data_ox.to_csv('modified_Oxford.csv', index=False) 

In [11]:
data_ox = data_ox.rename(columns={'CountryCode': 'iso_code', 
                                  'Date':'date', 
                                  'ConfirmedCases':'total_cases',
                                  'ConfirmedDeaths':'total_deaths'})

In [12]:
data_ox.columns

Index(['iso_code', 'date', 'total_cases', 'total_deaths', 'StringencyIndex',
       'ContainmentHealthIndex', 'C1_index', 'C2_index', 'C3_index',
       'C4_index', 'C5_index', 'C6_index', 'C7_index', 'C8_index', 'H1_index',
       'H6_index', 'H8_index'],
      dtype='object')

#### Creating Geographic and General State:

In [13]:
data_owid=data_owid.drop(columns=[
        'location', 'continent', 'total_cases', 'new_cases',
       'new_cases_smoothed', 'total_deaths', 'new_deaths',
       'new_deaths_smoothed',
       'new_cases_smoothed_per_million',
       'new_deaths_smoothed_per_million', 'reproduction_rate', 'icu_patients',
        'hosp_patients',
        'weekly_icu_admissions',
       'weekly_icu_admissions_per_million', 'weekly_hosp_admissions',
       'weekly_hosp_admissions_per_million', 'new_tests', 'total_tests',
       'total_tests_per_thousand', 
       'new_tests_smoothed', 'new_tests_smoothed_per_thousand',
       'tests_per_case', 'tests_units', 'total_vaccinations',
       'people_vaccinated', 'people_fully_vaccinated', 'new_vaccinations',
       'total_vaccinations_per_hundred',
       'people_vaccinated_per_hundred',
       'new_vaccinations_smoothed_per_million', 'stringency_index',
       'aged_70_older', 'extreme_poverty',
        'handwashing_facilities'
            ])

In [14]:
data_owid.columns

Index(['iso_code', 'date', 'total_cases_per_million', 'new_cases_per_million',
       'total_deaths_per_million', 'new_deaths_per_million',
       'icu_patients_per_million', 'hosp_patients_per_million',
       'new_tests_per_thousand', 'positive_rate', 'new_vaccinations_smoothed',
       'people_fully_vaccinated_per_hundred', 'population',
       'population_density', 'median_age', 'aged_65_older', 'gdp_per_capita',
       'cardiovasc_death_rate', 'diabetes_prevalence', 'female_smokers',
       'male_smokers', 'hospital_beds_per_thousand', 'life_expectancy',
       'human_development_index'],
      dtype='object')

In [15]:
data_owid=data_owid.replace(np.nan, 0)
data_owid['smokers'] = data_owid['male_smokers']+data_owid['female_smokers']
data_owid = data_owid.drop(columns = ['male_smokers', 'female_smokers'])

In [16]:
data_owid.columns

Index(['iso_code', 'date', 'total_cases_per_million', 'new_cases_per_million',
       'total_deaths_per_million', 'new_deaths_per_million',
       'icu_patients_per_million', 'hosp_patients_per_million',
       'new_tests_per_thousand', 'positive_rate', 'new_vaccinations_smoothed',
       'people_fully_vaccinated_per_hundred', 'population',
       'population_density', 'median_age', 'aged_65_older', 'gdp_per_capita',
       'cardiovasc_death_rate', 'diabetes_prevalence',
       'hospital_beds_per_thousand', 'life_expectancy',
       'human_development_index', 'smokers'],
      dtype='object')

In [17]:
data_owid.to_csv('modified_owid.csv', index=False)

## combine databases

In [18]:
data_combined = pd.merge(data_owid, data_ox, how='inner', left_on=['iso_code', 'date'], right_on = ['iso_code', 'date'])

In [24]:
geographic_columns = ['iso_code', 
                     'date',
                     'population',
                     'population_density',
                     'median_age',
                     'gdp_per_capita',
                     'aged_65_older',
                     'life_expectancy',
                     'human_development_index']
covid_columns = ['iso_code',
                'date',
                'total_cases_per_million', 
                'total_cases',
                'total_deaths_per_million',
                'total_deaths',
                'people_fully_vaccinated_per_hundred',
                'hosp_patients_per_million',
                'icu_patients_per_million',
                'new_tests_per_thousand',
                'new_cases_per_million',
                'new_deaths_per_million',                
                'positive_rate',
                'StringencyIndex',
                'ContainmentHealthIndex']
health_columns = ['cardiovasc_death_rate',
                 'smokers',
                 'diabetes_prevalence',
                 'hospital_beds_per_thousand']
policies_columns = ['C1_index',
                    'C2_index',
                    'C3_index',
                    'C4_index',
                    'C5_index',
                    'C6_index',
                    'C7_index',
                    'C8_index',
                    'H1_index',
                    'H6_index',
                    'H8_index',
                    'new_vaccinations_smoothed']


In [26]:
data_geographic = data_combined[geographic_columns]
data_covid = data_combined[covid_columns]
data_health = data_combined[health_columns]
data_policies = data_combined[policies_columns]

## save all data files

In [37]:
date = datetime.date(datetime.now())
data_geographic.to_csv(f"data_geographic_{date}.csv")
data_covid.to_csv(f"data_covid_{date}.csv")
data_health.to_csv(f"data_health_{date}.csv")
data_policies.to_csv(f"data_policies_{date}.csv")

## create informaiton about distance between countries 

In [174]:
distance_matrix = pd.read_csv("distance-matrix.csv")
distance_matrix = distance_matrix.rename(columns={'Unnamed: 0':'alpha-2'})

In [175]:
country_conversion_data = pd.read_csv("country_iso_conversion.csv")

In [176]:
country_conversion_data = country_conversion_data[['alpha-2', 'alpha-3']]

In [177]:
relevant_countries = data_geographic['iso_code']
merged_geogrpahic_data = pd.merge(relevant_countries, country_conversion_data, how='left', left_on='iso_code', right_on='alpha-3')[['alpha-2', 'alpha-3']]


In [178]:
relevant_countires = merged_geogrpahic_data['alpha-2'].unique()

In [186]:
def get_nearest_countries(country_distances, country_conversion, relevant_countires, country_iso_code, n_countries=4):
    # get iso country 2 code for wanted country-
    country_iso_2 = country_conversion[country_conversion['alpha-3'] == country_iso_code]['alpha-2'].item()
    # get nearest countries index -  
    specific_country = country_distances[country_distances['alpha-2'] == country_iso_2].to_numpy()
    sorted_distances_index = np.argsort(specific_country[0, 1:], axis=0)[1:n_countries+5]
    iso_2_countries = country_distances['alpha-2'].to_numpy()[sorted_distances_index]
    print(iso_2_countries)
    iso_3_countries = []
    for s in iso_2_countries:
        if s in relevant_countires and len(iso_3_countries) < n_countries:
            iso_3_countries.append(country_conversion[country_conversion['alpha-2'] == s]['alpha-3'].item())
    return iso_3_countries

In [187]:
np.argsort(distance_matrix[distance_matrix['alpha-2'] == 'IL'].to_numpy()[0,1:],axis=0)

array([100,  91, 179, 110, 124,  51, 206,  61, 104, 219, 120, 189,   6,
        75,  85,  15,  21,  22, 140, 183,   5, 238, 105, 133,  63, 185,
       186, 137, 136, 224, 149,   1,  16, 192, 239,  97,  54,  95, 107,
       229, 198, 168, 209, 217, 196, 216, 200,  65,  12,  33,  52, 175,
       135, 126, 130,  40, 228, 132,  53,  38, 158,   2, 202,  60, 131,
         0,  58,  71, 174, 122,  19, 225, 162, 213,  55, 193, 112,  66,
        44,  64, 160, 108, 188, 113,  77,  79, 163,  23, 134, 180,  39,
        73, 101,  24,  37, 223,  84, 141,  99,  20,  72, 211,  78, 102,
       191, 204,  70, 147,  62, 164, 116, 152,  41, 242, 240,   8, 151,
        82, 197, 106, 201,  30, 128, 127, 199,  81,  18, 155,  89, 243,
       138, 103, 187, 143,  32, 142,  49, 184, 150, 156,  80,  45, 207,
       129, 123, 212, 241, 114, 154, 176, 234, 144,  92, 195, 194, 118,
        36, 119, 222,  26,  50, 173,  98,  25, 111, 210,  76,   3,  35,
        17,  83,   4, 148,  56, 146, 117, 125, 232, 230, 233, 20

In [188]:
get_nearest_countries(distance_matrix, country_conversion_data, relevant_countires, 'ISR', 4)

['GZ' 'PS' 'JO' 'LB' 'CY' 'SY' 'EG' 'IQ']


['PSE', 'JOR', 'LBN', 'CYP']