In [1]:
import pandas as pd
import os
import numpy as np
import epiweeks
import datetime
import requests
import zipfile
import csv
from contextlib import ExitStack
import progressbar as pbar
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import json
import warnings
warnings.filterwarnings('ignore')

In [2]:
# Download and unzip covid data
ids = {'output_5': '7tnfh',}
for data_name, data_id in ids.items():
    link = f'https://osf.io/{data_id}/download'
    downloaded_data = requests.get(link)
    with open('data/tmp.zip', 'wb') as file:
        file.write(downloaded_data.content)
    with zipfile.ZipFile('data/tmp.zip', 'r') as zip_ref:
        zip_ref.extractall('data/')
    os.remove('data/tmp.zip')
    data_file = f'data/Data/{data_name}.csv'
print('Covid data downloaded')

Covid data downloaded


In [3]:
# Download data file (population and deaths from Eurostat)
population_link = 'https://ec.europa.eu/eurostat/api/dissemination/sdmx/2.1/data/DEMO_R_PJANGROUP/?format=SDMX-CSV&lang=en&label=both'
downloaded_data = requests.get(population_link)
with open('data/Data/old_population.csv', 'wb') as file:
    file.write(downloaded_data.content)
print('Population data downloaded')

deaths_data_link = 'https://ec.europa.eu/eurostat/api/dissemination/sdmx/2.1/data/DEMO_R_MWK2_05/?format=SDMX-CSV&lang=en&label=both'
downloaded_data = requests.get(deaths_data_link)
with open('data/Data/old_deaths.csv', 'wb') as file:
    file.write(downloaded_data.content)
print('Deaths data downloaded')

Population data downloaded
Deaths data downloaded


In [2]:
today = datetime.date.today().strftime("%d-%m-%Y")
data_dir_path = f'data/World/{today}'
if not os.path.exists(f'{data_dir_path}'):
    os.mkdir(f'{data_dir_path}')

In [3]:
data = pd.read_csv('./data/Data/Output_5.csv', encoding='latin-1', skiprows=3)
countries = {'Albania': 'AL',
             'Austria': 'AT',
             'Belgium': 'BE',
             'Bulgaria': 'BG',
             'Croatia': 'HR',
             'Czechia': 'CZ',
             'Denmark': 'DK',
             'Estonia': 'EE',
             'Finland': 'FI',
             'France': 'FR',
             'Germany': 'DE',
             'Greece': 'EL',
             'Hungary': 'HU',
             'Ireland': 'IE',
             'Italy': 'IT',
             'Latvia': 'LV',
             'Montenegro': 'ME',
             'Netherlands': 'NL',
             'Norway': 'NO',
             'Portugal': 'PT',
             'Romania': 'RO',
             'Slovakia': 'SK',
             'Spain': 'ES',
             'Sweden': 'SE',
             'Switzerland': 'CH',
             'Turkey': 'TR',
             'United Kingdom': 'UK',}

for country in countries.keys():
    fixed_deaths = pd.DataFrame(columns=['Date', 'location', 'location_name', 'Age group', 'Deaths'])
    fixed_cases = pd.DataFrame(columns=['Date', 'location', 'location_name', 'Age group', 'Cases'])
    
    country_data = data[data['Country'] == country]
    country_data = country_data[country_data['Region'] == 'All']
    country_data = country_data[country_data['Date'].str[-4:-1] == '202']
    country_data['Date'] = pd.to_datetime(country_data['Date'], dayfirst=True)
    
    country_data = country_data[country_data['Sex'] == 'b']
    
    for age_group in country_data['Age'].unique():
        deaths_age_group = country_data[country_data['Age'] == age_group].groupby('Date')['Deaths'].sum().reset_index()
        cases_age_group = country_data[country_data['Age'] == age_group].groupby('Date')['Cases'].sum().reset_index()

        fixed_deaths_age_group = deaths_age_group['Deaths']
        fixed_deaths_age_group.index = deaths_age_group['Date']
        fixed_deaths_age_group = fixed_deaths_age_group.resample('D').mean()
        fixed_deaths_age_group = fixed_deaths_age_group.interpolate()
        
        fixed_cases_age_group = cases_age_group['Cases']
        fixed_cases_age_group.index = cases_age_group['Date']
        fixed_cases_age_group = fixed_cases_age_group.resample('D').mean()
        fixed_cases_age_group = fixed_cases_age_group.interpolate()

        removed_negative_deaths = np.insert(np.maximum(0, (fixed_deaths_age_group.values[1:] - fixed_deaths_age_group.values[:-1])), 0, 0)
        fixed_deaths_age_group = pd.DataFrame(fixed_deaths_age_group)
        fixed_deaths_age_group['Deaths'] = removed_negative_deaths
        fixed_deaths = fixed_deaths.append(fixed_deaths_age_group.reset_index())
        fixed_deaths['location'] = fixed_deaths['location'].fillna(country)
        fixed_deaths['location_name'] = fixed_deaths['location_name'].fillna(countries[country])
        fixed_deaths['Age group'] = fixed_deaths['Age group'].fillna(f'{age_group:02d}-{age_group + 4:02d}')
        
        removed_negative_cases = np.insert(np.maximum(0, (fixed_cases_age_group.values[1:] - fixed_cases_age_group.values[:-1])), 0, 0)
        fixed_cases_age_group = pd.DataFrame(fixed_cases_age_group)
        fixed_cases_age_group['Cases'] = removed_negative_cases
        fixed_cases = fixed_cases.append(fixed_cases_age_group.reset_index())
        fixed_cases['location'] = fixed_cases['location'].fillna(country)
        fixed_cases['location_name'] = fixed_cases['location_name'].fillna(countries[country])
        fixed_cases['Age group'] = fixed_cases['Age group'].fillna(f'{age_group:02d}-{age_group + 4:02d}')
        
    if not os.path.exists(f'{data_dir_path}/{country}'):
        os.mkdir(f'{data_dir_path}/{country}')
        
    fixed_cases = fixed_cases.rename(columns={'Date': 'date', 'Cases': 'value'})
    fixed_cases = fixed_cases.append(fixed_cases[fixed_cases['Age group'].isin(['85-89', '90-94', '95-99', '100-104'])].groupby(['date', 'location', 'location_name']).sum().reset_index()).fillna('85+')
    fixed_cases = fixed_cases[~fixed_cases['Age group'].isin(['85-89', '90-94', '95-99', '100-104'])].sort_values('date').reset_index(drop=True)
    fixed_cases.to_csv(f'{data_dir_path}/{country}/covid_cases.csv')
    
    fixed_deaths = fixed_deaths.rename(columns={'Date': 'date', 'Deaths': 'value'})
    fixed_deaths = fixed_deaths.append(fixed_deaths[fixed_deaths['Age group'].isin(['85-89', '90-94', '95-99', '100-104'])].groupby(['date', 'location', 'location_name']).sum().reset_index()).fillna('85+')
    fixed_deaths = fixed_deaths[~fixed_deaths['Age group'].isin(['85-89', '90-94', '95-99', '100-104'])].sort_values('date').reset_index(drop=True)
    fixed_deaths.to_csv(f'{data_dir_path}/{country}/covid_deaths.csv')
print('Covid files prepared')

Covid files prepared


In [6]:
read = open('data/Data/old_population.csv', 'r', encoding='utf-8')
reader = csv.reader(read)
countries = {}
for index, row in enumerate(reader):
    if index in [0]:
        continue
    short, country = row[6].split(':')
    if len(short) == 2 and country not in countries.keys():
        if short == 'DE':
            countries['Germany'] = short
        else:
            countries[country] = short
with open(f'{data_dir_path}/eurostat_countries.json', 'w') as file:
    json.dump(countries, file, indent=4)

In [4]:
file_name = 'old_population.csv'
with ExitStack() as stack:
    writer_dict = {}
    for country in countries.keys():
        if not os.path.exists(f'{data_dir_path}/{country}'):
            os.mkdir(f'{data_dir_path}/{country}')
        file_list = stack.enter_context(open(f'{data_dir_path}/{country}/{file_name}', 'w'))
        writer_dict[countries[country]] = csv.writer(file_list, delimiter=',')
        writer_dict[countries[country]].writerow(['Year', 'Region', 'Age group', 'Sex', 'Value'])
    read = stack.enter_context(open('data/Data/old_population.csv', 'r', encoding='utf-8'))
    reader = csv.reader(read)
    for index, row in enumerate(reader):
        if index in [0]:
            continue
        if row[7] >= str(2015) and row[6][:2] in writer_dict.keys():
            writer_dict[row[6][:2]].writerow([row[7], row[6].split(':')[0], row[5].split(':')[0], row[4].split(':')[0], row[8]])
print('Population files prepared')

Population files prepared


In [5]:
file_name = 'old_deaths.csv'
with ExitStack() as stack:
    writer_dict = {}
    for country in countries.keys():
        file_list = stack.enter_context(open(f'{data_dir_path}/{country}/{file_name}', 'w'))
        writer_dict[countries[country]] = csv.writer(file_list, delimiter=',')
        writer_dict[countries[country]].writerow(['Year', 'Region', 'Age group', 'Sex', 'Value'])
    read = stack.enter_context(open('data/Data/old_deaths.csv', 'r', encoding='utf-8'))
    reader = csv.reader(read)
    for index, row in enumerate(reader):
        if index in [0]:
            continue
        if row[7][:4] >= str(2015) and row[6][:2] in writer_dict.keys():
            writer_dict[row[6][:2]].writerow([row[7], row[6].split(':')[0], row[3].split(':')[0], row[4].split(':')[0], row[8]])
print('Deaths files prepared')

Deaths files prepared
