In [1]:
import pandas as pd
import os
import numpy as np
import epiweeks
import datetime
import requests
import zipfile
import csv
from contextlib import ExitStack
import progressbar as pbar
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import json
import warnings
warnings.filterwarnings('ignore')

In [2]:
today = datetime.date.today().strftime("%d-%m-%Y")
data_dir_path = f'data/World/{today}'
if not os.path.exists(f'{data_dir_path}'):
        os.mkdir(f'{data_dir_path}')

In [3]:
# GERMANY
country = 'Germany'
if not os.path.exists(f'{data_dir_path}/{country}'):
    os.mkdir(f'{data_dir_path}/{country}')

if os.path.exists(f'{data_dir_path}/population_age_group_combiner.json'):
    with open(f'{data_dir_path}/population_age_group_combiner.json', 'r') as file:
        population_age_group_combiner = json.load(file)
else:
    population_age_group_combiner = {}
if os.path.exists(f'{data_dir_path}/population_age_group_combiner.json'):
    with open(f'{data_dir_path}/deaths_age_group_combiner.json', 'r') as file:
        deaths_age_group_combiner = json.load(file)
else:
    deaths_age_group_combiner = {}
if os.path.exists(f'{data_dir_path}/population_age_group_combiner.json'):
    with open(f'{data_dir_path}/covid_age_group_combiner.json', 'r') as file:
        covid_age_group_combiner = json.load(file)
else:
    covid_age_group_combiner = {}
    
population_age_group_combiner['Germany'] = {'80+': ['80-84', '85+'],
                                            '60-79': ['60-64', '65-69', '70-74', '75-79'],
                                            '00-59': ['00-04', '05-09', '10-14', '15-19', '20-24', '25-29', '30-34', '35-39', '40-44', '45-49', '50-54', '55-59']}

deaths_age_group_combiner['Germany'] = {'80+': ['80-84', '85+'], '60-79': ['60-64', '65-69', '70-74', '75-79'],
                                        '00-59': ['40-44', '45-49', '50-54', '55-59']}

covid_age_group_combiner['Germany'] ={'00-59': ['00-04', '05-14', '15-34', '35-59']}

with open(f'{data_dir_path}/population_age_group_combiner.json', 'w') as file:
    json.dump(population_age_group_combiner, file, indent=4)
with open(f'{data_dir_path}/deaths_age_group_combiner.json', 'w') as file:
    json.dump(deaths_age_group_combiner, file, indent=4)
with open(f'{data_dir_path}/covid_age_group_combiner.json', 'w') as file:
    json.dump(covid_age_group_combiner, file, indent=4)


cases_csv = 'https://raw.githubusercontent.com/KITmetricslab/covid19-forecast-hub-de/master/data-truth/RKI/by_age/truth_RKI-Incident%20Cases%20by%20Age_Germany.csv'
cases = pd.read_csv(cases_csv)
cases['age_group'].replace(dict([['A00-A04', '00-04'], ['A05-A14', '05-14'], ['A15-A34', '15-34'], ['A35-A59', '35-59'], ['A60-A79', '60-79'], ['A80+', '80+']]), inplace=True)
cases = cases.drop(cases.loc[cases['age_group']=='unbekannt'].index)
cases = cases.drop(columns='location_name').rename(columns={'age_group': 'Age group', 'location': 'location_name'})
cases['location_name'] = cases['location_name'].apply(lambda x: 'DE'+x[2:])
cases.to_csv(f'{data_dir_path}/{country}/covid_cases.csv')

deaths_csv = 'https://raw.githubusercontent.com/KITmetricslab/covid19-forecast-hub-de/master/data-truth/RKI/by_age/truth_RKI-Incident%20Deaths%20by%20Age_Germany.csv'
deaths = pd.read_csv(deaths_csv)
deaths.loc[deaths['location_name'] == 'Free State of Thuringia', 'location_name'] = 'Free State of Thüringia'
deaths['age_group'].replace(dict([['A00-A04', '00-04'], ['A05-A14', '05-14'], ['A15-A34', '15-34'], ['A35-A59', '35-59'], ['A60-A79', '60-79'], ['A80+', '80+']]), inplace=True)
deaths = deaths.drop(deaths.loc[deaths['age_group']=='unbekannt'].index)
deaths = deaths.drop(columns='location_name').rename(columns={'age_group': 'Age group', 'location': 'location_name'})
deaths['location_name'] = deaths['location_name'].apply(lambda x: 'DE'+x[2:])
deaths.to_csv(f'{data_dir_path}/{country}/covid_deaths.csv')

In [4]:
data = pd.read_csv('./data/Data/Covid_deaths.csv')
data = data[data['Measure'].isin(['Cases'])]
with open(f'{data_dir_path}/eurostat_countries.json', 'r') as file:
    country_translator = json.load(file)
for country in data['Country'].unique():
    if country in ['Ukraine', 'Germany']:
        continue
        
        
    if 'b' in  data[data['Country'] == country]['Sex'].unique() and country != 'United Kingdom':
        conditions = np.logical_and(data['Country'] == country, data['Sex'] == 'b')
        cases = data[np.logical_and(conditions, data['Measure'] == 'Cases')]
        cases = cases[cases['week'].str.startswith('202')]
        cases['week'] = cases['week'].apply(lambda row: datetime.datetime.strptime(row + '-1', "%Y-W%W-%w").strftime('%Y-%m-%d'))
        cases['Country'] = cases['Country'].replace(country_translator)
        cases = cases[~cases['Age'].isin(['TOT', 'UNK'])]
        age_groups = {}
        if any(cases['Age'].unique()):
            for index, age in enumerate(sorted(cases['Age'].apply(lambda x: int(x)).unique())):
                if index < len(cases['Age'].unique()) - 1:
                    age_groups[str(age)] = f"{int(age):02d}-{sorted(cases['Age'].apply(lambda x: int(x)).unique())[index + 1] - 1:02d}"
            age_groups[cases['Age'].unique()[-1]] = cases['Age'].unique()[-1] + '+'
            cases['Age'] = cases['Age'].replace(age_groups)
            cases = cases.drop(columns=['Sex', 'Measure']).rename(columns={'Country': 'location_name', 'Age': 'Age group', 'week': 'date', 'WeekSum': 'value'})
            cases = cases.groupby(['Age group', 'location_name', 'date']).sum().reset_index()
            cases = cases.reindex(['date','location_name','Age group','value'], axis=1).reset_index(drop=True)
            cases.to_csv(f'{data_dir_path}/{country}/covid_cases.csv')
            print(country, 'done')
            
    elif 'f' in  data[data['Country'] == country]['Sex'].unique() and 'm' in  data[data['Country'] == country]['Sex'].unique() and country != 'United Kingdom':
        conditions = np.logical_and(data['Country'] == country, data['Sex'].isin(['m', 'f']))
        cases = data[np.logical_and(conditions, data['Measure'] == 'Cases')]
        cases = cases[cases['week'].str.startswith('202')]
        cases = cases[~cases['week'].str.endswith('2-W52')]
        cases = cases.groupby(['Country', 'Age', 'week']).sum().reset_index()
        cases['week'] = cases['week'].apply(lambda row: datetime.datetime.strptime(row + '-1', "%Y-W%W-%w").strftime('%Y-%m-%d'))
        cases['Country'] = cases['Country'].replace(country_translator)
        cases = cases[~cases['Age'].isin(['TOT', 'UNK'])]
        age_groups = {}
        if any(cases['Age'].unique()):
            for index, age in enumerate(sorted(cases['Age'].apply(lambda x: int(x)).unique())):
                if index < len(cases['Age'].unique()) - 1:
                    age_groups[str(age)] = f"{int(age):02d}-{sorted(cases['Age'].apply(lambda x: int(x)).unique())[index + 1] - 1:02d}"
            age_groups[cases['Age'].unique()[-1]] = cases['Age'].unique()[-1] + '+'
            cases['Age'] = cases['Age'].replace(age_groups)
            cases = cases.rename(columns={'Country': 'location_name', 'Age': 'Age group', 'week': 'date', 'WeekSum': 'value'})
            cases = cases.groupby(['Age group', 'location_name', 'date']).sum().reset_index()
            cases = cases.reindex(['date','location_name','Age group','value'], axis=1).reset_index(drop=True)
            cases.to_csv(f'{data_dir_path}/{country}/covid_cases.csv')
            print(country, 'done')

Austria done
Belgium done
Bulgaria done
Croatia done
Czechia done
Denmark done
France done
Greece done
Ireland done
Italy done
Netherlands done
Norway done
Portugal done
Romania done
Spain done
Sweden done
Switzerland done


In [None]:
data = pd.read_csv('./data/Data/Covid_deaths.csv')
data

In [5]:
data = pd.read_csv('./data/Data/Covid_deaths.csv')
data = data[data['Measure'].isin(['Deaths'])]
with open(f'{data_dir_path}/eurostat_countries.json', 'r') as file:
    country_translator = json.load(file)
for country in data['Country'].unique():
    if country in ['Ukraine', 'Germany']:
        continue
        
        
    if 'b' in  data[data['Country'] == country]['Sex'].unique():
        conditions = np.logical_and(data['Country'] == country, data['Sex'] == 'b')
        cases = data[np.logical_and(conditions, data['Measure'] == 'Deaths')]
        cases = cases[cases['week'].str.startswith('202')]
        cases['week'] = cases['week'].apply(lambda row: datetime.datetime.strptime(row + '-1', "%Y-W%W-%w").strftime('%Y-%m-%d'))
        cases['Country'] = cases['Country'].replace(country_translator)
        cases = cases[~cases['Age'].isin(['TOT', 'UNK'])]
        age_groups = {}
        if any(cases['Age'].unique()):
            for index, age in enumerate(sorted(cases['Age'].apply(lambda x: int(x)).unique())):
                if index < len(cases['Age'].unique()) - 1:
                    age_groups[str(age)] = f"{int(age):02d}-{sorted(cases['Age'].apply(lambda x: int(x)).unique())[index + 1] - 1:02d}"
            age_groups[cases['Age'].unique()[-1]] = cases['Age'].unique()[-1] + '+'
            cases['Age'] = cases['Age'].replace(age_groups)
            cases = cases.drop(columns=['Sex', 'Measure']).rename(columns={'Country': 'location_name', 'Age': 'Age group', 'week': 'date', 'WeekSum': 'value'})
            cases = cases.groupby(['Age group', 'location_name', 'date']).sum().reset_index()
            cases = cases.reindex(['date','location_name','Age group','value'], axis=1).reset_index(drop=True)
            cases.to_csv(f'{data_dir_path}/{country}/covid_deaths.csv')
            print(country, 'done')
            
    elif 'f' in  data[data['Country'] == country]['Sex'].unique() and 'm' in  data[data['Country'] == country]['Sex'].unique():
        conditions = np.logical_and(data['Country'] == country, data['Sex'].isin(['m', 'f']))
        cases = data[np.logical_and(conditions, data['Measure'] == 'Deaths')]
        cases = cases[cases['week'].str.startswith('202')]
        cases = cases[~cases['week'].str.endswith('2-W52')]
        cases = cases.groupby(['Country', 'Age', 'week']).sum().reset_index()
        cases['week'] = cases['week'].apply(lambda row: datetime.datetime.strptime(row + '-1', "%Y-W%W-%w").strftime('%Y-%m-%d'))
        cases['Country'] = cases['Country'].replace(country_translator)
        cases = cases[~cases['Age'].isin(['TOT', 'UNK'])]
        
        age_groups = {}
        if any(cases['Age'].unique()):
            for index, age in enumerate(sorted(cases['Age'].apply(lambda x: int(x)).unique())):
                if index < len(cases['Age'].unique()) - 1:
                    age_groups[str(age)] = f"{int(age):02d}-{sorted(cases['Age'].apply(lambda x: int(x)).unique())[index + 1] - 1:02d}"
            age_groups[cases['Age'].unique()[-1]] = cases['Age'].unique()[-1] + '+'
            cases['Age'] = cases['Age'].replace(age_groups)
            cases = cases.rename(columns={'Country': 'location_name', 'Age': 'Age group', 'week': 'date', 'WeekSum': 'value'})
            cases = cases.groupby(['Age group', 'location_name', 'date']).sum().reset_index()
            cases = cases.reindex(['date','location_name','Age group','value'], axis=1).reset_index(drop=True)
            cases.to_csv(f'{data_dir_path}/{country}/covid_deaths.csv')
            print(country, 'done')

Austria done
Belgium done
Denmark done
France done
Greece done
Hungary done
Ireland done
Italy done
Latvia done
Netherlands done
Norway done
Portugal done
Romania done
Spain done
Sweden done
Switzerland done
United Kingdom done


In [6]:
if os.path.exists(f'{data_dir_path}/population_age_group_combiner.json'):
    with open(f'{data_dir_path}/population_age_group_combiner.json', 'r') as file:
        population_age_group_combiner = json.load(file)
else:
    population_age_group_combiner = {}
if os.path.exists(f'{data_dir_path}/population_age_group_combiner.json'):
    with open(f'{data_dir_path}/deaths_age_group_combiner.json', 'r') as file:
        deaths_age_group_combiner = json.load(file)
else:
    deaths_age_group_combiner = {}
if os.path.exists(f'{data_dir_path}/population_age_group_combiner.json'):
    with open(f'{data_dir_path}/covid_age_group_combiner.json', 'r') as file:
        covid_age_group_combiner = json.load(file)
else:
    covid_age_group_combiner = {}

In [7]:
country = 'Italy'
deaths = pd.read_csv(f'{data_dir_path}/{country}/covid_deaths.csv')
cases = pd.read_csv(f'{data_dir_path}/{country}/covid_cases.csv')
total_deaths = pd.read_csv(f'{data_dir_path}/{country}/old_deaths.csv')
population = pd.read_csv(f'{data_dir_path}/{country}/old_population.csv')

print('covid deaths', list(deaths['Age group'].unique()))
print('covid cases', list(cases['Age group'].unique()))
print('total deaths', list(total_deaths['Age group'].unique()))
print('population', list(population['Age group'].unique()))

covid deaths ['00-09', '10-19', '20-29', '30-39', '40-49', '50-59', '60-69', '70-79', '80-89', '90+']
covid cases ['00-09', '10-19', '20-29', '30-39', '40-49', '50-59', '60-69', '70-79', '80-89', '90+']
total deaths ['TOTAL', 'UNK', 'Y10-14', 'Y15-19', 'Y20-24', 'Y25-29', 'Y30-34', 'Y35-39', 'Y40-44', 'Y45-49', 'Y5-9', 'Y50-54', 'Y55-59', 'Y60-64', 'Y65-69', 'Y70-74', 'Y75-79', 'Y80-84', 'Y85-89', 'Y_GE90', 'Y_LT5']
population ['TOTAL', 'UNK', 'Y10-14', 'Y15-19', 'Y20-24', 'Y25-29', 'Y30-34', 'Y35-39', 'Y40-44', 'Y45-49', 'Y5-9', 'Y50-54', 'Y55-59', 'Y60-64', 'Y65-69', 'Y70-74', 'Y75-79', 'Y80-84', 'Y_GE75', 'Y_GE80', 'Y_GE85', 'Y_LT5']


In [8]:
# Italy
country = 'Italy'
population_age_group_combiner[country] = {'80+': ['80-84', '85+'],
                                          '70-79': ['70-74', '75-79'],
                                          '60-69': ['60-64', '65-69'],
                                          '00-59': ['00-04', '05-09', '10-14', '15-19', '20-24', '25-29', '30-34', '35-39', '40-44', '45-49', '50-54', '55-59']}

deaths_age_group_combiner[country] = {'80+': ['80-84', '85+'],
                                      '70-79': ['70-74', '75-79'],
                                      '60-69': ['60-64', '65-69'],
                                      '00-59': ['00-04', '05-09', '10-14', '15-19', '20-24', '25-29', '30-34', '35-39', '40-44', '45-49', '50-54', '55-59']}

covid_age_group_combiner[country] ={'00-59': ['00-09', '10-19', '20-29', '30-39', '40-49', '50-59'],
                                    '80+': ['80-89', '90+']}

In [9]:
# Austria
country = 'Austria'
population_age_group_combiner[country] = {'75-84': ['75-79', '80-84'],
                                          '65-74': ['65-69', '70-74'],
                                          '55-64': ['55-59', '60-64'],
                                          '00-54': ['00-04', '05-09', '10-14', '15-19', '20-24', '25-29', '30-34', '35-39', '40-44', '45-49', '50-54']}

deaths_age_group_combiner[country] = {'75-84': ['75-79', '80-84'],
                                      '65-74': ['65-69', '70-74'],
                                      '55-64': ['55-59', '60-64'],
                                      '00-54': ['00-04', '05-09', '10-14', '15-19', '20-24', '25-29', '30-34', '35-39', '40-44', '45-49', '50-54']}

covid_age_group_combiner[country] ={'00-54': ['00-04', '05-14', '15-24', '25-34', '35-44', '45-54']}

In [10]:
with open(f'{data_dir_path}/population_age_group_combiner.json', 'w') as file:
    json.dump(population_age_group_combiner, file, indent=4)
with open(f'{data_dir_path}/deaths_age_group_combiner.json', 'w') as file:
    json.dump(deaths_age_group_combiner, file, indent=4)
with open(f'{data_dir_path}/covid_age_group_combiner.json', 'w') as file:
    json.dump(covid_age_group_combiner, file, indent=4)