In [None]:
import pandas as pd
import matplotlib.pyplot as plt

from src.database import read_sql
from src.plotting import plot_multi

In [None]:
sql_sickness_abbr = '''
    SELECT neu_kurz as sickness_abbr, string_agg(sdienst_lang, ', ') as description FROM spxdb_archiv_2023.sdienst_mapping
    WHERE sdienst_lang ILIKE('%krank%') AND sdienst_lang NOT ILIKE('%nicht krank%')
    GROUP BY sickness_abbr
'''
read_sql(sql_sickness_abbr)

In [None]:
sql_sickness_plan = f'WITH mapping AS ({sql_sickness_abbr})' + '''
    SELECT *
    FROM mapping JOIN spxdb_archiv_2023.monatsplan_adapted AS plan
    ON mapping.sickness_abbr = plan.sdienst_kurz_adapted
    WHERE plan.mpebene = 1 -- filter for "ist" (vs. e.g. "soll")
'''
read_sql(sql_sickness_plan + ' LIMIT 10')

In [None]:
sickness_count = read_sql(f'''
    WITH plan AS ({sql_sickness_plan})
    SELECT
        DATE_TRUNC('week', (DATE '1990-01-01' + (tag_f * INTERVAL '1 day'))) AS "Kalenderwoche",
        COUNT(*) as "EVB"
     FROM plan GROUP BY "Kalenderwoche";
''')

plt.figure(figsize=(10, 5))
plt.plot(sickness_count['Kalenderwoche'], sickness_count['EVB'])
plt.title('WÃ¶chentliche Eintragungen von Krankheit')
plt.xlabel('Kalenderwoche')
plt.ylabel('Anzahl')

In [None]:
# load grippeweb data & parse dates
grippeweb_original = pd.read_csv('https://raw.githubusercontent.com/robert-koch-institut/GrippeWeb_Daten_des_Wochenberichts/main/GrippeWeb_Daten_des_Wochenberichts.tsv',
sep='\t', index_col='Kalenderwoche')
grippeweb_original.index = pd.to_datetime(grippeweb_original.index + '-1', format='%G-W%V-%u')

# filter & accumulate
grippeweb = grippeweb_original.loc[
     # grippeweb_original.Altersgruppe.str.contains("15-34|35-59") & # Region 'Osten' only has 00+ age group
    (grippeweb_original.Region == 'Osten') &
    (grippeweb_original.Erkrankung == 'ARE')
]\
    .groupby('Kalenderwoche')\
    .Inzidenz.sum()\
    .reset_index()\
    .rename(columns={ 'Inzidenz': 'GrippeWeb' })


# load consultations data & parse dates
consultation_original = pd.read_csv('https://raw.githubusercontent.com/robert-koch-institut/ARE-Konsultationsinzidenz/main/ARE-Konsultationsinzidenz.tsv',
sep='\t', index_col='Kalenderwoche')
consultation_original.index = pd.to_datetime(consultation_original.index + '-1', format='%G-W%V-%u')

# filter & accumulate
consultations = consultation_original.loc[
    consultation_original.Altersgruppe.str.contains('15-34|35-59') &
    consultation_original.Bundesland.str.contains('Berlin|Brandenburg')
    # consultation_original.Bundesland.str.contains('Bundesweit')
]\
    .groupby('Kalenderwoche')\
    .ARE_Konsultationsinzidenz.sum()\
    .reset_index()\
    .rename(columns={ 'ARE_Konsultationsinzidenz': 'Konsultationen' })

In [None]:
# merge data
merged = pd.merge(
    pd.merge(sickness_count, consultations, on='Kalenderwoche', how='outer'),
    grippeweb, on='Kalenderwoche', how='outer'
).sort_values('Kalenderwoche').set_index('Kalenderwoche')
merged.head()

In [None]:
plot_multi(merged, figsize=(10, 5), title='Alle Daten')

In [None]:
plot_multi(merged.loc[pd.to_datetime('2020-01-01'):], figsize=(10, 5), title='Daten ab 2020')