In [None]:
import numpy as np
import pandas as pd

pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', None)

In [None]:
def readKreisByKreisschluessel():
    kreisByKreisschluessel = pd.read_csv(
        'kreisByKreisschluessel.csv',
        low_memory = False,
        index_col = 'Kreisschluessel',
        dtype = {'Kreisschluessel': 'string'})
    kreisByKreisschluessel['Kreis'] = kreisByKreisschluessel['Kreis'].str.strip()
    return kreisByKreisschluessel[kreisByKreisschluessel.index.str.len() == 5]


In [None]:
kreisByKreisschluessel = readKreisByKreisschluessel()
kreisByKreisschluessel

In [None]:
class TimeseriesReader:
    
    def readTimeseries(self):
        timeseries = pd.read_csv(
            'zeitreihe-tagesdaten.csv',
            low_memory = False,
            usecols = ['date', 'bundesland', 'gemeindeschluessel', 'betten_belegt', 'betten_frei'],
            parse_dates = ['date'],
            date_parser = lambda dateStr: pd.to_datetime(dateStr, format = "%Y-%m-%d"),
            dtype = {
                'gemeindeschluessel': 'string',
                'bundesland': 'string'
                })
        return timeseries.sort_values(by = 'date', ascending = True)


In [None]:
timeSeries = TimeseriesReader().readTimeseries()
timeSeries

In [None]:
class KreisAndBundeslandColumnAdder:

    def __init__(self, kreisByKreisschluessel):
        self.kreisByKreisschluessel = kreisByKreisschluessel

    def addKreisAndBundeslandColumn(self, dataFrame):
        dataFrame['Kreis'] = dataFrame['gemeindeschluessel'].map(
            lambda gemeindeschluessel: self.kreisByKreisschluessel.loc[gemeindeschluessel, 'Kreis'])
        dataFrame['Bundesland'] = dataFrame['bundesland'].map({
            '01': 'Schleswig-Holstein',
            '02': 'Hamburg',
            '03': 'Niedersachsen',
            '04': 'Bremen',
            '05': 'Nordrhein-Westfalen',
            '06': 'Hessen',
            '07': 'Rheinland-Pfalz',
            '08': 'Baden-Württemberg',
            '09': 'Bayern',
            '10': 'Saarland',
            '11': 'Berlin',
            '12': 'Brandenburg',
            '13': 'Mecklenburg-Vorpommern',
            '14': 'Sachsen',
            '15': 'Sachsen-Anhalt',
            '16': 'Thüringen'})
        return dataFrame


In [None]:
timeSeries = KreisAndBundeslandColumnAdder(kreisByKreisschluessel).addKreisAndBundeslandColumn(timeSeries)
timeSeries

In [None]:
kreisValues = timeSeries['Kreis'].drop_duplicates().values
kreisValues

In [None]:
def getIntensiveCareBeds(timeSeries, kreis = None):
    if kreis is not None:
        return timeSeries[timeSeries['Kreis'] == kreis][['date', 'betten_belegt', 'betten_frei']]
    else:
        return timeSeries.groupby('date').agg(**{
                        'betten_belegt': pd.NamedAgg(column = 'betten_belegt', aggfunc = 'sum'),
                        'betten_frei':   pd.NamedAgg(column = 'betten_frei',   aggfunc = 'sum') 
                    }).reset_index()

In [None]:
def getAndPersistIntensiveCareBeds(timeSeries, kreis = None):
    dataFrame = getIntensiveCareBeds(timeSeries, kreis)
    display(kreis)
    dataFrame.to_csv(_getFilename(kreis), index = False)
    return dataFrame

def _getFilename(kreis):
    return '../../docs/data/intensivstationen/intensivstationen{suffix}.csv'.format(suffix = '-' + kreis if kreis is not None else '-de')


In [None]:
getAndPersistIntensiveCareBeds(timeSeries)

In [None]:
for kreis in kreisValues:
    getAndPersistIntensiveCareBeds(timeSeries, kreis)