In [None]:
import numpy as np
import pandas as pd

pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', None)

In [None]:
bundeslandById = {
    '01': 'Schleswig-Holstein',
    '02': 'Freie Hansestadt Hamburg',
    '03': 'Niedersachsen',
    '04': 'Freie Hansestadt Bremen',
    '05': 'Nordrhein-Westfalen',
    '06': 'Hessen',
    '07': 'Rheinland-Pfalz',
    '08': 'Baden-Württemberg',
    '09': 'Freistaat Bayern',
    '10': 'Saarland',
    '11': 'Berlin',
    '12': 'Brandenburg',
    '13': 'Mecklenburg-Vorpommern',
    '14': 'Freistaat Sachsen',
    '15': 'Sachsen-Anhalt',
    '16': 'Freistaat Thüringen'}
bundeslandById

In [None]:
def readKreisByKreisschluessel():
    kreisByKreisschluessel = pd.read_csv(
        'kreisByKreisschluessel.csv',
        low_memory = False,
        index_col = 'Kreisschluessel',
        dtype = {'Kreisschluessel': 'string'})
    kreisByKreisschluessel['Kreis'] = kreisByKreisschluessel['Kreis'].str.strip()
    return kreisByKreisschluessel[kreisByKreisschluessel.index.str.len() == 5]


In [None]:
kreisByKreisschluessel = readKreisByKreisschluessel()
kreisByKreisschluessel

In [None]:
class TimeseriesReader:
    
    def readTimeseries(self):
        return pd.read_csv(
            'zeitreihe-tagesdaten.csv',
            low_memory = False,
            usecols = ['date', 'bundesland', 'gemeindeschluessel', 'betten_belegt', 'betten_frei'],
            parse_dates = ['date'],
            date_parser = lambda dateStr: pd.to_datetime(dateStr, format = "%Y-%m-%d"),
            dtype = {
                'gemeindeschluessel': 'string',
                'bundesland': 'string'
                })


In [None]:
timeSeries = TimeseriesReader().readTimeseries()
timeSeries

In [None]:
class KreisAndBundeslandColumnAdder:
    
    def __init__(self, kreisByKreisschluessel, bundeslandById):
        self.kreisByKreisschluessel = kreisByKreisschluessel
        self.bundeslandById = bundeslandById

    def addKreisAndBundeslandColumn(self, dataFrame):
        dataFrame['Kreis'] = dataFrame['gemeindeschluessel'].map(lambda gemeindeschluessel: self.kreisByKreisschluessel.loc[gemeindeschluessel, 'Kreis'])
        dataFrame['Bundesland'] = dataFrame['bundesland'].map(lambda bundesland: self.bundeslandById[bundesland])
        return dataFrame

In [None]:
timeSeries = KreisAndBundeslandColumnAdder(kreisByKreisschluessel, bundeslandById).addKreisAndBundeslandColumn(timeSeries)
timeSeries

In [None]:
def readTimeseries(bundesland = None):
    dataFrame = TimeseriesReader().readTimeseries()
    display(dataFrame)
    display(dataFrame.info())
    if bundesland is not None:
        return dataFrame[dataFrame['Bundesland'] == bundesland][['date', 'betten_belegt', 'betten_frei']]
    else:
        return dataFrame.groupby('date').agg(**{
                        'betten_belegt': pd.NamedAgg(column = 'betten_belegt', aggfunc = 'sum'),
                        'betten_frei':   pd.NamedAgg(column = 'betten_frei',   aggfunc = 'sum') 
                    }).reset_index()

In [None]:
def readAndPersistTimeseries(bundesland = None):
    dataFrame = readTimeseries(bundesland)
    dataFrame.to_csv(_getFilename(bundesland), index = False)
    return dataFrame

def _getFilename(bundesland):
    return '../../docs/data/intensivstationen/intensivstationen{suffix}.csv'.format(suffix = '-' + bundesland if bundesland is not None else '-de')


In [None]:
# readAndPersistTimeseries(bundesland = 'BADEN_WUERTTEMBERG')

In [None]:
dataFrame = readAndPersistTimeseries()
dataFrame

In [None]:
dataFrame.info()