In [None]:
import numpy as np
import pandas as pd

pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', None)

In [None]:
kreise = pd.read_excel('04-kreise.xlsx', sheet_name = 1, header = 5, index_col = 0)
kreise

In [None]:
kreise.columns

In [None]:
class TimeseriesReader:
    
    def readTimeseries(self):
        timeseries = pd.read_csv(
            'zeitreihe-tagesdaten.csv',
            low_memory = False,
            usecols = ['date', 'bundesland', 'gemeindeschluessel', 'betten_belegt', 'betten_frei'],
            parse_dates = ['date'],
            date_parser = lambda dateStr: pd.to_datetime(dateStr, format = "%Y-%m-%d"),
            dtype = {
                'gemeindeschluessel': 'string',
                'bundesland': 'string'
                })
        return timeseries.sort_values(by = 'date', ascending = True)


In [None]:
timeSeries = TimeseriesReader().readTimeseries()
timeSeries

In [None]:
class ColumnsAdder:

    def __init__(self, kreise):
        self.kreise = kreise

    def addKreisAndBundeslandAndEinwohnerzahlColumns(self, dataFrame):
        dataFrame['Kreis'] = dataFrame['gemeindeschluessel'].map(
            lambda gemeindeschluessel: self.kreise.loc[gemeindeschluessel, 3])

        dataFrame['Bundesland'] = dataFrame['bundesland'].map(
            lambda bundesland: self.kreise.loc[bundesland, '2'])

        dataFrame['Einwohnerzahl'] = dataFrame['gemeindeschluessel'].map(
            lambda gemeindeschluessel: int(self.kreise.loc[gemeindeschluessel, 6]))

        return dataFrame


In [None]:
timeSeries = ColumnsAdder(kreise).addKreisAndBundeslandAndEinwohnerzahlColumns(timeSeries)
timeSeries

In [None]:
kreisValues = timeSeries['Kreis'].drop_duplicates().values
kreisValues

In [None]:
def printKreisOptions(kreisValues):
    for kreis in kreisValues:
        printKreisOption(kreis)

def printKreisOption(kreis):
    print('<option value="{kreis}">{kreis}</option>'.format(kreis = kreis))

In [None]:
kreisValues = sorted(kreisValues)
printKreisOptions(kreisValues)

In [None]:
def getIntensiveCareBeds(timeSeries, kreis = None):
    if kreis is not None:
        return timeSeries[timeSeries['Kreis'] == kreis][['date', 'betten_belegt', 'betten_frei']]
    else:
        return timeSeries.groupby('date').agg(**{
                        'betten_belegt': pd.NamedAgg(column = 'betten_belegt', aggfunc = 'sum'),
                        'betten_frei':   pd.NamedAgg(column = 'betten_frei',   aggfunc = 'sum') 
                    }).reset_index()

In [None]:
def getAndPersistIntensiveCareBeds(timeSeries, kreis = None):
    dataFrame = getIntensiveCareBeds(timeSeries, kreis)
    display(kreis)
    dataFrame.to_csv(_getFilename(kreis), index = False)
    return dataFrame

def _getFilename(kreis):
    return '../../docs/data/intensivstationen/intensivstationen-{suffix}.csv'.format(suffix =  _getSuffix(kreis))

def _getSuffix(kreis):
    return kreis if kreis is not None else 'de'


In [None]:
getAndPersistIntensiveCareBeds(timeSeries)

In [None]:
for kreis in kreisValues:
    getAndPersistIntensiveCareBeds(timeSeries, kreis)