## Scrape Data from Robert Koch Institut
https://experience.arcgis.com/experience/478220a4c454480e823b17327b2bf1d4/page/page_0/

In [None]:
import urllib
import json
import datetime
import pandas

In [None]:
safe_file = True
file_name = "Neuinfektionen_pro_land_pro_tag.csv"

In [None]:
def fetch_infection_data_from_rki(bundesland:str="Hamburg"):
    """
    Fetch Covid-19-Cases from 
    https://experience.arcgis.com/experience/478220a4c454480e823b17327b2bf1d4/page/page_0/
    
    Args:
        bundesland: written like displayed on the website, a string
    Returns:
        a Dataframe containing all historical infections data of a bundesland
    """
    
    url_endpoint = "https://services7.arcgis.com/mOBPykOjAyBO2ZKk/arcgis/rest/services/RKI_COVID19/FeatureServer/0/query"
    params = {
        'f': 'json', 
        'where': f'Bundesland=\'{bundesland}\'',
        'returnGeometry': 'false',
        'spatialRel': 'esriSpatialRelIntersects',
        'outFields': 'ObjectId,AnzahlFall,Meldedatum,Geschlecht,Altersgruppe',
        'orderByFields': 'Meldedatum asc',
        'resultOffset': 0,
        'resultRecordCount': 2000,
        'cacheHint': "true"    
    }

    url_query = f"{url_endpoint}?{urllib.parse.urlencode(params)}"

    with urllib.request.urlopen(url_query) as url:
        data = json.loads(url.read().decode())['features']
    
    data_list = [
        (datetime.datetime.fromtimestamp(x['attributes']['Meldedatum'] / 1e3), x['attributes']['AnzahlFall'],x['attributes']['Geschlecht'],x['attributes']['Altersgruppe'],bundesland) 
        for x in data
    ]

    df = pandas.DataFrame(data_list, columns=['Meldedatum', 'Neuinfektionen', 'Geschlecht','Altersgruppe','Bundesland'])

    return df

In [None]:
df = fetch_infection_data_from_rki("Bayern")
df.head()

In [None]:
# Just a list of all countries
bundeslaender = ["Baden-Württemberg","Nordrhein-Westfalen","Bayern","Hessen","Berlin",
                 "Niedersachsen","Sachsen","Rheinland-Pfalz","Brandenburg","Hamburg","Schleswig-Holstein"
                ,"Thüringen","Mecklenburg-Vorpommern","Bremen","Saarland","Sachsen-Anhalt"]

In [None]:
%%time
# get the first country data in a dataframe
all_country_data = fetch_infection_data_from_rki(bundeslaender[0])
# append all other rows to it
for bland in bundeslaender[1:]:
    all_country_data=all_country_data.append(fetch_infection_data_from_rki(bland))
    
all_country_data.info

In [None]:
aggregated_country_data=all_country_data.groupby(["Bundesland","Meldedatum","Geschlecht","Altersgruppe"]).aggregate(sum)

In [None]:
aggregated_country_data

In [None]:
if safe_file:
    aggregated_country_data.to_csv(file_name)