#### This adds `total_recovered` and `new_recovered` properties to documents saved in berlin_covid_district. 

In [128]:
import pandas as pd
import pycouchdb
import urllib3
import datetime

In [108]:
df = pd.read_csv("https://opendata.arcgis.com/datasets/9644cad183f042e79fb6ad00eadc4ecf_0.csv")
df

Unnamed: 0,ObjectId,AnzahlFall,AnzahlTodesfall,SummeFall,SummeTodesfall,Datenstand,Meldedatum,Bundesland,IdBundesland,Landkreis,IdLandkreis,AnzahlGenesen,SummeGenesen
0,1,0,0,0,0,"20.11.2020, 00:00 Uhr",2020/01/03 00:00:00,Schleswig-Holstein,1,SK Flensburg,1001,0,0
1,2,0,0,0,0,"20.11.2020, 00:00 Uhr",2020/01/04 00:00:00,Schleswig-Holstein,1,SK Flensburg,1001,0,0
2,3,0,0,0,0,"20.11.2020, 00:00 Uhr",2020/01/05 00:00:00,Schleswig-Holstein,1,SK Flensburg,1001,0,0
3,4,0,0,0,0,"20.11.2020, 00:00 Uhr",2020/01/06 00:00:00,Schleswig-Holstein,1,SK Flensburg,1001,0,0
4,5,0,0,0,0,"20.11.2020, 00:00 Uhr",2020/01/07 00:00:00,Schleswig-Holstein,1,SK Flensburg,1001,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
132659,132660,13,0,579,4,"20.11.2020, 00:00 Uhr",2020/11/15 00:00:00,Thüringen,16,LK Altenburger Land,16077,2,392
132660,132661,15,0,594,4,"20.11.2020, 00:00 Uhr",2020/11/16 00:00:00,Thüringen,16,LK Altenburger Land,16077,0,392
132661,132662,23,1,617,5,"20.11.2020, 00:00 Uhr",2020/11/17 00:00:00,Thüringen,16,LK Altenburger Land,16077,0,392
132662,132663,19,0,636,5,"20.11.2020, 00:00 Uhr",2020/11/18 00:00:00,Thüringen,16,LK Altenburger Land,16077,0,392


In [109]:
# Remove unused columns
df = df.drop(["ObjectId", "IdBundesland", "IdLandkreis", "Datenstand"], axis=1)

In [110]:
data = df[df["Bundesland"].str.contains("Berlin")]

In [111]:
data = data.sort_values(by=["Meldedatum"], ascending=False)
data.head()

Unnamed: 0,AnzahlFall,AnzahlTodesfall,SummeFall,SummeTodesfall,Meldedatum,Bundesland,Landkreis,AnzahlGenesen,SummeGenesen
103720,98,0,7987,38,2020/11/19 00:00:00,Berlin,SK Berlin Mitte,0,4562
104042,0,0,5015,29,2020/11/19 00:00:00,Berlin,SK Berlin Friedrichshain-Kreuzberg,0,3669
107293,44,0,4207,51,2020/11/19 00:00:00,Berlin,SK Berlin Pankow,1,2947
105441,0,0,3190,19,2020/11/19 00:00:00,Berlin,SK Berlin Spandau,0,2395
106781,47,0,3561,22,2020/11/19 00:00:00,Berlin,SK Berlin Steglitz-Zehlendorf,0,1611


In [112]:
# Remove "SK" from Landkreis strings
data["Landkreis"] = data["Landkreis"].str.replace("SK ", "")
data["Landkreis"].unique()

array(['Berlin Mitte', 'Berlin Friedrichshain-Kreuzberg', 'Berlin Pankow',
       'Berlin Spandau', 'Berlin Steglitz-Zehlendorf', 'Berlin Neukölln',
       'Berlin Tempelhof-Schöneberg', 'Berlin Treptow-Köpenick',
       'Berlin Charlottenburg-Wilmersdorf', 'Berlin Marzahn-Hellersdorf',
       'Berlin Lichtenberg', 'Berlin Reinickendorf'], dtype=object)

In [113]:
# Convert date strings to datetime
data["Meldedatum"] = pd.to_datetime(data["Meldedatum"], format="%Y-%m-%d")
data

Unnamed: 0,AnzahlFall,AnzahlTodesfall,SummeFall,SummeTodesfall,Meldedatum,Bundesland,Landkreis,AnzahlGenesen,SummeGenesen
103720,98,0,7987,38,2020-11-19,Berlin,Berlin Mitte,0,4562
104042,0,0,5015,29,2020-11-19,Berlin,Berlin Friedrichshain-Kreuzberg,0,3669
107293,44,0,4207,51,2020-11-19,Berlin,Berlin Pankow,1,2947
105441,0,0,3190,19,2020-11-19,Berlin,Berlin Spandau,0,2395
106781,47,0,3561,22,2020-11-19,Berlin,Berlin Steglitz-Zehlendorf,0,1611
...,...,...,...,...,...,...,...,...,...
103372,0,0,0,0,2020-01-03,Berlin,Berlin Mitte,0,0
105590,0,0,0,0,2020-01-03,Berlin,Berlin Neukölln,0,0
109870,0,0,0,0,2020-01-03,Berlin,Berlin Reinickendorf,0,0
105442,0,0,0,0,2020-01-03,Berlin,Berlin Steglitz-Zehlendorf,0,0


In [114]:
# Connect to couchdb
urllib3.disable_warnings()

url = ""
server = pycouchdb.Server(url)
db = server.database("berlin_covid_district")

In [115]:
server.info()["version"]

'3.1.1'

In [127]:
# Get all features from db
for feature in db.all():    
    date = datetime.datetime.strptime(feature["doc"]["date"], '%d.%m.%Y')        
    by_date = data.loc[data["Meldedatum"] == date]

    for district in by_date["Landkreis"].unique():
        for item in feature["doc"]["data"]["features"]:
            # Find matching district and add new properties to item
            if item["properties"]["GEN"] == district:
                total_recovered = int(by_date.loc[by_date["Landkreis"] == district]["SummeGenesen"])
                new_recovered = int(by_date.loc[by_date["Landkreis"] == district]["AnzahlGenesen"])

                if "recovered" in item["properties"]: del item["properties"]["recovered"]
                item["properties"]["total_recovered"] = total_recovered
                item["properties"]["new_recovered"] = new_recovered    
    
    db.save(feature["doc"])    