# Erste Schritte
+ Nbstriput sollte initialisiert werden, damit beim commit die Metadaten extrahiert werden. Dafür einfach in der nächsten Zelle den Befehl auskommentieren
+ In dem Ordner Data liegen die aktuellen Daten der John Hopkins CSSE zum 2019-nCoV, diese wurden als submodul initialisiert, dafür der entsprechenden Zelle den Befehl aktivieren
+ In der 3. Zelle werden die aktuellen Daten aus dem John Hopkins Repo gepullt 

In [None]:
# Nbstripout installieren
# !cd ~/corona_jupyternotebooks && nbstripout --install

In [None]:
# Initialisieren des GIT submoduls
# !cd ~/corona_jupyternotebooks/COVID-19 && git submodule init && git submodule update

In [None]:
# Update Data
!cd ~/corona/Data && git pull

In [None]:
# Imports
import os
import glob
import math
import pandas as pd
import numpy as np
# Libraries for visualization
import cufflinks as cf
from ipywidgets import interactive
cf.go_offline()
cf.set_config_file(offline=False, world_readable=True)

# Read data from John Hopkins into a dataframe
For better visualization it is helpful to build a new dataframe, because e.g. France sorts colonies like Guadeloupe to France with naturally much smaller numbers.
In addition, a clean dataframe helps with further data-moves

In [None]:
#countries = ['Germany','FranceFrance','UK','Austria','Switzerland','Netherlands','Belgium','Ireland','Spain','Greece','Portugal''Sweden','Denmark','Finland','Norway','Poland','Ukraine','Romania','Hungary']
countries = ['Germany', 'Italy', 'US', 'France', 'United Kingdom', 'Netherlands', 'Spain']
colony = ['France', 'United Kingdom', 'Australia', 'Netherlands']
df_time = pd.read_csv('COVID-19/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv', sep=',') 

pd.to_datetime(df_time.columns[4:], dayfirst=True)

df_series_filter = df_time.drop(['Lat', 'Long'], 1)

df_cleaned = pd.DataFrame()
for c in countries:
    if any(c in col for col in colony):
        df_temp = df_series_filter[df_series_filter['Country/Region']==c]
        df_temp = df_temp[df_temp['Province/State'].isnull()].transpose()[2:]
        df_temp.columns = [c]
        temp = df_temp.loc[:, c]
        df_cleaned[c]=temp
        
    else:
        df_temp = df_series_filter[df_series_filter['Country/Region']==c].transpose()[2:]
        df_temp.columns = [c]
        temp = df_temp.loc[:, c]
        df_cleaned[c]=temp
df_cleaned

# Visualization of infected persons in absolute values

In [None]:
# modify n for shifted start date
n=0
df_cleaned.iloc[n:].iplot()

# Decadic logarithm visualisation

In [None]:
clean_log10 = df_cleaned.copy()
# modify n for shifted start date
n=0
for c in countries:
    clean_log10[c]=clean_log10[c].apply(lambda x: math.log10(x) if x != 0 else 0)
clean_log10.iplot()

# New infections compared to the previous day

In [None]:
growth = clean.copy()
# modify n for shifted start date
n=0
for c in countries:    
    growth.loc[:,c].size
    for i in range(growth.loc[:,c].size-1):
        #print(i)
        growth.loc[:,c][i+1]=clean.loc[:,c][i+1]-clean.loc[:,c][i]
growth.iloc[n:].iplot()

# Percentage of infected persons in the total population

In [None]:
# Data from 2017
population = {
    'Germany': 82114224,
    'Italy': 59359900,
    'US': 327200000,
    'France': 64979548,
    'Spain': 46354321,
    'United Kingdom': 66181585,
    'Netherlands': 17035938
}
df_percentage = df_cleaned.copy()

for c in countries:
    df_percentage[c]=df_percentage[c].apply(lambda x: x/population[c] if x != 0 else 0)
df_percentage.iplot