# 1 Update all data

In [2]:
import os
if os.path.split(os.getcwd())[-1] == 'notebooks':
    os.chdir("../")

'Your base path is at: ' + os.path.split(os.getcwd())[-1]

'Your base path is at: ads_covid-19'

In [3]:
# %load ../src/data/get_data.py
import subprocess
import os

import pandas as pd
import numpy as np

from datetime import datetime

# Access websites
import requests
# Access json files
import json

def get_john_hopkins():
   # Running a process using 'git' to get the data from 'cwd'
   # shell == True the specified command will be executed through the shell.
   # stdin, stdout and stderr specify the executed program’s standard input, standard output and standard error file handles, respectively.
   # .communicate() writes input, reads all output, and waits for the subprocess to exit.
    git_pull = subprocess.Popen('/usr/bin/git pull' ,
    cwd = os.path.dirname('data/raw/COVID-19/'),
    shell = True,
    stdout = subprocess.PIPE,
    stderr = subprocess.PIPE)

    (out, error) = git_pull.communicate()

    print("Error: " + str(error))
    print("out: " + str(out))


def get_current_data_germany():
    data = requests.get('https://services7.arcgis.com/mOBPykOjAyBO2ZKk/arcgis/rest/services/RKI_Landkreisdaten/FeatureServer/0/query?where=1%3D1&outFields=*&outSR=4326&f=json')

    json_object = json.loads(data.content)
    full_list = []
    for pos, each_dict in enumerate(json_object['features'][:]):
        full_list.append(each_dict['attributes'])

    pd_full_list = pd.DataFrame(full_list)
    pd_full_list.to_csv('data/raw/NPGEO/GER_state_data.csv',sep=';')
    print('Number of region rows:' +str(pd_full_list.shape[0]))

if __name__ == '__main__':
    get_john_hopkins()
    get_current_data_germany()


Error: b'The system cannot find the path specified.\r\n'
out: b''
Number of region rows:412


# 2 Process Pipeline

In [4]:
# %load ../src/data/process_JH_data.py
import pandas as pd
import numpy as np

from datetime import datetime


def store_relational_JH_data():
    ''' Transformes the COVID data in a relational data set

    '''

    data_path='data/raw/COVID-19/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv'
    pd_raw=pd.read_csv(data_path)

    pd_data_base=pd_raw.rename(columns={'Country/Region':'country',
                      'Province/State':'state'})

    pd_data_base['state']=pd_data_base['state'].fillna('no')

    pd_data_base=pd_data_base.drop(['Lat','Long'],axis=1)


    pd_relational_model=pd_data_base.set_index(['state','country']) \
                                .T                              \
                                .stack(level=[0,1])             \
                                .reset_index()                  \
                                .rename(columns={'level_0':'date',
                                                   0:'confirmed'},
                                                  )

    pd_relational_model['date']=pd_relational_model.date.astype('datetime64[ns]')

    pd_relational_model.to_csv('data/processed/COVID_relational_confirmed.csv',sep=';',index=False)
    print(' Number of rows stored: '+str(pd_relational_model.shape[0]))

if __name__ == '__main__':

    store_relational_JH_data()


 Number of rows stored: 54530


# 3 Filtering and Doubling Rate Calculation

In [5]:
# %load ../src/features/build_features_sir.py
import pandas as pd
import numpy as np
from datetime import datetime
from scipy import optimize
from scipy import integrate
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt

def SIR_model(SIR,beta,gamma,N0):
    '''Here's the simple SIR model
        S - susceptible population
        I - infected population
        R - recovered population
        beta - infection rate
        gamma - recovery rate
        N0 - Total population
        And then the overall condition is as below
        overall condition is that the sum of changes (differnces) sum up to 0
        dS+dI+dR=0
        S+I+R= N (constant size of population)

     Parameters:
        SIR - numpy.ndarray
        beta - float
        gamma - float
    '''

    S,I,R = SIR
    dS_dt=-beta*S*I/N0
    dI_dt=beta*S*I/N0-gamma*I
    dR_dt=gamma*I
    return(dS_dt,dI_dt,dR_dt)

if __name__ == '__main__':
    pd_JH_data=pd.read_csv('data/processed/COVID_relational_confirmed.csv',sep=';',parse_dates=[0])
    pd_JH_data=pd_JH_data.sort_values('date',ascending=True).copy()


# 4 Visual Board

In [None]:
# %load ../src/visualization/visualize_sir.py
import pandas as pd
import numpy as np

import dash
dash.__version__
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output,State

import plotly.graph_objects as go

import os
print(os.getcwd())

df_ip_big=pd.read_csv('data/processed/COVID_final_set.csv',sep=';')
fig = go.Figure()
app = dash.Dash()
app.layout = html.Div([

    dcc.Markdown('''
    #  Task 2 of the Applied Data Science project on the COVID-19 data

    Here we will be able to select and control the following SIR parameters and be able to adjust the graph according to our requirements.
    It is a full walkthrough of the SIR model.
    '''),

    dcc.Markdown('''
    ## Multi-Select Country for visualization

    Please select the countries which you wish to view it.
    '''),

    dcc.Dropdown(
        id='country_drop_down',
        options=[ {'label': each,'value':each} for each in df_ip_big['country'].unique()],
        value=['US'], # which are pre-selected
        multi=True
    ),

    dcc.Markdown('''
        ## The SIR Parameters
        '''),

    dcc.Markdown('''
    ## Starting period(in Days)
    '''),
    dcc.Input(
            id="t_start", type="number", placeholder="number",
            value=7,min=0, max=1000,
            step=1,debounce=True
    ),

     dcc.Markdown('''
    ## Intro period(in Days)
    '''),
    dcc.Input(
            id="t_intr", type="number", placeholder="number",
            value=40,min=0, max=1000,
            step=1,debounce=True
    ),

     dcc.Markdown('''
    ## Hold period(in Days)
    '''),
    dcc.Input(
            id="t_halt", type="number", placeholder="number",
            value=43,min=0, max=1000,
            step=1,debounce=True
    ),

     dcc.Markdown('''
    ## Relaxing period(in Days)
    '''),
    dcc.Input(
            id="t_relx", type="number", placeholder="number",
            value=70,min=0, max=1000,
            step=1,debounce=True
    ),

     dcc.Markdown('''
    ## Max infection rate
    '''),
     dcc.Input(
             id="b_max", type="number", placeholder="number",
             value=0.35,min=0, max=100,
             debounce=True

    ),

     dcc.Markdown('''
    ## Min infection rate
    '''),
     dcc.Input(
             id="b_min", type="number", placeholder="number",
             value=0.13,min=0, max=100,
             debounce=True
    ),

     dcc.Markdown('''
    ## Recovering rate
    '''),
     dcc.Input(
             id="gamma", type="number", placeholder="number",
             value=0.09,min=0, max=100,
             debounce=True
    ),

    dcc.Graph(figure=fig, id='main_window_slope')
])

@app.callback(
    Output('main_window_slope', 'figure'),
    [Input('country_drop_down', 'value'),
    Input('t_start', component_property='value'),
    Input('t_intr',component_property= 'value'),
    Input('t_halt',component_property= 'value'),
    Input('t_relx',component_property= 'value'),
    Input('b_max',component_property= 'value'),
    Input('b_min', component_property='value'),
    Input('gamma', component_property='value')])

def update_figure(country_list,t_start,t_intr,t_halt,t_relx,bmax,bmin,gamma):

    traces =[]

    for each in country_list:

        df_plot=df_ip_big[df_ip_big['country']==each]
        df_plot=df_plot[['state','country','confirmed','confirmed_filtered','confirmed_DR','confirmed_filtered_DR','date']].groupby(['country','date']).agg(np.mean).reset_index()
        df_plot=df_plot['confirmed'][df_plot['confirmed']>35].reset_index(drop=True)


        ydata=df_plot
        xdata= np.arange(len(df_plot))

        traces.append(dict(
                                x=xdata,
                                y=ydata,
                                type='bar',
                                opacity=0.9,
                                visible=True,
                                name=each+'_Confirmed'
                          )
                     )

        pd_beta=np.concatenate((np.array(t_start*[bmax]),
                               np.linspace(bmax,bmin,t_intr),
                               np.array(t_halt*[bmin]),
                               np.linspace(bmin,bmax,t_relx),
                               ))

        I0=df_plot[0]
        N0=np.array(df_plot)[-1]/0.04
        S0=N0-I0
        R0=0


        SIR=np.array([S0,I0,R0])


        propagation_rates=pd.DataFrame(columns={'susceptible':S0,
                                            'infected':I0,
                                            'recoverd':R0})

        for each_beta in pd_beta:

            new_delta_vec=SIR_model(SIR,each_beta,gamma,N0)

            SIR=SIR+new_delta_vec

            propagation_rates=propagation_rates.append({'susceptible':SIR[0],
                                                        'infected':SIR[1],
                                                        'recovered':SIR[2]}, ignore_index=True)

        traces.append(dict(
                                x=propagation_rates.index,
                                y=propagation_rates.infected,
                                mode='markers+lines',
                                legend_title="Legend Title",
                                opacity=0.9,
                                visible=True,
                                name=each+'_Infected'

                          )
                     )

    return {
                        'data': traces,
                        'layout': dict (
                            width=1280,
                            height=720,
                            title= 'Scenario SIR simulations  (demonstration purposes only)',

                            xaxis={'title':'Time in days',
                                    #'tickangle':-45,
                                   # 'nticks':20,
                                    'tickfont':dict(size=14,color="#7f7f7f"),
                                  },
                            yaxis={'title':'Confirmed infected people (Source: John Hopkins,log-scale)',
                                   'type':"log",
                                    #'tickangle':-45,
                                   #'nticks':20,
                                    'tickfont':dict(size=14,color="#7f7f7f"),
                                  },

                    )
        }

if __name__ == '__main__':
    app.run_server(debug=True, use_reloader=False, port=8051)


C:\Users\Joshua Saffrine\ads_covid-19
Dash is running on http://127.0.0.1:8051/

 in production, use a production WSGI server like gunicorn instead.



 * Tip: There are .env or .flaskenv files present. Do "pip install python-dotenv" to use them.


 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: on
