# HackOnAzure Analysis:

### Importing Libraries:

In [12]:
import pandas as pd
import numpy as np
from geopy.geocoders import Nominatim
from pandas.io.json import json_normalize
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import scipy as sp
from scipy.optimize import curve_fit
from sklearn.cluster import KMeans 
from sklearn.datasets.samples_generator import make_blobs 
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import silhouette_score,silhouette_samples
import folium
import os
import requests
import json
import matplotlib.pyplot as plt
import plotly.express as ex
import plotly.graph_objs as go
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input,Output,State
import os
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.filterwarnings("ignore")


### Load Dataset:

In [13]:
# Load Data

df_ox = pd.read_parquet("https://pandemicdatalake.blob.core.windows.net/public/curated/covid-19/covid_policy_tracker/latest/covid_policy_tracker.parquet")
df_ox.rename(columns = {'countryname':'Country/Region','date':'Date'},inplace=True)
df_ox['Date'] = pd.to_datetime(df_ox['Date'])
df_ox.rename(index = {'United States':'US'},inplace=True)


# URL of the raw dataset provided by John Hopkins University
url_cases = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv'
url_deaths = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv'
url_recovered = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv'

# Importing CSV files from John Hopkins Github Dataset
df_cases = pd.read_csv(url_cases)                
df_deaths = pd.read_csv(url_deaths)                      
df_recovered = pd.read_csv(url_recovered)
#print(df_cases.head())

dates = df_cases.columns[4:]

df_cases_melt = df_cases.melt(id_vars=['Province/State', 'Country/Region', 'Lat', 'Long'], 
    value_vars=dates, 
    var_name='Date', 
    value_name='Confirmed')

df_deaths_melt = df_deaths.melt(
    id_vars=['Province/State', 'Country/Region', 'Lat', 'Long'], 
    value_vars=dates, 
    var_name='Date', 
    value_name='Deaths'
)
df_recovered_melt = df_recovered.melt(
    id_vars=['Province/State', 'Country/Region', 'Lat', 'Long'], 
    value_vars=dates, 
    var_name='Date', 
    value_name='Recovered'
)

# Merging all three csv files
df_data = pd.DataFrame()

df_data = df_cases_melt.merge(
  right=df_deaths_melt, 
  how='left',
  on=['Province/State', 'Country/Region', 'Date', 'Lat', 'Long']
)

df_data = df_data.merge(
  right=df_recovered_melt, 
  how='left',
  on=['Province/State', 'Country/Region', 'Date', 'Lat', 'Long']
)


# Converting the column Date as a DateTime object
df_data['Date'] = pd.to_datetime(df_data['Date'])
# Check for any missing values
#print(df_data.isna().sum())
# Replacing any missing values in Recovered column as 0 as it is the only possible explanation to it
df_data['Recovered'] = df_data['Recovered'].fillna(0)
# Replacing missing values in State column as some countries do not update the statewise data
df_data['Province/State'] = df_data['Province/State'].fillna(df_data['Country/Region'])
# Re-check missing values :
#print(df_data.isna().sum())

# Remoing the Ships data from the table as this project is focussed on a country level dataset
filter_1 = df_data['Country/Region'].str.contains('MS Zaandam')
filter_2 = df_data['Country/Region'].str.contains('Diamond Princess')
filter_3 = df_data['Province/State'].str.contains('Diamond Princess')
filter_4 = df_data['Province/State'].str.contains('Grand Princess')

df_data = df_data[~(filter_1 | filter_2 | filter_3 | filter_4)]

#print(df_data)

# Active Case = confirmed - (deaths + recovered)
df_data['Active'] = df_data['Confirmed'] - (df_data['Deaths'] + df_data['Recovered'])
#print(df_data)

# We dont need State data for country-wise analysis. 
# Removing the Latitude and Longitude columns as well because for the countries with specified data columns, Lat and Long data are different.
# While plotting in folium, we will definitely need that data, but I will be using geopy package to get that data.
df = pd.DataFrame()
df = df_data.groupby(['Date', 'Country/Region'])['Confirmed', 'Deaths', 'Recovered', 'Active'].sum().reset_index()
#print(df)


# Also including Daily stats in df for better visualization and understanding in the later steps such as Data exploration and Visualization
df_temp = df.groupby(['Country/Region', 'Date', ])['Confirmed', 'Deaths', 'Recovered']
df_temp = df_temp.sum().diff().reset_index()
df_new = df_temp['Country/Region'] != df_temp['Country/Region'].shift(1)
df_temp.loc[df_new, 'Confirmed'] = np.nan
df_temp.loc[df_new, 'Deaths'] = np.nan
df_temp.loc[df_new, 'Recovered'] = np.nan
# renaming columns
df_temp.columns = ['Country/Region', 'Date', 'New_Cases', 'New_Deaths', 'New_Recovered']
# merging new values
df = pd.merge(df, df_temp, on=['Country/Region', 'Date'])
# filling na with 0
df = df.fillna(0)
# fixing data types
cols = ['New_Cases', 'New_Deaths', 'New_Recovered']
df[cols] = df[cols].astype('int')
df['New_Cases'] = df['New_Cases'].apply(lambda x: 0 if x<0 else x)


# Worldometer Data taken from OWID : 

df_owid = pd.read_csv('https://covid.ourworldindata.org/data/owid-covid-data.csv',parse_dates = True)
filt_d = df_owid['location'].isin(['World','International'])
df_owid.drop(df_owid.loc[filt_d].index,axis=0,inplace=True)
df_owid['combined_smokers'] = df_owid['female_smokers'] + df_owid['male_smokers']

df_owid.rename(columns = {'location':'Country/Region','date':'Date'},inplace=True)
df_owid.set_index('Country/Region',inplace=True)
df_owid.rename(index = {'United States':'US'},inplace=True)
df_owid['Date'] = pd.to_datetime(df_owid['Date'])
df_owid.reset_index()

df_owid.drop(columns=['iso_code', 'total_cases', 'new_cases',
       'total_deaths', 'new_deaths', 'total_cases_per_million',
       'new_cases_per_million', 'total_deaths_per_million',
       'new_deaths_per_million'],axis=1,inplace=True)

dfs=df.copy()
dfs = dfs.merge(right=df_owid, how='left', on=['Country/Region','Date'])
dfs.fillna(0)
df1 = dfs
df1 = df1.merge(right=df_ox,how='left',on=['Country/Region','Date'])
df=df1
df.isna().sum()
df.fillna(0)
df['Date'] = pd.to_datetime(df['Date'])
#df.head()
print()




In [14]:
color_code = {'Confirmed':'blue','Deaths':'red','Recovered':'green',
              'New_Cases':'blue','New_Deaths':'red','New_Recovered':'green'}

external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']


# Initializing Dash >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
app = dash.Dash('Covid_19_Dashboard',external_stylesheets=external_stylesheets)
colors = {'background': '#111111','text': '#7FDBFF'}
colors = {'background': '#111111','text': '#7FDBFF'}
markdown_text = 'Contact: priteshjha27@gmail.com'



In [15]:
def sigmoid(x, a, b, c):
    return a / (1 + np.exp(-b * (x - c))) + 1

### Web App Layout:

In [16]:
# Layout - Tabs >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>

app.layout = html.Div([
    html.H3(children='COVID-19 Tracker',style = {'textAlign':'center'}),
    html.Div(children=markdown_text,style = {'textAlign':'right'}),
    dcc.Tabs(id="tabs", value='tab-1', children=[
        dcc.Tab(label='World Statistics', value='tab-1'),
        dcc.Tab(label='India - Predictive Model', value='tab-2'),
    ]),
    html.Div(id='tabs-content')
])





In [17]:
# Tabs output >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>

@app.callback(Output('tabs-content', 'children'),
              [Input('tabs', 'value')])


# World Tab >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>

def render_content(tab):
    if tab == 'tab-1':
        return html.Div([
            dcc.Input(
                id='input_text',
                placeholder='Enter the Country Name: ',
                type='text',
                value='India'
                ),
                html.Button('Submit', id='button'),

            dcc.Graph(
                id = 'Stats'
                )
            ],
            style = {'width':'500'})


# India Daily Stats Tab >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>    
        
    
    elif(tab == 'tab-2'):
        return html.Div([
            dcc.RadioItems(
                id='radio',
                options=[
                    {'label': 'Predict Confirmed Cases', 'value': 'Confirmed'},
                    {'label': 'Predict Death Count', 'value': 'Deaths'}
                ],
                value='Confirmed',
                labelStyle={'display': 'inline-block'}
                ),
            dcc.Graph(
                id = 'test_graph_radio'
                )],style={'padding': '0px 10px 10px 10px'})

# Else >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>        

    else:
        pass
#app.css.append_css({'external_url': 'https://codepen.io/chriddyp/pen/bWLwgP.css'})




### Web app Callbacks:

In [18]:
# World Tab Callback >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>

@app.callback(Output('Stats', 'figure'), 
              [Input('button', 'n_clicks')],
              [State('input_text', 'value')])
def update_graph(click,name):
    Daily = {'C':'New_Cases','D':'New_Deaths','R':'New_Recovered'}
    Total = {'C':'Confirmed','D':'Deaths','R':'Recovered'}
    X = Total   
    c = str(name)
    filt_c = df['Country/Region']==c
    filt_C = df['Confirmed'] > 0
    dfc = df.loc[filt_c & filt_C]


    fig = make_subplots(
        rows=2, cols=2,
        column_widths=[0.5, 0.5],
        row_heights=[0.5, 0.5],
        specs=[[{"type": "bar"}, {"type": "bar"}],
               [ {"type": "bar"} , {"type": "bar"}]])


    fig.add_trace(go.Bar(x = dfc['Date'],y=dfc[X['C']]), row=1,col=1)


    fig.add_trace(go.Bar(x = dfc['Date'],y=dfc[X['C']]), row=1,col=2)

    fig.add_trace(go.Bar(x = dfc['Date'],y=dfc[X['D']]), row=2,col=1)

    fig.add_trace(go.Bar(x = dfc['Date'],y=dfc[X['R']]), row=2,col=2)

    fig.update_layout(
        template="plotly_dark",
        margin=dict(r=10, t=25, b=40, l=60),
        annotations=[
            dict(
                text="Source: NOAA",
                showarrow=False,
                xref="paper",
                yref="paper",
                x=0,
                y=0)
        ]
    )   
  
    return fig


In [19]:
# India Stats Tab Callback >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>

@app.callback(Output('test_graph_radio', 'figure'), [Input('radio', 'value')])
def update_case(case_type):
    Country_Train = 'India'

    try:
        df['Country/Region'].str.contains(Country_Train)
        pass
        #print('Found Data!')
    except ValueError:
        print('Invalid Country Name')


    filter_5 = df['Country/Region']==str(Country_Train)
    filter_6 = df[case_type]>0
    # Requested Country DataFrame :
    dfg = df.loc[filter_5 & filter_6]
    dfg = dfg.reset_index()
    dfg.drop(columns=['index'],inplace=True)
    #print(df1.dtypes)
    dfg['Day']=dfg.index + 1
    #dfg.tail(2)

    # Train_Test_Split:

    train_x = dfg['Day']
    train_y = dfg[case_type]

    a = dfg[case_type].max()
    b = 0.3
    c = 100
    p0 = (a, b, c)

    popt, pcov = sp.optimize.curve_fit(sigmoid,  train_x,  train_y,  p0=p0 )

    #x = range(1,dfg.shape[0] + int(popt[2]))
    #y_fit = sigmoid(x, *popt)
    x = train_x
    model_y = sigmoid(train_x,*popt)

    df_model = pd.DataFrame()
    df_model['x'] = x
    df_model['y'] = model_y.astype(int)

    popt[0]=1.10*popt[0]
    popt[1]=1.2*popt[1]
    #print("Optimized a (the maximum number of confirmed cases): " + str(int(popt[0])))
    #print("Optimized b (growth rate): " + str(float(popt[1])))
    #print("Optimized c (the day of the inflexion): " + str(int(popt[2])) + "")


    p_df = pd.DataFrame()
    d = train_x.max()
    days = []
    for x in range(d+1,d+30):
        days.append(x)

    p_df['Day'] = pd.Series(data=days)

    test_x = p_df['Day']
    y_predict_India = sigmoid(test_x, *popt)
    p_df[case_type] = pd.Series(data=y_predict_India)
    t_df = dfg[['Day',case_type]]

    frames=[t_df,p_df]
    prediction = pd.concat(frames,ignore_index=True)
    prediction[case_type]

    fig = go.Figure()

    Confirmed_Cases = fig.add_trace(go.Scatter(x = t_df['Day'], y = t_df[case_type],mode = 'lines+markers',name = case_type))
    Predicted_Cases = fig.add_trace(go.Scatter(x = p_df['Day'], y = p_df[case_type],mode = 'lines+markers', name='Predicted'))

    #fig.show()
    fig_prediction = fig
    
    return fig_prediction



### Run Local Server:

In [20]:
# Run_Server >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>

if __name__ == '__main__':
    app.run_server()


Dash is running on http://127.0.0.1:8050/

 in production, use a production WSGI server like gunicorn instead.

 * Serving Flask app "Covid_19_Dashboard" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: off


 * Running on http://127.0.0.1:8050/ (Press CTRL+C to quit)
