# The Uganda Covid-19 Dashboard Project

In [1]:
#Import the required dependencies
import io
from datetime import date, timedelta

import pandas as pd
import requests

import plotly.graph_objects as go
from plotly.offline import iplot, init_notebook_mode

init_notebook_mode(connected=True)

In [2]:
#Load Data from CSSE Repository and carry out some ETL
file_date = date(2020, 3, 21)
dates = []

while file_date <= date.today():
    dates.append(file_date)
    file_date += timedelta(days=1)

files = []
for file in dates:
    file = file.strftime("%m-%d-%Y")
    print(file)
    url = r'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/{}.csv'.format(
        file)
    raw_string = requests.get(url).content
    df = pd.read_csv(io.StringIO(raw_string.decode('utf-8')))
    df['date'] = pd.to_datetime(file)
    df.rename(columns={'Country_Region': 'Country'}, inplace=True)
    files.append(df)

df = pd.concat(files, axis=0, ignore_index=True, sort=False)

df.drop(['Province/State', 'Province_State', 'Last Update', 'Last_Update',
         'FIPS', 'Admin2', 'Combined_Key', '404: Not Found', 'Lat', 'Long_','Latitude',
                       'Longitude'], axis=1, inplace=True)

02-10-2020
02-11-2020
02-12-2020
02-13-2020
02-14-2020
02-15-2020
02-16-2020
02-17-2020
02-18-2020
02-19-2020
02-20-2020
02-21-2020
02-22-2020
02-23-2020
02-24-2020
02-25-2020
02-26-2020
02-27-2020
02-28-2020
02-29-2020
03-01-2020
03-02-2020
03-03-2020
03-04-2020
03-05-2020
03-06-2020
03-07-2020
03-08-2020
03-09-2020
03-10-2020
03-11-2020
03-12-2020
03-13-2020
03-14-2020
03-15-2020
03-16-2020
03-17-2020
03-18-2020
03-19-2020
03-20-2020
03-21-2020
03-22-2020
03-23-2020
03-24-2020
03-25-2020
03-26-2020
03-27-2020
03-28-2020
03-29-2020
03-30-2020
03-31-2020
04-01-2020
04-02-2020
04-03-2020
04-04-2020
04-05-2020
04-06-2020
04-07-2020
04-08-2020
04-09-2020
04-10-2020
04-11-2020
04-12-2020
04-13-2020
04-14-2020
04-15-2020
04-16-2020
04-17-2020
04-18-2020
04-19-2020
04-20-2020
04-21-2020
04-22-2020
04-23-2020
04-24-2020
04-25-2020
04-26-2020
04-27-2020
04-28-2020
04-29-2020
04-30-2020
05-01-2020
05-02-2020


In [4]:
#Then extract only data related to Uganda
df_uganda = df[df['Country'] == 'Uganda']

df_uganda['Confirmed'] = df_uganda['Confirmed'].fillna(0).astype(int)
df_uganda['Deaths'] = df_uganda['Deaths'].fillna(0).astype(int)
df_uganda['Recovered'] = df_uganda['Recovered'].fillna(0).astype(int)
df_uganda['Active'] = df_uganda['Confirmed'] - \
    df_uganda['Deaths'] - df_uganda['Recovered']

df_uganda = df_uganda[['date',
                       'Country',
                       'Confirmed',
                       'Deaths',
                       'Recovered',
                       'Active',
                       ]]

ugandan_data = df_uganda.to_csv('uganda_data.csv', index=False)

## Data Exploration

In [5]:
df_uganda.head()

Unnamed: 0,date,Country,Confirmed,Deaths,Recovered,Active,Latitude,Longitude
9908,2020-03-22,Uganda,1,0,0,1,,
13314,2020-03-23,Uganda,9,0,0,9,,
16730,2020-03-24,Uganda,9,0,0,9,,
20150,2020-03-25,Uganda,14,0,0,14,,
23570,2020-03-26,Uganda,14,0,0,14,,


In [6]:
df_uganda.tail()

Unnamed: 0,date,Country,Confirmed,Deaths,Recovered,Active,Latitude,Longitude
116239,2020-04-26,Uganda,79,0,46,33,,
119391,2020-04-27,Uganda,79,0,47,32,,
122562,2020-04-28,Uganda,79,0,52,27,,
125730,2020-04-29,Uganda,81,0,52,29,,
128907,2020-04-30,Uganda,83,0,52,31,,


In [7]:
df_uganda.sample(5)

Unnamed: 0,date,Country,Confirmed,Deaths,Recovered,Active,Latitude,Longitude
85282,2020-04-16,Uganda,55,0,20,35,,
82241,2020-04-15,Uganda,55,0,12,43,,
55659,2020-04-06,Uganda,52,0,0,52,,
106835,2020-04-23,Uganda,74,0,46,28,,
42219,2020-04-01,Uganda,44,0,0,44,,


In [8]:
df_uganda.describe()

Unnamed: 0,Confirmed,Deaths,Recovered,Active,Latitude,Longitude
count,40.0,40.0,40.0,40.0,0.0,0.0
mean,50.175,0.0,15.825,34.35,,
std,20.958919,0.0,20.278683,14.144946,,
min,1.0,0.0,0.0,1.0,,
25%,44.0,0.0,0.0,27.0,,
50%,53.0,0.0,2.0,33.0,,
75%,57.25,0.0,38.0,47.25,,
max,83.0,0.0,52.0,53.0,,


In [9]:
df_uganda.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 40 entries, 9908 to 128907
Data columns (total 8 columns):
date         40 non-null datetime64[ns]
Country      40 non-null object
Confirmed    40 non-null int32
Deaths       40 non-null int32
Recovered    40 non-null int32
Active       40 non-null int32
Latitude     0 non-null float64
Longitude    0 non-null float64
dtypes: datetime64[ns](1), float64(2), int32(4), object(1)
memory usage: 2.2+ KB


## Statistics

In [11]:
#Total Confirmed cases currently
confirmed = df_uganda[df_uganda['date'] == df_uganda['date'].iloc[-1]]['Confirmed'].sum()

print('There have been a total of {} confirmed cases in Uganda'.format(confirmed))

There have been a total of 83 confirmed cases in Uganda


In [12]:
#Total Recovered cases to date
recovered = df_uganda[df_uganda['date'] == df_uganda['date'].iloc[-1]]['Recovered'].sum()

print('There have been a total of {} recoveries in Uganda'.format(recovered))

There have been a total of 52 recoveries in Uganda


In [13]:
# Total deaths to date
deaths = df_uganda[df_uganda['date'] == df_uganda['date'].iloc[-1]]['Deaths'].sum()

print('There have been a total of {} death cases in Uganda'.format(deaths))

There have been a total of 0 death cases in Uganda


In [14]:
#Total Active Cases
active = df_uganda[df_uganda['date'] == df_uganda['date'].iloc[-1]]['Active'].sum()

print('There are currently a total of {} active cases in Uganda'.format(active))

There are currently a total of 31 active cases in Uganda


In [16]:
#The fatality rate
ug_fatality = (100 *
                 df_uganda[df_uganda['date'] == df_uganda['date'].iloc[-1]]['Deaths'].sum() /
                 df_uganda[df_uganda['date'] == df_uganda['date'].iloc[-1]]['Confirmed'].sum())

print('Uganda currently has a fatality rate of {:.2f}%'.format(ug_fatality))

Uganda currently has a fatality rate of 0.00%


## Visualisations

In [17]:
fig = go.Figure()
fig.add_trace(go.Scatter(
    x=df_uganda.groupby('date')['date'].first(),
    y=df_uganda.groupby('date')['Confirmed'].sum(),
    name="Confirmed",stackgroup='one',
    mode='lines',hovertemplate='%{y:,g}'))


fig.add_trace(go.Scatter(
    x=df_uganda.groupby('date')['date'].first(),
    y=df_uganda.groupby('date')['Active'].sum(),
    name="Active",stackgroup='one',
    mode='lines',hovertemplate='%{y:,g}'))

fig.add_trace(go.Scatter(
    x=df_uganda.groupby('date')['date'].first(),
    y=df_uganda.groupby('date')['Recovered'].sum(),
    name="Recovered",stackgroup='one',
    mode='lines',hovertemplate='%{y:,g}'))

fig.add_trace(go.Scatter(
    x=df_uganda.groupby('date')['date'].first(),
    y=df_uganda.groupby('date')['Deaths'].sum(),
    name="Deaths",stackgroup='one',
    mode='lines',hovertemplate='%{y:,g}'))

fig.update_layout(title="COVID-19 infections in Uganda",
                  xaxis_title="Date",
                  yaxis_title="Number of Individuals")

fig.show()