In [4]:
# Import packages
import pandas as pd
import requests
import os
import io
import map_data

import json
import plotly
import plotly.express as px
import plotly.graph_objects as go

import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output



# Function for retrieving CSVs from online source
def retrieve_data(url_str):
    req = requests.get(url_str).content.decode("utf-8")
    df = pd.read_csv(io.StringIO(req))
    return(df)


### COVID lineGraphs

In [5]:
# province full and short names
prov_name_dict = {'EC':'Eastern Cape',
            'FS':'Free State',
            'GP':'Gauteng',
            'KZN':'KwaZulu-Natal',
            'LP':'Limpopo',
            'MP':'Mpumalanga',
            'NC':'Northern Cape',
            'NW':'North West',
            'WC':'Western Cape',
            'total':'Total'}

In [21]:
# retrieve data from github
cases_df = retrieve_data("https://raw.githubusercontent.com/dsfsi/covid19za/master/data/covid19za_provincial_cumulative_timeline_confirmed.csv")
deaths_df = retrieve_data("https://raw.githubusercontent.com/dsfsi/covid19za/master/data/covid19za_provincial_cumulative_timeline_deaths.csv")
recoveries_df = retrieve_data("https://raw.githubusercontent.com/dsfsi/covid19za/master/data/covid19za_provincial_cumulative_timeline_recoveries.csv")

# remove columns that arent needed
cases_clean_df = cases_df.drop(['YYYYMMDD','UNKNOWN','source'],axis=1)
death_clean_df = deaths_df.drop(['YYYYMMDD','UNKNOWN','source'],axis=1)
recov_clean_df = recoveries_df.drop(['YYYYMMDD','UNKNOWN','source'],axis=1)

# melt dataframes
covid_df = cases_clean_df.melt('date', var_name='province', value_name='Cumulative Cases')#.sort_values('date',ascending=False)
death_long_df = death_clean_df.melt('date', var_name='province', value_name='Cumulative Deaths')#.sort_values('date',ascending=False)
recov_long_df = recov_clean_df.melt('date', var_name='province', value_name='Cumulative Recoveries')#.sort_values('date',ascending=False)

# Merge dataframes into one
covid_df = pd.merge(covid_df, death_long_df,  how='left', left_on=['date','province'], right_on = ['date','province'])
covid_df = pd.merge(covid_df, recov_long_df,  how='left', left_on=['date','province'], right_on = ['date','province'])

# replace province short names with full ones
covid_df = covid_df.replace({'province':prov_name_dict})
covid_df['province'] = pd.Categorical(
    covid_df['province'], 
    categories=['Eastern Cape','Free State','Gauteng','KwaZulu-Natal','Limpopo','Mpumalanga','North West','Northern Cape','Western Cape','Total'], 
    ordered=True)

# Calculate more columns of information
covid_df['Daily Active Cases'] = covid_df['Cumulative Cases']-(covid_df['Cumulative Deaths']+covid_df['Cumulative Recoveries'])
covid_df['Daily New Cases'] = covid_df['Cumulative Cases'].diff()
covid_df['Daily Deaths'] = covid_df['Cumulative Deaths'].diff()
covid_df['Daily Recoveries'] = covid_df['Cumulative Recoveries'].diff()

# remove negatives
mask = covid_df['Daily New Cases'] < 0
covid_df.loc[mask, 'Daily New Cases'] = 0

covid_df

Unnamed: 0,date,province,Cumulative Cases,Cumulative Deaths,Cumulative Recoveries,Daily Active Cases,Daily New Cases,Daily Deaths,Daily Recoveries
0,05-03-2020,Eastern Cape,0.0,,,,,,
1,07-03-2020,Eastern Cape,0.0,,,,0.0,,
2,08-03-2020,Eastern Cape,0.0,,,,0.0,,
3,09-03-2020,Eastern Cape,0.0,,,,0.0,,
4,11-03-2020,Eastern Cape,0.0,,,,0.0,,
...,...,...,...,...,...,...,...,...,...
3865,24-03-2021,Total,1540009.0,52372.0,1466595.0,21042.0,1048.0,121.0,1391.0
3866,25-03-2021,Total,1541563.0,52535.0,1467254.0,21774.0,1554.0,163.0,659.0
3867,26-03-2021,Total,1543079.0,52602.0,1469565.0,20912.0,1516.0,67.0,2311.0
3868,27-03-2021,Total,1544466.0,52648.0,1471164.0,20654.0,1387.0,46.0,1599.0


In [23]:
mask = covid_df['Daily New Cases'] < 0
covid_df.loc[mask, 'Daily New Cases'] = 0

In [134]:
# create line graph of province
cov_fig = px.line(covid_df, y="Cumulative Cases", x="date", color='province')

plotly.offline.plot(cov_fig, filename='test.html')

'test.html'

In [24]:
covid_df['Daily New Cases'].min()

0.0

### Mobility LineGraphs

In [25]:
# get working directory
working_path = os.getcwd() # use for Jupyter
#working_path = os.path.dirname(os.path.abspath(__file__)) #use for .py scripts

# Function for retrieving CSVs from online source
def retrieve_data(url_str):
    req = requests.get(url_str).content.decode("utf-8")
    df = pd.read_csv(io.StringIO(req))
    return(df)

# Import necessary data
prov_keys_df = pd.read_csv(working_path+'/province_pop.csv', index_col='province')
cases_df = retrieve_data("https://raw.githubusercontent.com/dsfsi/covid19za/master/data/covid19za_provincial_cumulative_timeline_confirmed.csv")
deaths_df = retrieve_data("https://raw.githubusercontent.com/dsfsi/covid19za/master/data/covid19za_provincial_cumulative_timeline_deaths.csv")
recoveries_df = retrieve_data("https://raw.githubusercontent.com/dsfsi/covid19za/master/data/covid19za_provincial_cumulative_timeline_recoveries.csv")
mobility_df = retrieve_data("https://raw.githubusercontent.com/dsfsi/covid19za/master/data/mobility/google_mobility/mobility_report_ZA.csv")

In [52]:
mobility_df = retrieve_data("https://raw.githubusercontent.com/dsfsi/covid19za/master/data/mobility/google_mobility/mobility_report_ZA.csv")

# Dictionary of Provinces and colours
prov_col_dict = {'Eastern Cape':'',
            'Free State':'',
            'Gauteng':'',
            'KwaZulu-Natal':'',
            'Limpopo':'',
            'Mpumalanga':'',
            'North West':'',
            'Northern Cape':'',
            'Western Cape':'',
            }

# create line graph of province
mob_fig = px.line(mobility_df, y="retail and recreation", x="date", color='province')

plotly.offline.plot(mob_fig, filename='test.html')

'test.html'

In [26]:
mobility_df = retrieve_data("https://raw.githubusercontent.com/dsfsi/covid19za/master/data/mobility/google_mobility/mobility_report_ZA.csv")
mobility_df[mobility_df.columns[2:]] = (mobility_df[mobility_df.columns[2:]]+100)/200
mobility_df.sort_values('province')

Unnamed: 0,province,date,retail and recreation,grocery and pharmacy,parks,transit stations,workplaces,residential
805,Eastern Cape,2021-03-23,0.410,0.485,0.325,0.405,0.440,0.540
531,Eastern Cape,2020-06-22,0.335,0.405,0.445,0.200,0.335,0.580
532,Eastern Cape,2020-06-23,0.325,0.405,0.455,0.200,0.335,0.580
533,Eastern Cape,2020-06-24,0.340,0.415,0.465,0.200,0.340,0.585
534,Eastern Cape,2020-06-25,0.410,0.505,0.460,0.220,0.340,0.580
...,...,...,...,...,...,...,...,...
3755,Western Cape,2020-06-22,0.285,0.350,0.260,0.185,0.290,0.605
3754,Western Cape,2020-06-21,0.205,0.330,0.160,0.140,0.420,0.570
3753,Western Cape,2020-06-20,0.280,0.405,0.245,0.195,0.415,0.580
3763,Western Cape,2020-06-30,0.345,0.430,0.315,0.200,0.305,0.590


In [None]:
# create line graph for each province
for i,j in prov_col_dict.items():
    # extract all data on selected province
    prov_mob_df = mobility_df[mobility_df['province']==i]
    mobility_df[mobility_df.columns[2:]] = (mobility_df[mobility_df.columns[2:]]+100)/200
    
    # create line graph of province
    mob_fig = px.line(prov_mob_df, y="retail and recreation", x="date")

plotly.offline.plot(mob_fig, filename='test.html')
#mob_fig.to_html("test.html")