# General Stats
___

In [1]:
# Enable Intellisense
%config IPCompleter.greedy=True

import pandas as pd
import numpy as np
import io
import requests
import seaborn as sns
import plotly.express as px
import plotly.offline
sns.set()

In [2]:
confirm_data_url = "https://raw.githubusercontent.com/dsfsi/covid19za/master/data/covid19za_timeline_confirmed.csv"

confirm_data_req = requests.get(confirm_data_url).content

confirm_data = pd.read_csv(io.StringIO(confirm_data_req.decode('utf-8')), delimiter = ',')

In [3]:
confirm_data.tail()

Unnamed: 0,case_id,date,YYYYMMDD,country,province,geo_subdivision,age,gender,transmission_type,type
235,236,21-03-2020,20200321,South Africa,WC,ZA-WC,36.0,male,with pending travel history,pending
236,237,21-03-2020,20200321,South Africa,WC,ZA-WC,55.0,female,with pending travel history,pending
237,238,21-03-2020,20200321,South Africa,WC,ZA-WC,45.0,male,Travelled to Germany and Austria,travel
238,239,21-03-2020,20200321,South Africa,WC,ZA-WC,65.0,female,Travelled to UK,travel
239,240,21-03-2020,20200321,South Africa,WC,ZA-WC,34.0,male,with pending travel history,pending


In [4]:
confirm_data.set_index('case_id',inplace= True)

In [5]:
confirm_data.drop(['YYYYMMDD','country','geo_subdivision'], axis = 1, inplace=True)

In [6]:
confirm_data.tail()

Unnamed: 0_level_0,date,province,age,gender,transmission_type,type
case_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
236,21-03-2020,WC,36.0,male,with pending travel history,pending
237,21-03-2020,WC,55.0,female,with pending travel history,pending
238,21-03-2020,WC,45.0,male,Travelled to Germany and Austria,travel
239,21-03-2020,WC,65.0,female,Travelled to UK,travel
240,21-03-2020,WC,34.0,male,with pending travel history,pending


In [7]:
confirm_data.describe()

Unnamed: 0,age
count,239.0
mean,41.732218
std,16.205205
min,2.0
25%,31.0
50%,39.0
75%,54.0
max,85.0


## Cases by Province

In [30]:
province_cases = confirm_data.groupby('province').count()[['date']]
province_cases.reset_index(inplace=True)
province_cases.rename(columns = {"date":"count"}, inplace=True)
province_cases = province_cases.append(pd.DataFrame({"province":["NW","NC"],"count":[0,0]}))
province_cases

Unnamed: 0,province,count
0,EC,1
1,FS,7
2,GP,125
3,KZN,26
4,LP,1
5,MP,5
6,WC,75
0,NW,0
1,NC,0


In [31]:
province_names = {"EC":"Eastern Cape",
                     "FS" : "Free State",
                     "GP" : "Gauteng",
                     "KZN" : "KwaZula-Natal",
                     "LP" : "Limpopo",
                     "MP" : "Mpumalanga",
                     "NW" : "North West",
                     "NC" : "Northern Cape",
                     "WC" : "Western Cape"}
province_cases['province'] = province_cases['province'].map(province_names)
province_cases

Unnamed: 0,province,count
0,Eastern Cape,1
1,Free State,7
2,Gauteng,125
3,KwaZula-Natal,26
4,Limpopo,1
5,Mpumalanga,5
6,Western Cape,75
0,North West,0
1,Northern Cape,0


In [32]:
fig_province = px.pie(province_cases, values='count', names='province')
fig_province.update_layout(legend=dict(x=0, y=1))
# hov_template = ''
fig_province.update_traces(hoverinfo= 'label+percent', 
                  hovertemplate = '%{label}<br>%{value}',
                  textinfo='value+label',
                  textposition='inside')
fig_province.show()

**Save HTML**

In [11]:
plotly.offline.plot(fig_province, filename = 'tot_cases_per_province.html', auto_open=True, config = dict(displayModeBar=False))

'tot_cases_per_province.html'

## Cases by Gender

In [34]:
gender_cases = confirm_data.groupby('gender').count()[['date']]
gender_cases.reset_index(inplace=True)
gender_cases.rename(columns = {"date":"count"}, inplace=True)
gender_cases

Unnamed: 0,gender,count
0,female,88
1,male,147
2,not specified,5


In [13]:
upper_dict = {"female":"Female", "male":"Male", "not specified":"Not Specified"}
gender_cases['gender'] = gender_cases['gender'].map(upper_dict) 
gender_cases

Unnamed: 0,gender,count
0,Female,88
1,Male,147
2,Not Specified,5


In [28]:
fig_gender = px.pie(province_cases, values='count', names='gender')
fig_gender.update_layout(legend=dict(x=0, y=1))
# hov_template = ''
fig_gender.update_traces(hoverinfo= 'label+percent', 
                  hovertemplate = '%{label}<br>%{value}',
                  textinfo='value+label',
                  textposition='inside')
fig_gender.show()

**Save HTML**

In [15]:
plotly.offline.plot(fig_gender, filename = 'tot_cases_per_gender.html', auto_open=True, config = dict(displayModeBar=False))

'tot_cases_per_gender.html'

### Cases Per Travel Type

In [16]:
# confirm_data.fillna('not specified', inplace=True)
travel_cases = confirm_data.groupby('type').count()[['date']]
travel_cases.reset_index(inplace=True)
travel_cases.rename(columns = {"date":"count"}, inplace=True)
travel_cases

Unnamed: 0,type,count
0,local,15
1,pending,46
2,travel,179


In [17]:
travel_cases['type'] = travel_cases['type'].apply(lambda x: x.capitalize()) 
travel_cases

Unnamed: 0,type,count
0,Local,15
1,Pending,46
2,Travel,179


In [27]:
fig_travel = px.pie(travel_cases, values='count', names='type')
fig_travel.update_layout(legend=dict(x=0, y=1))
# hov_template = ''
fig_travel.update_traces(hoverinfo= 'label+percent', 
                  hovertemplate = '%{label}<br>%{value}',
                  textinfo='value+label',
                  textposition='inside')
fig_travel.show()

**Save HTML**

In [19]:
plotly.offline.plot(fig_travel, filename = 'tot_cases_per_travel.html', auto_open=True, config = dict(displayModeBar=False))

'tot_cases_per_travel.html'

### Cases by Age Range

#### Age ranges
* 0 - 12
* 13 - 19
* 20 - 29
* 30 - 39
* 40 - 49
* 50 - 59
* 60 - 69
* 70 - 79
* 80 - ...

In [20]:
# confirm_data.head()

In [21]:
def get_age_range(age):
    if age == np.NaN:
        return "Not specified"
    if age < 13:
        return "0-12"
    elif age < 20:
        return "13-19"
    elif age < 30:
        return "20 - 29"
    elif age < 40:
        return "30 - 39"
    elif age < 50:
        return "40 - 49"
    elif age < 60:
        return "50 - 59"
    elif age < 70:
        return "60 - 69"
    elif age < 80:
        return "70 - 79"
    else:
        return "80 or over"
    
confirm_data['age_range'] = confirm_data['age'].apply(get_age_range)
confirm_data.head()

Unnamed: 0_level_0,date,province,age,gender,transmission_type,type,age_range
case_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,05-03-2020,KZN,38.0,male,Travelled to Italy,travel,30 - 39
2,07-03-2020,GP,39.0,female,Travelled to Italy,travel,30 - 39
3,08-03-2020,KZN,38.0,female,Travelled to Italy,travel,30 - 39
4,09-03-2020,KZN,38.0,male,Travelled to Italy,travel,30 - 39
5,09-03-2020,KZN,38.0,female,Travelled to Italy,travel,30 - 39


In [22]:
# confirm_data.fillna('not specified', inplace=True)
age_cases = confirm_data.groupby('age_range').count()[['date']]
age_cases.reset_index(inplace=True)
age_cases.rename(columns = {"date":"count"}, inplace=True)
age_cases

Unnamed: 0,age_range,count
0,0-12,10
1,13-19,3
2,20 - 29,37
3,30 - 39,72
4,40 - 49,32
5,50 - 59,53
6,60 - 69,20
7,70 - 79,11
8,80 or over,2


In [26]:
fig_age = px.pie(age_cases, values='count', names='age_range')
fig_age.update_layout(legend=dict(x=0, y=1))
# hov_template = ''
fig_age.update_traces(hoverinfo= 'label+percent', 
                  hovertemplate = '%{label}<br>%{value}',
                  textinfo='value+label',
                  textposition='inside')
fig_age.show()

In [24]:
plotly.offline.plot(fig_age, filename = 'tot_cases_per_age.html', auto_open=True, config = dict(displayModeBar=False))

'tot_cases_per_age.html'