# General Stats
___

In [1]:
# Enable Intellisense
%config IPCompleter.greedy=True

import pandas as pd
import numpy as np
import io
import requests
import seaborn as sns
import plotly.express as px
import plotly.offline
sns.set()

In [2]:
confirm_data_url = "https://raw.githubusercontent.com/dsfsi/covid19za/master/data/covid19za_timeline_confirmed.csv"

confirm_data_req = requests.get(confirm_data_url).content

confirm_data = pd.read_csv(io.StringIO(confirm_data_req.decode('utf-8')), delimiter = ',')

In [3]:
confirm_data.tail()

Unnamed: 0,case_id,date,YYYYMMDD,country,province,geo_subdivision,age,gender,transmission_type,type
269,270,22-03-2020,20200322,South Africa,WC,ZA-WC,71.0,male,Travelled to Portugal,travel
270,271,22-03-2020,20200322,South Africa,WC,ZA-WC,47.0,female,with no international travel history,local
271,272,22-03-2020,20200322,South Africa,WC,ZA-WC,57.0,male,with no international travel history,local
272,273,22-03-2020,20200322,South Africa,WC,ZA-WC,54.0,male,Travelled to Italy,travel
273,274,22-03-2020,20200322,South Africa,WC,ZA-WC,49.0,female,Travelled to Italy,travel


In [4]:
confirm_data.set_index('case_id',inplace= True)

In [5]:
confirm_data.drop(['YYYYMMDD','country','geo_subdivision'], axis = 1, inplace=True)

In [6]:
confirm_data.tail()

Unnamed: 0_level_0,date,province,age,gender,transmission_type,type
case_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
270,22-03-2020,WC,71.0,male,Travelled to Portugal,travel
271,22-03-2020,WC,47.0,female,with no international travel history,local
272,22-03-2020,WC,57.0,male,with no international travel history,local
273,22-03-2020,WC,54.0,male,Travelled to Italy,travel
274,22-03-2020,WC,49.0,female,Travelled to Italy,travel


In [7]:
confirm_data.describe()

Unnamed: 0,age
count,273.0
mean,41.787546
std,15.901544
min,2.0
25%,31.0
50%,40.0
75%,54.0
max,85.0


## Cases by Province

In [8]:
province_cases = confirm_data.groupby('province').count()[['date']]
province_cases.reset_index(inplace=True)
province_cases.rename(columns = {"date":"count"}, inplace=True)
province_cases = province_cases.append(pd.DataFrame({"province":["NW","NC"],"count":[0,0]}))
province_cases

Unnamed: 0,province,count
0,EC,2
1,FS,9
2,GP,132
3,KZN,35
4,LP,2
5,MP,5
6,WC,89
0,NW,0
1,NC,0


In [9]:
province_names = {"EC":"Eastern Cape",
                     "FS" : "Free State",
                     "GP" : "Gauteng",
                     "KZN" : "KwaZula-Natal",
                     "LP" : "Limpopo",
                     "MP" : "Mpumalanga",
                     "NW" : "North West",
                     "NC" : "Northern Cape",
                     "WC" : "Western Cape"}
province_cases['province'] = province_cases['province'].map(province_names)
province_cases

Unnamed: 0,province,count
0,Eastern Cape,2
1,Free State,9
2,Gauteng,132
3,KwaZula-Natal,35
4,Limpopo,2
5,Mpumalanga,5
6,Western Cape,89
0,North West,0
1,Northern Cape,0


In [7]:
province_cases = pd.read_csv('data/tot_provinces.csv')
province_cases

Unnamed: 0,province,total
0,Eastern Cape,2
1,Free State,13
2,Gauteng,207
3,KwaZula-Natal,60
4,Limpopo,4
5,Mpumalanga,9
6,Northern Cape,2
7,North West,4
8,Western Cape,100
9,Unknown,1


In [9]:
fig_province = px.pie(province_cases, values='total', names='province')
fig_province.update_layout(legend=dict(x=0, y=1))
# hov_template = ''
fig_province.update_traces(hoverinfo= 'label+percent', 
                  hovertemplate = '%{label}<br>%{value}',
                  textinfo='value+label',
                  textposition='inside')
fig_province.show()

**Save HTML**

In [11]:
plotly.offline.plot(fig_province, filename = 'tot_cases_per_province.html', auto_open=True, config = dict(displayModeBar=False))

'tot_cases_per_province.html'

## Cases by Gender

In [11]:
gender_cases = confirm_data.groupby('gender').count()[['date']]
gender_cases.reset_index(inplace=True)
gender_cases.rename(columns = {"date":"count"}, inplace=True)
gender_cases

Unnamed: 0,gender,count
0,female,100
1,male,169
2,not specified,5


In [12]:
upper_dict = {"female":"Female", "male":"Male", "not specified":"Not Specified"}
gender_cases['gender'] = gender_cases['gender'].map(upper_dict) 
gender_cases

Unnamed: 0,gender,count
0,Female,100
1,Male,169
2,Not Specified,5


In [13]:
fig_gender = px.pie(gender_cases, values='count', names='gender')
fig_gender.update_layout(legend=dict(x=0, y=1))
# hov_template = ''
fig_gender.update_traces(hoverinfo= 'label+percent', 
                  hovertemplate = '%{label}<br>%{value}',
                  textinfo='value+label',
                  textposition='inside')
fig_gender.show()

**Save HTML**

In [29]:
plotly.offline.plot(fig_gender, filename = 'tot_cases_per_gender.html', auto_open=True, config = dict(displayModeBar=False))

'tot_cases_per_gender.html'

### Cases Per Travel Type

In [14]:
# confirm_data.fillna('not specified', inplace=True)
travel_cases = confirm_data.groupby('type').count()[['date']]
travel_cases.reset_index(inplace=True)
travel_cases.rename(columns = {"date":"count"}, inplace=True)
travel_cases

Unnamed: 0,type,count
0,local,26
1,pending,49
2,travel,199


In [15]:
travel_cases['type'] = travel_cases['type'].apply(lambda x: x.capitalize()) 
travel_cases

Unnamed: 0,type,count
0,Local,26
1,Pending,49
2,Travel,199


In [16]:
fig_travel = px.pie(travel_cases, values='count', names='type')
fig_travel.update_layout(legend=dict(x=0, y=1))
# hov_template = ''
fig_travel.update_traces(hoverinfo= 'label+percent', 
                  hovertemplate = '%{label}<br>%{value}',
                  textinfo='value+label',
                  textposition='inside')
fig_travel.show()

**Save HTML**

In [28]:
plotly.offline.plot(fig_travel, filename = 'tot_cases_per_travel.html', auto_open=True, config = dict(displayModeBar=False))

'tot_cases_per_travel.html'

### Cases by Age Range

#### Age ranges
* 0 - 12
* 13 - 19
* 20 - 29
* 30 - 39
* 40 - 49
* 50 - 59
* 60 - 69
* 70 - 79
* 80 - ...

In [34]:
# confirm_data.head()

In [17]:
def get_age_range(age):
    if age == np.NaN:
        return "Not specified"
    if age < 13:
        return "0-12"
    elif age < 20:
        return "13-19"
    elif age < 30:
        return "20 - 29"
    elif age < 40:
        return "30 - 39"
    elif age < 50:
        return "40 - 49"
    elif age < 60:
        return "50 - 59"
    elif age < 70:
        return "60 - 69"
    elif age < 80:
        return "70 - 79"
    else:
        return "80 or over"
    
confirm_data['age_range'] = confirm_data['age'].apply(get_age_range)
confirm_data.head()

Unnamed: 0_level_0,date,province,age,gender,transmission_type,type,age_range
case_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,05-03-2020,KZN,38.0,male,Travelled to Italy,travel,30 - 39
2,07-03-2020,GP,39.0,female,Travelled to Italy,travel,30 - 39
3,08-03-2020,KZN,38.0,female,Travelled to Italy,travel,30 - 39
4,09-03-2020,KZN,38.0,male,Travelled to Italy,travel,30 - 39
5,09-03-2020,KZN,38.0,female,Travelled to Italy,travel,30 - 39


In [18]:
# confirm_data.fillna('not specified', inplace=True)
age_cases = confirm_data.groupby('age_range').count()[['date']]
age_cases.reset_index(inplace=True)
age_cases.rename(columns = {"date":"count"}, inplace=True)
age_cases

Unnamed: 0,age_range,count
0,0-12,10
1,13-19,3
2,20 - 29,45
3,30 - 39,78
4,40 - 49,43
5,50 - 59,59
6,60 - 69,22
7,70 - 79,12
8,80 or over,2


In [19]:
fig_age = px.pie(age_cases, values='count', names='age_range')
fig_age.update_layout(legend=dict(x=0, y=1))
# hov_template = ''
fig_age.update_traces(hoverinfo= 'label+percent', 
                  hovertemplate = '%{label}<br>%{value}',
                  textinfo='value+label',
                  textposition='inside')
fig_age.show()

In [38]:
plotly.offline.plot(fig_age, filename = 'tot_cases_per_age_pie.html', auto_open=True, config = dict(displayModeBar=False))

'tot_cases_per_age.html'

### Bar graph for age vs count
Pie chart is confusing

In [5]:
tot_age_data = pd.read_csv('data/tot_ages.csv')
tot_age_data

Unnamed: 0,age,count
0,Unknown,129
1,1-10,9
2,11-20,6
3,21-30,52
4,31-40,69
5,41-50,42
6,51-60,63
7,61-70,20
8,71-80,11
9,81-90,1


In [4]:
fig_age_bar = px.bar(tot_age_data, x='age', y='count')
fig_age_bar.update_layout(
    title="",
    xaxis_title="Age Range",
    yaxis_title="Total Cases",
    xaxis=dict(fixedrange=True),
    yaxis=dict(fixedrange=True),
)

fig_age_bar.update_traces(hovertemplate = '%{y}',)
fig_age_bar.show(config = dict(displayModeBar=False))

In [10]:
plotly.offline.plot(fig_age_bar, filename = 'tot_cases_per_age.html', auto_open=True, config = dict(displayModeBar=False))

'tot_cases_per_age.html'