In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go 
from plotly.subplots import make_subplots
import matplotlib.pyplot as plt
import json

In [2]:
px

<module 'plotly.express' from 'C:\\Users\\HP\\anaconda\\lib\\site-packages\\plotly\\express\\__init__.py'>

In [3]:
#import your data: 
base_url1 = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv'
base_url2 = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv'
base_url3 = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv'

confirm_df = pd.read_csv(base_url1)
death_df=pd.read_csv(base_url2)
recv_df=pd.read_csv(base_url3)


In [4]:
#confirm_df.head()

In [5]:
#death_df.head()

In [6]:
#recv_df.head()

In [7]:
#drop col
confirm_df =confirm_df.drop(columns=['Lat','Long','Province/State'])
death_df = death_df.drop(columns=['Lat','Long','Province/State'])
recv_df =recv_df.drop(columns=['Lat','Long','Province/State'])


In [8]:
#1> Group by country:
confirm_df = confirm_df.groupby(by='Country/Region').aggregate(np.sum).T
confirm_df.index.name='Date'
confirm_df = confirm_df.reset_index()
death_df = death_df.groupby(by='Country/Region').aggregate(np.sum).T
death_df.index.name='Date'
death_df = death_df.reset_index()
recv_df = recv_df.groupby(by='Country/Region').aggregate(np.sum).T
recv_df.index.name='Date'
recv_df = recv_df.reset_index()


In [9]:
#confirm_df

In [10]:
#format data to long form:
confirm_melt= confirm_df.melt(id_vars='Date').copy()
confirm_melt.rename(columns={'value':'Confirmed'}, inplace=True)

death_melt= death_df.melt(id_vars='Date').copy()
death_melt.rename(columns={'value':'Deaths'}, inplace=True)

recv_melt= recv_df.melt(id_vars='Date').copy()
recv_melt.rename(columns={'value':'Recovered'}, inplace=True)

#confirm_melt
#death_melt
#recv_melt

In [11]:
#2> Fix the date format:
confirm_melt['Date']= pd.to_datetime(confirm_melt['Date'])
confirm_melt['Date'] = confirm_melt['Date'].dt.strftime('%d/%m/%Y')

death_melt['Date']= pd.to_datetime(death_melt['Date'])
death_melt['Date'] = death_melt['Date'].dt.strftime('%d/%m/%Y')

recv_melt['Date']= pd.to_datetime(recv_melt['Date'])
recv_melt['Date'] = recv_melt['Date'].dt.strftime('%d/%m/%Y')

In [12]:
#confirm_melt

In [13]:
max_date = confirm_melt['Date'].max()
max_date

'31/12/2020'

In [14]:
#3> Total of Confirmed, deaths, recovered, and active cases:
total_confirm = confirm_melt[confirm_melt['Date'] == max_date]
total_confirm

Unnamed: 0,Date,Country/Region,Confirmed
344,31/12/2020,Afghanistan,52330
961,31/12/2020,Albania,58316
1578,31/12/2020,Algeria,99610
2195,31/12/2020,Andorra,8049
2812,31/12/2020,Angola,17553
...,...,...,...
117574,31/12/2020,Vietnam,1465
118191,31/12/2020,West Bank and Gaza,138004
118808,31/12/2020,Yemen,2099
119425,31/12/2020,Zambia,20725


In [15]:
totalConfir = total_confirm['Confirmed'].sum()
print(totalConfir)

83637784


In [16]:
total_death = death_melt[death_melt['Date'] == max_date]
total_death

Unnamed: 0,Date,Country/Region,Deaths
344,31/12/2020,Afghanistan,2189
961,31/12/2020,Albania,1181
1578,31/12/2020,Algeria,2756
2195,31/12/2020,Andorra,84
2812,31/12/2020,Angola,405
...,...,...,...
117574,31/12/2020,Vietnam,35
118191,31/12/2020,West Bank and Gaza,1400
118808,31/12/2020,Yemen,610
119425,31/12/2020,Zambia,388


In [17]:
totalDeaths = total_death['Deaths'].sum()
print(totalDeaths)

1880709


In [18]:
total_recv = recv_melt[recv_melt['Date'] == max_date]
totalRecv = total_recv['Recovered'].sum()
print(totalRecv)

47055807


In [19]:
#tong ca nhiem hien tai

totalActive = totalConfir - totalDeaths - totalRecv
print(totalActive)

34701268


In [20]:
#4> create a Keys Performence Indicator(KPI) of total confirmed, 
# deaths, recovered, and active cases:
fig = go.Figure()
fig.add_trace(go.Indicator(
    mode = 'number', value= int(totalConfir),
    number= {"valueformat":"0,f"}, 
    title={"text":"Total Confirmed Cases"}, 
    domain= {'row':0, 'column':0}
))
fig.add_trace(go.Indicator(
    mode='number', value= int(totalDeaths), 
    number= {"valueformat":"0,f"}, 
    title= {'text':'Total Deaths Cases'},
    domain= {'row':0, 'column':1}
))
fig.add_trace(go.Indicator(
    mode = 'number', value= int(totalRecv),
    number= {"valueformat":"0,f"},
    title={"text":"Total Recovered Cases"}, 
    domain= {'row':1, 'column':0}
))
fig.add_trace(go.Indicator(
    mode = 'number', value= int(totalActive),
    number= {"valueformat":"0,f"},
    title={"text":"Total Active Cases"}, 
    domain= {'row':1, 'column':1}
))

fig.update_layout(grid = {'rows': 2, 'columns': 2, 'pattern': 'independent'})
fig.show()

In [21]:
#5> Plot a scatter of the total confirmed, total deaths, and death race in different countries
A = total_confirm.copy()
A['Deaths'] = total_death['Deaths']
A['DeathRace'] = (A['Deaths']/ A['Confirmed'] * 100).round(2)
A

Unnamed: 0,Date,Country/Region,Confirmed,Deaths,DeathRace
344,31/12/2020,Afghanistan,52330,2189,4.18
961,31/12/2020,Albania,58316,1181,2.03
1578,31/12/2020,Algeria,99610,2756,2.77
2195,31/12/2020,Andorra,8049,84,1.04
2812,31/12/2020,Angola,17553,405,2.31
...,...,...,...,...,...
117574,31/12/2020,Vietnam,1465,35,2.39
118191,31/12/2020,West Bank and Gaza,138004,1400,1.01
118808,31/12/2020,Yemen,2099,610,29.06
119425,31/12/2020,Zambia,20725,388,1.87


In [22]:
fig = px.scatter(
    A.sort_values(by= 'Confirmed', ascending=False).head(20), 
    x = 'Confirmed', y='Deaths', 
    color='Country/Region', 
    size='DeathRace'
)
fig.show()

In [23]:
#6>create a graph of the combination(line + bar chart) of the total number of deaths and the level of deaths by country.
B = A.copy()
B = B.sort_values(by= 'Deaths', ascending=False).head(20)
B

Unnamed: 0,Date,Country/Region,Confirmed,Deaths,DeathRace
112021,31/12/2020,US,20153428,352001,1.75
14535,31/12/2020,Brazil,7675973,194949,2.54
49087,31/12/2020,India,10286709,148994,1.45
71299,31/12/2020,Mexico,1426094,125807,8.82
85490,31/12/2020,Peru,1015137,93070,9.17
52789,31/12/2020,Italy,2107166,74159,3.52
114489,31/12/2020,United Kingdom,2496235,73622,2.95
38598,31/12/2020,France,2677660,64758,2.42
89192,31/12/2020,Russia,3127347,56271,1.8
50321,31/12/2020,Iran,1225142,55223,4.51


In [24]:
fig = make_subplots(specs=[[{'secondary_y' : True}]])
fig.add_trace(go.Bar(
    x = B['Country/Region'], y = B['Deaths'], text= B['Deaths'], name = 'Deaths', textposition = 'auto'
), secondary_y=False)

fig.add_trace(go.Scatter(
    x = B['Country/Region'], y = B['DeathRace'], text= B['DeathRace'], name = 'Death Race', mode = 'markers + lines'
), secondary_y=True)


fig.show()