# COVID DEATHS AND HOSPITAL BEDS

Imports and set magics:

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import ipywidgets as widgets
from pandas_datareader import wb

plt.rcParams.update({"axes.grid":True,"grid.color":"black","grid.alpha":"0.25","grid.linestyle":"--"})
plt.rcParams.update({'font.size': 14})

# autoreload modules when code is run
%load_ext autoreload
%autoreload 2

# user written modules
from dataproject import *


In this project I read and cleaned two datasets. The first contains datas about covid-19 death by country from Jenuary 2020 to March 2023. The second dataset contains datas about the number of hospital beds per 1000 people by country from 1960 to 2017.
The goal of the project is to merge the datasets in order to analyze an eventual correlation between the datas.

# Read and clean data

## First DataSet : covid death by country

The first dataset is contained in the data.csv file in the dataproject folder.

In [2]:
cov = pd.read_csv('data.csv') #reading the csv file
cov.head(15)

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,2/28/23,3/1/23,3/2/23,3/3/23,3/4/23,3/5/23,3/6/23,3/7/23,3/8/23,3/9/23
0,,Afghanistan,33.93911,67.709953,0,0,0,0,0,0,...,7896,7896,7896,7896,7896,7896,7896,7896,7896,7896
1,,Albania,41.1533,20.1683,0,0,0,0,0,0,...,3598,3598,3598,3598,3598,3598,3598,3598,3598,3598
2,,Algeria,28.0339,1.6596,0,0,0,0,0,0,...,6881,6881,6881,6881,6881,6881,6881,6881,6881,6881
3,,Andorra,42.5063,1.5218,0,0,0,0,0,0,...,165,165,165,165,165,165,165,165,165,165
4,,Angola,-11.2027,17.8739,0,0,0,0,0,0,...,1933,1933,1933,1933,1933,1933,1933,1933,1933,1933
5,,Antarctica,-71.9499,23.347,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,,Antigua and Barbuda,17.0608,-61.7964,0,0,0,0,0,0,...,146,146,146,146,146,146,146,146,146,146
7,,Argentina,-38.4161,-63.6167,0,0,0,0,0,0,...,130463,130463,130463,130463,130463,130463,130472,130472,130472,130472
8,,Armenia,40.0691,45.0382,0,0,0,0,0,0,...,8721,8721,8721,8721,8721,8721,8721,8721,8727,8727
9,Australian Capital Territory,Australia,-35.4735,149.0124,0,0,0,0,0,0,...,224,224,228,228,228,228,228,228,228,228


In [3]:
drop_these = ['Lat', 'Long','Province/State'] #dropping latitude, longitude and provinces columns
cov.drop(drop_these, axis=1, inplace=True) 
cov2 = cov.groupby(cov['Country/Region']).aggregate(np.sum) #summing the values in those countries who were divided into provinces in the initial dataset
cov.head(15)

Unnamed: 0,Country/Region,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,1/30/20,...,2/28/23,3/1/23,3/2/23,3/3/23,3/4/23,3/5/23,3/6/23,3/7/23,3/8/23,3/9/23
0,Afghanistan,0,0,0,0,0,0,0,0,0,...,7896,7896,7896,7896,7896,7896,7896,7896,7896,7896
1,Albania,0,0,0,0,0,0,0,0,0,...,3598,3598,3598,3598,3598,3598,3598,3598,3598,3598
2,Algeria,0,0,0,0,0,0,0,0,0,...,6881,6881,6881,6881,6881,6881,6881,6881,6881,6881
3,Andorra,0,0,0,0,0,0,0,0,0,...,165,165,165,165,165,165,165,165,165,165
4,Angola,0,0,0,0,0,0,0,0,0,...,1933,1933,1933,1933,1933,1933,1933,1933,1933,1933
5,Antarctica,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,Antigua and Barbuda,0,0,0,0,0,0,0,0,0,...,146,146,146,146,146,146,146,146,146,146
7,Argentina,0,0,0,0,0,0,0,0,0,...,130463,130463,130463,130463,130463,130463,130472,130472,130472,130472
8,Armenia,0,0,0,0,0,0,0,0,0,...,8721,8721,8721,8721,8721,8721,8721,8721,8727,8727
9,Australia,0,0,0,0,0,0,0,0,0,...,224,224,228,228,228,228,228,228,228,228


In [4]:
cov2.columns = pd.to_datetime(cov2.columns.astype(str), format='%m/%d/%y') #converting columns's type from objects to datatime, this will help melting the dataset
cov2.head(10)

Unnamed: 0_level_0,2020-01-22,2020-01-23,2020-01-24,2020-01-25,2020-01-26,2020-01-27,2020-01-28,2020-01-29,2020-01-30,2020-01-31,...,2023-02-28,2023-03-01,2023-03-02,2023-03-03,2023-03-04,2023-03-05,2023-03-06,2023-03-07,2023-03-08,2023-03-09
Country/Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Afghanistan,0,0,0,0,0,0,0,0,0,0,...,7896,7896,7896,7896,7896,7896,7896,7896,7896,7896
Albania,0,0,0,0,0,0,0,0,0,0,...,3598,3598,3598,3598,3598,3598,3598,3598,3598,3598
Algeria,0,0,0,0,0,0,0,0,0,0,...,6881,6881,6881,6881,6881,6881,6881,6881,6881,6881
Andorra,0,0,0,0,0,0,0,0,0,0,...,165,165,165,165,165,165,165,165,165,165
Angola,0,0,0,0,0,0,0,0,0,0,...,1933,1933,1933,1933,1933,1933,1933,1933,1933,1933
Antarctica,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Antigua and Barbuda,0,0,0,0,0,0,0,0,0,0,...,146,146,146,146,146,146,146,146,146,146
Argentina,0,0,0,0,0,0,0,0,0,0,...,130463,130463,130463,130463,130463,130463,130472,130472,130472,130472
Armenia,0,0,0,0,0,0,0,0,0,0,...,8721,8721,8721,8721,8721,8721,8721,8721,8727,8727
Australia,0,0,0,0,0,0,0,0,0,0,...,19373,19373,19458,19459,19459,19459,19459,19459,19459,19574


In [5]:
cov2.reset_index(inplace=True) #resetting index
cov2.head(10)

Unnamed: 0,Country/Region,2020-01-22 00:00:00,2020-01-23 00:00:00,2020-01-24 00:00:00,2020-01-25 00:00:00,2020-01-26 00:00:00,2020-01-27 00:00:00,2020-01-28 00:00:00,2020-01-29 00:00:00,2020-01-30 00:00:00,...,2023-02-28 00:00:00,2023-03-01 00:00:00,2023-03-02 00:00:00,2023-03-03 00:00:00,2023-03-04 00:00:00,2023-03-05 00:00:00,2023-03-06 00:00:00,2023-03-07 00:00:00,2023-03-08 00:00:00,2023-03-09 00:00:00
0,Afghanistan,0,0,0,0,0,0,0,0,0,...,7896,7896,7896,7896,7896,7896,7896,7896,7896,7896
1,Albania,0,0,0,0,0,0,0,0,0,...,3598,3598,3598,3598,3598,3598,3598,3598,3598,3598
2,Algeria,0,0,0,0,0,0,0,0,0,...,6881,6881,6881,6881,6881,6881,6881,6881,6881,6881
3,Andorra,0,0,0,0,0,0,0,0,0,...,165,165,165,165,165,165,165,165,165,165
4,Angola,0,0,0,0,0,0,0,0,0,...,1933,1933,1933,1933,1933,1933,1933,1933,1933,1933
5,Antarctica,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,Antigua and Barbuda,0,0,0,0,0,0,0,0,0,...,146,146,146,146,146,146,146,146,146,146
7,Argentina,0,0,0,0,0,0,0,0,0,...,130463,130463,130463,130463,130463,130463,130472,130472,130472,130472
8,Armenia,0,0,0,0,0,0,0,0,0,...,8721,8721,8721,8721,8721,8721,8721,8721,8727,8727
9,Australia,0,0,0,0,0,0,0,0,0,...,19373,19373,19458,19459,19459,19459,19459,19459,19459,19574


In [6]:
cov2.rename(columns = {'Country/Region':'country'}, inplace=True) #renaming country column
cov2.head(15)

Unnamed: 0,country,2020-01-22 00:00:00,2020-01-23 00:00:00,2020-01-24 00:00:00,2020-01-25 00:00:00,2020-01-26 00:00:00,2020-01-27 00:00:00,2020-01-28 00:00:00,2020-01-29 00:00:00,2020-01-30 00:00:00,...,2023-02-28 00:00:00,2023-03-01 00:00:00,2023-03-02 00:00:00,2023-03-03 00:00:00,2023-03-04 00:00:00,2023-03-05 00:00:00,2023-03-06 00:00:00,2023-03-07 00:00:00,2023-03-08 00:00:00,2023-03-09 00:00:00
0,Afghanistan,0,0,0,0,0,0,0,0,0,...,7896,7896,7896,7896,7896,7896,7896,7896,7896,7896
1,Albania,0,0,0,0,0,0,0,0,0,...,3598,3598,3598,3598,3598,3598,3598,3598,3598,3598
2,Algeria,0,0,0,0,0,0,0,0,0,...,6881,6881,6881,6881,6881,6881,6881,6881,6881,6881
3,Andorra,0,0,0,0,0,0,0,0,0,...,165,165,165,165,165,165,165,165,165,165
4,Angola,0,0,0,0,0,0,0,0,0,...,1933,1933,1933,1933,1933,1933,1933,1933,1933,1933
5,Antarctica,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,Antigua and Barbuda,0,0,0,0,0,0,0,0,0,...,146,146,146,146,146,146,146,146,146,146
7,Argentina,0,0,0,0,0,0,0,0,0,...,130463,130463,130463,130463,130463,130463,130472,130472,130472,130472
8,Armenia,0,0,0,0,0,0,0,0,0,...,8721,8721,8721,8721,8721,8721,8721,8721,8727,8727
9,Australia,0,0,0,0,0,0,0,0,0,...,19373,19373,19458,19459,19459,19459,19459,19459,19459,19574


## Second DataSet : Hospital beds per 1000 people by country

The second dataset is downloaded from world bank. In order to download it I imported wb from pandas_datareader

In [7]:
wb_beds = wb.download(indicator='SH.MED.BEDS.ZS', start=2000, end=2017, country=['all']) #downloading the dataset
wb_beds = wb_beds.rename(columns = {'SH.MED.BEDS.ZS':'beds'}) #changing value's column name
wb_beds = wb_beds.reset_index() #reset index
wb_beds.head(10)

Unnamed: 0,country,year,beds
0,Africa Eastern and Southern,2017,
1,Africa Eastern and Southern,2016,
2,Africa Eastern and Southern,2015,
3,Africa Eastern and Southern,2014,
4,Africa Eastern and Southern,2013,
5,Africa Eastern and Southern,2012,
6,Africa Eastern and Southern,2011,
7,Africa Eastern and Southern,2010,
8,Africa Eastern and Southern,2009,
9,Africa Eastern and Southern,2008,


In [8]:
wb_beds.year = wb_beds.year.astype(int) # convert year's column type
wb_beds.country = wb_beds.country.astype('string') # convert country's column type
wb_beds.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4788 entries, 0 to 4787
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   country  4788 non-null   string 
 1   year     4788 non-null   int64  
 2   beds     2612 non-null   float64
dtypes: float64(1), int64(1), string(1)
memory usage: 112.3 KB


In [9]:
wb_beds2 = wb_beds.pivot_table('beds','country','year') #shaping the dataset from long to wide
wb_beds2

year,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
Afghanistan,0.300000,0.390000,0.390000,0.390000,0.39,0.42000,0.420000,0.420000,0.420000,0.420000,0.430000,0.440000,0.530000,0.530000,0.500000,0.500000,0.500000,0.390000
Africa Eastern and Southern,,,,,,,0.910851,,,,,,,,,,,
Albania,3.260000,3.260000,3.140000,3.070000,3.01,3.08000,3.120000,3.090000,,3.010000,2.990000,2.880000,2.880000,2.890000,,,,
Algeria,,,,,1.70,,,,,,,,,,,1.900000,,
Andorra,3.200000,2.590000,,3.300000,,2.70000,2.600000,2.600000,,2.500000,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Vietnam,2.340000,2.400000,1.400000,,2.80,2.34000,2.660000,,2.900000,3.100000,2.910000,,2.500000,3.180000,2.600000,,,
World,2.900104,2.768852,2.669515,2.891211,,2.63149,2.671389,2.676496,2.653164,2.607105,2.620025,2.684055,2.778869,2.816225,2.767624,2.693565,2.725949,2.881085
"Yemen, Rep.",0.590000,0.590000,0.590000,0.590000,0.59,0.61000,0.700000,0.700000,0.700000,0.700000,0.720000,0.700000,0.720000,0.670000,0.710000,0.710000,0.710000,0.710000
Zambia,,,,,2.00,,,,1.900000,,2.000000,,,,,,,


In [10]:
wb_beds3=wb_beds2.dropna(how='all') #dropping countries without datas
wb_beds3

year,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
Afghanistan,0.300000,0.390000,0.390000,0.390000,0.39,0.42000,0.420000,0.420000,0.420000,0.420000,0.430000,0.440000,0.530000,0.530000,0.500000,0.500000,0.500000,0.390000
Africa Eastern and Southern,,,,,,,0.910851,,,,,,,,,,,
Albania,3.260000,3.260000,3.140000,3.070000,3.01,3.08000,3.120000,3.090000,,3.010000,2.990000,2.880000,2.880000,2.890000,,,,
Algeria,,,,,1.70,,,,,,,,,,,1.900000,,
Andorra,3.200000,2.590000,,3.300000,,2.70000,2.600000,2.600000,,2.500000,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Vietnam,2.340000,2.400000,1.400000,,2.80,2.34000,2.660000,,2.900000,3.100000,2.910000,,2.500000,3.180000,2.600000,,,
World,2.900104,2.768852,2.669515,2.891211,,2.63149,2.671389,2.676496,2.653164,2.607105,2.620025,2.684055,2.778869,2.816225,2.767624,2.693565,2.725949,2.881085
"Yemen, Rep.",0.590000,0.590000,0.590000,0.590000,0.59,0.61000,0.700000,0.700000,0.700000,0.700000,0.720000,0.700000,0.720000,0.670000,0.710000,0.710000,0.710000,0.710000
Zambia,,,,,2.00,,,,1.900000,,2.000000,,,,,,,


In [11]:
wb_beds3['mean'] = wb_beds.groupby(['country'])['beds'].mean() #adding mean's column
wb_beds3


year,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,mean
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
Afghanistan,0.300000,0.390000,0.390000,0.390000,0.39,0.42000,0.420000,0.420000,0.420000,0.420000,0.430000,0.440000,0.530000,0.530000,0.500000,0.500000,0.500000,0.390000,0.432222
Africa Eastern and Southern,,,,,,,0.910851,,,,,,,,,,,,0.910851
Albania,3.260000,3.260000,3.140000,3.070000,3.01,3.08000,3.120000,3.090000,,3.010000,2.990000,2.880000,2.880000,2.890000,,,,,3.052308
Algeria,,,,,1.70,,,,,,,,,,,1.900000,,,1.800000
Andorra,3.200000,2.590000,,3.300000,,2.70000,2.600000,2.600000,,2.500000,,,,,,,,,2.784286
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Vietnam,2.340000,2.400000,1.400000,,2.80,2.34000,2.660000,,2.900000,3.100000,2.910000,,2.500000,3.180000,2.600000,,,,2.594167
World,2.900104,2.768852,2.669515,2.891211,,2.63149,2.671389,2.676496,2.653164,2.607105,2.620025,2.684055,2.778869,2.816225,2.767624,2.693565,2.725949,2.881085,2.731572
"Yemen, Rep.",0.590000,0.590000,0.590000,0.590000,0.59,0.61000,0.700000,0.700000,0.700000,0.700000,0.720000,0.700000,0.720000,0.670000,0.710000,0.710000,0.710000,0.710000,0.667222
Zambia,,,,,2.00,,,,1.900000,,2.000000,,,,,,,,1.966667


## Explore each data set

In [12]:
cov_long = pd.melt(cov2,id_vars='country',var_name='day',value_name='death') #shaping first dataset from wide to long in order to interactive plotting
cov_long.head(20)

Unnamed: 0,country,day,death
0,Afghanistan,2020-01-22,0
1,Albania,2020-01-22,0
2,Algeria,2020-01-22,0
3,Andorra,2020-01-22,0
4,Angola,2020-01-22,0
5,Antarctica,2020-01-22,0
6,Antigua and Barbuda,2020-01-22,0
7,Argentina,2020-01-22,0
8,Armenia,2020-01-22,0
9,Australia,2020-01-22,0


In [13]:
#plotting, plot() function can be found in .py file
widgets.interact(plot, 
    cov = widgets.fixed(cov_long),
    country = widgets.Dropdown(description='country', 
                                    options=cov_long.country.unique(), 
                                    value='Afghanistan')
); 

interactive(children=(Dropdown(description='country', options=('Afghanistan', 'Albania', 'Algeria', 'Andorra',…

Changing country you can see progression of covid-19 deaths during the past 3 years

In [14]:
#plotting, plot2() function can be found in .py file
widgets.interact(plot2, 
    bed = widgets.fixed(wb_beds),
    country = widgets.Dropdown(description='country', 
                                    options=wb_beds.country.unique(), 
                                    value='Afghanistan')
); 

interactive(children=(Dropdown(description='country', index=49, options=('Africa Eastern and Southern', 'Afric…

Changing country you can see the amount of hospital beds per 1000 people from 1960 to 2017

# Merge data sets

In [15]:
wb_beds4 = wb_beds3.loc[:,'mean'] #dropping years' columns
wb_beds4

country
Afghanistan                    0.432222
Africa Eastern and Southern    0.910851
Albania                        3.052308
Algeria                        1.800000
Andorra                        2.784286
                                 ...   
Vietnam                        2.594167
World                          2.731572
Yemen, Rep.                    0.667222
Zambia                         1.966667
Zimbabwe                       2.350000
Name: mean, Length: 227, dtype: float64

In [16]:
inner = pd.merge(cov2,wb_beds4,how='inner',on=['country']) #inner merging
inner.head(10)

Unnamed: 0,country,2020-01-22 00:00:00,2020-01-23 00:00:00,2020-01-24 00:00:00,2020-01-25 00:00:00,2020-01-26 00:00:00,2020-01-27 00:00:00,2020-01-28 00:00:00,2020-01-29 00:00:00,2020-01-30 00:00:00,...,2023-03-01 00:00:00,2023-03-02 00:00:00,2023-03-03 00:00:00,2023-03-04 00:00:00,2023-03-05 00:00:00,2023-03-06 00:00:00,2023-03-07 00:00:00,2023-03-08 00:00:00,2023-03-09 00:00:00,mean
0,Afghanistan,0,0,0,0,0,0,0,0,0,...,7896,7896,7896,7896,7896,7896,7896,7896,7896,0.432222
1,Albania,0,0,0,0,0,0,0,0,0,...,3598,3598,3598,3598,3598,3598,3598,3598,3598,3.052308
2,Algeria,0,0,0,0,0,0,0,0,0,...,6881,6881,6881,6881,6881,6881,6881,6881,6881,1.8
3,Andorra,0,0,0,0,0,0,0,0,0,...,165,165,165,165,165,165,165,165,165,2.784286
4,Angola,0,0,0,0,0,0,0,0,0,...,1933,1933,1933,1933,1933,1933,1933,1933,1933,0.8
5,Antigua and Barbuda,0,0,0,0,0,0,0,0,0,...,146,146,146,146,146,146,146,146,146,2.624
6,Argentina,0,0,0,0,0,0,0,0,0,...,130463,130463,130463,130463,130463,130472,130472,130472,130472,4.61
7,Armenia,0,0,0,0,0,0,0,0,0,...,8721,8721,8721,8721,8721,8721,8721,8727,8727,4.319375
8,Australia,0,0,0,0,0,0,0,0,0,...,19373,19458,19459,19459,19459,19459,19459,19459,19574,3.86375
9,Austria,0,0,0,0,0,0,0,0,0,...,21891,21899,21907,21921,21922,21923,21941,21949,21970,7.671667


In [17]:
inner['tot_death'] = inner.iloc[:,1143] #adding more columns. 1143 is the last but one column whcih contains the total deaths
inner

Unnamed: 0,country,2020-01-22 00:00:00,2020-01-23 00:00:00,2020-01-24 00:00:00,2020-01-25 00:00:00,2020-01-26 00:00:00,2020-01-27 00:00:00,2020-01-28 00:00:00,2020-01-29 00:00:00,2020-01-30 00:00:00,...,2023-03-02 00:00:00,2023-03-03 00:00:00,2023-03-04 00:00:00,2023-03-05 00:00:00,2023-03-06 00:00:00,2023-03-07 00:00:00,2023-03-08 00:00:00,2023-03-09 00:00:00,mean,tot_death
0,Afghanistan,0,0,0,0,0,0,0,0,0,...,7896,7896,7896,7896,7896,7896,7896,7896,0.432222,7896
1,Albania,0,0,0,0,0,0,0,0,0,...,3598,3598,3598,3598,3598,3598,3598,3598,3.052308,3598
2,Algeria,0,0,0,0,0,0,0,0,0,...,6881,6881,6881,6881,6881,6881,6881,6881,1.800000,6881
3,Andorra,0,0,0,0,0,0,0,0,0,...,165,165,165,165,165,165,165,165,2.784286,165
4,Angola,0,0,0,0,0,0,0,0,0,...,1933,1933,1933,1933,1933,1933,1933,1933,0.800000,1933
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
161,Uzbekistan,0,0,0,0,0,0,0,0,0,...,1637,1637,1637,1637,1637,1637,1637,1637,4.817333,1637
162,Vanuatu,0,0,0,0,0,0,0,0,0,...,14,14,14,14,14,14,14,14,3.150000,14
163,Vietnam,0,0,0,0,0,0,0,0,0,...,43186,43186,43186,43186,43186,43186,43186,43186,2.594167,43186
164,Zambia,0,0,0,0,0,0,0,0,0,...,4057,4057,4057,4057,4057,4057,4057,4057,1.966667,4057


In [18]:
#plotting, the red_country() function can be found in the .py file
widgets.interact(red_country, 
    inner = widgets.fixed(inner),
    country = widgets.Dropdown(description='country', 
                                    options=inner.country.unique(), 
                                    value='Afghanistan')
); 

interactive(children=(Dropdown(description='country', options=('Afghanistan', 'Albania', 'Algeria', 'Andorra',…

This graph is hard to analyze, I'm going to drop some outliers considering those countries which have experienced a number of deaths between 2000 and 150000.

In [19]:
inner2= inner.loc[inner['tot_death']>2000] #dropping outliners
inner2= inner2.loc[inner['tot_death']<150000]

In [20]:
#plotting, the red_country() function can be found in the .py file
widgets.interact(red_country, 
    inner = widgets.fixed(inner2),
    country = widgets.Dropdown(description='country', 
                                    options=inner.country.unique(), 
                                    value='Afghanistan')
);

interactive(children=(Dropdown(description='country', options=('Afghanistan', 'Albania', 'Algeria', 'Andorra',…

**Without outliners, we can see a positive correlation between covid-19 deaths and hospital beds per 1000 people. This is not what I was expecting. Supposing that results can be more precise if we consider more factors, I'm going to download a third dataset containing datas about population density by country**

## Third DataSet : Population density by country (people per sq. km of land area)

This third dataset id downloaded from worldbank.

In [21]:
den = wb.download(indicator='EN.POP.DNST', start=1960, end=2020, country=['all']) #downloading dataset
den = den.rename(columns = {'EN.POP.DNST':'density'}) #rename values' column's name
den = den.reset_index() #reset index
den

Unnamed: 0,country,year,density
0,Africa Eastern and Southern,2020,46.150692
1,Africa Eastern and Southern,2019,44.946811
2,Africa Eastern and Southern,2018,43.768966
3,Africa Eastern and Southern,2017,42.623142
4,Africa Eastern and Southern,2016,41.520460
...,...,...,...
16221,Zimbabwe,1964,11.142127
16222,Zimbabwe,1963,10.799873
16223,Zimbabwe,1962,10.468600
16224,Zimbabwe,1961,10.148512


In [22]:
den.year = den.year.astype(int) # converting year type
den.country = den.country.astype('string') # converting country type
den.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16226 entries, 0 to 16225
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   country  16226 non-null  string 
 1   year     16226 non-null  int64  
 2   density  15579 non-null  float64
dtypes: float64(1), int64(1), string(1)
memory usage: 380.4 KB


In [23]:
den.sort_values(by=['country', 'year'], inplace=True) #sorting
den.head(5)

Unnamed: 0,country,year,density
3049,Afghanistan,1960,
3048,Afghanistan,1961,13.477056
3047,Afghanistan,1962,13.751356
3046,Afghanistan,1963,14.040239
3045,Afghanistan,1964,14.343888


In [24]:
den2 = den.pivot_table('density','country','year') #piovting from long to wide
den2

year,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,...,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Afghanistan,13.477056,13.751356,14.040239,14.343888,14.665298,14.999535,15.347393,15.711911,16.090166,16.486471,...,44.844851,46.711251,48.359028,50.160542,51.750915,53.104284,54.648541,56.248231,57.908252,59.752281
Africa Eastern and Southern,9.207577,9.459187,9.719622,9.992443,10.276307,10.565442,10.864498,11.177568,11.501553,11.836424,...,36.531770,37.219557,38.254417,39.316097,40.417819,41.520460,42.623142,43.768966,44.946811,46.150692
Africa Western and Central,10.978074,11.213633,11.459305,11.712711,11.975377,12.247555,12.526275,12.813873,13.111667,13.422177,...,40.514937,41.654561,42.804993,43.982443,45.180225,46.405991,47.661860,48.934069,50.222984,51.536679
Albania,60.576642,62.456898,64.329234,66.209307,68.058066,69.874927,71.737153,73.805547,75.974270,77.937190,...,106.029015,105.854051,105.660292,105.441752,105.135146,104.967190,104.870693,104.612263,104.167555,103.571131
Algeria,4.869804,4.945233,5.025507,5.113530,5.198408,5.295872,5.414997,5.538377,5.664353,5.792368,...,15.343212,15.644261,15.954985,16.273887,16.602626,16.936908,17.271629,17.603512,17.930316,18.243657
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
West Bank and Gaza,,,,,,,,,,,...,645.014286,661.129236,677.194020,693.255482,709.317608,725.429900,740.000831,758.984551,778.290033,797.885216
World,23.702333,24.122182,24.635755,25.152683,25.675460,26.217599,26.756801,27.310673,27.884725,28.468226,...,54.319804,54.940119,55.620147,56.299967,56.970835,57.636017,58.302608,58.945607,59.568080,60.170734
"Yemen, Rep.",10.695055,10.897184,11.099489,11.314664,11.548569,11.796939,12.061318,12.341428,12.640834,12.962113,...,48.252003,49.668335,51.108968,52.566062,54.011677,55.446336,56.886545,58.318679,59.750916,61.147501
Zambia,4.330770,4.470637,4.615856,4.765687,4.920733,5.081693,5.247969,5.419999,5.594650,5.759656,...,19.190215,19.834351,20.493921,21.170305,21.856939,22.555807,23.269151,23.992646,24.725214,25.461353


In [25]:
#plotting, plot3() function can be find in the .py file
widgets.interact(plot3, 
    den = widgets.fixed(den),
    country = widgets.Dropdown(description='country', 
                                    options=den.country.unique(), 
                                    value='Afghanistan')
); 

interactive(children=(Dropdown(description='country', options=('Afghanistan', 'Africa Eastern and Southern', '…

Changing country you can see progression of population density from 1960 to 2020

# merging

In [26]:
inner2 = pd.merge(den,wb_beds,how='inner',on=['country','year']) #merging
inner2.head(10)

Unnamed: 0,country,year,density,beds
0,Afghanistan,2000,29.963329,0.3
1,Afghanistan,2001,30.18664,0.39
2,Afghanistan,2002,32.197624,0.39
3,Afghanistan,2003,34.719547,0.39
4,Afghanistan,2004,36.112339,0.39
5,Afghanistan,2005,37.427274,0.42
6,Afghanistan,2006,39.009159,0.42
7,Afghanistan,2007,39.714979,0.42
8,Afghanistan,2008,40.518221,0.42
9,Afghanistan,2009,41.987193,0.42


In [27]:
#plotting, plot4() function can be find in the .py file
widgets.interact(plot4, 
    inner2 = widgets.fixed(inner2),
    country = widgets.Dropdown(description='country', 
                                    options=inner2.country.unique(), 
                                    value='Afghanistan')
); 

interactive(children=(Dropdown(description='country', options=('Afghanistan', 'Africa Eastern and Southern', '…

In [28]:
den2['last']=den2.loc[:,2020] #new column with density's values in 2020
inner3 = pd.merge(den2,wb_beds4,how='inner',on=['country']) #merging
inner3 = inner3.reset_index()
inner3

Unnamed: 0,country,1961,1962,1963,1964,1965,1966,1967,1968,1969,...,2013,2014,2015,2016,2017,2018,2019,2020,last,mean
0,Afghanistan,13.477056,13.751356,14.040239,14.343888,14.665298,14.999535,15.347393,15.711911,16.090166,...,48.359028,50.160542,51.750915,53.104284,54.648541,56.248231,57.908252,59.752281,59.752281,0.432222
1,Africa Eastern and Southern,9.207577,9.459187,9.719622,9.992443,10.276307,10.565442,10.864498,11.177568,11.501553,...,38.254417,39.316097,40.417819,41.520460,42.623142,43.768966,44.946811,46.150692,46.150692,0.910851
2,Albania,60.576642,62.456898,64.329234,66.209307,68.058066,69.874927,71.737153,73.805547,75.974270,...,105.660292,105.441752,105.135146,104.967190,104.870693,104.612263,104.167555,103.571131,103.571131,3.052308
3,Algeria,4.869804,4.945233,5.025507,5.113530,5.198408,5.295872,5.414997,5.538377,5.664353,...,15.954985,16.273887,16.602626,16.936908,17.271629,17.603512,17.930316,18.243657,18.243657,1.800000
4,Andorra,21.736170,23.434043,25.189362,27.000000,28.857447,30.948936,33.500000,36.338298,39.253191,...,151.844681,152.385106,152.651064,154.340426,157.100000,159.602128,162.431915,165.319149,165.319149,2.784286
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
222,Vietnam,103.296513,106.098157,109.148444,112.166782,115.106691,117.939752,120.687468,123.338004,126.012673,...,288.388579,291.142141,294.181235,297.161430,300.038762,302.825616,305.577072,308.359102,308.359102,2.594167
223,World,23.702333,24.122182,24.635755,25.152683,25.675460,26.217599,26.756801,27.310673,27.884725,...,55.620147,56.299967,56.970835,57.636017,58.302608,58.945607,59.568080,60.170734,60.170734,2.731572
224,"Yemen, Rep.",10.695055,10.897184,11.099489,11.314664,11.548569,11.796939,12.061318,12.341428,12.640834,...,51.108968,52.566062,54.011677,55.446336,56.886545,58.318679,59.750916,61.147501,61.147501,0.667222
225,Zambia,4.330770,4.470637,4.615856,4.765687,4.920733,5.081693,5.247969,5.419999,5.594650,...,20.493921,21.170305,21.856939,22.555807,23.269151,23.992646,24.725214,25.461353,25.461353,1.966667


In [29]:
#plotting, red_country2() function can be find in the .py file
widgets.interact(red_country2, 
    inner = widgets.fixed(inner3),
    country = widgets.Dropdown(description='country', 
                                    options=inner3.country.unique(), 
                                    value='Afghanistan')
); 

interactive(children=(Dropdown(description='country', options=('Afghanistan', 'Africa Eastern and Southern', '…

Again, this graph can be affected and distorted by outliers.

In [30]:
inner4 = inner3.loc[inner3['last']>0] #dropping outliers
inner4 = inner4.loc[inner4['last']<5000]

In [31]:
#plotting, red_country2() function can be find in the .py file
widgets.interact(red_country2, 
    inner = widgets.fixed(inner4),
    country = widgets.Dropdown(description='country', 
                                    options=inner4.country.unique(), 
                                    value='Afghanistan')
); 

interactive(children=(Dropdown(description='country', options=('Afghanistan', 'Africa Eastern and Southern', '…

# Analysis

In this last graph we can see a slightly inverse correlation between density and hospital beds. This could be another factor to analyze to study correlation between deaths and the number of hospital beds.

# Conclusion

datasets links:

First dataset: https://github.com/CSSEGISandData/COVID-19/blob/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv

Second dataset: https://data.worldbank.org/indicator/SH.MED.BEDS.ZS

Third dataset: https://data.worldbank.org/indicator/EN.POP.DNST