# Project - COVID-19 Analysis [EDA & Visualization]

- You can find the full project at: https://ourworldindata.org/coronavirus
- Daily-updated dataset link: https://covid.ourworldindata.org/data/owid-covid-data.csv
- We'll be exploring the dataset from: https://covid.ourworldindata.org
- Dashboard Example: https://coronavirus.jhu.edu/map.html

### Importing libraries & data


In [1]:
import numpy as np
import pandas as pd 
import matplotlib .pyplot as plt 
import seaborn as sns
%matplotlib inline
sns.set()
import plotly.express as px 

In [4]:
df=pd.read_csv(r'https://covid.ourworldindata.org/data/owid-covid-data.csv')

### EDA 

In [5]:
df.head()

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,...,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index,population,excess_mortality_cumulative_absolute,excess_mortality_cumulative,excess_mortality,excess_mortality_cumulative_per_million
0,AFG,Asia,Afghanistan,2020-01-03,,0.0,,,0.0,,...,,37.746,0.5,64.83,0.511,41128772.0,,,,
1,AFG,Asia,Afghanistan,2020-01-04,,0.0,,,0.0,,...,,37.746,0.5,64.83,0.511,41128772.0,,,,
2,AFG,Asia,Afghanistan,2020-01-05,,0.0,,,0.0,,...,,37.746,0.5,64.83,0.511,41128772.0,,,,
3,AFG,Asia,Afghanistan,2020-01-06,,0.0,,,0.0,,...,,37.746,0.5,64.83,0.511,41128772.0,,,,
4,AFG,Asia,Afghanistan,2020-01-07,,0.0,,,0.0,,...,,37.746,0.5,64.83,0.511,41128772.0,,,,


In [6]:
df.tail()

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,...,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index,population,excess_mortality_cumulative_absolute,excess_mortality_cumulative,excess_mortality,excess_mortality_cumulative_per_million
358641,ZWE,Africa,Zimbabwe,2023-11-18,265890.0,0.0,0.0,5725.0,0.0,0.0,...,30.7,36.791,1.7,61.49,0.571,16320539.0,,,,
358642,ZWE,Africa,Zimbabwe,2023-11-19,265890.0,0.0,0.0,5725.0,0.0,0.0,...,30.7,36.791,1.7,61.49,0.571,16320539.0,,,,
358643,ZWE,Africa,Zimbabwe,2023-11-20,265890.0,0.0,0.0,5725.0,0.0,0.0,...,30.7,36.791,1.7,61.49,0.571,16320539.0,,,,
358644,ZWE,Africa,Zimbabwe,2023-11-21,265890.0,0.0,0.0,5725.0,0.0,0.0,...,30.7,36.791,1.7,61.49,0.571,16320539.0,,,,
358645,ZWE,Africa,Zimbabwe,2023-11-22,265890.0,0.0,0.0,5725.0,0.0,0.0,...,30.7,36.791,1.7,61.49,0.571,16320539.0,,,,


In [7]:
df.columns

Index(['iso_code', 'continent', 'location', 'date', 'total_cases', 'new_cases',
       'new_cases_smoothed', 'total_deaths', 'new_deaths',
       'new_deaths_smoothed', 'total_cases_per_million',
       'new_cases_per_million', 'new_cases_smoothed_per_million',
       'total_deaths_per_million', 'new_deaths_per_million',
       'new_deaths_smoothed_per_million', 'reproduction_rate', 'icu_patients',
       'icu_patients_per_million', 'hosp_patients',
       'hosp_patients_per_million', 'weekly_icu_admissions',
       'weekly_icu_admissions_per_million', 'weekly_hosp_admissions',
       'weekly_hosp_admissions_per_million', 'total_tests', 'new_tests',
       'total_tests_per_thousand', 'new_tests_per_thousand',
       'new_tests_smoothed', 'new_tests_smoothed_per_thousand',
       'positive_rate', 'tests_per_case', 'tests_units', 'total_vaccinations',
       'people_vaccinated', 'people_fully_vaccinated', 'total_boosters',
       'new_vaccinations', 'new_vaccinations_smoothed',
       't

In [8]:
df.shape

(358646, 67)

In [9]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 358646 entries, 0 to 358645
Data columns (total 67 columns):
 #   Column                                      Non-Null Count   Dtype  
---  ------                                      --------------   -----  
 0   iso_code                                    358646 non-null  object 
 1   continent                                   341606 non-null  object 
 2   location                                    358646 non-null  object 
 3   date                                        358646 non-null  object 
 4   total_cases                                 320628 non-null  float64
 5   new_cases                                   349032 non-null  float64
 6   new_cases_smoothed                          347773 non-null  float64
 7   total_deaths                                299086 non-null  float64
 8   new_deaths                                  349089 non-null  float64
 9   new_deaths_smoothed                         347859 non-null  float64
 

In [10]:
df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
total_cases,320628.0,6.862419e+06,4.155251e+07,1.0000,8355.000000,7.271900e+04,7.844830e+05,7.721658e+08
new_cases,349032.0,9.375506e+03,1.089256e+05,0.0000,0.000000,1.000000e+00,2.440000e+02,8.401963e+06
new_cases_smoothed,347773.0,9.409256e+03,9.331850e+04,0.0000,0.143000,2.285700e+01,4.737140e+02,6.402036e+06
total_deaths,299086.0,8.695437e+04,4.439683e+05,1.0000,129.000000,1.349000e+03,1.201900e+04,6.981250e+06
new_deaths,349089.0,8.351554e+01,6.290777e+02,0.0000,0.000000,0.000000e+00,2.000000e+00,5.788900e+04
...,...,...,...,...,...,...,...,...
population,358646.0,1.282212e+08,6.599107e+08,47.0000,449002.000000,5.882259e+06,2.830170e+07,7.975105e+09
excess_mortality_cumulative_absolute,12211.0,5.312168e+04,1.494480e+05,-37726.0980,121.649992,5.968999e+03,3.770731e+04,1.289776e+06
excess_mortality_cumulative,12211.0,9.760347e+00,1.229389e+01,-44.2300,1.430000,8.100000e+00,1.538000e+01,7.662000e+01
excess_mortality,12211.0,1.133770e+01,2.516691e+01,-95.9200,-1.590000,5.720000e+00,1.630000e+01,3.776300e+02


In [11]:
df.dtypes

iso_code                                    object
continent                                   object
location                                    object
date                                        object
total_cases                                float64
                                            ...   
population                                 float64
excess_mortality_cumulative_absolute       float64
excess_mortality_cumulative                float64
excess_mortality                           float64
excess_mortality_cumulative_per_million    float64
Length: 67, dtype: object

### EXPlORING WORLD DATA 

In [12]:
df['location'].unique()

array(['Afghanistan', 'Africa', 'Albania', 'Algeria', 'American Samoa',
       'Andorra', 'Angola', 'Anguilla', 'Antigua and Barbuda',
       'Argentina', 'Armenia', 'Aruba', 'Asia', 'Australia', 'Austria',
       'Azerbaijan', 'Bahamas', 'Bahrain', 'Bangladesh', 'Barbados',
       'Belarus', 'Belgium', 'Belize', 'Benin', 'Bermuda', 'Bhutan',
       'Bolivia', 'Bonaire Sint Eustatius and Saba',
       'Bosnia and Herzegovina', 'Botswana', 'Brazil',
       'British Virgin Islands', 'Brunei', 'Bulgaria', 'Burkina Faso',
       'Burundi', 'Cambodia', 'Cameroon', 'Canada', 'Cape Verde',
       'Cayman Islands', 'Central African Republic', 'Chad', 'Chile',
       'China', 'Colombia', 'Comoros', 'Congo', 'Cook Islands',
       'Costa Rica', "Cote d'Ivoire", 'Croatia', 'Cuba', 'Curacao',
       'Cyprus', 'Czechia', 'Democratic Republic of Congo', 'Denmark',
       'Djibouti', 'Dominica', 'Dominican Republic', 'Ecuador', 'Egypt',
       'El Salvador', 'England', 'Equatorial Guinea', 'Eritrea',

In [13]:
df['location'].nunique()

255

In [14]:
df_world=df[df['location']=='World']
df_world.reset_index()

Unnamed: 0,index,iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,...,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index,population,excess_mortality_cumulative_absolute,excess_mortality_cumulative,excess_mortality,excess_mortality_cumulative_per_million
0,352966,OWID_WRL,,World,2020-01-03,,0.0,,,0.0,...,34.635,60.13,2.705,72.58,0.737,7.975105e+09,,,,
1,352967,OWID_WRL,,World,2020-01-04,3.0,3.0,,,0.0,...,34.635,60.13,2.705,72.58,0.737,7.975105e+09,,,,
2,352968,OWID_WRL,,World,2020-01-05,2.0,0.0,,3.0,3.0,...,34.635,60.13,2.705,72.58,0.737,7.975105e+09,,,,
3,352969,OWID_WRL,,World,2020-01-06,5.0,3.0,,3.0,0.0,...,34.635,60.13,2.705,72.58,0.737,7.975105e+09,,,,
4,352970,OWID_WRL,,World,2020-01-07,5.0,0.0,,3.0,0.0,...,34.635,60.13,2.705,72.58,0.737,7.975105e+09,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1415,354381,OWID_WRL,,World,2023-11-18,772164882.0,2748.0,19522.429,6981218.0,15.0,...,34.635,60.13,2.705,72.58,0.737,7.975105e+09,,,,
1416,354382,OWID_WRL,,World,2023-11-19,772165753.0,871.0,18297.429,6981250.0,32.0,...,34.635,60.13,2.705,72.58,0.737,7.975105e+09,,,,
1417,354383,OWID_WRL,,World,2023-11-20,772165753.0,0.0,12107.143,6981250.0,0.0,...,34.635,60.13,2.705,72.58,0.737,7.975105e+09,,,,
1418,354384,OWID_WRL,,World,2023-11-21,772165753.0,0.0,4359.000,6981250.0,0.0,...,34.635,60.13,2.705,72.58,0.737,7.975105e+09,,,,


The date of the maximum number of the deaths all over the world ?

In [15]:
df_world .head()

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,...,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index,population,excess_mortality_cumulative_absolute,excess_mortality_cumulative,excess_mortality,excess_mortality_cumulative_per_million
352966,OWID_WRL,,World,2020-01-03,,0.0,,,0.0,,...,34.635,60.13,2.705,72.58,0.737,7975105000.0,,,,
352967,OWID_WRL,,World,2020-01-04,3.0,3.0,,,0.0,,...,34.635,60.13,2.705,72.58,0.737,7975105000.0,,,,
352968,OWID_WRL,,World,2020-01-05,2.0,0.0,,3.0,3.0,,...,34.635,60.13,2.705,72.58,0.737,7975105000.0,,,,
352969,OWID_WRL,,World,2020-01-06,5.0,3.0,,3.0,0.0,,...,34.635,60.13,2.705,72.58,0.737,7975105000.0,,,,
352970,OWID_WRL,,World,2020-01-07,5.0,0.0,,3.0,0.0,,...,34.635,60.13,2.705,72.58,0.737,7975105000.0,,,,


In [16]:
df_world[df_world['new_deaths']==df_world['new_deaths'].max()]

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,...,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index,population,excess_mortality_cumulative_absolute,excess_mortality_cumulative,excess_mortality,excess_mortality_cumulative_per_million
354091,OWID_WRL,,World,2023-02-01,752663406.0,176142.0,208955.857,6839545.0,57889.0,9392.429,...,34.635,60.13,2.705,72.58,0.737,7975105000.0,,,,


In [17]:
df_world[df_world['new_deaths']==df_world['new_deaths'].max()]['date']


354091    2023-02-01
Name: date, dtype: object

In [18]:
px.line(df_world,y='new_deaths',x='date')

In [19]:
df[(df['date']=='2023-02-01')&(df['location']=='China')]

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,...,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index,population,excess_mortality_cumulative_absolute,excess_mortality_cumulative,excess_mortality,excess_mortality_cumulative_per_million
63607,CHN,Asia,China,2023-02-01,98527660.0,55271.0,24618.286,113926.0,44047.0,6292.429,...,48.4,,4.34,76.91,0.761,1425887000.0,,,,


### calculating the percentage of confirmed cases regarding the world population 

In [21]:
df_world['total_cases'].max()

772165753.0

In [24]:
df_world['population'].max()

7975105024.0

In [26]:
df_world_ratio=df_world['total_cases'].max() / df_world['population'].max()
df_world_ratio

0.09682201684821348

In [28]:
print('{:.2f}'.format(df_world_ratio))

0.10
