In [16]:
import pandas as pd
import numpy as np
import plotly.express as px


In [2]:
data = pd.read_csv('owid-covid-data.csv')
data.head(2)

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,...,gdp_per_capita,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index
0,AFG,Asia,Afghanistan,24-02-2020,1.0,1.0,,,,,...,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.511
1,AFG,Asia,Afghanistan,25-02-2020,1.0,0.0,,,,,...,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.511


In [5]:
data['date']=pd.to_datetime(data['date'])

In [6]:
data['Day'] = data['date'].dt.day

In [7]:
data['year'] = data['date'].dt.year

In [11]:
data['total_cases'].value_counts()

1.0           1066
3.0            447
4.0            435
721.0          423
2.0            418
              ... 
15970949.0       1
7813.0           1
302480.0         1
53527.0          1
3400296.0        1
Name: total_cases, Length: 47894, dtype: int64

In [14]:
continent = data.groupby(['continent','year']).agg({'total_cases':'sum'}).reset_index(drop=False)

In [42]:
con_2020 = continent[continent['year']==2020]
con_2021 = continent[continent['year']==2021]

In [44]:
con_2021.rename({'year':'year_2021','total_cases':'total_cases_2021'},axis=1,inplace=True)



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [46]:
yearly = pd.merge(con_2021,con_2020,how = 'left',on = 'continent')

In [47]:
yearly

Unnamed: 0,continent,year_2021,total_cases_2021,year,total_cases
0,Africa,2021,564722900.0,2020,293258500.0
1,Asia,2021,4163363000.0,2020,2067622000.0
2,Europe,2021,5190629000.0,2020,1740126000.0
3,North America,2021,4712076000.0,2020,2027108000.0
4,Oceania,2021,5201765.0,2020,5656213.0
5,South America,2021,2797961000.0,2020,1505064000.0


In [51]:
px.bar(yearly, x="continent", y="total_cases",title = 'Total Cases in 2020')

In [80]:
fig = px.bar(yearly, x="continent", y="total_cases_2021",title = 'Total Cases in 2021',text = 'total_cases_2021')
fig.update_traces(texttemplate='%{text:.3s}', textposition='outside')

In [56]:
yearly.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 6 entries, 0 to 5
Data columns (total 5 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   continent         6 non-null      object 
 1   year_2021         6 non-null      int64  
 2   total_cases_2021  6 non-null      float64
 3   year              6 non-null      int64  
 4   total_cases       6 non-null      float64
dtypes: float64(2), int64(2), object(1)
memory usage: 288.0+ bytes


In [59]:
yearly['% change'] = ((yearly['total_cases_2021'] - yearly['total_cases'])/yearly['total_cases'])*100

In [62]:
yearly

Unnamed: 0,continent,year_2021,total_cases_2021,year,total_cases,% change
0,Africa,2021,564722900.0,2020,293258500.0,92.568274
1,Asia,2021,4163363000.0,2020,2067622000.0,101.359935
2,Europe,2021,5190629000.0,2020,1740126000.0,198.290478
3,North America,2021,4712076000.0,2020,2027108000.0,132.453156
4,Oceania,2021,5201765.0,2020,5656213.0,-8.034492
5,South America,2021,2797961000.0,2020,1505064000.0,85.903047


In [118]:
fig1 = px.bar(yearly, x="continent", y="% change",title = 'Percentage Change in Total Cases from 2020 to 2021',text = '% change')
fig1.update_traces(texttemplate='%{text:.3s}', textposition='outside')

In [90]:
Asia = data[data['continent']=='Asia']

In [94]:
Asia.columns

Index(['iso_code', 'continent', 'location', 'date', 'total_cases', 'new_cases',
       'new_cases_smoothed', 'total_deaths', 'new_deaths',
       'new_deaths_smoothed', 'total_cases_per_million',
       'new_cases_per_million', 'new_cases_smoothed_per_million',
       'total_deaths_per_million', 'new_deaths_per_million',
       'new_deaths_smoothed_per_million', 'reproduction_rate', 'icu_patients',
       'icu_patients_per_million', 'hosp_patients',
       'hosp_patients_per_million', 'weekly_icu_admissions',
       'weekly_icu_admissions_per_million', 'weekly_hosp_admissions',
       'weekly_hosp_admissions_per_million', 'new_tests', 'total_tests',
       'total_tests_per_thousand', 'new_tests_per_thousand',
       'new_tests_smoothed', 'new_tests_smoothed_per_thousand',
       'positive_rate', 'tests_per_case', 'tests_units', 'total_vaccinations',
       'people_vaccinated', 'people_fully_vaccinated', 'new_vaccinations',
       'new_vaccinations_smoothed', 'total_vaccinations_per_hun

In [112]:
Asia[['male_smokers','location']]

Unnamed: 0,male_smokers,location
0,,Afghanistan
1,,Afghanistan
2,,Afghanistan
3,,Afghanistan
4,,Afghanistan
...,...,...
89654,29.2,Yemen
89655,29.2,Yemen
89656,29.2,Yemen
89657,29.2,Yemen


In [114]:
Asia.groupby(['location']).agg({'total_cases':'sum','gdp_per_capita':'max','population_density':'max','handwashing_facilities':'max','male_smokers':'max','female_smokers':'max'}).reset_index(drop=False).sort_values(by=['total_cases'],ascending=False)

Unnamed: 0,location,total_cases,gdp_per_capita,population_density,handwashing_facilities,male_smokers,female_smokers
11,India,3035546000.0,6426.674,450.419,59.55,20.6,1.9
44,Turkey,582460100.0,25129.341,104.914,,41.1,14.1
13,Iran,380857300.0,19082.62,49.831,,21.1,0.8
12,Indonesia,252624700.0,11188.744,145.725,64.204,76.1,2.8
14,Iraq,179041700.0,15663.986,88.125,94.576,,
31,Pakistan,159302900.0,5034.708,255.573,59.607,36.7,2.8
33,Philippines,156047100.0,7599.188,351.873,78.463,40.8,7.8
4,Bangladesh,155940600.0,3523.984,1265.036,34.808,44.7,1.0
15,Israel,147385300.0,33132.32,402.606,,35.4,15.4
35,Saudi Arabia,119998500.0,49045.411,15.322,,25.4,1.8
