In [1]:
import pandas as pd
import plotly.express as px

In [2]:
data = pd.read_csv('owid-covid-data.csv')

In [3]:
data = data[['iso_code', 'continent', 'location', 'new_cases', 'new_deaths',
       'new_cases_per_million', 'new_deaths_per_million',
       'icu_patients_per_million',
       'hosp_patients_per_million', 'new_tests_per_thousand',
       'positive_rate', 'total_vaccinations',
       'new_vaccinations', 'people_vaccinated_per_hundred',
       'people_fully_vaccinated_per_hundred', 'new_vaccinations_smoothed_per_million',
       'population', 'population_density', 'median_age', 'aged_65_older',
       'aged_70_older', 'gdp_per_capita', 'extreme_poverty',
       'cardiovasc_death_rate', 'diabetes_prevalence', 'female_smokers',
       'male_smokers', 'handwashing_facilities', 'hospital_beds_per_thousand',
       'life_expectancy', 'human_development_index']]

In [4]:
df_sum = data.groupby(['iso_code', 'location', 'continent'])[['new_cases', 'new_deaths',
       'new_cases_per_million', 'new_deaths_per_million', 'new_vaccinations']].sum(min_count=1).reset_index()

df_sum.columns = [i.replace('new', 'total') for i in df_sum.columns]
df_sum

Unnamed: 0,iso_code,location,continent,total_cases,total_deaths,total_cases_per_million,total_deaths_per_million,total_vaccinations
0,ABW,Aruba,North America,,,,,103093.0
1,AFG,Afghanistan,Asia,156363.0,7284.0,3925.231,182.831,6874.0
2,AGO,Angola,Africa,64583.0,1718.0,1903.207,50.564,
3,AIA,Anguilla,North America,,,,,1421.0
4,ALB,Albania,Europe,187363.0,2944.0,65216.607,1024.684,1269877.0
...,...,...,...,...,...,...,...,...
219,WSM,Samoa,Oceania,3.0,,14.988,,
220,YEM,Yemen,Asia,9831.0,1901.0,322.451,62.371,
221,ZAF,South Africa,Africa,2923054.0,89251.0,48683.488,1486.486,9175952.0
222,ZMB,Zambia,Africa,209815.0,3662.0,11089.220,193.577,553717.0


In [5]:
df_unique = data[[
       'iso_code', 'location', 'continent',
       'population', 'population_density', 'median_age', 'aged_65_older',
       'aged_70_older', 'gdp_per_capita', 'extreme_poverty',
       'cardiovasc_death_rate', 'diabetes_prevalence', 'female_smokers',
       'male_smokers', 'handwashing_facilities', 'hospital_beds_per_thousand',
       'life_expectancy', 'human_development_index']].drop_duplicates().reset_index(drop=True)

In [6]:
df_unique

Unnamed: 0,iso_code,location,continent,population,population_density,median_age,aged_65_older,aged_70_older,gdp_per_capita,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index
0,AFG,Afghanistan,Asia,3.983543e+07,54.422,18.6,2.581,1.337,1803.987,,597.029,9.59,,,37.746,0.500,64.83,0.511
1,OWID_AFR,Africa,,1.373486e+09,,,,,,,,,,,,,,
2,ALB,Albania,Europe,2.872934e+06,104.871,38.0,13.188,8.643,11803.431,1.1,304.195,10.08,7.100,51.200,,2.890,78.57,0.795
3,DZA,Algeria,Africa,4.461663e+07,17.348,29.1,6.211,3.857,13913.839,0.5,278.364,6.73,0.700,30.400,83.741,1.900,76.88,0.748
4,AND,Andorra,Europe,7.735400e+04,163.755,,,,,,109.135,7.97,29.000,37.800,,,83.73,0.868
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
232,WLF,Wallis and Futuna,Oceania,1.109400e+04,,,,,,,,,,,,,79.94,
233,OWID_WRL,World,,7.874966e+09,58.045,30.9,8.696,5.355,15469.207,10.0,233.070,8.51,6.434,34.635,60.130,2.705,72.58,0.737
234,YEM,Yemen,Asia,3.049064e+07,53.508,20.3,2.922,1.583,1479.147,18.8,495.003,5.35,7.600,29.200,49.542,0.700,66.12,0.470
235,ZMB,Zambia,Africa,1.892066e+07,22.995,17.7,2.480,1.542,3689.251,57.5,234.499,3.94,3.100,24.700,13.938,2.000,63.89,0.584


In [7]:
df = pd.merge(df_sum, df_unique, on=['iso_code', 'location', 'continent'])

In [8]:
df.to_csv('covid_df.csv', index=False)

In [9]:
df.columns

Index(['iso_code', 'location', 'continent', 'total_cases', 'total_deaths',
       'total_cases_per_million', 'total_deaths_per_million',
       'total_vaccinations', 'population', 'population_density', 'median_age',
       'aged_65_older', 'aged_70_older', 'gdp_per_capita', 'extreme_poverty',
       'cardiovasc_death_rate', 'diabetes_prevalence', 'female_smokers',
       'male_smokers', 'handwashing_facilities', 'hospital_beds_per_thousand',
       'life_expectancy', 'human_development_index'],
      dtype='object')

In [10]:
px.scatter(df, 'gdp_per_capita', 'total_deaths_per_million', color='continent', hover_data=['location'])

In [11]:
df.corr()

Unnamed: 0,total_cases,total_deaths,total_cases_per_million,total_deaths_per_million,total_vaccinations,population,population_density,median_age,aged_65_older,aged_70_older,gdp_per_capita,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index
total_cases,1.0,0.934057,0.174822,0.212464,0.36611,0.512368,-0.030605,0.160332,0.154883,0.148134,0.128784,-0.127453,-0.122584,0.031206,0.09274,-0.077176,0.105829,0.002238,0.128173,0.168201
total_deaths,0.934057,1.0,0.156999,0.330795,0.314907,0.435482,-0.042651,0.156947,0.143235,0.138055,0.092471,-0.157093,-0.125241,0.055372,0.093874,-0.066347,0.163881,0.000313,0.129136,0.168342
total_cases_per_million,0.174822,0.156999,1.0,0.690915,-0.080792,-0.103222,0.038617,0.633754,0.570406,0.56595,0.448004,-0.535287,-0.255387,0.032692,0.56465,0.172107,0.609856,0.357363,0.583977,0.647801
total_deaths_per_million,0.212464,0.330795,0.690915,1.0,-0.053281,-0.071585,-0.052514,0.533792,0.522953,0.517476,0.18329,-0.468993,-0.146859,0.056988,0.542774,0.161592,0.553514,0.293848,0.445225,0.502736
total_vaccinations,0.36611,0.314907,-0.080792,-0.053281,1.0,0.925241,-0.020541,0.097777,0.05177,0.035962,-0.006356,-0.06416,-0.033702,0.050063,-0.084589,0.066587,0.062989,0.036388,0.054703,0.041009
population,0.512368,0.435482,-0.103222,-0.071585,0.925241,1.0,-0.028066,0.032107,-0.001784,-0.01384,-0.051248,-0.037456,-0.008687,-0.004276,-0.124605,0.021783,0.033667,-0.04182,-0.024398,-0.01698
population_density,-0.030605,-0.042651,0.038617,-0.052514,-0.020541,-0.028066,1.0,0.148996,0.063253,0.031729,0.409115,-0.033639,-0.176599,0.011056,-0.046626,0.000888,0.091626,0.309588,0.231069,0.179682
median_age,0.160332,0.156947,0.633754,0.533792,0.097777,0.032107,0.148996,1.0,0.913454,0.89765,0.643436,-0.69729,-0.340794,0.137148,0.635594,0.174119,0.781097,0.649012,0.84554,0.897594
aged_65_older,0.154883,0.143235,0.570406,0.522953,0.05177,-0.001784,0.063253,0.913454,1.0,0.994416,0.492855,-0.574047,-0.34043,-0.063584,0.733061,0.084517,0.628682,0.635134,0.729937,0.780512
aged_70_older,0.148134,0.138055,0.56595,0.517476,0.035962,-0.01384,0.031729,0.89765,0.994416,1.0,0.477828,-0.557994,-0.354783,-0.091368,0.733614,0.083907,0.624506,0.635943,0.716108,0.768762
