In [1]:
import pandas as pd
import numpy as np
from plotly.offline import init_notebook_mode, iplot, plot
import plotly as py
import plotly.express as px
import plotly.graph_objs as go
import matplotlib.pyplot as plt
init_notebook_mode(connected=True)

In [2]:
df = pd.read_csv('owid-covid-data.csv')

In [3]:
df['datetime'] = pd.to_datetime(df['date'])

In [61]:
df.columns

Index(['Unnamed: 0', 'iso_code', 'continent', 'location', 'date',
       'total_cases', 'new_cases', 'new_cases_smoothed', 'total_deaths',
       'new_deaths', 'new_deaths_smoothed', 'total_cases_per_million',
       'new_cases_per_million', 'new_cases_smoothed_per_million',
       'total_deaths_per_million', 'new_deaths_per_million',
       'new_deaths_smoothed_per_million', 'reproduction_rate', 'icu_patients',
       'icu_patients_per_million', 'hosp_patients',
       'hosp_patients_per_million', 'weekly_icu_admissions',
       'weekly_icu_admissions_per_million', 'weekly_hosp_admissions',
       'weekly_hosp_admissions_per_million', 'total_tests', 'new_tests',
       'total_tests_per_thousand', 'new_tests_per_thousand',
       'new_tests_smoothed', 'new_tests_smoothed_per_thousand',
       'tests_per_case', 'positive_rate', 'tests_units', 'stringency_index',
       'population', 'population_density', 'median_age', 'aged_65_older',
       'aged_70_older', 'gdp_per_capita', 'extreme_p

In [4]:
df['human_development_index'].replace(np.inf,np.nan)

0       NaN
1       NaN
2       NaN
3       NaN
4       NaN
         ..
58693   NaN
58694   NaN
58695   NaN
58696   NaN
58697   NaN
Name: human_development_index, Length: 58698, dtype: float64

In [5]:
df['human_development_index'].unique()

array([  nan, 0.498, 0.581, 0.785, 0.858, 0.863, 0.825, 0.755, 0.78 ,
       0.939, 0.908, 0.757, 0.417, 0.916, 0.515, 0.423, 0.608, 0.813,
       0.846, 0.807, 0.768, 0.808, 0.708, 0.693, 0.759, 0.8  , 0.853,
       0.612, 0.717, 0.367, 0.926, 0.944, 0.843, 0.752, 0.492, 0.556,
       0.457, 0.606, 0.747, 0.503, 0.654, 0.794, 0.777, 0.869, 0.888,
       0.936, 0.476, 0.715, 0.929, 0.736, 0.754, 0.696, 0.44 , 0.891,
       0.871, 0.463, 0.92 , 0.741, 0.901, 0.702, 0.922, 0.592, 0.459,
       0.46 , 0.455, 0.591, 0.87 , 0.772, 0.65 , 0.933, 0.617, 0.831,
       0.838, 0.694, 0.64 , 0.938, 0.798, 0.685, 0.935, 0.903, 0.88 ,
       0.732, 0.735, 0.909, 0.59 , 0.672, 0.582, 0.778, 0.803, 0.601,
       0.435, 0.706, 0.77 , 0.52 , 0.904, 0.847, 0.667, 0.7  , 0.519,
       0.774, 0.427, 0.878, 0.578, 0.814, 0.437, 0.79 , 0.477, 0.802,
       0.647, 0.354, 0.532, 0.658, 0.931, 0.953, 0.574, 0.917, 0.821,
       0.562, 0.789, 0.75 , 0.699, 0.544, 0.865, 0.686, 0.856, 0.811,
       0.816, 0.524,

In [6]:
df['location'].unique()

array(['Aruba', 'Afghanistan', 'Angola', 'Anguilla', 'Albania', 'Andorra',
       'United Arab Emirates', 'Argentina', 'Armenia',
       'Antigua and Barbuda', 'Australia', 'Austria', 'Azerbaijan',
       'Burundi', 'Belgium', 'Benin', 'Bonaire Sint Eustatius and Saba',
       'Burkina Faso', 'Bangladesh', 'Bulgaria', 'Bahrain', 'Bahamas',
       'Bosnia and Herzegovina', 'Belarus', 'Belize', 'Bermuda',
       'Bolivia', 'Brazil', 'Barbados', 'Brunei', 'Bhutan', 'Botswana',
       'Central African Republic', 'Canada', 'Switzerland', 'Chile',
       'China', "Cote d'Ivoire", 'Cameroon',
       'Democratic Republic of Congo', 'Congo', 'Colombia', 'Comoros',
       'Cape Verde', 'Costa Rica', 'Cuba', 'Curacao', 'Cayman Islands',
       'Cyprus', 'Czech Republic', 'Germany', 'Djibouti', 'Dominica',
       'Denmark', 'Dominican Republic', 'Algeria', 'Ecuador', 'Egypt',
       'Eritrea', 'Western Sahara', 'Spain', 'Estonia', 'Ethiopia',
       'Finland', 'Fiji', 'Falkland Islands', 'France',

In [7]:
df_grouped = df.groupby(['location','continent']).agg({'life_expectancy': np.sum, 'total_cases': np.mean, 'human_development_index':np.sum}).reset_index()
df_grouped = df_grouped[(~df_grouped['life_expectancy'].isnull())&(~df_grouped['total_cases'].isnull())&(~df_grouped['human_development_index'].isnull())&(~df_grouped['continent'].isnull())]

In [8]:
df['year_month'] = df['date'].apply(lambda x: x[:7])
df_grouped = df.groupby(['location','year_month']).agg({'life_expectancy': np.sum, 'total_cases': np.mean, 'human_development_index':np.sum}).reset_index()
df_grouped = df_grouped[(~df_grouped['life_expectancy'].isnull())&(~df_grouped['total_cases'].isnull())&(~df_grouped['human_development_index'].isnull())&(~df_grouped['location'].isnull())]
del df['year_month']

In [9]:
df_grouped = df_grouped[df_grouped['year_month'].isin(['2020-04','2020-05','2020-06','2020-07','2020-08','2020-09', '2020-10', '2020-11'])]

In [10]:
fig = px.scatter(df_grouped[df_grouped['location'].isin(['Denmark','Iceland','Malta','Italy'])], 
                 x="life_expectancy", y="total_cases", animation_frame="year_month", animation_group="location",
                 size="human_development_index", color="location", hover_name="location", facet_col="location",
                 log_x=True, size_max=60,range_x=[500,100000], range_y=[10,70000])
fig.show()

In [11]:
df_grouped = df.groupby(['location','continent']).agg({'life_expectancy': np.mean, 'total_cases': np.sum, 'human_development_index':np.mean}).reset_index()
df_grouped = df_grouped[(~df_grouped['life_expectancy'].isnull())&(~df_grouped['total_cases'].isnull())&(~df_grouped['human_development_index'].isnull())&(~df_grouped['continent'].isnull())]

In [12]:
fig = px.scatter(df_grouped, 
                 x="total_cases", y="life_expectancy", size="human_development_index", color="continent",
                 hover_name="location", log_x=True, size_max=60)
fig.show()

In [13]:
df['year_month'] = df['date'].apply(lambda x: x[:7])
df_grouped = df.groupby(['location','year_month']).agg({'life_expectancy': np.mean, 'total_cases': np.sum, 'human_development_index':np.mean}).reset_index()
df_grouped = df_grouped[(~df_grouped['life_expectancy'].isnull())&(~df_grouped['total_cases'].isnull())&(~df_grouped['human_development_index'].isnull())&(~df_grouped['location'].isnull())]
del df['year_month']

In [14]:
df_grouped = df_grouped[df_grouped['year_month'].isin(['2020-04','2020-05','2020-06','2020-07','2020-08','2020-09', '2020-10', '2020-11'])]
df_grouped = df_grouped[df_grouped["location"] != "World"]
df_grouped = df_grouped[df_grouped["location"] != "International"]

In [15]:
fig = px.scatter(
                 df_grouped, 
                 y="life_expectancy",
                 x="total_cases",
                 animation_frame="year_month",
                 animation_group="location",
                 size="human_development_index",
                 color="location", # would like to have used "continent" here, but it gives me an error
                 hover_name="location",
                 log_x=True, 
                 size_max=60
                )
fig.layout.updatemenus[0].buttons[0].args[1]["frame"]["duration"] = 2000
print(fig.show())

None


In [16]:
df['year_month'] = df['date'].apply(lambda x: x[:7])
df_grouped = df.groupby(['location','year_month']).agg({'total_cases': np.sum, 'total_deaths': np.sum, 'human_development_index':np.mean}).reset_index()
df_grouped = df_grouped[(~df_grouped['total_cases'].isnull())&(~df_grouped['total_deaths'].isnull())&(~df_grouped['human_development_index'].isnull())&(~df_grouped['location'].isnull())]
del df['year_month']

In [17]:
df_grouped = df_grouped[df_grouped['year_month'].isin(['2020-04','2020-05','2020-06','2020-07','2020-08','2020-09', '2020-10', '2020-11'])]
df_grouped = df_grouped[df_grouped["location"] != "World"]
df_grouped = df_grouped[df_grouped["location"] != "International"]

In [None]:
fig = px.scatter(
                 df_grouped, 
                 y="total_cases",
                 x="total_deaths",
                 animation_frame="year_month",
                 animation_group="location",
                 size="human_development_index",
                 color="location", # would like to have used "continent" here, but it gives me an error
                 hover_name="location",
                 log_x=True, 
                 size_max=60
                )
fig.layout.updatemenus[0].buttons[0].args[1]["frame"]["duration"] = 2000
print(fig.show())

In [86]:
df['year_month'] = df['date'].apply(lambda x: x[:7])
df_grouped = df.groupby(['location','year_month']).agg({'life_expectancy': np.sum, 'human_development_index': np.sum, 'total_deaths':np.sum}).reset_index()
df_grouped = df_grouped[(~df_grouped['life_expectancy'].isnull())&(~df_grouped['human_development_index'].isnull())&(~df_grouped['total_deaths'].isnull())&(~df_grouped['location'].isnull())]
del df['year_month']

In [87]:
df_grouped = df_grouped[df_grouped['year_month'].isin(['2020-04','2020-05','2020-06','2020-07','2020-08','2020-09', '2020-10', '2020-11'])]
df_grouped = df_grouped[df_grouped["location"] != "World"]
df_grouped = df_grouped[df_grouped["location"] != "International"]

In [88]:
fig = px.scatter(
                 df_grouped, 
                 y="life_expectancy",
                 x="human_development_index",
                 animation_frame="year_month",
                 animation_group="location",
                 size="total_deaths",
                 color="location", # would like to have used "continent" here, but it gives me an error
                 hover_name="location",
                 log_x=True, 
                 size_max=60
                )
fig.layout.updatemenus[0].buttons[0].args[1]["frame"]["duration"] = 2000
print(fig.show())

None
