# 10_01: COVID-19 data

In [1]:
import math
import collections
import dataclasses
import datetime

import numpy as np
import pandas as pd

import plotly.express as px

In [2]:
covid19 = pd.read_csv('covid19.csv.gz', dtype_backend='pyarrow', parse_dates=['date'])

In [3]:
covid19.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 156710 entries, 0 to 156709
Data columns (total 15 columns):
 #   Column                      Non-Null Count   Dtype          
---  ------                      --------------   -----          
 0   country                     156710 non-null  string[pyarrow]
 1   date                        156710 non-null  datetime64[ns] 
 2   continent                   156710 non-null  string[pyarrow]
 3   population                  156710 non-null  int64[pyarrow] 
 4   life_expectancy             151672 non-null  double[pyarrow]
 5   gdp_per_capita              148190 non-null  int64[pyarrow] 
 6   population_density          156710 non-null  double[pyarrow]
 7   median_age                  156710 non-null  double[pyarrow]
 8   extreme_poverty             137966 non-null  double[pyarrow]
 9   human_development_index     156710 non-null  double[pyarrow]
 10  hospital_beds_per_thousand  122630 non-null  double[pyarrow]
 11  percent_fully_vaccinated  

In [4]:
europe2020 = covid19[(covid19.continent == 'Europe') & (covid19.date == pd.to_datetime('2020-12-31'))]

In [5]:
europe2020.head()

Unnamed: 0,country,date,continent,population,life_expectancy,gdp_per_capita,population_density,median_age,extreme_poverty,human_development_index,hospital_beds_per_thousand,percent_fully_vaccinated,total_cases,total_deaths,total_excess
12284,Belgium,2020-12-31,Europe,11641813,76.6138,53287,384.47202,41.151,0.029965,0.942,5.58,1.7e-05,635180,19374,
37844,Czechia,2020-12-31,Europe,10673216,81.8787,41052,138.27768,42.645,0.059851,0.895,6.62,9e-06,661412,11367,
48068,France,2020-12-31,Europe,66277412,75.0859,45904,121.04203,41.567,0.056357,0.91,5.91,3e-06,2314290,63534,
49338,Germany,2020-12-31,Europe,84086228,75.0859,53969,240.66582,45.024,0.0,0.95,8.0,0.0,1620512,47009,
52320,Greece,2020-12-31,Europe,10412481,61.1409,31704,80.779526,45.391,0.567095,0.893,4.2,0.0,138467,4682,


In [6]:
fig = px.choropleth(europe2020,  # dataset
                    color='total_deaths', color_continuous_scale=px.colors.sequential.Blues,  # color scheme
                    locations='country', locationmode='country names',              # geographic arrangement
                    hover_name='country', title='COVID-19 in Europe, Dec 31 2020')  # titles

fig.update_layout(width=800, height=500)       # set figure size
fig.update_geos(lonaxis={'range': [-25, 50]},  # set longitude range
                lataxis={'range': [30, 70]})   # set latitude range

fig.show()