# Plotting vaccines vs covid cases

## Setup

In [1]:
import pandas as pd 
import numpy as np 
import plotly.express as px
from LeafPlotlyTools import *
import datetime
from tqdm import tqdm
import math

In [2]:
grapher = Graph()

## Loading and preparing the data

In [3]:
df = pd.read_csv('data/country_covid_data.csv')

In [4]:
def interpret_date(value):
    year, month, day = value.split('-')

    return datetime.date(int(year), int(month), int(day))

In [5]:
df['date'] = df['date'].apply(interpret_date)

df = df.sort_values('date')

# df = df[df['date'] >= datetime.date(2020, 12, 8)]

df

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,...,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index,gdp_per_cap_percentile
0,ARG,South America,Argentina,2020-01-01,,,,,,,...,0.6,191.032,5.50,16.2,27.7,,5.00,76.67,0.845,2.0
1,MEX,North America,Mexico,2020-01-01,,,,,,,...,2.5,152.783,13.06,6.9,21.4,87.847,1.38,75.05,0.779,2.0
2,ARG,South America,Argentina,2020-01-02,,,,,,,...,0.6,191.032,5.50,16.2,27.7,,5.00,76.67,0.845,2.0
3,MEX,North America,Mexico,2020-01-02,,,,,,,...,2.5,152.783,13.06,6.9,21.4,87.847,1.38,75.05,0.779,2.0
4,ARG,South America,Argentina,2020-01-03,,,,,,,...,0.6,191.032,5.50,16.2,27.7,,5.00,76.67,0.845,2.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
80221,AND,Europe,Andorra,2021-04-29,13198.0,50.0,36.571,125.0,0.0,0.286,...,,109.135,7.97,29.0,37.8,,,83.73,0.868,
80222,ZAF,Africa,South Africa,2021-04-29,1579536.0,1086.0,1169.714,54331.0,46.0,48.000,...,18.9,200.380,5.52,8.1,33.2,43.993,2.32,64.13,0.709,2.0
80223,SOM,Africa,Somalia,2021-04-29,13915.0,0.0,65.143,713.0,0.0,3.429,...,,365.769,6.05,,,9.831,0.90,57.40,,
80225,SVN,Europe,Slovenia,2021-04-29,239339.0,918.0,646.857,4243.0,7.0,8.000,...,,153.493,7.25,20.1,25.0,,4.50,81.32,0.917,1.0


In [6]:
def days_since_start(date):
    start = datetime.date(2020, 12, 8)
    difference = date - start

    return difference.days

In [7]:
df['num_days'] = df['date'].apply(days_since_start)

In [8]:
def gdp_stringifyer(value):
    option_dict = {
        1: "High",
        2: "Upper middle",
        3: "Lower middle",
        4: "Low"
    }

    try: return option_dict[int(value)]
    except: return np.nan

In [9]:
df['Country wealth'] = df['gdp_per_cap_percentile'].apply(gdp_stringifyer)

df.dropna(subset = ['Country wealth'])

df

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,...,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index,gdp_per_cap_percentile,num_days,Country wealth
0,ARG,South America,Argentina,2020-01-01,,,,,,,...,5.50,16.2,27.7,,5.00,76.67,0.845,2.0,-342,Upper middle
1,MEX,North America,Mexico,2020-01-01,,,,,,,...,13.06,6.9,21.4,87.847,1.38,75.05,0.779,2.0,-342,Upper middle
2,ARG,South America,Argentina,2020-01-02,,,,,,,...,5.50,16.2,27.7,,5.00,76.67,0.845,2.0,-341,Upper middle
3,MEX,North America,Mexico,2020-01-02,,,,,,,...,13.06,6.9,21.4,87.847,1.38,75.05,0.779,2.0,-341,Upper middle
4,ARG,South America,Argentina,2020-01-03,,,,,,,...,5.50,16.2,27.7,,5.00,76.67,0.845,2.0,-340,Upper middle
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
80221,AND,Europe,Andorra,2021-04-29,13198.0,50.0,36.571,125.0,0.0,0.286,...,7.97,29.0,37.8,,,83.73,0.868,,142,
80222,ZAF,Africa,South Africa,2021-04-29,1579536.0,1086.0,1169.714,54331.0,46.0,48.000,...,5.52,8.1,33.2,43.993,2.32,64.13,0.709,2.0,142,Upper middle
80223,SOM,Africa,Somalia,2021-04-29,13915.0,0.0,65.143,713.0,0.0,3.429,...,6.05,,,9.831,0.90,57.40,,,142,
80225,SVN,Europe,Slovenia,2021-04-29,239339.0,918.0,646.857,4243.0,7.0,8.000,...,7.25,20.1,25.0,,4.50,81.32,0.917,1.0,142,High


In [10]:
plot_df = df[['num_days',
'total_vaccinations_per_hundred',
'new_cases_per_million',
'location',
'population',
'Country wealth'
]]#.dropna()

In [11]:
# plot_df['total_vaccinations_per_hundred'] = plot_df['total_vaccinations_per_hundred'].fillna(0)

# plot_df = plot_df.dropna()

In [12]:
plot_df#[plot_df['Country wealth']!='High']

Unnamed: 0,num_days,total_vaccinations_per_hundred,new_cases_per_million,location,population,Country wealth
0,-342,,,Argentina,45195777.0,Upper middle
1,-342,,,Mexico,128932753.0,Upper middle
2,-341,,,Argentina,45195777.0,Upper middle
3,-341,,,Mexico,128932753.0,Upper middle
4,-340,,,Argentina,45195777.0,Upper middle
...,...,...,...,...,...,...
80221,142,,647.124,Andorra,77265.0,
80222,142,0.52,18.311,South Africa,59308690.0,Upper middle
80223,142,,0.000,Somalia,15893219.0,
80225,142,29.38,441.573,Slovenia,2078932.0,High


In [13]:
plot_df['Country wealth'].drop_duplicates()

0      Upper middle
12     Lower middle
54              NaN
80             High
115             Low
Name: Country wealth, dtype: object

In [51]:
new_data = []
countries = plot_df['location'].sort_values().drop_duplicates()
for country in tqdm(countries):
    df_temp = plot_df[plot_df['location'] == country].sort_values('num_days')
    data_start_date = df_temp['num_days'].min()
    data_end_date = df_temp['num_days'].max()
    day = data_start_date.copy()
    vaccines_started = False
    while day <= data_end_date:
        try:
            row = df_temp[df_temp['num_days']==day].iloc[0] # I know there is a better way to do this but cba to find it

            if row['total_vaccinations_per_hundred'] > 0:
                vaccines_started = True
                
            if  math.isnan(row['total_vaccinations_per_hundred']):
                if vaccines_started:
                    row['total_vaccinations_per_hundred'] = last_row['total_vaccinations_per_hundred']
                else:
                    row['total_vaccinations_per_hundred'] = 0

                

        except:
            row = last_row.copy()
            row['num_days'] = day

        new_data.append(list(row))

        day += 1
        last_row = row.copy()
        
plot_df_filled = pd.DataFrame(new_data, columns = list(plot_df)).sort_values('num_days')

plot_df_filled

 19%|█▉        | 35/181 [00:04<00:19,  7.57it/s]

## Creating the figure

Figuring out the colour order

In [46]:
color_dict = {
    "High": "#73a2ab",
    "Upper middle": "#ccdee0",
    "Lower middle": "#e2b6c2",
    "Low": "#bc586e"
}

# label_order = plot_df_filled['Country wealth'].drop_duplicates()

# colour_list = []
# for i in label_order:
#     colour_list.append(color_dict[i])

# colour_list

In [47]:
fig = px.scatter(plot_df_filled, 
            x="total_vaccinations_per_hundred", 
            y="new_cases_per_million", 
            labels={
                     "total_vaccinations_per_hundred": "Total vaccinations per hundred",
                     "new_cases_per_million": "New cases per million",
                     "num_days": "Days since first vaccine",
                 }, 
            color_discrete_map=color_dict,
            animation_frame="num_days", 
            animation_group="location",
           size="population", 
           color="Country wealth", 
           hover_name="location",
        #    log_y=True, 
           size_max=200, 
        #    size_min=5, 
           range_x=[-5,plot_df['total_vaccinations_per_hundred'].max()], 
           range_y=[0,1100],
           )

fig

In [48]:
plot_df_filled['new_cases_per_million'].max()

3216.5690000000004

In [49]:
# Using the style_graph function and passing in varibles
fig = grapher.style_graph(fig,
                        title=False,
                        subtitle='',
                        x_axis_title="Total vaccinations per hundred",
                        y_axis_title='New cases per million',
                        data_source="",
                        data_source_position=[0,-0.16], # [x, y]
                        watermark=None, # "left" or "right"
                        watermark_position = [0,-0.16], # [x, y]
                        legend_position = [0,1], # [x, y]
                        ytozero = True, # Y-xis to zero True or False
                        xhovermode = False, # x-axis hover mode. This means y value hover text always appears regardless if your mouse is over the value or not
                        xaxis_tickangle = 0 # xaxis text angle in degrees of rotation clockwise
                       )
# fig.update_layout(
#     margin=dict(t=30, b=150),
# )

# fig.update_layout(yaxis_range=[0, 4])

# fig.update_layout(
#     yaxis = dict(
#         tickmode = 'array',
#         tickvals = [10,50, 100, 500, 1000, 3000, 5000],
#     )
# )

# fig.update_yaxes(type="log")

fig.update_layout(annotations=[])

fig.update_layout(legend=dict(
    orientation="v",
    yanchor="top",
    y=1,
    xanchor="right",
    x=1
))

# Showing figure and passing in the config setting from the grapher object. 
# Config settings determine the elements of the interactive options in the top right bar
fig.show(config = grapher.get_config())

In [50]:
grapher.save_graph_html(fig, 'figures/animated_plot.html')