# Plotting vaccines vs covid cases

## Setup

In [51]:
import pandas as pd 
import numpy as np 
import plotly.express as px
from LeafPlotlyTools import *
import datetime
from tqdm import tqdm
import math

In [52]:
grapher = Graph()

## Loading and preparing the data

In [53]:
df = pd.read_csv('data/country_covid_data.csv')

In [54]:
def interpret_date(value):
    year, month, day = value.split('-')

    return datetime.date(int(year), int(month), int(day))

In [55]:
df['date'] = df['date'].apply(interpret_date)

df = df.sort_values('date')

# df = df[df['date'] >= datetime.date(2020, 12, 8)]

df

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,...,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index,excess_mortality,gdp_per_cap_percentile
0,MEX,North America,Mexico,2020-01-01,,,,,,,...,152.783,13.06,6.9,21.4,87.847,1.38,75.05,0.779,,2.0
1,ARG,South America,Argentina,2020-01-01,,,,,,,...,191.032,5.50,16.2,27.7,,5.00,76.67,0.845,,2.0
2,ARG,South America,Argentina,2020-01-02,,,,,,,...,191.032,5.50,16.2,27.7,,5.00,76.67,0.845,,2.0
3,MEX,North America,Mexico,2020-01-02,,,,,,,...,152.783,13.06,6.9,21.4,87.847,1.38,75.05,0.779,,2.0
4,ARG,South America,Argentina,2020-01-03,,,,,,,...,191.032,5.50,16.2,27.7,,5.00,76.67,0.845,,2.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
90252,TCD,Africa,Chad,2021-06-13,4942.0,0.0,0.429,174.0,0.0,0.000,...,280.995,6.10,,,5.818,,54.24,0.398,,4.0
90253,CHL,South America,Chile,2021-06-13,1476473.0,7481.0,6931.000,30707.0,128.0,110.000,...,127.993,8.46,34.2,41.5,,2.11,80.18,0.851,,2.0
90254,AND,Europe,Andorra,2021-06-13,13813.0,0.0,7.857,127.0,0.0,0.000,...,109.135,7.97,29.0,37.8,,,83.73,0.868,,
90256,COL,South America,Colombia,2021-06-13,3753224.0,28519.0,26022.429,95778.0,586.0,545.286,...,124.240,7.44,4.7,13.5,65.386,1.71,77.29,0.767,,2.0


In [56]:
def days_since_start(date):
    start = datetime.date(2020, 12, 8)
    difference = date - start

    return difference.days

In [57]:
df['num_days'] = df['date'].apply(days_since_start)

In [58]:
def gdp_stringifyer(value):
    option_dict = {
        1: "High",
        2: "Upper middle",
        3: "Lower middle",
        4: "Low"
    }

    try: return option_dict[int(value)]
    except: return np.nan

In [59]:
df['Country wealth'] = df['gdp_per_cap_percentile'].apply(gdp_stringifyer)

df.dropna(subset = ['Country wealth'])

df

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,...,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index,excess_mortality,gdp_per_cap_percentile,num_days,Country wealth
0,MEX,North America,Mexico,2020-01-01,,,,,,,...,6.9,21.4,87.847,1.38,75.05,0.779,,2.0,-342,Upper middle
1,ARG,South America,Argentina,2020-01-01,,,,,,,...,16.2,27.7,,5.00,76.67,0.845,,2.0,-342,Upper middle
2,ARG,South America,Argentina,2020-01-02,,,,,,,...,16.2,27.7,,5.00,76.67,0.845,,2.0,-341,Upper middle
3,MEX,North America,Mexico,2020-01-02,,,,,,,...,6.9,21.4,87.847,1.38,75.05,0.779,,2.0,-341,Upper middle
4,ARG,South America,Argentina,2020-01-03,,,,,,,...,16.2,27.7,,5.00,76.67,0.845,,2.0,-340,Upper middle
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
90252,TCD,Africa,Chad,2021-06-13,4942.0,0.0,0.429,174.0,0.0,0.000,...,,,5.818,,54.24,0.398,,4.0,187,Low
90253,CHL,South America,Chile,2021-06-13,1476473.0,7481.0,6931.000,30707.0,128.0,110.000,...,34.2,41.5,,2.11,80.18,0.851,,2.0,187,Upper middle
90254,AND,Europe,Andorra,2021-06-13,13813.0,0.0,7.857,127.0,0.0,0.000,...,29.0,37.8,,,83.73,0.868,,,187,
90256,COL,South America,Colombia,2021-06-13,3753224.0,28519.0,26022.429,95778.0,586.0,545.286,...,4.7,13.5,65.386,1.71,77.29,0.767,,2.0,187,Upper middle


In [60]:
list(df)

['iso_code',
 'continent',
 'location',
 'date',
 'total_cases',
 'new_cases',
 'new_cases_smoothed',
 'total_deaths',
 'new_deaths',
 'new_deaths_smoothed',
 'total_cases_per_million',
 'new_cases_per_million',
 'new_cases_smoothed_per_million',
 'total_deaths_per_million',
 'new_deaths_per_million',
 'new_deaths_smoothed_per_million',
 'reproduction_rate',
 'icu_patients',
 'icu_patients_per_million',
 'hosp_patients',
 'hosp_patients_per_million',
 'weekly_icu_admissions',
 'weekly_icu_admissions_per_million',
 'weekly_hosp_admissions',
 'weekly_hosp_admissions_per_million',
 'new_tests',
 'total_tests',
 'total_tests_per_thousand',
 'new_tests_per_thousand',
 'new_tests_smoothed',
 'new_tests_smoothed_per_thousand',
 'positive_rate',
 'tests_per_case',
 'tests_units',
 'total_vaccinations',
 'people_vaccinated',
 'people_fully_vaccinated',
 'new_vaccinations',
 'new_vaccinations_smoothed',
 'total_vaccinations_per_hundred',
 'people_vaccinated_per_hundred',
 'people_fully_vaccinate

In [61]:
# plot_df['total_vaccinations_per_hundred'] = plot_df['total_vaccinations_per_hundred'].fillna(0)

# plot_df = plot_df.dropna()

In [62]:
plot_df = df.copy()#[plot_df['Country wealth']!='High']

In [63]:
plot_df['Country wealth'].drop_duplicates()

0      Upper middle
12     Lower middle
13             High
59              NaN
141             Low
Name: Country wealth, dtype: object

In [64]:
plot_df = df[['num_days',
'total_vaccinations_per_hundred',
'new_cases_per_million',
'location',
'population',
'Country wealth'
]]#.dropna()

This code goes through the countries and fills in a any days where the vaccines might not be reported and puts the number at 0 if it is nan

In [65]:
new_data = []
countries = plot_df['location'].sort_values().drop_duplicates()
data_start_date = plot_df['num_days'].min()
data_end_date = plot_df['num_days'].max()
for country in tqdm(countries):
    df_temp = plot_df[plot_df['location'] == country].sort_values('num_days')

    day = data_start_date.copy()
    vaccines_started = False

    # print(day)
    while day <= data_end_date:
        try:
            row = df_temp[df_temp['num_days']==day].iloc[0] # I know there is a better way to do this but cba to find it

            if row['total_vaccinations_per_hundred'] > 0:
                vaccines_started = True
            
            if math.isnan(row['total_vaccinations_per_hundred']):
                if vaccines_started:
                    row['total_vaccinations_per_hundred'] = last_row['total_vaccinations_per_hundred']
                else:
                    row['total_vaccinations_per_hundred'] = 0

        except:
            if vaccines_started == False: 
                day+=1
                continue 
            row = last_row.copy()
            row['num_days'] = day

        
        
        new_data.append(list(row))
        last_row = row.copy()
        day+=1

        
        
plot_df_filled = pd.DataFrame(new_data, columns = list(plot_df)).sort_values('num_days')

plot_df_filled

100%|██████████| 218/218 [01:13<00:00,  2.96it/s]


Unnamed: 0,num_days,total_vaccinations_per_hundred,new_cases_per_million,location,population,Country wealth
2941,-342,0.00,,Argentina,45195777.0,Upper middle
52901,-342,0.00,,Mexico,128932753.0,Upper middle
52902,-341,0.00,,Mexico,128932753.0,Upper middle
2942,-341,0.00,,Argentina,45195777.0,Upper middle
2943,-340,0.00,,Argentina,45195777.0,Upper middle
...,...,...,...,...,...,...
28658,188,24.92,116.014,Fiji,896444.0,Lower middle
28156,188,126.53,,Falkland Islands,3483.0,
476,188,1.65,28.797,Afghanistan,38928341.0,Low
29802,188,41.20,,French Polynesia,280904.0,


In [66]:
day

189

In [67]:
plot_df_filled = plot_df_filled.dropna()

plot_df_filled = plot_df_filled[plot_df_filled['num_days'] >= 0]

plot_df_filled

Unnamed: 0,num_days,total_vaccinations_per_hundred,new_cases_per_million,location,population,Country wealth
67878,0,0.00,3.552,Rwanda,12952209.0,Low
59322,0,0.00,7.397,Nicaragua,6624554.0,Lower middle
57214,0,0.00,31.091,Namibia,2540916.0,Lower middle
57789,0,0.00,47.431,Nepal,29136808.0,Low
288,0,0.00,5.138,Afghanistan,38928341.0,Low
...,...,...,...,...,...,...
29161,188,64.18,8.483,Finland,5540718.0,High
79551,188,66.84,0.000,Switzerland,8654618.0,High
28658,188,24.92,116.014,Fiji,896444.0,Lower middle
476,188,1.65,28.797,Afghanistan,38928341.0,Low


## Creating the figure

Figuring out the colour order

In [68]:
color_dict = {
    "High": "#73a2ab",
    "Upper middle": "#ccdee0",
    "Lower middle": "#e2b6c2",
    "Low": "#bc586e"
}

# label_order = plot_df_filled['Country wealth'].drop_duplicates()

# colour_list = []
# for i in label_order:
#     colour_list.append(color_dict[i])

# colour_list

In [69]:
plot_df_filled['total_vaccinations_per_hundred'].max()

139.61

In [70]:
fig = px.scatter(plot_df_filled, 
            x="total_vaccinations_per_hundred", 
            y="new_cases_per_million", 
            labels={
                     "total_vaccinations_per_hundred": "Total vaccinations per hundred",
                     "new_cases_per_million": "New cases per million",
                     "num_days": "Days since first vaccine",
                 }, 
            color_discrete_map=color_dict,
            animation_frame="num_days", 
            animation_group="location",
           size="population", 
           color="Country wealth", 
           hover_name="location",
         #   log_y=True, 
           size_max=200, 
        #    size_min=5, 
           range_x=[-5,plot_df_filled['total_vaccinations_per_hundred'].max()], 
           range_y=[0,1100],
           )

fig

In [71]:
plot_df_filled['new_cases_per_million'].max()

18293.675

In [72]:
# Using the style_graph function and passing in varibles
fig = grapher.style_graph(fig,
                        title=False,
                        subtitle='',
                        x_axis_title="Total vaccinations per hundred",
                        y_axis_title='New cases per million',
                        data_source="",
                        data_source_position=[0,-0.16], # [x, y]
                        watermark=None, # "left" or "right"
                        watermark_position = [0,-0.16], # [x, y]
                        legend_position = [0,1], # [x, y]
                        ytozero = True, # Y-xis to zero True or False
                        xhovermode = False, # x-axis hover mode. This means y value hover text always appears regardless if your mouse is over the value or not
                        xaxis_tickangle = 0 # xaxis text angle in degrees of rotation clockwise
                       )
# fig.update_layout(
#     margin=dict(t=30, b=150),
# )

# fig.update_layout(yaxis_range=[0, 4])

# fig.update_layout(
#     yaxis = dict(
#         tickmode = 'array',
#         tickvals = [10,50, 100, 500, 1000, 3000, 5000],
#     )
# )

# fig.update_yaxes(type="log")

fig.update_layout(annotations=[])

fig.update_layout(legend=dict(
    orientation="v",
    yanchor="top",
    y=1,
    xanchor="right",
    x=1
))

# Showing figure and passing in the config setting from the grapher object. 
# Config settings determine the elements of the interactive options in the top right bar
fig.show(config = grapher.get_config())

In [73]:
grapher.save_graph_html(fig, 'figures/animated_plot.html')