In [144]:
import pandas as pd
import numpy as np

import chart_studio
import chart_studio.plotly as py

import plotly.express as px

In [145]:
username = 'leguela' # your username
api_key = 'dUoZ8GPRarmVDovaJE1F' 
chart_studio.tools.set_credentials_file(username=username, api_key=api_key)

In [2]:
pd.options.display.max_columns = None
pd.options.display.max_rows = 300
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

## 1- Get total vaccination rate over time:

In [3]:
cdc_fr = pd.read_csv('../data/vaccines_numbers/France.csv')
cdc_us = pd.read_csv('../data/vaccines_numbers/United States.csv')

**NB: population france = 68147687 and population USA = 331002647**

In [4]:
# get the percentage of the total population vaccinated
cdc_fr['perc_tot_vac'] = cdc_fr['total_vaccinations'] * 100 / 68147687
cdc_us['perc_tot_vac'] = cdc_us['total_vaccinations'] * 100 / 331002647

In [5]:
# get the percentage of the population vaccinated
cdc_fr['perc_vac'] = cdc_fr['people_vaccinated'] * 100 / 68147687
cdc_us['perc_vac'] = cdc_us['people_vaccinated'] * 100 / 331002647

In [6]:
# get the percentage of the population fully vaccinated
cdc_fr['perc_full_vac'] = cdc_fr['people_fully_vaccinated'] * 100 / 68147687
cdc_us['perc_full_vac'] = cdc_us['people_fully_vaccinated'] * 100 / 331002647

In [7]:
# concat the two datasets
cdc_us_fr = pd.concat([cdc_fr, cdc_us], ignore_index=True)

In [8]:
# change "date" object type to datetime
cdc_us_fr['date'] = cdc_us_fr['date'].astype('Datetime64')

In [465]:
fig1 = px.line(cdc_us_fr, x="date", y="perc_vac", color='location', 
              title= "Share of people who received at least one dose of COVID-19 vaccine",
             labels={
                 "perc_vac": "Population",
                 "date": "",
                 "location": "Country"
             },
            color_discrete_map={ # replaces default color mapping by value
                "France": "#17becf", "United States": "#d62728"
            },
              template="simple_white" 
             )
fig1.update_yaxes(ticksuffix="%", showgrid=True, showline=False, tickwidth=0, tickcolor='white')
fig1.update_xaxes(showline=True, zeroline=True)
fig1.update(layout_showlegend=False)
fig1.add_annotation(text="France", x="2021-05-29", y=39, arrowhead=1, showarrow=False)
fig1.add_annotation(text="United States", x="2021-05-29", y=52, arrowhead=1, showarrow=False)

fig1.show()

In [466]:
py.plot(fig1, filename = 'covid_vaccination_us_fr_time', auto_open=False)

'https://plotly.com/~leguela/24/'

In [148]:
fig2 = px.line(cdc_us_fr, x="date", y="perc_full_vac", color='location',
              title= "Share of people who received two doses of COVID-19 vaccine",
             labels={
                 "perc_full_vac": "",
                 "date": "",
                 "location": "Country"
             },
            color_discrete_map={ # replaces default color mapping by value
                "France": "#17becf", "United States": "#d62728"
            },
              template="simple_white" 
             )
fig2.update_yaxes(ticksuffix="%", showgrid=True, showline=False, tickwidth=0, tickcolor='white')
fig2.update_xaxes(showline=True, zeroline=True)

fig2.update(layout_showlegend=False)
fig2.add_annotation(text="France", x="2021-05-29", y=18, arrowhead=1, showarrow=False)
fig2.add_annotation(text="United States", x="2021-05-29", y=42, arrowhead=1, showarrow=False)

fig2.show()

In [159]:
#py.plot(fig2, filename = 'covid_vaccination_us_two_doses', auto_open=False)

'https://plotly.com/~leguela/29/'

## 2- Get vaccination rate per vaccine per country:

### 1-USA:

In [428]:
vaccination_us = pd.read_csv('../data/vaccines_numbers/COVID-19_Vaccinations_in_the_United_States_Jurisdiction.csv')

In [429]:
# lowecase columns names
vaccination_us.columns = vaccination_us.columns.str.lower()

In [430]:
# change "date" object type to datetime
vaccination_us['date'] = vaccination_us['date'].astype('Datetime64')

In [431]:
# Filter to last date and keep few columns
vaccination_us = vaccination_us[vaccination_us.date == '05/31/2021']
vaccination_us = vaccination_us[['date', 'location', 'distributed', 'distributed_janssen', 'distributed_moderna', 'distributed_pfizer', 'distributed_unk_manuf', 'administered', 'administered_janssen', 'administered_moderna', 'administered_pfizer', 'administered_unk_manuf', 'administered_dose1_pop_pct', 'series_complete_pop_pct']]

In [432]:
northeast_states = ['NY', 'PA', 'NJ', 'DE', 'VT', 'NH', 'RI', 'MA', 'CT']
vac_ne = vaccination_us[vaccination_us['location'].isin(northeast_states)]

southeast_states = ['TN', 'AL', 'MS', 'GA', 'LA', 'NC', 'SC', 'AR', 'FL', 'KY']
vac_se = vaccination_us[vaccination_us['location'].isin(southeast_states)]

In [433]:
ne_distri = vac_ne[['distributed', 'distributed_pfizer', 'distributed_moderna', 'distributed_janssen', 'distributed_unk_manuf']].rename(columns={'distributed_pfizer':'Pfizer', 'distributed_moderna':'Moderna', 'distributed_janssen':'Johnson&Johnson'})
ne_distri['status'] = 'distributed'
ne_admin = vac_ne[['administered', 'administered_janssen', 'administered_moderna', 'administered_pfizer', 'administered_unk_manuf']].rename(columns={'administered_pfizer':'Pfizer', 'administered_moderna':'Moderna', 'administered_janssen':'Johnson&Johnson'})
ne_admin['status'] = 'administered'

In [434]:
se_distri = vac_se[['distributed_pfizer', 'distributed_moderna', 'distributed_janssen']].rename(columns={'distributed_pfizer':'Pfizer', 'distributed_moderna':'Moderna', 'distributed_janssen':'Johnson&Johnson'})
se_distri['status'] = 'distributed'
se_distri
se_admin = vac_se[['administered_pfizer', 'administered_moderna', 'administered_janssen']].rename(columns={'administered_pfizer':'Pfizer', 'administered_moderna':'Moderna', 'administered_janssen':'Johnson&Johnson'})
se_admin['status'] = 'administered'

In [435]:
se_all = pd.concat([se_distri, se_admin], ignore_index=True)
ne_all = pd.concat([ne_distri, ne_admin], ignore_index=True)

In [463]:
ne_all_gp = ne_all.groupby('status')['Pfizer', 'Moderna', 'Johnson&Johnson'].sum().reset_index()
se_all_gp = se_all.groupby('status')['Pfizer', 'Moderna', 'Johnson&Johnson'].sum().reset_index()


Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.


Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.



In [464]:
ne_all_gp

Unnamed: 0,status,Pfizer,Moderna,Johnson&Johnson
0,administered,31441005,24032735,2240650
1,distributed,36000705,28002400,3861500


In [468]:
ne_all_gp = pd.melt(ne_all_gp, id_vars=['status'], value_vars=['Pfizer', 'Moderna', 'Johnson&Johnson'], var_name='vaccine', value_name='sum')
se_all_gp = pd.melt(se_all_gp, id_vars=['status'], value_vars=['Pfizer', 'Moderna', 'Johnson&Johnson'], var_name='vaccine', value_name='sum')

In [469]:
fig4 = px.bar(ne_all_gp, x='vaccine', y='sum', color ='status', barmode='group',title= 'Cumulative COVID-19 doses received and used in Northeast States',
            labels={
                 "sum": "Number of doses",
                 "vaccine": "",
                 "status": ""
             },
            color_discrete_map={ # replaces default color mapping by value
                "distributed": "darkgreen", "administered": "yellowgreen" 
            },
                        category_orders={"type_de_vaccin": ["Pfizer", "Moderna", "Johnson&Johnson"],
                                         "status": ["distributed", "administered"]
                                         
                        },
             template="simple_white"
            )
fig4.update_yaxes(showgrid=True, showline=False, tickwidth=0, tickcolor='white')
fig4.update_xaxes(showline=True, zeroline=True)
            
fig4.show()

In [470]:
py.plot(fig4, filename = 'covid_vaccination_northeast', auto_open=False)

'https://plotly.com/~leguela/33/'

In [471]:
fig5 = px.bar(se_all_gp, x='vaccine', y='sum', color ='status', barmode='group',title= 'Cumulative COVID-19 doses received and used in Southeast States',
            labels={
                 "sum": "Number of doses",
                 "vaccine": "",
                 "status": ""
             },
            color_discrete_map={ # replaces default color mapping by value
                "distributed": "maroon", "administered": "indianred"
            },
                        category_orders={"type_de_vaccin": ["Pfizer", "Moderna","AstraZeneca", "Johnson&Johnson"],
                                         "status": ["distributed", "administered"]
                        },
             template="simple_white"
            )
fig5.update_yaxes(showgrid=True, showline=False, tickwidth=0, tickcolor='white')
fig5.update_xaxes(showline=True, zeroline=True)
            
fig5.show()

In [472]:
py.plot(fig5, filename = 'covid_vaccination_southeast', auto_open=False)

'https://plotly.com/~leguela/35/'

In [215]:
se_pop_perc = vac_se[['administered_dose1_pop_pct', 'series_complete_pop_pct']]
ne_pop_perc = vac_ne[['administered_dose1_pop_pct', 'series_complete_pop_pct']]

In [216]:
se_tot_pop_perc = (se_pop_perc.sum()/8).to_frame().reset_index().rename(columns={'index': 'vaccinated', 0: 'percentage'})
se_tot_pop_perc['states'] = 'Southeast'

In [217]:
ne_tot_pop_perc = (ne_pop_perc.sum()/9).to_frame().reset_index().rename(columns={'index': 'vaccinated', 0: 'percentage'})
ne_tot_pop_perc['states'] = 'Northeast'

In [224]:
pop_perc_all_gp =  pd.concat([se_tot_pop_perc, ne_tot_pop_perc], ignore_index=True)

In [226]:
pop_perc_all_gp['vaccinated'] = pop_perc_all_gp['vaccinated'].replace('administered_dose1_pop_pct', 'One dose')
pop_perc_all_gp['vaccinated'] = pop_perc_all_gp['vaccinated'].replace('series_complete_pop_pct', 'Two doses')

In [473]:
fig3 = px.bar(pop_perc_all_gp, x='vaccinated', y='percentage',  color ='states', barmode='group', title='Percentage of people half and fully vaccinated in the United States',
            labels={
                 "percentage": "Population",
                 "vaccinated": "",
                 "states": "",
                
             },
            color_discrete_map={ # replaces default color mapping by value
                "Southeast": "indianred", "Northeast": "yellowgreen"
            },
            category_orders={"states": ["Northeast", "Southeast"]
                        },
             template="simple_white"
            )
fig3.update_yaxes(ticksuffix="%", showgrid=True, showline=False, tickwidth=0, tickcolor='white')
fig3.update_xaxes(showline=True, zeroline=True)
fig3.show()

In [474]:
py.plot(fig3, filename = 'perc_vaccination_us', auto_open=False)

'https://plotly.com/~leguela/27/'

### 2- France:

**Data until May 30, 2021:**

In [455]:
livraison_fr = pd.read_csv('../data/vaccines_numbers/livraisons-realisees-cumul-nat.csv')
vaccination_fr = pd.read_csv('../data/vaccines_numbers/vacsi-v-fra-2021-06-06-19h05.csv', sep=';')

In [456]:
livraison_fr = livraison_fr[livraison_fr.date_fin_semaine == '2021-05-30']
livraison_fr = livraison_fr[['type_de_vaccin', 'nb_doses']]
livraison_fr = livraison_fr.rename(columns={'type_de_vaccin': 'vaccin', 'nb_doses':'n_cum_doses'})
livraison_fr['vaccin'] = livraison_fr['vaccin'].replace("Janssen", "Johnson&Johnson")
livraison_fr['status'] = 'distributed'

In [457]:
vaccination_fr = vaccination_fr[vaccination_fr.jour == '2021-05-30']
vaccination_fr["n_cum_doses"] = vaccination_fr.n_cum_dose1 + vaccination_fr.n_cum_dose2
vaccination_fr['vaccin'] = vaccination_fr['vaccin'].astype('int64')
vaccination_fr['vaccin'] = vaccination_fr['vaccin'].replace([1, 2, 3, 4],["Pfizer", "Moderna", "AstraZeneca", "Johnson&Johnson"])
vaccination_fr = vaccination_fr[["vaccin", "n_cum_doses"]]
vaccination_fr['status'] = 'adminitered'
vaccination_fr = vaccination_fr.drop(vaccination_fr[vaccination_fr.vaccin == 0].index)

In [458]:
tot_vaccin_fr =  pd.concat([livraison_fr, vaccination_fr], ignore_index=True)

In [475]:
fig6 = px.bar(tot_vaccin_fr, x='vaccin', y='n_cum_doses', color='status', barmode='group', title='Cumulative COVID-19 doses received and used in France',
             labels={
                 "n_cum_doses": "Number of doses",
                 "vaccin": "",
                 "status": ""
             },
            color_discrete_map={ # replaces default color mapping by value
                "distributed": "steelblue", "adminitered": "lightsteelblue"
            },
                        category_orders={"vaccin": ["Pfizer", "Moderna","AstraZeneca", "Johnson&Johnson"],
                                         "status": ["distributed", "administered"]
                        },
             template="simple_white"
            )
fig6.update_yaxes(showgrid=True, showline=False, tickwidth=0, tickcolor='white')
fig6.update_xaxes(showline=True, zeroline=True)

fig6.show()

In [476]:
py.plot(fig6, filename = 'covid_vaccination_france', auto_open=False)

'https://plotly.com/~leguela/37/'