In [1]:
import pandas as pd
import numpy as np

import plotly.express as px

In [2]:
pd.options.display.max_columns = None
pd.options.display.max_rows = 300
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

## 1- Get total vaccination rate over time:

In [3]:
cdc_fr = pd.read_csv('../data/vaccines_numbers/France.csv')
cdc_us = pd.read_csv('../data/vaccines_numbers/United States.csv')

**NB: population france = 68147687 and population USA = 331002647**

In [4]:
# get the percentage of the total population vaccinated
cdc_fr['perc_tot_vac'] = cdc_fr['total_vaccinations'] * 100 / 68147687
cdc_us['perc_tot_vac'] = cdc_us['total_vaccinations'] * 100 / 331002647

In [5]:
# get the percentage of the population vaccinated
cdc_fr['perc_vac'] = cdc_fr['people_vaccinated'] * 100 / 68147687
cdc_us['perc_vac'] = cdc_us['people_vaccinated'] * 100 / 331002647

In [6]:
# get the percentage of the population fully vaccinated
cdc_fr['perc_full_vac'] = cdc_fr['people_fully_vaccinated'] * 100 / 68147687
cdc_us['perc_full_vac'] = cdc_us['people_fully_vaccinated'] * 100 / 331002647

In [7]:
# concat the two datasets
cdc_us_fr = pd.concat([cdc_fr, cdc_us], ignore_index=True)

In [8]:
# change "date" object type to datetime
cdc_us_fr['date'] = cdc_us_fr['date'].astype('Datetime64')

In [142]:
fig = px.line(cdc_us_fr, x="date", y="perc_vac", color='location', 
              title= "Share of people who received at least one dose of COVID-19 vaccine",
             labels={
                 "perc_vac": "",
                 "date": "",
                 "location": "Country"
             },
            color_discrete_map={ # replaces default color mapping by value
                "France": "#17becf", "United States": "#d62728"
            },
              template="simple_white" 
             )
fig.update_yaxes(ticksuffix="%", showgrid=True, showline=False, tickwidth=0, tickcolor='white')
fig.update_xaxes(showline=True, zeroline=True)
fig.update(layout_showlegend=False)
fig.add_annotation(text="France", x="2021-05-29", y=39, arrowhead=1, showarrow=False)
fig.add_annotation(text="United States", x="2021-05-29", y=52, arrowhead=1, showarrow=False)

fig.show()

In [143]:
fig = px.line(cdc_us_fr, x="date", y="perc_full_vac", color='location',
              title= "Share of people who received two doses of COVID-19 vaccine",
             labels={
                 "perc_full_vac": "",
                 "date": "",
                 "location": "Country"
             },
            color_discrete_map={ # replaces default color mapping by value
                "France": "#17becf", "United States": "#d62728"
            },
              template="simple_white" 
             )
fig.update_yaxes(ticksuffix="%", showgrid=True, showline=False, tickwidth=0, tickcolor='white')
fig.update_xaxes(showline=True, zeroline=True)

fig.update(layout_showlegend=False)
fig.add_annotation(text="France", x="2021-05-29", y=18, arrowhead=1, showarrow=False)
fig.add_annotation(text="United States", x="2021-05-29", y=42, arrowhead=1, showarrow=False)

fig.show()

## 2- Get vaccination rate per vaccine per country:

### 1-USA:

In [11]:
vaccination_us = pd.read_csv('../data/vaccines_numbers/COVID-19_Vaccinations_in_the_United_States_Jurisdiction.csv')

In [12]:
# lowecase columns names
vaccination_us.columns = vaccination_us.columns.str.lower()

In [13]:
# change "date" object type to datetime
vaccination_us['date'] = vaccination_us['date'].astype('Datetime64')

In [14]:
# Filter to last date and keep few columns
vaccination_us = vaccination_us[vaccination_us.date == '05/31/2021']
vaccination_us = vaccination_us[['date', 'location', 'distributed', 'distributed_janssen', 'distributed_moderna', 'distributed_pfizer', 'distributed_unk_manuf', 'administered', 'administered_janssen', 'administered_moderna', 'administered_pfizer', 'administered_unk_manuf', 'administered_dose1_pop_pct', 'series_complete_pop_pct']]

In [15]:
north_states = ['NY', 'PA', 'NJ', 'DE', 'VT', 'NH', 'RI', 'MA', 'CT']
vac_north = vaccination_us[vaccination_us['location'].isin(north_states)]

south_states = ['TN', 'AL', 'MS', 'GA', 'LA', 'NC', 'SC', 'AR']
vac_south = vaccination_us[vaccination_us['location'].isin(south_states)]

In [123]:
south_distri = vac_south[['distributed_pfizer', 'distributed_moderna', 'distributed_janssen']].rename(columns={'distributed_pfizer':'Pfizer', 'distributed_moderna':'Moderna', 'distributed_janssen':'Janssen'})
south_distri['status'] = 'distribution'
south_distri
south_admin = vac_south[['administered_pfizer', 'administered_moderna', 'administered_janssen']].rename(columns={'administered_pfizer':'Pfizer', 'administered_moderna':'Moderna', 'administered_janssen':'Janssen'})
south_admin['status'] = 'vaccinated'

In [124]:
north_distri = vac_north[['distributed', 'distributed_pfizer', 'distributed_moderna', 'distributed_janssen', 'distributed_unk_manuf']].rename(columns={'distributed_pfizer':'Pfizer', 'distributed_moderna':'Moderna', 'distributed_janssen':'Janssen'})
north_distri['status'] = 'distribution'
north_admin = vac_north[['administered', 'administered_janssen', 'administered_moderna', 'administered_pfizer', 'administered_unk_manuf']].rename(columns={'administered_pfizer':'Pfizer', 'administered_moderna':'Moderna', 'administered_janssen':'Janssen'})
north_admin['status'] = 'vaccinated'

In [125]:
south_all = pd.concat([south_distri, south_admin], ignore_index=True)
north_all = pd.concat([north_distri, north_admin], ignore_index=True)

In [129]:
north_all_gp = north_all.groupby('status')['Pfizer', 'Moderna', 'Janssen'].sum().reset_index()
south_all_gp = south_all.groupby('status')['Pfizer', 'Moderna', 'Janssen'].sum().reset_index()


Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.


Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.



In [130]:
north_all_gp = pd.melt(north_all_gp, id_vars=['status'], value_vars=['Pfizer', 'Moderna', 'Janssen'], var_name='vaccine', value_name='sum')
south_all_gp = pd.melt(south_all_gp, id_vars=['status'], value_vars=['Pfizer', 'Moderna', 'Janssen'], var_name='vaccine', value_name='sum')

In [132]:
fig = px.bar(north_all_gp, x='vaccine', y='sum', color ='status', barmode='group',title= 'Cumulative COVID-19 doses received and injected in Northern States',
            labels={
                 "sum": "",
                 "vaccine": "",
                 "status": ""
             },
            color_discrete_map={ # replaces default color mapping by value
                "distribution": "#17becf", "vaccinated": "#d62728"
            },
                        category_orders={"type_de_vaccin": ["Pfizer", "Moderna","AstraZeneca", "Janssen"]
                        },
             template="simple_white"
            )
fig.update_yaxes(showgrid=True, showline=False, tickwidth=0, tickcolor='white')
fig.update_xaxes(showline=True, zeroline=True)
            
fig.show()

In [133]:
fig = px.bar(south_all_gp, x='vaccine', y='sum', color ='status', barmode='group',title= 'Cumulative COVID-19 doses received and injected in Southern States',
            labels={
                 "sum": "",
                 "vaccine": "",
                 "status": ""
             },
            color_discrete_map={ # replaces default color mapping by value
                "distribution": "#17becf", "vaccinated": "#d62728"
            },
                        category_orders={"type_de_vaccin": ["Pfizer", "Moderna","AstraZeneca", "Janssen"]
                        },
             template="simple_white"
            )
fig.update_yaxes(showgrid=True, showline=False, tickwidth=0, tickcolor='white')
fig.update_xaxes(showline=True, zeroline=True)
            
fig.show()

In [109]:
south_pop_perc = vac_south[['administered_dose1_pop_pct', 'series_complete_pop_pct']]
north_pop_perc = vac_north[['administered_dose1_pop_pct', 'series_complete_pop_pct']]

In [110]:
south_tot_pop_perc = (south_pop_perc.sum()/8).to_frame().reset_index().rename(columns={'index': 'vaccinated', 0: 'percentage'})
south_tot_pop_perc['states'] = 'Southern states'

In [111]:
north_tot_pop_perc = (north_pop_perc.sum()/9).to_frame().reset_index().rename(columns={'index': 'vaccinated', 0: 'percentage'})
north_tot_pop_perc['states'] = 'Northern states'

In [112]:
pop_perc_all_gp =  pd.concat([south_tot_pop_perc, north_tot_pop_perc], ignore_index=True)

In [113]:
pop_perc_all_gp

Unnamed: 0,vaccinated,percentage,states
0,administered_dose1_pop_pct,38.5625,Southern states
1,series_complete_pop_pct,31.525,Southern states
2,administered_dose1_pop_pct,61.144444,Northern states
3,series_complete_pop_pct,49.577778,Northern states


In [116]:
pop_perc_all_gp['vaccinated'] = pop_perc_all_gp['vaccinated'].replace('administered_dose1_pop_pct', 'Only one dose')
pop_perc_all_gp['vaccinated'] = pop_perc_all_gp['vaccinated'].replace('series_complete_pop_pct', 'Fully vaccinated')

In [134]:
fig = px.bar(pop_perc_all_gp, x='vaccinated', y='percentage',  color ='states', barmode='group', title='Percentage of people half and fully vaccinated in the United States',
            labels={
                 "percentage": "",
                 "vaccinated": "",
                 "states": "",
                
             },
            color_discrete_map={ # replaces default color mapping by value
                "Southern states": "#17becf", "Northern states": "#d62728"
            },
            category_orders={"states": ["Northern states", "Southern states"]
                        },
             template="simple_white"
            )
fig.update_yaxes(ticksuffix="%", showgrid=True, showline=False, tickwidth=0, tickcolor='white')
fig.update_xaxes(showline=True, zeroline=True)
fig.show()

### 2- France:

In [74]:
livraison_fr = pd.read_csv('../data/vaccines_numbers/livraisons-realisees-cumul-nat.csv')

In [75]:
livraison_fr = livraison_fr[livraison_fr.date_fin_semaine == '2021-05-30']
livraison_fr = livraison_fr[['type_de_vaccin', 'nb_doses']]
livraison_fr['status'] = 'distribution'

In [139]:
data = {'type_de_vaccin': ['Pfizer', 'Moderna', 'AstraZeneca', 'Janssen'],
             'nb_doses':[21848732, 2555043, 4364726, 156333],
            }
vaccin_fr = pd.DataFrame(data, columns = ['type_de_vaccin', 'nb_doses'])
vaccin_fr['status'] = 'vaccinated'

In [140]:
tot_vaccin_fr =  pd.concat([livraison_fr, vaccin_fr], ignore_index=True)

In [141]:
fig = px.bar(tot_vaccin_fr, x='type_de_vaccin', y='nb_doses', color='status', barmode='group', title='Cumulative COVID-19 doses received and injected in France',
             labels={
                 "nb_doses": "",
                 "type_de_vaccin": "",
                 "status": ""
             },
            color_discrete_map={ # replaces default color mapping by value
                "distribution": "#17becf", "vaccinated": "#d62728"
            },
                        category_orders={"type_de_vaccin": ["Pfizer", "Moderna","AstraZeneca", "Janssen"]
                        },
             template="simple_white"
            )
fig.update_yaxes(showgrid=True, showline=False, tickwidth=0, tickcolor='white')
fig.update_xaxes(showline=True, zeroline=True)

fig.show()