# Analysing covid vaccine distribution

In [24]:
import plotly.express as px
import pandas as pd
import numpy as np

In [25]:
df = pd.read_csv('https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/vaccinations/vaccinations.csv')
df.sort_values(by=['date','location'],inplace=True)
df.head()

Unnamed: 0,location,iso_code,date,total_vaccinations,people_vaccinated,people_fully_vaccinated,daily_vaccinations_raw,daily_vaccinations,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,daily_vaccinations_per_million
1828,England,,2020-12-13,55576.0,55576.0,,,,0.1,0.1,,
4235,Northern Ireland,,2020-12-13,3623.0,3623.0,,,,0.19,0.19,,
5141,Scotland,,2020-12-13,19009.0,19009.0,,,,0.35,0.35,,
6072,United Kingdom,GBR,2020-12-13,86465.0,86465.0,,,,0.13,0.13,,
6287,Wales,,2020-12-13,8257.0,8257.0,,,,0.26,0.26,,


## Data preprocessing

### All the countries should consider the same dates

In [26]:
date_max = df.date.max()
date_min = df.date.min()

# create dummy dataframes 
df_dates = pd.DataFrame(data=pd.date_range(date_min,date_max),columns=['date'])
df_states = pd.DataFrame(data=df.location.unique(),columns=['location'])

df_dates['_tmpkey'] = 1
df_states['_tmpkey'] = 1

# make the cartesian product
df_dates_states = pd.merge(df_dates, df_states, on='_tmpkey').drop('_tmpkey', axis=1)
df_dates_states.sort_values(by=['date','location'],inplace=True)

# transform the date 
df['date'] = pd.to_datetime(df['date'],format='%Y-%m-%d')

# join the df to add missing dates
df = pd.merge(df, df_dates_states, on= ['date','location'],how='outer')
df.head()

Unnamed: 0,location,iso_code,date,total_vaccinations,people_vaccinated,people_fully_vaccinated,daily_vaccinations_raw,daily_vaccinations,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,daily_vaccinations_per_million
0,England,,2020-12-13,55576.0,55576.0,,,,0.1,0.1,,
1,Northern Ireland,,2020-12-13,3623.0,3623.0,,,,0.19,0.19,,
2,Scotland,,2020-12-13,19009.0,19009.0,,,,0.35,0.35,,
3,United Kingdom,GBR,2020-12-13,86465.0,86465.0,,,,0.13,0.13,,
4,Wales,,2020-12-13,8257.0,8257.0,,,,0.26,0.26,,


### Change dates format

In [27]:
# save a copy as string
df['date_str'] = df['date'].apply(str)

In [28]:
df.sort_values(by=['date','location'],inplace=True)

### Fix cumulated columns

In [29]:
# fill missing values
col_ffill = [
    'total_vaccinations', 'people_vaccinated', 'people_fully_vaccinated',
    'total_vaccinations_per_hundred', 'people_vaccinated_per_hundred',
    'people_fully_vaccinated_per_hundred', 'daily_vaccinations_per_million'
]

for c in col_ffill:
    df[c] = df.groupby('location')[c].fillna(method='ffill')
    
# fill the iso_code
df['iso_code'] =df.groupby(by='location')['iso_code'].fillna(method='ffill')
df['iso_code'] =df.groupby(by='location')['iso_code'].fillna(method='bfill')

### Include population information

In [30]:
# include most recent population information
pop = pd.read_csv('https://raw.githubusercontent.com/datasets/population/master/data/population.csv')
pop= pop[pop['Year']==pop['Year'].max()]
pop.head()

Unnamed: 0,Country Name,Country Code,Year,Value
58,Arab World,ARB,2018,419790588
117,Caribbean small states,CSS,2018,7358965
176,Central Europe and the Baltics,CEB,2018,102511922
235,Early-demographic dividend,EAR,2018,3249140605
294,East Asia & Pacific,EAS,2018,2328220870


In [31]:
df = pd.merge(df,pop,left_on = 'iso_code',right_on = 'Country Code',how='left')
df.drop(['Country Name', 'Country Code', 'Year'],axis=1,inplace=True)
df.head()

Unnamed: 0,location,iso_code,date,total_vaccinations,people_vaccinated,people_fully_vaccinated,daily_vaccinations_raw,daily_vaccinations,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,daily_vaccinations_per_million,date_str,Value
0,Albania,ALB,2020-12-13,,,,,,,,,,2020-12-13 00:00:00,2866376.0
1,Algeria,DZA,2020-12-13,,,,,,,,,,2020-12-13 00:00:00,42228429.0
2,Andorra,AND,2020-12-13,,,,,,,,,,2020-12-13 00:00:00,77006.0
3,Angola,AGO,2020-12-13,,,,,,,,,,2020-12-13 00:00:00,30809762.0
4,Anguilla,AIA,2020-12-13,,,,,,,,,,2020-12-13 00:00:00,


In [32]:
col = ['total_vaccinations','people_vaccinated','people_fully_vaccinated']
for c in col:
    df[c+'_pop']=df[c]/df['Value']

In [33]:
df.rename(columns={'people_vaccinated_pop':'% vaccinated (>=1 dose)',
                  'people_fully_vaccinated_pop': '% fully vaccinated',
                  'total_vaccinations_pop': 'Total doses distributed/population'},inplace=True)


In [38]:
feat = 'Total doses distributed/population'
fig = px.choropleth(df, locations="iso_code",
                    color=feat, 
                    hover_name="location",
                    hover_data = [feat,'% fully vaccinated'],
                    color_continuous_scale=px.colors.sequential.Plasma,
                    range_color=[df[feat].min(),df[feat].max()*0.3],
                   animation_frame='date_str',
                   animation_group='location'
                   )

fig.update_layout(margin={'t':50,
                         'b':0,
                         'r':3,
                         'l':3});

In [39]:
fig.show()