# Data visualization for the percentage of population fully vaccinated against COVID-19

__Author: Shubham Abhay Deshpande__

__Matrikelnummer : 419665__

__Notebook Description:__

- This notebook will show a graph of percentage of population fully vaccinated against COVID-19 for selected countries
- The data set used for this analysis is available on John Hopkins COVID-19 vaccination GitHub page. 
- To find the percentage of population infected, the number of infected people for a specific date is divided by population of that country.
- The data for population of a country is taken from a different John Hopkins GitHub page. It is available as a csv file on the GitHub page for vaccinated population.
- It is possible that the path from which data is taken in this notebook may not match exactly with the path on the user's computer. In such an event, my my sincere apologies. Please change the path in that case to a correct one. 

In [2]:
import pandas as pd 
import numpy as np

%matplotlib inline

import matplotlib as mpl 
import matplotlib.pyplot as plt

import seaborn as sns

import plotly
import plotly.graph_objects as go 

In [3]:
mpl.rcParams['figure.figsize'] = (20,16)
pd.set_option('display.max_rows', 500)

sns.set(style = 'darkgrid')

# John Hopkins Data For Vaccination 

In [4]:
path = 'https://raw.githubusercontent.com/govex/COVID-19/master/data_tables/vaccine_data/global_data/time_series_covid19_vaccine_global.csv'
pd_raw_vaccine = pd.read_csv(path)

In [5]:
pd_raw_vaccine.head()

Unnamed: 0,Country_Region,Date,Doses_admin,People_partially_vaccinated,People_fully_vaccinated,Report_Date_String,UID,Province_State
0,Canada,2020-12-14,5.0,0.0,0.0,2020-12-14,124.0,
1,World,2020-12-14,5.0,0.0,0.0,2020-12-14,,
2,Canada,2020-12-15,723.0,0.0,0.0,2020-12-15,124.0,
3,China,2020-12-15,1500000.0,0.0,0.0,2020-12-15,156.0,
4,Russia,2020-12-15,28500.0,28500.0,0.0,2020-12-15,643.0,


In [6]:
#Checking what is the difference between the two dates mentioned in the columns
df_compare = np.where(pd_raw_vaccine['Date'] == pd_raw_vaccine['Report_Date_String'], 'True', 'False')
print(np.where(df_compare == 'False'))

pd_raw_vaccine.dtypes # Checking the datatypes of all the remaining columns. 


(array([ 13943,  13944,  13945, ..., 261267, 261268, 261269]),)


Country_Region                  object
Date                            object
Doses_admin                    float64
People_partially_vaccinated    float64
People_fully_vaccinated        float64
Report_Date_String              object
UID                            float64
Province_State                  object
dtype: object

Based on above, it is sufficient to have the Reporting date of vaccination for the analysis. 
And the other column named 'Date' can be dropped completely. 

In [7]:
# Dropping the first 'Date' column also dropping the "Province_State" columns to make the data more manageable. 
pd_raw=pd_raw_vaccine.drop(['Date', 'Province_State'], axis =1)


# Also renaming the columns for simplicity 

pd_raw = pd_raw.rename(columns = {'Country_Region':'Country', 'People_partially_vaccinated':'partially_vaccinated', 'People_fully_vaccinated':'fully_vaccinated', 'Report_Date_String':'Date'})

pd_raw.head()


Unnamed: 0,Country,Doses_admin,partially_vaccinated,fully_vaccinated,Date,UID
0,Canada,5.0,0.0,0.0,2020-12-14,124.0
1,World,5.0,0.0,0.0,2020-12-14,
2,Canada,723.0,0.0,0.0,2020-12-15,124.0
3,China,1500000.0,0.0,0.0,2020-12-15,156.0
4,Russia,28500.0,28500.0,0.0,2020-12-15,643.0


In [8]:
#Converting the date to string using 'datetime'.
pd_raw['Date'] = pd.to_datetime(pd_raw['Date'])

In [9]:
# Using Pivot table for rearraging the data 
pivot = pd.pivot_table(data = pd_raw,
                                  index = 'Date',
                                  columns = 'Country',
                                  values = 'fully_vaccinated',
                                  )

# Replacing the NaN values in the table 
piot = pivot.fillna(0)


In [10]:
# Replaving NaN values and resetting the index of the table
pivot= pivot.replace(np.nan,0)

pivot.reset_index(inplace = True)

pivot.head()

Country,Date,Afghanistan,Albania,Algeria,Andorra,Angola,Antigua and Barbuda,Argentina,Armenia,Australia,...,United Kingdom,Uruguay,Uzbekistan,Vanuatu,Venezuela,Vietnam,World,Yemen,Zambia,Zimbabwe
0,2020-12-14,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2020-12-15,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2020-12-16,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,2020-12-17,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,2020-12-18,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [11]:
# Isolating the date time index
time_idx= pivot["Date"]
df_plot = pd.DataFrame({'Date': time_idx})
df_plot.head()

Unnamed: 0,Date
0,2020-12-14
1,2020-12-15
2,2020-12-16
3,2020-12-17
4,2020-12-18


In [12]:
# Making a list of countries 
country_list = ['India', 'US', 'Italy']

In [13]:
# Importing the data for the world population
path_wp = 'https://raw.githubusercontent.com/govex/COVID-19/master/data_tables/world_pop_by_country.csv'
wp = pd.read_csv(path_wp)
wp.head()

Unnamed: 0,Country Name,Country Code,2018
0,Aruba,ABW,105845.0
1,Afghanistan,AFG,37172386.0
2,Angola,AGO,30809762.0
3,Albania,ALB,2866376.0
4,Andorra,AND,77006.0


In [14]:
# Finding the percentage of fully vaccinated people in each country  
for each in country_list:
    df_plot[each] = pivot[each]
    var = np.array(wp.loc[wp["Country Name"]==each, "2018"])
    df_plot[each] = df_plot[each]/var[0]*100 
    

    
df_plot.head()




Unnamed: 0,Date,India,US,Italy
0,2020-12-14,0.0,0.0,0.0
1,2020-12-15,0.0,0.0,0.0
2,2020-12-16,0.0,0.0,0.0
3,2020-12-17,0.0,0.0,0.0
4,2020-12-18,0.0,0.0,0.0


In [19]:
# Plotting the above data with respect to time
fig = go.Figure()

for each in country_list:
    fig.add_trace(go.Scatter(x = df_plot.Date, 
                         y = df_plot[each], 
                        mode = 'markers+lines',
                        opacity = 0.9, 
                        line_width = 1,
                        marker_size = 2,
                        name = each))

fig.update_layout(width =900,
                 height = 700, 
                 xaxis_title = 'Time',
                 yaxis_title = "Percentage of total population fully-vaccinated(source: John Hopkins vaccinations, linear-scale)", 
                 )

fig.update_yaxes(type = 'linear', range = [0, 90])


# Choose the browser according to your preference

#fig.show(renderer='chrome')
#fig.show(renderer='firefox')

fig.update_layout(xaxis_rangeslider_visible = True)
fig.show()

In [16]:
import dash
dash.__version__
import dash_core_components as dcc
import dash_html_components as html

app = dash.Dash()
app.layout = html.Div([
    
    html.Label('Multi-Select Country'),
    
    dcc.Dropdown(
        id='country_drop_down',
        options=[
            {'label': 'India', 'value': 'India'},
            {'label': 'US', 'value': 'US'},
            {'label': 'Italy', 'value': 'Italy'}
        ],
        value=['US'], # which are pre-selected
        multi=True
    ),   
        
    dcc.Graph(figure=fig, id='main_window_slope')
])


In [17]:
from dash.dependencies import Input, Output

@app.callback(
    Output('main_window_slope', 'figure'),
    [Input('country_drop_down', 'value')])
def update_figure(country_list):
    
    traces = [] 
    for each in country_list:
        traces.append(dict(x=df_plot.Date,
                                y=df_plot[each],
                                mode='markers+lines',
                                opacity=0.9,
                                line_width=1,
                                marker_size=2, 
                                name=each
                        )
                )
        
    return {
            'data': traces,
            'layout': dict (
                width=1280,
                height=720,
                xaxis_title="Time",
                yaxis_title="Percentage of total population fully-vaccinated(source johns hopkins csse, linear-scale)",
                xaxis={'tickangle':-45,
                        'nticks':20,
                        'tickfont':dict(size=14,color="#7f7f7f"),
                        
                      },
                yaxis={'type':"linear",
                       'range':'[1.1,5.5]'
                      }
        )
    }

In [18]:
app.run_server(debug = True, use_reloader = False)

Dash is running on http://127.0.0.1:8050/

 * Serving Flask app '__main__' (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: on
