## Import Dependencies

In [1]:
import sys
sys.path.append("..")

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import altair as alt

# from helpers import utils, plots
import utils
import plots

## Load Dataset

In [2]:
covid_data = utils.get_covid_data()
covid_data

Unnamed: 0,Location,Collection Date,Total Vaccinations,People Vaccinated,People Fully Vaccinated,Population
0,Afghanistan,2020-02-24,,,,39835428.0
1,Afghanistan,2020-02-25,,,,39835428.0
2,Afghanistan,2020-02-26,,,,39835428.0
3,Afghanistan,2020-02-27,,,,39835428.0
4,Afghanistan,2020-02-28,,,,39835428.0
...,...,...,...,...,...,...
123979,Zimbabwe,2021-10-11,5594808.0,3176445.0,2418363.0,15092171.0
123980,Zimbabwe,2021-10-12,5612476.0,3183015.0,2429461.0,15092171.0
123981,Zimbabwe,2021-10-13,5632534.0,3190977.0,2441557.0,15092171.0
123982,Zimbabwe,2021-10-14,5654267.0,3200122.0,2454145.0,15092171.0


### Get list of countries

In [3]:
countries = utils.get_country_list()
print(countries)

['Afghanistan', 'Albania', 'Algeria', 'Andorra', 'Angola', 'Anguilla', 'Antigua and Barbuda', 'Argentina', 'Armenia', 'Aruba', 'Australia', 'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain', 'Bangladesh', 'Barbados', 'Belarus', 'Belgium', 'Belize', 'Benin', 'Bermuda', 'Bhutan', 'Bolivia', 'Bonaire Sint Eustatius and Saba', 'Bosnia and Herzegovina', 'Botswana', 'Brazil', 'British Virgin Islands', 'Brunei', 'Bulgaria', 'Burkina Faso', 'Cambodia', 'Cameroon', 'Canada', 'Cape Verde', 'Cayman Islands', 'Central African Republic', 'Chad', 'Chile', 'China', 'Colombia', 'Comoros', 'Congo', 'Cook Islands', 'Costa Rica', "Cote d'Ivoire", 'Croatia', 'Cuba', 'Curacao', 'Cyprus', 'Czechia', 'Democratic Republic of Congo', 'Denmark', 'Djibouti', 'Dominica', 'Dominican Republic', 'Ecuador', 'Egypt', 'El Salvador', 'England', 'Equatorial Guinea', 'Estonia', 'Eswatini', 'Ethiopia', 'Faeroe Islands', 'Falkland Islands', 'Fiji', 'Finland', 'France', 'French Polynesia', 'Gabon', 'Gambia', 'Georgia', 'Germany'

## Print Complete DataFrame as HTML

In [4]:
utils.print_complete_dataframe(covid_data.iloc[:5])

Unnamed: 0,Location,Collection Date,Total Vaccinations,People Vaccinated,People Fully Vaccinated,Population
0,Afghanistan,2020-02-24,,,,39835428.0
1,Afghanistan,2020-02-25,,,,39835428.0
2,Afghanistan,2020-02-26,,,,39835428.0
3,Afghanistan,2020-02-27,,,,39835428.0
4,Afghanistan,2020-02-28,,,,39835428.0


### Preprocess data

Drop NaNs (Total Vaccinations, People Vaccinated, People Fully Vaccinated, Population), 

Drop Duplicates (Based on Location)

Add Column to represent Percentage Fully Vaccinated 

    Percentage Fully Vaccinated = People Fully Vaccinated / Population * 100

In [5]:
dataset = utils.preprocess_covid_data(covid_data)
dataset

Unnamed: 0,Location,Collection Date,Total Vaccinations,People Vaccinated,People Fully Vaccinated,Population,Percentage Fully Vaccinated
543,Afghanistan,2021-08-20,1.201286e+06,7.705420e+05,4.307440e+05,3.983543e+07,1.081309
1210,Africa,2021-10-15,1.710956e+08,1.048424e+08,6.883170e+07,1.373486e+09,5.011458
1807,Albania,2021-10-13,1.816580e+06,9.707030e+05,8.458770e+05,2.872934e+06,29.442967
2388,Algeria,2021-09-25,1.408292e+07,6.017036e+06,4.032942e+06,4.461663e+07,9.039101
2982,Andorra,2021-09-26,1.020320e+05,5.431200e+04,4.772000e+04,7.735400e+04,61.690410
...,...,...,...,...,...,...,...
121644,Wallis and Futuna,2021-10-12,1.100900e+04,5.749000e+03,5.260000e+03,1.109400e+04,47.413016
122277,World,2021-10-15,6.638294e+09,3.735482e+09,2.823759e+09,7.874966e+09,35.857418
122813,Yemen,2021-09-27,3.561730e+05,3.080250e+05,4.814800e+04,3.049064e+07,0.157911
123370,Zambia,2021-09-07,6.029960e+05,3.110490e+05,2.919470e+05,1.892066e+07,1.543007


## Country-Continental split

Split data for countries and continents into two dataframes

In [6]:
countries_data, continents_data = utils.country_continental_split(dataset)
countries_data.head()

Unnamed: 0,Location,Collection Date,Total Vaccinations,People Vaccinated,People Fully Vaccinated,Population,Percentage Fully Vaccinated
543,Afghanistan,2021-08-20,1201286.0,770542.0,430744.0,39835428.0,1.081309
1807,Albania,2021-10-13,1816580.0,970703.0,845877.0,2872934.0,29.442967
2388,Algeria,2021-09-25,14082920.0,6017036.0,4032942.0,44616626.0,9.039101
2982,Andorra,2021-09-26,102032.0,54312.0,47720.0,77354.0,61.69041
3575,Angola,2021-10-14,4448653.0,3158501.0,1290152.0,33933611.0,3.801989


In [7]:
continents_data.head()

Unnamed: 0,Location,Collection Date,Total Vaccinations,People Vaccinated,People Fully Vaccinated,Population,Percentage Fully Vaccinated
1210,Africa,2021-10-15,171095600.0,104842400.0,68831700.0,1373486000.0,5.011458
6494,Asia,2021-10-15,4480253000.0,2541010000.0,1841308000.0,4678445000.0,39.357259
37831,Europe,2021-10-15,822524800.0,432536300.0,401660900.0,748963000.0,53.628945
38463,European Union,2021-10-15,578727800.0,304701400.0,286558700.0,447189900.0,64.079864
82449,North America,2021-10-15,649153000.0,353953300.0,294724400.0,596581300.0,49.402225


## Total Vaccinations Administered

### Top 10 countries

In [8]:
top_10_vaccinations_by_countries = utils.sort_and_return_top_k(countries_data)
top_10_vaccinations_by_countries.head()

Unnamed: 0,Location,Collection Date,Total Vaccinations,People Vaccinated,People Fully Vaccinated,Population,Percentage Fully Vaccinated
23923,China,2021-09-18,2174043000.0,1100842000.0,1022207000.0,1444216000.0,70.779366
51824,India,2021-10-15,971163400.0,693416100.0,277747300.0,1393409000.0,19.932935
118136,United States,2021-10-15,406570900.0,218318100.0,188655200.0,332915100.0,56.667664
16383,Brazil,2021-10-15,254484300.0,155512800.0,103754900.0,213993400.0,48.485072
57545,Japan,2021-10-14,178255500.0,94599320.0,83656180.0,126050800.0,66.367041


In [9]:
plots.plot_bars(top_10_vaccinations_by_countries, 'Total Vaccinations:Q', 'Location:O', plot_type = plots.PLOT_TYPES.FULLY_VACCINATED)

### Total Vaccinations administered per continent

In [10]:
total_vaccinations_by_continents = utils.sort_and_return_top_k(continents_data, sort_by=['Total Vaccinations'])
total_vaccinations_by_continents

Unnamed: 0,Location,Collection Date,Total Vaccinations,People Vaccinated,People Fully Vaccinated,Population,Percentage Fully Vaccinated
6494,Asia,2021-10-15,4480253000.0,2541010000.0,1841308000.0,4678445000.0,39.357259
37831,Europe,2021-10-15,822524800.0,432536300.0,401660900.0,748963000.0,53.628945
82449,North America,2021-10-15,649153000.0,353953300.0,294724400.0,596581300.0,49.402225
38463,European Union,2021-10-15,578727800.0,304701400.0,286558700.0,447189900.0,64.079864
103487,South America,2021-10-15,474395000.0,279974700.0,199527400.0,434260100.0,45.946517
1210,Africa,2021-10-15,171095600.0,104842400.0,68831700.0,1373486000.0,5.011458
84531,Oceania,2021-10-15,40872580.0,23165350.0,17707220.0,43219950.0,40.970016


In [11]:
plots.plot_bars(total_vaccinations_by_continents, 'Total Vaccinations:Q', 'Location:O', plot_type = plots.PLOT_TYPES.FULLY_VACCINATED)

## Fully Vaccinated Visualization

### Fully Vaccinated Info for Top X Countries 

In [162]:
full_vaccinations_by_countries = utils.sort_and_return_top_k(countries_data, sort_by=['Percentage Fully Vaccinated'])
full_vaccinations_by_countries

Unnamed: 0,Location,Collection Date,Total Vaccinations,People Vaccinated,People Fully Vaccinated,Population,Percentage Fully Vaccinated
44173,Gibraltar,2021-10-15,81966.0,39970.0,39751.0,33691.0,117.986999
89488,Pitcairn,2021-09-07,94.0,47.0,47.0,47.0,100.0
90680,Portugal,2021-10-11,16104710.0,8975593.0,8782671.0,10167923.0,86.376254
116879,United Arab Emirates,2021-10-15,20619734.0,9494889.0,8505838.0,9991083.0,85.134294
21560,Cayman Islands,2021-10-14,109919.0,55780.0,55780.0,66498.0,83.882222
70443,Malta,2021-10-14,848445.0,425218.0,424538.0,514564.0,82.504412
51198,Iceland,2021-10-14,562426.0,282338.0,277837.0,343360.0,80.917113
99953,Singapore,2021-10-14,9787176.0,4730374.0,4674723.0,5896684.0,79.27715
105302,Spain,2021-10-14,70982052.0,37868453.0,37029165.0,46745211.0,79.214885
91245,Qatar,2021-09-11,4578600.0,2360308.0,2218292.0,2930524.0,75.696087


In [13]:
plots.plot_bars(full_vaccinations_by_countries, 'Percentage Fully Vaccinated:Q', 'Location:O', plot_type = plots.PLOT_TYPES.FULLY_VACCINATED)


### Fully Vaccinated Statistics by Continent

In [14]:
full_vaccinations_by_continents = utils.sort_and_return_top_k(continents_data, sort_by=['Percentage Fully Vaccinated'])
full_vaccinations_by_continents

Unnamed: 0,Location,Collection Date,Total Vaccinations,People Vaccinated,People Fully Vaccinated,Population,Percentage Fully Vaccinated
38463,European Union,2021-10-15,578727800.0,304701400.0,286558700.0,447189900.0,64.079864
37831,Europe,2021-10-15,822524800.0,432536300.0,401660900.0,748963000.0,53.628945
82449,North America,2021-10-15,649153000.0,353953300.0,294724400.0,596581300.0,49.402225
103487,South America,2021-10-15,474395000.0,279974700.0,199527400.0,434260100.0,45.946517
84531,Oceania,2021-10-15,40872580.0,23165350.0,17707220.0,43219950.0,40.970016
6494,Asia,2021-10-15,4480253000.0,2541010000.0,1841308000.0,4678445000.0,39.357259
1210,Africa,2021-10-15,171095600.0,104842400.0,68831700.0,1373486000.0,5.011458


In [15]:
plots.plot_bars(full_vaccinations_by_continents, 'Percentage Fully Vaccinated:Q', 'Location:O', plot_type = plots.PLOT_TYPES.FULLY_VACCINATED)

## At least One Dose Visualization

### At least One Dose Info for Top X Countries


In [16]:
people_vaccinated_by_countries = utils.sort_and_return_top_k(countries_data, sort_by=['People Vaccinated'])
people_vaccinated_by_countries

Unnamed: 0,Location,Collection Date,Total Vaccinations,People Vaccinated,People Fully Vaccinated,Population,Percentage Fully Vaccinated
23923,China,2021-09-18,2174043000.0,1100842000.0,1022207000.0,1444216000.0,70.779366
51824,India,2021-10-15,971163400.0,693416100.0,277747300.0,1393409000.0,19.932935
118136,United States,2021-10-15,406570900.0,218318100.0,188655200.0,332915100.0,56.667664
16383,Brazil,2021-10-15,254484300.0,155512800.0,103754900.0,213993400.0,48.485072
52417,Indonesia,2021-10-15,166861700.0,105464700.0,61397060.0,276361800.0,22.216188
57545,Japan,2021-10-14,178255500.0,94599320.0,83656180.0,126050800.0,66.367041
72608,Mexico,2021-10-14,110574500.0,67957150.0,50772620.0,130262200.0,38.977242
85727,Pakistan,2021-10-12,93551190.0,64947700.0,34809850.0,225199900.0,15.457309
43313,Germany,2021-10-15,109892000.0,57231090.0,54658270.0,83900470.0,65.146564
114495,Turkey,2021-10-15,113709500.0,54799550.0,47119100.0,85042740.0,55.406377


In [17]:
plots.plot_bars(people_vaccinated_by_countries, 'People Vaccinated:Q', 'Location:O', plot_type = plots.PLOT_TYPES.PEOPLE_VACCINATED)


In [21]:
financial_data = utils.get_financial_data()
financial_df = financial_data.copy()

In [22]:
financial_df.columns

Index(['Location', 'Population', 'Polulation Density', 'GDP per capita',
       'Extreme Poverty', 'HDI'],
      dtype='object')

In [23]:
financial_df.columns = ['Location', 'Population', 'Population Density', 'GDP', 'Extreme Poverty', 'Human Development Index']
financial_df

Unnamed: 0,Location,Population,Population Density,GDP,Extreme Poverty,Human Development Index
0,Afghanistan,39835428.0,54.422,1803.987,,0.511
1,Afghanistan,39835428.0,54.422,1803.987,,0.511
2,Afghanistan,39835428.0,54.422,1803.987,,0.511
3,Afghanistan,39835428.0,54.422,1803.987,,0.511
4,Afghanistan,39835428.0,54.422,1803.987,,0.511
...,...,...,...,...,...,...
123979,Zimbabwe,15092171.0,42.729,1899.775,21.4,0.571
123980,Zimbabwe,15092171.0,42.729,1899.775,21.4,0.571
123981,Zimbabwe,15092171.0,42.729,1899.775,21.4,0.571
123982,Zimbabwe,15092171.0,42.729,1899.775,21.4,0.571


In [24]:
financial_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 123984 entries, 0 to 123983
Data columns (total 6 columns):
 #   Column                   Non-Null Count   Dtype  
---  ------                   --------------   -----  
 0   Location                 123984 non-null  object 
 1   Population               123111 non-null  float64
 2   Population Density       114518 non-null  float64
 3   GDP                      109948 non-null  float64
 4   Extreme Poverty          73606 non-null   float64
 5   Human Development Index  109663 non-null  float64
dtypes: float64(5), object(1)
memory usage: 5.7+ MB


In [25]:
financial_df.isnull().sum()

Location                       0
Population                   873
Population Density          9466
GDP                        14036
Extreme Poverty            50378
Human Development Index    14321
dtype: int64

In [26]:
financial_df = financial_df.drop_duplicates(subset=['Location'], keep='last')#.drop(['Extreme Poverty', 'Human Development Index'], axis=1)
financial_df

Unnamed: 0,Location,Population,Population Density,GDP,Extreme Poverty,Human Development Index
599,Afghanistan,3.983543e+07,54.422,1803.987,,0.511
1210,Africa,1.373486e+09,,,,
1809,Albania,2.872934e+06,104.871,11803.431,1.1,0.795
2408,Algeria,4.461663e+07,17.348,13913.839,0.5,0.748
3001,Andorra,7.735400e+04,163.755,,,0.868
...,...,...,...,...,...,...
121644,Wallis and Futuna,1.109400e+04,,,,
122277,World,7.874966e+09,58.045,15469.207,10.0,0.737
122831,Yemen,3.049064e+07,53.508,1479.147,18.8,0.470
123408,Zambia,1.892066e+07,22.995,3689.251,57.5,0.584


In [27]:
financial_df.isnull().sum()

Location                     0
Population                   2
Population Density          25
GDP                         38
Extreme Poverty            107
Human Development Index     43
dtype: int64

## Discussion
29 Countries do not have GDP information. These countries can not be used in the analysis of access to vaccines by GDP. Such countries would be dropped from the dataset. 

In [28]:
countries_financial_df = financial_df[financial_df['Location'].isin(countries)]
countries_financial_df

Unnamed: 0,Location,Population,Population Density,GDP,Extreme Poverty,Human Development Index
599,Afghanistan,39835428.0,54.422,1803.987,,0.511
1809,Albania,2872934.0,104.871,11803.431,1.1,0.795
2408,Algeria,44616626.0,17.348,13913.839,0.5,0.748
3001,Andorra,77354.0,163.755,,,0.868
3576,Angola,33933611.0,23.890,5819.495,,0.581
...,...,...,...,...,...,...
121440,Vietnam,98168829.0,308.127,6171.884,2.0,0.704
121644,Wallis and Futuna,11094.0,,,,
122831,Yemen,30490639.0,53.508,1479.147,18.8,0.470
123408,Zambia,18920657.0,22.995,3689.251,57.5,0.584


In [29]:
countries_financial_df[~countries_financial_df.GDP.isna()]

Unnamed: 0,Location,Population,Population Density,GDP,Extreme Poverty,Human Development Index
599,Afghanistan,39835428.0,54.422,1803.987,,0.511
1809,Albania,2872934.0,104.871,11803.431,1.1,0.795
2408,Algeria,44616626.0,17.348,13913.839,0.5,0.748
3576,Angola,33933611.0,23.890,5819.495,,0.581
4412,Antigua and Barbuda,98728.0,231.845,21490.943,,0.778
...,...,...,...,...,...,...
120808,Venezuela,28704947.0,36.253,16745.022,,0.711
121440,Vietnam,98168829.0,308.127,6171.884,2.0,0.704
122831,Yemen,30490639.0,53.508,1479.147,18.8,0.470
123408,Zambia,18920657.0,22.995,3689.251,57.5,0.584


In [30]:
countries_financial_df = countries_financial_df[countries_financial_df.GDP.notna()]

In [31]:
countries_data

Unnamed: 0,Location,Collection Date,Total Vaccinations,People Vaccinated,People Fully Vaccinated,Population,Percentage Fully Vaccinated
543,Afghanistan,2021-08-20,1201286.0,770542.0,430744.0,39835428.0,1.081309
1807,Albania,2021-10-13,1816580.0,970703.0,845877.0,2872934.0,29.442967
2388,Algeria,2021-09-25,14082920.0,6017036.0,4032942.0,44616626.0,9.039101
2982,Andorra,2021-09-26,102032.0,54312.0,47720.0,77354.0,61.690410
3575,Angola,2021-10-14,4448653.0,3158501.0,1290152.0,33933611.0,3.801989
...,...,...,...,...,...,...,...
121439,Vietnam,2021-10-14,59003239.0,41811429.0,17191810.0,98168829.0,17.512494
121644,Wallis and Futuna,2021-10-12,11009.0,5749.0,5260.0,11094.0,47.413016
122813,Yemen,2021-09-27,356173.0,308025.0,48148.0,30490639.0,0.157911
123370,Zambia,2021-09-07,602996.0,311049.0,291947.0,18920657.0,1.543007


In [32]:
countries_with_valid_gdp = countries_financial_df['Location'].to_list()
access_vaccine_data = countries_data[countries_data['Location'].isin(countries_with_valid_gdp)]
access_vaccine_data

Unnamed: 0,Location,Collection Date,Total Vaccinations,People Vaccinated,People Fully Vaccinated,Population,Percentage Fully Vaccinated
543,Afghanistan,2021-08-20,1201286.0,770542.0,430744.0,39835428.0,1.081309
1807,Albania,2021-10-13,1816580.0,970703.0,845877.0,2872934.0,29.442967
2388,Algeria,2021-09-25,14082920.0,6017036.0,4032942.0,44616626.0,9.039101
3575,Angola,2021-10-14,4448653.0,3158501.0,1290152.0,33933611.0,3.801989
4410,Antigua and Barbuda,2021-10-13,99846.0,54758.0,45088.0,98728.0,45.668909
...,...,...,...,...,...,...,...
120801,Venezuela,2021-10-08,16127242.0,9926613.0,6190629.0,28704947.0,21.566418
121439,Vietnam,2021-10-14,59003239.0,41811429.0,17191810.0,98168829.0,17.512494
122813,Yemen,2021-09-27,356173.0,308025.0,48148.0,30490639.0,0.157911
123370,Zambia,2021-09-07,602996.0,311049.0,291947.0,18920657.0,1.543007


In [181]:
countries_vaccines_financial_data = pd.merge(access_vaccine_data, countries_financial_df, on='Location', how='outer')
countries_vaccines_financial_data.drop(columns=['Population_y'], inplace=True)
countries_vaccines_financial_data.rename(columns={'Population_x':'Population'}, inplace=True)

In [182]:
countries_vaccines_financial_data[countries_vaccines_financial_data['Location'] == 'Belgium']

Unnamed: 0,Location,Collection Date,Total Vaccinations,People Vaccinated,People Fully Vaccinated,Population,Percentage Fully Vaccinated,Population Density,GDP,Extreme Poverty,Human Development Index
16,Belgium,2021-10-14,16741664.0,8645665.0,8489064.0,11632334.0,72.978166,375.564,42658.576,0.2,0.931


In [183]:
countries_vaccines_financial_data['GDP'].unique()

array([  1803.987,  11803.431,  13913.839,   5819.495,  21490.943,
        18933.907,   8787.58 ,  35973.781,  44648.71 ,  45436.686,
        15847.419,  27717.847,  43290.705,   3523.984,  16978.068,
        17167.967,  42658.576,   7824.362,   2064.236,  50669.315,
         8708.597,   6885.829,  11713.895,  15807.374,  14103.452,
        71809.251,  18563.307,   1703.102,   3645.07 ,   3364.926,
        44017.591,   6222.554,  49903.029,    661.24 ,   1768.153,
        22767.037,  15308.712,  13254.949,   1413.89 ,   4881.406,
        15524.995,   3601.006,  22669.797,  32415.132,  32605.906,
          808.133,  46682.515,   2705.406,   9673.367,  14600.861,
        10581.936,  10550.206,   7292.458,  22604.873,  29481.252,
         7738.975,   1729.927,   8702.975,  40585.721,  38605.671,
        16562.413,   1561.767,   9745.079,  45229.245,   4227.63 ,
        24574.382,  13593.877,   7423.808,   1998.926,   1548.675,
         7435.047,   1653.173,   4541.795,  56054.92 ,  26777.

### Countries GNI Classification
 Classification is done based on the defined criteria by World Bank as seen here - https://datahelpdesk.worldbank.org/knowledgebase/articles/906519


In [184]:
gni_data = pd.read_csv('gni_per_capita.csv', skiprows=3, usecols=['Country Name', '2018', '2019', '2020'])
gni_data

Unnamed: 0,Country Name,2018,2019,2020
0,Aruba,,,
1,Africa Eastern and Southern,3540.132713,3589.956043,3454.236460
2,Afghanistan,2100.000000,2190.000000,2110.000000
3,Africa Western and Central,4040.129229,4167.624990,4066.878688
4,Angola,6550.000000,6370.000000,6020.000000
...,...,...,...,...
261,Kosovo,11340.000000,12240.000000,11650.000000
262,"Yemen, Rep.",,,
263,South Africa,12520.000000,12640.000000,11870.000000
264,Zambia,3550.000000,3560.000000,3360.000000


In [185]:
gni_data.columns = ['Location', '2018', '2019', '2020']
gni_data

Unnamed: 0,Location,2018,2019,2020
0,Aruba,,,
1,Africa Eastern and Southern,3540.132713,3589.956043,3454.236460
2,Afghanistan,2100.000000,2190.000000,2110.000000
3,Africa Western and Central,4040.129229,4167.624990,4066.878688
4,Angola,6550.000000,6370.000000,6020.000000
...,...,...,...,...
261,Kosovo,11340.000000,12240.000000,11650.000000
262,"Yemen, Rep.",,,
263,South Africa,12520.000000,12640.000000,11870.000000
264,Zambia,3550.000000,3560.000000,3360.000000


In [186]:
gni_data[['2018', '2019', '2020']]

Unnamed: 0,2018,2019,2020
0,,,
1,3540.132713,3589.956043,3454.236460
2,2100.000000,2190.000000,2110.000000
3,4040.129229,4167.624990,4066.878688
4,6550.000000,6370.000000,6020.000000
...,...,...,...
261,11340.000000,12240.000000,11650.000000
262,,,
263,12520.000000,12640.000000,11870.000000
264,3550.000000,3560.000000,3360.000000


In [187]:
gni_df = gni_data[['2018', '2019', '2020']].copy()
gni_df

Unnamed: 0,2018,2019,2020
0,,,
1,3540.132713,3589.956043,3454.236460
2,2100.000000,2190.000000,2110.000000
3,4040.129229,4167.624990,4066.878688
4,6550.000000,6370.000000,6020.000000
...,...,...,...
261,11340.000000,12240.000000,11650.000000
262,,,
263,12520.000000,12640.000000,11870.000000
264,3550.000000,3560.000000,3360.000000


In [188]:
gni_df.dtypes.to_dict()

{'2018': dtype('float64'), '2019': dtype('float64'), '2020': dtype('float64')}

In [189]:
gni_df.isnull().sum()

2018    26
2019    28
2020    73
dtype: int64

In [190]:
gni_df

Unnamed: 0,2018,2019,2020
0,,,
1,3540.132713,3589.956043,3454.236460
2,2100.000000,2190.000000,2110.000000
3,4040.129229,4167.624990,4066.878688
4,6550.000000,6370.000000,6020.000000
...,...,...,...
261,11340.000000,12240.000000,11650.000000
262,,,
263,12520.000000,12640.000000,11870.000000
264,3550.000000,3560.000000,3360.000000


In [191]:
gni_df.dropna(how='all', inplace=True)

In [192]:
gni_df

Unnamed: 0,2018,2019,2020
1,3540.132713,3589.956043,3454.236460
2,2100.000000,2190.000000,2110.000000
3,4040.129229,4167.624990,4066.878688
4,6550.000000,6370.000000,6020.000000
5,13530.000000,14040.000000,13580.000000
...,...,...,...
260,6230.000000,6480.000000,
261,11340.000000,12240.000000,11650.000000
263,12520.000000,12640.000000,11870.000000
264,3550.000000,3560.000000,3360.000000


In [193]:
gni_df.isnull().sum()

2018     0
2019     2
2020    47
dtype: int64

In [194]:
# gni_df.isnull().sum()
gni_df

Unnamed: 0,2018,2019,2020
1,3540.132713,3589.956043,3454.236460
2,2100.000000,2190.000000,2110.000000
3,4040.129229,4167.624990,4066.878688
4,6550.000000,6370.000000,6020.000000
5,13530.000000,14040.000000,13580.000000
...,...,...,...
260,6230.000000,6480.000000,
261,11340.000000,12240.000000,11650.000000
263,12520.000000,12640.000000,11870.000000
264,3550.000000,3560.000000,3360.000000


In [195]:
gni_df['2020'].fillna(gni_df['2019'], inplace=True)

In [196]:
gni_df['2020'].fillna(gni_df['2018'], inplace=True)

In [197]:
gni_df

Unnamed: 0,2018,2019,2020
1,3540.132713,3589.956043,3454.236460
2,2100.000000,2190.000000,2110.000000
3,4040.129229,4167.624990,4066.878688
4,6550.000000,6370.000000,6020.000000
5,13530.000000,14040.000000,13580.000000
...,...,...,...
260,6230.000000,6480.000000,6480.000000
261,11340.000000,12240.000000,11650.000000
263,12520.000000,12640.000000,11870.000000
264,3550.000000,3560.000000,3360.000000


In [198]:
gni_location = gni_data['Location']
gni_location

0                            Aruba
1      Africa Eastern and Southern
2                      Afghanistan
3       Africa Western and Central
4                           Angola
                  ...             
261                         Kosovo
262                    Yemen, Rep.
263                   South Africa
264                         Zambia
265                       Zimbabwe
Name: Location, Length: 266, dtype: object

In [199]:
gni = gni_df.join(gni_location)
gni

Unnamed: 0,2018,2019,2020,Location
1,3540.132713,3589.956043,3454.236460,Africa Eastern and Southern
2,2100.000000,2190.000000,2110.000000,Afghanistan
3,4040.129229,4167.624990,4066.878688,Africa Western and Central
4,6550.000000,6370.000000,6020.000000,Angola
5,13530.000000,14040.000000,13580.000000,Albania
...,...,...,...,...
260,6230.000000,6480.000000,6480.000000,Samoa
261,11340.000000,12240.000000,11650.000000,Kosovo
263,12520.000000,12640.000000,11870.000000,South Africa
264,3550.000000,3560.000000,3360.000000,Zambia


In [200]:
gni.isnull().sum()

2018        0
2019        2
2020        0
Location    0
dtype: int64

In [201]:
gni = gni[gni['Location'].isin(countries)]
gni

Unnamed: 0,2018,2019,2020,Location
2,2100.0,2190.0,2110.0,Afghanistan
4,6550.0,6370.0,6020.0,Angola
5,13530.0,14040.0,13580.0,Albania
8,68810.0,70300.0,70300.0,United Arab Emirates
9,22470.0,22080.0,20210.0,Argentina
...,...,...,...,...
260,6230.0,6480.0,6480.0,Samoa
261,11340.0,12240.0,11650.0,Kosovo
263,12520.0,12640.0,11870.0,South Africa
264,3550.0,3560.0,3360.0,Zambia


In [202]:
gni[['Location', '2018', '2019', '2020']]

Unnamed: 0,Location,2018,2019,2020
2,Afghanistan,2100.0,2190.0,2110.0
4,Angola,6550.0,6370.0,6020.0
5,Albania,13530.0,14040.0,13580.0
8,United Arab Emirates,68810.0,70300.0,70300.0
9,Argentina,22470.0,22080.0,20210.0
...,...,...,...,...
260,Samoa,6230.0,6480.0,6480.0
261,Kosovo,11340.0,12240.0,11650.0
263,South Africa,12520.0,12640.0,11870.0
264,Zambia,3550.0,3560.0,3360.0


In [203]:
gni_2020 = gni[['Location', '2020']]
gni_2020.columns = ['Location', 'GNI']
gni_2020

Unnamed: 0,Location,GNI
2,Afghanistan,2110.0
4,Angola,6020.0
5,Albania,13580.0
8,United Arab Emirates,70300.0
9,Argentina,20210.0
...,...,...
260,Samoa,6480.0
261,Kosovo,11650.0
263,South Africa,11870.0
264,Zambia,3360.0


In [204]:
gni_2020[gni_2020['Location'] == 'Curacao']

Unnamed: 0,Location,GNI
51,Curacao,26620.0


In [205]:
vax_gni = pd.merge(countries_vaccines_financial_data, gni_2020, on='Location', how='inner')
# vax_gni = countries_vaccines_financial_data.join(gni_2020)

In [206]:
vax_gni

Unnamed: 0,Location,Collection Date,Total Vaccinations,People Vaccinated,People Fully Vaccinated,Population,Percentage Fully Vaccinated,Population Density,GDP,Extreme Poverty,Human Development Index,GNI
0,Afghanistan,2021-08-20,1201286.0,770542.0,430744.0,39835428.0,1.081309,54.422,1803.987,,0.511,2110.0
1,Albania,2021-10-13,1816580.0,970703.0,845877.0,2872934.0,29.442967,104.871,11803.431,1.1,0.795,13580.0
2,Algeria,2021-09-25,14082920.0,6017036.0,4032942.0,44616626.0,9.039101,17.348,13913.839,0.5,0.748,11010.0
3,Angola,2021-10-14,4448653.0,3158501.0,1290152.0,33933611.0,3.801989,23.890,5819.495,,0.581,6020.0
4,Antigua and Barbuda,2021-10-13,99846.0,54758.0,45088.0,98728.0,45.668909,231.845,21490.943,,0.778,18610.0
...,...,...,...,...,...,...,...,...,...,...,...,...
158,Uzbekistan,2021-10-07,21978290.0,12025985.0,4941860.0,33935765.0,14.562395,76.134,6253.104,,0.720,7350.0
159,Vanuatu,2021-10-12,87583.0,59215.0,28368.0,314464.0,9.021064,22.662,2921.909,13.2,0.609,2880.0
160,Vietnam,2021-10-14,59003239.0,41811429.0,17191810.0,98168829.0,17.512494,308.127,6171.884,2.0,0.704,8200.0
161,Zambia,2021-09-07,602996.0,311049.0,291947.0,18920657.0,1.543007,22.995,3689.251,57.5,0.584,3360.0


Country Income Classification

The classification is based on the World bank's income threshold classification for 2020

| Class              |  Income Level            |
|--------------------|--------------------------|
| Low income         |	< 1,036                 |
|Lower-middle income | 1,035 – 4,045            |
|Upper-middle income | 4,046 - 12,535           |
|High income         | > 12,535                 |


In [207]:
def add_income_class(row):
    if row['GNI'] < 1036:
        val = 'Low Income'
    elif row['GNI'] > 1036 and row['GNI'] <= 4045:
        val = 'Lower-Middle Income'
    elif row['GNI'] > 4045 and row['GNI'] <= 12535:
        val = 'Upper-Middle Income'
    elif row['GNI'] > 12535:
        val = 'High Income'
    return val

In [208]:
vax_gni['Income Class'] = vax_gni.apply(add_income_class, axis=1)

In [209]:
vax_gni

Unnamed: 0,Location,Collection Date,Total Vaccinations,People Vaccinated,People Fully Vaccinated,Population,Percentage Fully Vaccinated,Population Density,GDP,Extreme Poverty,Human Development Index,GNI,Income Class
0,Afghanistan,2021-08-20,1201286.0,770542.0,430744.0,39835428.0,1.081309,54.422,1803.987,,0.511,2110.0,Lower-Middle Income
1,Albania,2021-10-13,1816580.0,970703.0,845877.0,2872934.0,29.442967,104.871,11803.431,1.1,0.795,13580.0,High Income
2,Algeria,2021-09-25,14082920.0,6017036.0,4032942.0,44616626.0,9.039101,17.348,13913.839,0.5,0.748,11010.0,Upper-Middle Income
3,Angola,2021-10-14,4448653.0,3158501.0,1290152.0,33933611.0,3.801989,23.890,5819.495,,0.581,6020.0,Upper-Middle Income
4,Antigua and Barbuda,2021-10-13,99846.0,54758.0,45088.0,98728.0,45.668909,231.845,21490.943,,0.778,18610.0,High Income
...,...,...,...,...,...,...,...,...,...,...,...,...,...
158,Uzbekistan,2021-10-07,21978290.0,12025985.0,4941860.0,33935765.0,14.562395,76.134,6253.104,,0.720,7350.0,Upper-Middle Income
159,Vanuatu,2021-10-12,87583.0,59215.0,28368.0,314464.0,9.021064,22.662,2921.909,13.2,0.609,2880.0,Lower-Middle Income
160,Vietnam,2021-10-14,59003239.0,41811429.0,17191810.0,98168829.0,17.512494,308.127,6171.884,2.0,0.704,8200.0,Upper-Middle Income
161,Zambia,2021-09-07,602996.0,311049.0,291947.0,18920657.0,1.543007,22.995,3689.251,57.5,0.584,3360.0,Lower-Middle Income


In [210]:
vax_gni.dtypes.to_dict()

{'Location': dtype('O'),
 'Collection Date': dtype('O'),
 'Total Vaccinations': dtype('float64'),
 'People Vaccinated': dtype('float64'),
 'People Fully Vaccinated': dtype('float64'),
 'Population': dtype('float64'),
 'Percentage Fully Vaccinated': dtype('float64'),
 'Population Density': dtype('float64'),
 'GDP': dtype('float64'),
 'Extreme Poverty': dtype('float64'),
 'Human Development Index': dtype('float64'),
 'GNI': dtype('float64'),
 'Income Class': dtype('O')}

In [211]:
alt.Chart(vax_gni).mark_circle().encode(
    alt.X('GNI', scale=alt.Scale(zero=False)),
    alt.Y('Percentage Fully Vaccinated', scale=alt.Scale(zero=False, padding=1)),
    alt.Color('Income Class', sort=['High Income', 'Upper-Middle Income', 'Lower-Middle Income']),
    size='Human Development Index',
    tooltip=['Location', 'GNI', 'Income Class', 'Total Vaccinations', 'Percentage Fully Vaccinated']
).interactive().properties(height=400, width=700, title='A graph of Percentage of Fully Vaccinated vs Gross National Income grouped by Income Classes')

### Insights on Percentage Vaccinated vs Population size
The graph of Percentage Vaccinated vs Population shows that the Percentage of vaccinated is almost independent of the population size but instead more dependent on the country's access to vaccine. This is seen from the outliers (China & India) with very close population size 1444216102 and 1393409033 respectively, but have a wider gap in there vaccinated percentages with China 70.78% and India 19.93%

In [218]:
alt.Chart(vax_gni).mark_circle().encode(
    alt.X('Population', scale=alt.Scale(zero=False)),
    alt.Y('Percentage Fully Vaccinated', scale=alt.Scale(zero=False, padding=1)),
    alt.Color('Income Class', sort=['High Income', 'Upper-Middle Income', 'Lower-Middle Income']),
    size='Human Development Index',
    tooltip=['Location', 'GNI', 'Income Class', 'Percentage Fully Vaccinated', 'Population']
).interactive().properties(height=400, width=700, title='A graph of Percentage of Fully Vaccinated vs Country Population')





In [213]:
from vega_datasets import data
source = data.iris()
source

Unnamed: 0,sepalLength,sepalWidth,petalLength,petalWidth,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,virginica
146,6.3,2.5,5.0,1.9,virginica
147,6.5,3.0,5.2,2.0,virginica
148,6.2,3.4,5.4,2.3,virginica


In [127]:
source.dtypes.to_dict()

{'sepalLength': dtype('float64'),
 'sepalWidth': dtype('float64'),
 'petalLength': dtype('float64'),
 'petalWidth': dtype('float64'),
 'species': dtype('O')}

In [159]:
alt.Chart(source).mark_circle().encode(
    alt.X('sepalLength', scale=alt.Scale(zero=False)),
    alt.Y('sepalWidth', scale=alt.Scale(zero=False, padding=1)),
    color='species',
    size='petalWidth'
)