## Import Dependencies

In [1]:
import sys
sys.path.append("..")

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import altair as alt

import utils, plots

## Load Dataset

In [2]:
covid_data = utils.get_covid_data()
covid_data

Unnamed: 0,Location,Collection Date,Total Vaccinations,People Vaccinated,People Fully Vaccinated,Population
0,Afghanistan,2020-02-24,,,,39835428.0
1,Afghanistan,2020-02-25,,,,39835428.0
2,Afghanistan,2020-02-26,,,,39835428.0
3,Afghanistan,2020-02-27,,,,39835428.0
4,Afghanistan,2020-02-28,,,,39835428.0
...,...,...,...,...,...,...
123979,Zimbabwe,2021-10-11,5594808.0,3176445.0,2418363.0,15092171.0
123980,Zimbabwe,2021-10-12,5612476.0,3183015.0,2429461.0,15092171.0
123981,Zimbabwe,2021-10-13,5632534.0,3190977.0,2441557.0,15092171.0
123982,Zimbabwe,2021-10-14,5654267.0,3200122.0,2454145.0,15092171.0


### Get list of countries

In [3]:
countries = utils.get_country_list()
print(countries)

['Afghanistan', 'Albania', 'Algeria', 'Andorra', 'Angola', 'Anguilla', 'Antigua and Barbuda', 'Argentina', 'Armenia', 'Aruba', 'Australia', 'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain', 'Bangladesh', 'Barbados', 'Belarus', 'Belgium', 'Belize', 'Benin', 'Bermuda', 'Bhutan', 'Bolivia', 'Bonaire Sint Eustatius and Saba', 'Bosnia and Herzegovina', 'Botswana', 'Brazil', 'British Virgin Islands', 'Brunei', 'Bulgaria', 'Burkina Faso', 'Cambodia', 'Cameroon', 'Canada', 'Cape Verde', 'Cayman Islands', 'Central African Republic', 'Chad', 'Chile', 'China', 'Colombia', 'Comoros', 'Congo', 'Cook Islands', 'Costa Rica', "Cote d'Ivoire", 'Croatia', 'Cuba', 'Curacao', 'Cyprus', 'Czechia', 'Democratic Republic of Congo', 'Denmark', 'Djibouti', 'Dominica', 'Dominican Republic', 'Ecuador', 'Egypt', 'El Salvador', 'England', 'Equatorial Guinea', 'Estonia', 'Eswatini', 'Ethiopia', 'Faeroe Islands', 'Falkland Islands', 'Fiji', 'Finland', 'France', 'French Polynesia', 'Gabon', 'Gambia', 'Georgia', 'Germany'

## Print Complete DataFrame as HTML

In [4]:
utils.print_complete_dataframe(covid_data.iloc[:5])

Unnamed: 0,Location,Collection Date,Total Vaccinations,People Vaccinated,People Fully Vaccinated,Population
0,Afghanistan,2020-02-24,,,,39835428.0
1,Afghanistan,2020-02-25,,,,39835428.0
2,Afghanistan,2020-02-26,,,,39835428.0
3,Afghanistan,2020-02-27,,,,39835428.0
4,Afghanistan,2020-02-28,,,,39835428.0


### Preprocess data

Drop NaNs (Total Vaccinations, People Vaccinated, People Fully Vaccinated, Population), 

Drop Duplicates (Based on Location)

Add Column to represent Percentage Fully Vaccinated 

    Percentage Fully Vaccinated = People Fully Vaccinated / Population * 100

In [5]:
dataset = utils.preprocess_covid_data(covid_data)
dataset

Unnamed: 0,Location,Collection Date,Total Vaccinations,People Vaccinated,People Fully Vaccinated,Population,Percentage Fully Vaccinated
543,Afghanistan,2021-08-20,1.201286e+06,7.705420e+05,4.307440e+05,3.983543e+07,1.081309
1210,Africa,2021-10-15,1.710956e+08,1.048424e+08,6.883170e+07,1.373486e+09,5.011458
1807,Albania,2021-10-13,1.816580e+06,9.707030e+05,8.458770e+05,2.872934e+06,29.442967
2388,Algeria,2021-09-25,1.408292e+07,6.017036e+06,4.032942e+06,4.461663e+07,9.039101
2982,Andorra,2021-09-26,1.020320e+05,5.431200e+04,4.772000e+04,7.735400e+04,61.690410
...,...,...,...,...,...,...,...
121644,Wallis and Futuna,2021-10-12,1.100900e+04,5.749000e+03,5.260000e+03,1.109400e+04,47.413016
122277,World,2021-10-15,6.638294e+09,3.735482e+09,2.823759e+09,7.874966e+09,35.857418
122813,Yemen,2021-09-27,3.561730e+05,3.080250e+05,4.814800e+04,3.049064e+07,0.157911
123370,Zambia,2021-09-07,6.029960e+05,3.110490e+05,2.919470e+05,1.892066e+07,1.543007


## Country-Continental split

Split data for countries and continents into two dataframes

In [6]:
countries_data, continents_data = utils.country_continental_split(dataset)
countries_data.head()

Unnamed: 0,Location,Collection Date,Total Vaccinations,People Vaccinated,People Fully Vaccinated,Population,Percentage Fully Vaccinated
543,Afghanistan,2021-08-20,1201286.0,770542.0,430744.0,39835428.0,1.081309
1807,Albania,2021-10-13,1816580.0,970703.0,845877.0,2872934.0,29.442967
2388,Algeria,2021-09-25,14082920.0,6017036.0,4032942.0,44616626.0,9.039101
2982,Andorra,2021-09-26,102032.0,54312.0,47720.0,77354.0,61.69041
3575,Angola,2021-10-14,4448653.0,3158501.0,1290152.0,33933611.0,3.801989


In [7]:
continents_data.head()

Unnamed: 0,Location,Collection Date,Total Vaccinations,People Vaccinated,People Fully Vaccinated,Population,Percentage Fully Vaccinated
1210,Africa,2021-10-15,171095600.0,104842400.0,68831700.0,1373486000.0,5.011458
6494,Asia,2021-10-15,4480253000.0,2541010000.0,1841308000.0,4678445000.0,39.357259
37831,Europe,2021-10-15,822524800.0,432536300.0,401660900.0,748963000.0,53.628945
38463,European Union,2021-10-15,578727800.0,304701400.0,286558700.0,447189900.0,64.079864
82449,North America,2021-10-15,649153000.0,353953300.0,294724400.0,596581300.0,49.402225


## Total Vaccinations Administered

### Top 10 countries

In [8]:
top_10_vaccinations_by_countries = utils.sort_and_return_top_k(countries_data)
top_10_vaccinations_by_countries.head()

Unnamed: 0,Location,Collection Date,Total Vaccinations,People Vaccinated,People Fully Vaccinated,Population,Percentage Fully Vaccinated
23923,China,2021-09-18,2174043000.0,1100842000.0,1022207000.0,1444216000.0,70.779366
51824,India,2021-10-15,971163400.0,693416100.0,277747300.0,1393409000.0,19.932935
118136,United States,2021-10-15,406570900.0,218318100.0,188655200.0,332915100.0,56.667664
16383,Brazil,2021-10-15,254484300.0,155512800.0,103754900.0,213993400.0,48.485072
57545,Japan,2021-10-14,178255500.0,94599320.0,83656180.0,126050800.0,66.367041


In [9]:
plots.plot_bars(top_10_vaccinations_by_countries, 'Total Vaccinations:Q', 'Location:O', plot_type = plots.PLOT_TYPES.FULLY_VACCINATED)

### Total Vaccinations administered per continent

In [10]:
total_vaccinations_by_continents = utils.sort_and_return_top_k(continents_data, k=3, sort_by=['Total Vaccinations'])
total_vaccinations_by_continents

Unnamed: 0,Location,Collection Date,Total Vaccinations,People Vaccinated,People Fully Vaccinated,Population,Percentage Fully Vaccinated
6494,Asia,2021-10-15,4480253000.0,2541010000.0,1841308000.0,4678445000.0,39.357259
37831,Europe,2021-10-15,822524800.0,432536300.0,401660900.0,748963000.0,53.628945
82449,North America,2021-10-15,649153000.0,353953300.0,294724400.0,596581300.0,49.402225


In [11]:
plots.plot_bars(total_vaccinations_by_continents, 'Total Vaccinations:Q', 'Location:O', plot_type = plots.PLOT_TYPES.FULLY_VACCINATED)

## Fully Vaccinated Visualization

### Fully Vaccinated Info for Top X Countries 

In [12]:
full_vaccinations_by_countries = utils.sort_and_return_top_k(countries_data, sort_by=['Percentage Fully Vaccinated'])
full_vaccinations_by_countries

Unnamed: 0,Location,Collection Date,Total Vaccinations,People Vaccinated,People Fully Vaccinated,Population,Percentage Fully Vaccinated
44173,Gibraltar,2021-10-15,81966.0,39970.0,39751.0,33691.0,117.986999
89488,Pitcairn,2021-09-07,94.0,47.0,47.0,47.0,100.0
90680,Portugal,2021-10-11,16104710.0,8975593.0,8782671.0,10167923.0,86.376254
116879,United Arab Emirates,2021-10-15,20619734.0,9494889.0,8505838.0,9991083.0,85.134294
21560,Cayman Islands,2021-10-14,109919.0,55780.0,55780.0,66498.0,83.882222
70443,Malta,2021-10-14,848445.0,425218.0,424538.0,514564.0,82.504412
51198,Iceland,2021-10-14,562426.0,282338.0,277837.0,343360.0,80.917113
99953,Singapore,2021-10-14,9787176.0,4730374.0,4674723.0,5896684.0,79.27715
105302,Spain,2021-10-14,70982052.0,37868453.0,37029165.0,46745211.0,79.214885
91245,Qatar,2021-09-11,4578600.0,2360308.0,2218292.0,2930524.0,75.696087


In [13]:
plots.plot_bars(full_vaccinations_by_countries, 'Percentage Fully Vaccinated:Q', 'Location:O', plot_type = plots.PLOT_TYPES.FULLY_VACCINATED)


### Fully Vaccinated Statistics by Continent

In [15]:
full_vaccinations_by_continents = utils.sort_and_return_top_k(continents_data, sort_by=['Percentage Fully Vaccinated'])
full_vaccinations_by_continents

Unnamed: 0,Location,Collection Date,Total Vaccinations,People Vaccinated,People Fully Vaccinated,Population,Percentage Fully Vaccinated
38463,European Union,2021-10-15,578727800.0,304701400.0,286558700.0,447189900.0,64.079864
37831,Europe,2021-10-15,822524800.0,432536300.0,401660900.0,748963000.0,53.628945
82449,North America,2021-10-15,649153000.0,353953300.0,294724400.0,596581300.0,49.402225
103487,South America,2021-10-15,474395000.0,279974700.0,199527400.0,434260100.0,45.946517
84531,Oceania,2021-10-15,40872580.0,23165350.0,17707220.0,43219950.0,40.970016
6494,Asia,2021-10-15,4480253000.0,2541010000.0,1841308000.0,4678445000.0,39.357259
1210,Africa,2021-10-15,171095600.0,104842400.0,68831700.0,1373486000.0,5.011458


In [16]:
plots.plot_bars(full_vaccinations_by_continents, 'Percentage Fully Vaccinated:Q', 'Location:O', plot_type = plots.PLOT_TYPES.FULLY_VACCINATED)

## At least One Dose Visualization

### At least One Dose Info for Top X Countries


In [18]:
people_vaccinated_by_countries = utils.sort_and_return_top_k(countries_data, sort_by=['People Vaccinated'])
people_vaccinated_by_countries

Unnamed: 0,Location,Collection Date,Total Vaccinations,People Vaccinated,People Fully Vaccinated,Population,Percentage Fully Vaccinated
23923,China,2021-09-18,2174043000.0,1100842000.0,1022207000.0,1444216000.0,70.779366
51824,India,2021-10-15,971163400.0,693416100.0,277747300.0,1393409000.0,19.932935
118136,United States,2021-10-15,406570900.0,218318100.0,188655200.0,332915100.0,56.667664
16383,Brazil,2021-10-15,254484300.0,155512800.0,103754900.0,213993400.0,48.485072
52417,Indonesia,2021-10-15,166861700.0,105464700.0,61397060.0,276361800.0,22.216188
57545,Japan,2021-10-14,178255500.0,94599320.0,83656180.0,126050800.0,66.367041
72608,Mexico,2021-10-14,110574500.0,67957150.0,50772620.0,130262200.0,38.977242
85727,Pakistan,2021-10-12,93551190.0,64947700.0,34809850.0,225199900.0,15.457309
43313,Germany,2021-10-15,109892000.0,57231090.0,54658270.0,83900470.0,65.146564
114495,Turkey,2021-10-15,113709500.0,54799550.0,47119100.0,85042740.0,55.406377


In [19]:
plots.plot_bars(people_vaccinated_by_countries, 'People Vaccinated:Q', 'Location:O', plot_type = plots.PLOT_TYPES.PEOPLE_VACCINATED)
