# DSC106 FINAL PROJECT - GROUP JY
## COVID19 vaccination visualization and analysis

In [18]:
import altair as alt
import pandas as pd
import numpy as np
from vega_datasets import data
import vaccination_data as eda
import vaccination_plots as plot

### Read Data

In [19]:
vaccination = pd.read_csv("data/country_vaccinations.csv")
cases = pd.read_csv("data/countries-aggregated.csv")
country = pd.read_csv("data/countries.csv")

In [38]:
vaccination

Unnamed: 0,country,iso_code,date,total_vaccinations,people_vaccinated,people_fully_vaccinated,daily_vaccinations_raw,daily_vaccinations,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,daily_vaccinations_per_million,vaccines,source_name,source_website
0,Albania,ALB,2021-01-10,0.0,0.0,,,,0.00,0.00,,,Pfizer/BioNTech,Ministry of Health,https://shendetesia.gov.al/covid19-ministria-e...
1,Albania,ALB,2021-01-11,0.0,0.0,,,64.0,0.00,0.00,,22.0,Pfizer/BioNTech,Ministry of Health,https://shendetesia.gov.al/covid19-ministria-e...
2,Albania,ALB,2021-01-12,128.0,128.0,,,64.0,0.00,0.00,,22.0,Pfizer/BioNTech,Ministry of Health,https://shendetesia.gov.al/covid19-ministria-e...
3,Albania,ALB,2021-01-13,188.0,188.0,,60.0,63.0,0.01,0.01,,22.0,Pfizer/BioNTech,Ministry of Health,https://shendetesia.gov.al/covid19-ministria-e...
4,Albania,ALB,2021-01-14,266.0,266.0,,78.0,66.0,0.01,0.01,,23.0,Pfizer/BioNTech,Ministry of Health,https://shendetesia.gov.al/covid19-ministria-e...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4038,Zimbabwe,ZWE,2021-02-19,0.0,0.0,49729.0,,328.0,0.00,0.00,1.58,22.0,Sinopharm/Beijing,Ministry of Health,https://twitter.com/MoHCCZim/status/1364298653...
4039,Zimbabwe,ZWE,2021-02-20,0.0,0.0,49729.0,,328.0,0.00,0.00,1.58,22.0,Sinopharm/Beijing,Ministry of Health,https://twitter.com/MoHCCZim/status/1364298653...
4040,Zimbabwe,ZWE,2021-02-21,0.0,0.0,49729.0,,328.0,0.00,0.00,1.58,22.0,Sinopharm/Beijing,Ministry of Health,https://twitter.com/MoHCCZim/status/1364298653...
4041,Zimbabwe,ZWE,2021-02-22,1314.0,1314.0,49729.0,,328.0,0.01,0.01,1.58,22.0,Sinopharm/Beijing,Ministry of Health,https://twitter.com/MoHCCZim/status/1364298653...


`cases` dataset has 5 attributes: 
* Date
* Country
* Confirmed
* Recovered
* Deaths <br>
`vaccination` dataset has 16 attributes:
* Country 
* Country ISO Code 
* Date
* Total number of vaccinations
* Total number of people vaccinated 
* Total number of people fully vaccinated 
* Daily vaccinations
* Total vaccinations per hundred 
* Total number of people vaccinated per hundred 
* Total number of people fully vaccinated per hundred 
* Number of vaccinations per day 
* Daily vaccinations per million 
* Vaccines used in the country 
* Source name 
* Source website <br>
`country` dataset is a supplement dataset use for ploting geographic graph.

In [20]:
country

Unnamed: 0,id,name,alpha2,alpha3
0,4,Afghanistan,af,afg
1,8,Albania,al,alb
2,12,Algeria,dz,dza
3,20,Andorra,ad,and
4,24,Angola,ao,ago
...,...,...,...,...
188,862,Venezuela (Bolivarian Republic of),ve,ven
189,704,Viet Nam,vn,vnm
190,887,Yemen,ye,yem
191,894,Zambia,zm,zmb


## Plot 1. Vaccination process overall. Global and Countrywise.
First, we want to visulize the global vaccination process. We noticed that there is missing data for some date. We preprocess the data to fill the NaN field with the previous number. If the country do not have up-to-date information, we do not fill them to make sure we don't misinterpret the data. <br>
**The plot shows a overall trend of vaccination for COVID-19. User can select certain country in the drop down menu.**

In [22]:
# all function inside the vaccination_func.py file
vaccination = eda.clean_table_fillna(vaccination)
plot.plot1_line_graph(vaccination) 

**Since certain countrys may have a small total vaccinations number, selecting may not have a clear representation. Or user only care about the data in one country. User can specific the country name to plot the information for that country.**

In [24]:
plot.plot1_line_graph(vaccination,country = "United States",height = 200, width = 400,)

In [26]:
plot.plot1_line_graph(vaccination,country = "China",height = 200, width = 400)

## Plot 2. Top 10 countrys vaccinations visualization.
First, we want to understand which 10 countries have the most number of vaccination. <br>
We plot them with barchart.

In [27]:
vaccination_top10 = eda.top10(vaccination)
vaccination_top10

Unnamed: 0,country,total_vaccinations
102,United States,1571491000.0
21,China,666288000.0
101,United Kingdom,473648400.0
32,England,404933900.0
51,Israel,236389000.0
46,India,206403400.0
100,United Arab Emirates,161119700.0
38,Germany,130132600.0
15,Brazil,120113200.0
98,Turkey,114202300.0


In [28]:
plot.plot2_barchart_top10(vaccination_top10)

**Trend Line Chart: the total number of vaccinations of the top 10 countries**

In [29]:
# find detail information for top10 countries
vaccination_top10_detail = eda.top10_detail(vaccination)

In [30]:
vaccination_top10_detail

Unnamed: 0,country,iso_code,date,total_vaccinations,people_vaccinated,people_fully_vaccinated,daily_vaccinations_raw,daily_vaccinations,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,daily_vaccinations_per_million,vaccines,source_name,source_website
494,Brazil,BRA,2021-01-16,0.0,0.0,1600.0,,,0.00,0.00,2.57,,"Oxford/AstraZeneca, Sinovac",Regional governments via Coronavirus Brasil,https://coronavirusbra1.github.io/
495,Brazil,BRA,2021-01-17,112.0,112.0,1600.0,112.0,112.0,0.00,0.00,2.57,1.0,"Oxford/AstraZeneca, Sinovac",Regional governments via Coronavirus Brasil,https://coronavirusbra1.github.io/
496,Brazil,BRA,2021-01-18,1109.0,1109.0,1600.0,997.0,554.0,0.00,0.00,2.57,3.0,"Oxford/AstraZeneca, Sinovac",Regional governments via Coronavirus Brasil,https://coronavirusbra1.github.io/
497,Brazil,BRA,2021-01-19,11470.0,11470.0,1600.0,10361.0,3823.0,0.01,0.01,2.57,18.0,"Oxford/AstraZeneca, Sinovac",Regional governments via Coronavirus Brasil,https://coronavirusbra1.github.io/
498,Brazil,BRA,2021-01-20,28543.0,28543.0,1600.0,17073.0,7136.0,0.01,0.01,2.57,34.0,"Oxford/AstraZeneca, Sinovac",Regional governments via Coronavirus Brasil,https://coronavirusbra1.github.io/
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3960,United States,USA,2021-02-19,59585043.0,41977401.0,17039118.0,1847276.0,1596355.0,17.82,12.55,5.09,4773.0,"Moderna, Pfizer/BioNTech",Centers for Disease Control and Prevention,https://covid.cdc.gov/covid-data-tracker/#vacc...
3961,United States,USA,2021-02-20,61289500.0,42809595.0,17895667.0,1704457.0,1521088.0,18.33,12.80,5.35,4548.0,"Moderna, Pfizer/BioNTech",Centers for Disease Control and Prevention,https://covid.cdc.gov/covid-data-tracker/#vacc...
3962,United States,USA,2021-02-21,63090634.0,43628092.0,18865319.0,1801134.0,1458040.0,18.86,13.05,5.64,4360.0,"Moderna, Pfizer/BioNTech",Centers for Disease Control and Prevention,https://covid.cdc.gov/covid-data-tracker/#vacc...
3963,United States,USA,2021-02-22,64177474.0,44138118.0,19438495.0,1086840.0,1446445.0,19.19,13.20,5.81,4325.0,"Moderna, Pfizer/BioNTech",Centers for Disease Control and Prevention,https://covid.cdc.gov/covid-data-tracker/#vacc...


In [31]:
plot.plot2_b_trend_line(vaccination_top10_detail)

**Stacked Bar Chart: The total number of vaccinations of the top 10 countries**

In [32]:
plot.plot2_c_stacked_barh(vaccination_top10_detail)

## Plot3. Understand country vaccination state. (Total vaccination per hundred)

In [33]:
country_vac = eda.geo_vac(country,vaccination)
country_vac.head()

Unnamed: 0,id,name,alpha2,alpha3,country,total_vaccinations,people_fully_vaccinated,total_vaccinations_per_hundred,people_fully_vaccinated_per_hundred,vaccines
0,4,Afghanistan,af,AFG,0,0.0,0.0,0.0,0.0,0
1,8,Albania,al,ALB,Albania,4177.0,611.0,0.15,0.02,Pfizer/BioNTech
2,12,Algeria,dz,DZA,Algeria,75000.0,611.0,0.17,0.02,Sputnik V
3,20,Andorra,ad,AND,Andorra,2141.0,611.0,2.77,0.02,Pfizer/BioNTech
4,24,Angola,ao,AGO,0,0.0,0.0,0.0,0.0,0


In [34]:
plot.plot3_geo_per100(country_vac,"total_vaccinations_per_hundred")

In [35]:
plot.plot3_geo_per100(country_vac,"people_fully_vaccinated_per_hundred")


## Plot4. Relationship between COVID-19 comfirmed cases and total vaccinations.

In [36]:
# combine case dataset and vaccination dataset
total_cases = cases.groupby("Country").max()[["Confirmed","Deaths"]].reset_index()
case_vac = pd.merge(total_cases,country_vac, left_on = "Country",right_on = "name")

In [37]:
plot.plot4_compare_scatter(case_vac)