In [52]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
data = pd.read_csv("../input/covid-world-vaccination-progress/country_vaccinations.csv")
data.head()

Unnamed: 0,country,iso_code,date,total_vaccinations,people_vaccinated,people_fully_vaccinated,daily_vaccinations_raw,daily_vaccinations,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,daily_vaccinations_per_million,vaccines,source_name,source_website
0,Afghanistan,AFG,2021-02-22,0.0,0.0,,,,0.0,0.0,,,"Oxford/AstraZeneca, Pfizer/BioNTech, Sinopharm...",World Health Organization,https://covid19.who.int/
1,Afghanistan,AFG,2021-02-23,,,,,1367.0,,,,35.0,"Oxford/AstraZeneca, Pfizer/BioNTech, Sinopharm...",World Health Organization,https://covid19.who.int/
2,Afghanistan,AFG,2021-02-24,,,,,1367.0,,,,35.0,"Oxford/AstraZeneca, Pfizer/BioNTech, Sinopharm...",World Health Organization,https://covid19.who.int/
3,Afghanistan,AFG,2021-02-25,,,,,1367.0,,,,35.0,"Oxford/AstraZeneca, Pfizer/BioNTech, Sinopharm...",World Health Organization,https://covid19.who.int/
4,Afghanistan,AFG,2021-02-26,,,,,1367.0,,,,35.0,"Oxford/AstraZeneca, Pfizer/BioNTech, Sinopharm...",World Health Organization,https://covid19.who.int/


In [53]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20923 entries, 0 to 20922
Data columns (total 15 columns):
 #   Column                               Non-Null Count  Dtype  
---  ------                               --------------  -----  
 0   country                              20923 non-null  object 
 1   iso_code                             20923 non-null  object 
 2   date                                 20923 non-null  object 
 3   total_vaccinations                   11946 non-null  float64
 4   people_vaccinated                    11170 non-null  float64
 5   people_fully_vaccinated              8675 non-null   float64
 6   daily_vaccinations_raw               9934 non-null   float64
 7   daily_vaccinations                   20705 non-null  float64
 8   total_vaccinations_per_hundred       11946 non-null  float64
 9   people_vaccinated_per_hundred        11170 non-null  float64
 10  people_fully_vaccinated_per_hundred  8675 non-null   float64
 11  daily_vaccinations_per_milli

In [54]:
data.describe()

Unnamed: 0,total_vaccinations,people_vaccinated,people_fully_vaccinated,daily_vaccinations_raw,daily_vaccinations,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,daily_vaccinations_per_million
count,11946.0,11170.0,8675.0,9934.0,20705.0,11946.0,11170.0,8675.0,20705.0
mean,6884554.0,3938186.0,2143163.0,166314.6,87076.07,19.918126,13.965227,7.632891,3222.074813
std,30651350.0,14661770.0,9164351.0,829556.1,526554.6,27.13989,17.29156,11.708367,8313.20495
min,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,77974.5,64540.75,29010.0,3448.0,822.0,1.83,1.68,0.81,359.0
50%,555735.5,430014.5,228486.0,18149.0,5948.0,8.7,6.665,3.15,1615.0
75%,2579226.0,1785076.0,919942.0,71751.0,31475.0,27.1825,20.0,9.255,4459.0
max,602991000.0,166388100.0,133532500.0,20009000.0,17092570.0,229.23,115.94,113.3,1000000.0


In [55]:
data['date']
pd.to_datetime(data.date)

0       2021-02-22
1       2021-02-23
2       2021-02-24
3       2021-02-25
4       2021-02-26
           ...    
20918   2021-05-24
20919   2021-05-25
20920   2021-05-26
20921   2021-05-27
20922   2021-05-28
Name: date, Length: 20923, dtype: datetime64[ns]

In [56]:
pd.to_datetime(data.date)
data.country.value_counts()

Norway                      177
Canada                      166
China                       165
Russia                      165
Denmark                     162
                           ... 
Turkmenistan                  1
Central African Republic      1
Cook Islands                  1
Tuvalu                        1
British Virgin Islands        1
Name: country, Length: 214, dtype: int64

The United Kingdom is made up of England, Scotland, Wales, and Northern Ireland. But in the above data, these countries are mentioned separately with the same values as in the United Kingdom. So this may be an error while recording this data. So let’s see how we can fix this error:

In [57]:
data = data[data.country.apply(lambda x: x not in ["England", "Scotland", "Wales", "Northern Ireland"])]
data.country.value_counts()


Norway                             177
Canada                             166
Russia                             165
China                              165
Denmark                            162
                                  ... 
Cook Islands                         1
Bonaire Sint Eustatius and Saba      1
Guinea-Bissau                        1
Central African Republic             1
Turkmenistan                         1
Name: country, Length: 210, dtype: int64

In [58]:
data.vaccines.value_counts()

Oxford/AstraZeneca                                                                    3670
Johnson&Johnson, Moderna, Oxford/AstraZeneca, Pfizer/BioNTech                         2985
Oxford/AstraZeneca, Pfizer/BioNTech                                                   1602
Moderna, Pfizer/BioNTech                                                              1253
Pfizer/BioNTech                                                                       1214
Moderna, Oxford/AstraZeneca, Pfizer/BioNTech                                          1040
Oxford/AstraZeneca, Pfizer/BioNTech, Sinovac                                          1028
Oxford/AstraZeneca, Sinopharm/Beijing                                                  905
Oxford/AstraZeneca, Pfizer/BioNTech, Sinopharm/Beijing, Sputnik V                      904
Oxford/AstraZeneca, Pfizer/BioNTech, Sinopharm/Beijing                                 403
Oxford/AstraZeneca, Sputnik V                                                          357

So we have almost all the Covid-19 vaccines available in this dataset. Now I will create a new DataFrame by only selecting the vaccine and the country columns to explore which vaccine is taken by which country:

In [59]:
df = data[["vaccines", "country"]]
df.head()

Unnamed: 0,vaccines,country
0,"Oxford/AstraZeneca, Pfizer/BioNTech, Sinopharm...",Afghanistan
1,"Oxford/AstraZeneca, Pfizer/BioNTech, Sinopharm...",Afghanistan
2,"Oxford/AstraZeneca, Pfizer/BioNTech, Sinopharm...",Afghanistan
3,"Oxford/AstraZeneca, Pfizer/BioNTech, Sinopharm...",Afghanistan
4,"Oxford/AstraZeneca, Pfizer/BioNTech, Sinopharm...",Afghanistan


In [60]:
dict_ = {}
for i in df.vaccines.unique():
  dict_[i] = [df["country"][j] for j in df[df["vaccines"]==i].index]

vaccines = {}
for key, value in dict_.items():
  vaccines[key] = set(value)
for i, j in vaccines.items():
  print(f"{i}:>>{j}")

Oxford/AstraZeneca, Pfizer/BioNTech, Sinopharm/Beijing:>>{'Afghanistan', 'Peru', 'Maldives', 'Iraq'}
Oxford/AstraZeneca, Pfizer/BioNTech, Sinovac, Sputnik V:>>{'Albania', 'Bosnia and Herzegovina', 'Philippines'}
Sputnik V:>>{'Syria', 'Venezuela', 'Algeria', 'Guinea'}
Oxford/AstraZeneca, Pfizer/BioNTech:>>{'Isle of Man', 'Andorra', 'Slovenia', 'Cape Verde', 'Sweden', 'Panama', 'Saudi Arabia', 'South Korea', 'Costa Rica', 'Oman', 'Jersey', 'Australia', 'Cayman Islands'}
Oxford/AstraZeneca:>>{'British Virgin Islands', 'Ethiopia', 'Georgia', 'Samoa', 'Saint Kitts and Nevis', 'Eswatini', 'Malawi', 'Kosovo', 'Grenada', 'Madagascar', 'Vietnam', 'Saint Helena', 'Wallis and Futuna', 'Yemen', 'Saint Lucia', 'South Sudan', 'Gambia', 'Taiwan', 'Suriname', 'Mali', 'Trinidad and Tobago', 'Bahamas', 'Timor', 'Dominica', 'Pitcairn', 'Nauru', 'Nigeria', 'Sao Tome and Principe', 'Falkland Islands', 'Myanmar', 'New Caledonia', 'Tajikistan', 'Angola', 'Sierra Leone', 'Cook Islands', 'Montserrat', 'Saint V

In [79]:
import plotly.express as px
import plotly.offline as py

vaccine_map = px.choropleth(data, locations = 'iso_code', color = 'vaccines')
vaccine_map.update_layout(height=300, margin={"r":0,"t":0,"l":0,"b":0})
vaccine_map.show()