**Cleaning data COVID-19 world vaccination progress**

**Exploring vaccination info**

In [1]:
# Dependencies and Setup
import pandas as pd
import csv
import os
from sqlalchemy import create_engine
from config import access


In [2]:
# File to Load (Remember to Change These)
file_to_load = "Resources/country_vaccinations.csv"

# Read Purchasing File and store into Pandas data frame
vaccination_data = pd.read_csv(file_to_load)
vaccination_data.head()

Unnamed: 0,country,iso_code,date,total_vaccinations,people_vaccinated,people_fully_vaccinated,daily_vaccinations_raw,daily_vaccinations,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,daily_vaccinations_per_million,vaccines,source_name,source_website
0,Afghanistan,AFG,2021-02-22,0.0,0.0,,,,0.0,0.0,,,"Oxford/AstraZeneca, Pfizer/BioNTech, Sinopharm...",World Health Organization,https://covid19.who.int/
1,Afghanistan,AFG,2021-02-23,,,,,1367.0,,,,35.0,"Oxford/AstraZeneca, Pfizer/BioNTech, Sinopharm...",World Health Organization,https://covid19.who.int/
2,Afghanistan,AFG,2021-02-24,,,,,1367.0,,,,35.0,"Oxford/AstraZeneca, Pfizer/BioNTech, Sinopharm...",World Health Organization,https://covid19.who.int/
3,Afghanistan,AFG,2021-02-25,,,,,1367.0,,,,35.0,"Oxford/AstraZeneca, Pfizer/BioNTech, Sinopharm...",World Health Organization,https://covid19.who.int/
4,Afghanistan,AFG,2021-02-26,,,,,1367.0,,,,35.0,"Oxford/AstraZeneca, Pfizer/BioNTech, Sinopharm...",World Health Organization,https://covid19.who.int/


In [3]:
# Inspect all columns
vaccination_data.columns

Index(['country', 'iso_code', 'date', 'total_vaccinations',
       'people_vaccinated', 'people_fully_vaccinated',
       'daily_vaccinations_raw', 'daily_vaccinations',
       'total_vaccinations_per_hundred', 'people_vaccinated_per_hundred',
       'people_fully_vaccinated_per_hundred', 'daily_vaccinations_per_million',
       'vaccines', 'source_name', 'source_website'],
      dtype='object')

In [4]:
vaccination_data.dtypes

country                                 object
iso_code                                object
date                                    object
total_vaccinations                     float64
people_vaccinated                      float64
people_fully_vaccinated                float64
daily_vaccinations_raw                 float64
daily_vaccinations                     float64
total_vaccinations_per_hundred         float64
people_vaccinated_per_hundred          float64
people_fully_vaccinated_per_hundred    float64
daily_vaccinations_per_million         float64
vaccines                                object
source_name                             object
source_website                          object
dtype: object

In [5]:
vaccination_date = vaccination_data.loc[(vaccination_data["date"] >= "2020-12-2") & (vaccination_data["date"] <= "2021-5-28")]
vaccination_date

Unnamed: 0,country,iso_code,date,total_vaccinations,people_vaccinated,people_fully_vaccinated,daily_vaccinations_raw,daily_vaccinations,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,daily_vaccinations_per_million,vaccines,source_name,source_website
0,Afghanistan,AFG,2021-02-22,0.0,0.0,,,,0.00,0.00,,,"Oxford/AstraZeneca, Pfizer/BioNTech, Sinopharm...",World Health Organization,https://covid19.who.int/
1,Afghanistan,AFG,2021-02-23,,,,,1367.0,,,,35.0,"Oxford/AstraZeneca, Pfizer/BioNTech, Sinopharm...",World Health Organization,https://covid19.who.int/
2,Afghanistan,AFG,2021-02-24,,,,,1367.0,,,,35.0,"Oxford/AstraZeneca, Pfizer/BioNTech, Sinopharm...",World Health Organization,https://covid19.who.int/
3,Afghanistan,AFG,2021-02-25,,,,,1367.0,,,,35.0,"Oxford/AstraZeneca, Pfizer/BioNTech, Sinopharm...",World Health Organization,https://covid19.who.int/
4,Afghanistan,AFG,2021-02-26,,,,,1367.0,,,,35.0,"Oxford/AstraZeneca, Pfizer/BioNTech, Sinopharm...",World Health Organization,https://covid19.who.int/
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21623,Zimbabwe,ZWE,2021-05-27,953389.0,648121.0,305268.0,16349.0,12285.0,6.41,4.36,2.05,827.0,Sinopharm/Beijing,Ministry of Health,https://twitter.com/MoHCCZim/status/1399441311...
21624,Zimbabwe,ZWE,2021-05-28,976796.0,656630.0,320166.0,23407.0,12695.0,6.57,4.42,2.15,854.0,Sinopharm/Beijing,Ministry of Health,https://twitter.com/MoHCCZim/status/1399441311...
21625,Zimbabwe,ZWE,2021-05-29,1002465.0,666786.0,335679.0,25669.0,14056.0,6.74,4.49,2.26,946.0,Sinopharm/Beijing,Ministry of Health,https://twitter.com/MoHCCZim/status/1399441311...
21626,Zimbabwe,ZWE,2021-05-30,1011973.0,670755.0,341218.0,9508.0,14420.0,6.81,4.51,2.30,970.0,Sinopharm/Beijing,Ministry of Health,https://twitter.com/MoHCCZim/status/1399441311...


In [6]:
vaccination_date_country = vaccination_date.groupby(["country"]).sum()
vaccination_date_country=vaccination_date_country.reset_index()

In [7]:
vaccination = vaccination_date_country.loc[:, ['country','total_vaccinations','people_fully_vaccinated']]

In [8]:
vaccination

Unnamed: 0,country,total_vaccinations,people_fully_vaccinated
0,Afghanistan,3831799.0,574841.0
1,Albania,24787760.0,3550624.0
2,Algeria,75030.0,0.0
3,Andorra,249370.0,31652.0
4,Angola,5386560.0,669072.0
...,...,...,...
209,Wales,223488730.0,53934287.0
210,Wallis and Futuna,49977.0,12249.0
211,Yemen,122625.0,0.0
212,Zambia,2484910.0,41089.0


In [9]:
missing_values = vaccination.isnull().sum()
missing_values

country                    0
total_vaccinations         0
people_fully_vaccinated    0
dtype: int64

In [10]:
vaccination.dtypes

country                     object
total_vaccinations         float64
people_fully_vaccinated    float64
dtype: object

### Connect to local database

In [11]:
# Create Engine for SQL Data
engine = create_engine(f'postgresql://postgres:{access}@localhost/etl_db')
conn= engine.connect()

In [12]:
engine.table_names()

['confirmed_cases', 'tests', 'deaths', 'vaccination', 'countries']

In [13]:
vaccination.to_sql(name='vaccination', con=engine, if_exists='append', index=False)