In [None]:
# import dependecies 

import pandas as pd
from sqlalchemy import create_engine
from sqlalchemy import inspect

#Config should contain database username as username and database password as password
import config

In [None]:
#Make sure the following are present in your config file:
#config.username
#config.password
#config.database_name

In [None]:
# read vaccinations csv

csv_file = "Resources/vaccinations.csv"
vaccinations_df = pd.read_csv(csv_file)
vaccinations_df.head()

In [None]:
# set columns for first table

vaccination_rate_df = vaccinations_df[['location', 'iso_code', 'date', 'people_vaccinated', 'people_fully_vaccinated', 'total_boosters', 'daily_vaccinations_raw']].copy()
vaccination_rate_df.head()

In [None]:
# Fill NaN values with zero

vaccination_rate_df.fillna(0, inplace=True)
vaccination_rate_df.head()

In [None]:
# Filter Zero values from people vaccinated column

vaccination_rate_df = vaccination_rate_df.loc[vaccination_rate_df["people_vaccinated"] > 0]

vaccination_rate_df.head()

In [None]:
# set columns for second table

vaccination_perhundred_df = vaccinations_df[['location', 'iso_code', 'date', 'people_vaccinated_per_hundred', 'people_fully_vaccinated_per_hundred', 'total_boosters_per_hundred', 'daily_vaccinations_per_million' ]].copy()
vaccination_perhundred_df.head()

In [None]:
vaccination_perhundred_df.fillna(0, inplace=True)
vaccination_perhundred_df.head()

In [None]:
vaccination_perhundred_df = vaccination_perhundred_df.loc[vaccination_perhundred_df["people_vaccinated_per_hundred"] > 0]

vaccination_perhundred_df.head()

In [None]:
# group by country for third table 

# vaccination_country_df = vaccinations_df.groupby
# vaccination_country_df.head()# Setting Up Country Reference Table
countries_df = pd.DataFrame({"location": vaccinations_df["location"].unique(), "iso_code": vaccinations_df["iso_code"].unique()})
countries_df.head()

## Region Date Metadata

In [None]:
# read region_date_metadata.csv

path = "Resources/region_date_metadata.csv"
region_date_df = pd.read_csv(path)
region_date_df.head()

In [None]:
#Set column for 4th table
region_date_metadata = region_date_df[['Country_Region', 'Province_State', 'Date', 'Recoveries']].copy()
region_date_metadata.head()

In [None]:
#Groupby the data  as per Country_region and Date
region_date_metadata_groupby = region_date_metadata.groupby(['Country_Region', 'Date']).sum()['Recoveries']
region_date_metadata_groupby = pd.DataFrame(region_date_metadata_groupby)
region_date_metadata_groupby

In [None]:
#Removing index columns
region_date_metadata_df = region_date_metadata_groupby.reset_index(['Country_Region', 'Date'])
region_date_metadata_df

## Region Metadata

In [None]:
# load csv to pandas dataframe

path2 = "Resources/region_metadata.csv"
region_metadata_df = pd.read_csv(path2)
region_metadata_df.head()

In [None]:
region_metadata = region_metadata_df.copy()

In [None]:
#groupby 

region_metadata_groupby = region_metadata.groupby(['Country_Region']).sum()['population']
region_metadata_ = pd.DataFrame(region_metadata_groupby)

In [None]:
#reset_index to fit in the SQL table
region_metadata_ = region_metadata_.reset_index('Country_Region')
region_metadata_

In [None]:
# connect to SQL database 

protocol = 'postgresql'
username = config.username
password = config.password
host = 'localhost'
port = 5432
database_name = config.database_name
rds_connection_string = f'{protocol}://{username}:{password}@{host}:{port}/{database_name}'
engine = create_engine(rds_connection_string)
insp = inspect(engine)

In [None]:
# check tables

insp.get_table_names()

In [None]:
# Push countries lookup table to database
countries_df.to_sql(name="countries", con=engine, if_exists="append", index=False)

In [None]:
# push first table to database  

vaccination_rate_df.to_sql(name='vaccinations', con=engine, if_exists='append', index=False)

In [None]:
# push second table to database  

vaccination_perhundred_df.to_sql(name='vaccinations_per_hundred', con=engine, if_exists='append', index=False)

In [None]:
# push thrid table to database  

# vaccination_country_df.to_sql(name='', con=engine, if_exists='append', index=False)

In [None]:
# check data upload first table

pd.read_sql_query('select * from ', con=engine).head()

In [None]:
# check data upload second table

pd.read_sql_query('select * from ', con=engine).head()

In [None]:
# check data upload third table

pd.read_sql_query('select * from ', con=engine).head()