In [None]:
import pandas as pd
from sqlalchemy import create_engine, inspect

### Extract CSVs into DataFrames

In [None]:
# Country Population
pop_file = "Resources/Population.csv"
pop_df = pd.read_csv(pop_file, skiprows=4)

In [None]:
# Country GDP
gdp_file = "Resources/GDP.csv"
gdp_df = pd.read_csv(gdp_file, skiprows=4)

In [None]:
# Olympic Data

### Transform DataFrames

In [None]:
# Country Population

In [None]:
# eliminate unused columns
pop_df.drop(columns=['Country Code', 'Indicator Name', 'Indicator Code'], inplace=True)

In [None]:
# rename columns to what will be used in the database
pop_df.rename(columns={'Country Name' : 'country'}, inplace=True)

In [None]:
# get all the column names
cols = pop_df.columns.values.tolist()

In [None]:
# drop the country column name
cols.pop(0)

In [None]:
# convert all the year columns into rows
pop_m_df = pop_df.melt(id_vars = 'country', value_vars = cols, var_name = 'year')

In [None]:
pop_m_df.head()

In [None]:
pop_m_df.rename(columns={'value' : 'population'}, inplace=True)

In [None]:
# eliminate any empty values
cleaned_pop_df = pop_m_df.dropna()

In [None]:
len(cleaned_pop_df)

In [None]:
# Country GDP

In [None]:
# eliminate unused columns
gdp_df.drop(columns=['Country Code', 'Indicator Name', 'Indicator Code'], inplace=True)

In [None]:
# rename columns to what will be used in the database
gdp_df.rename(columns={'Country Name' : 'country'}, inplace=True)

In [None]:
# get all the column names
cols = gdp_df.columns.values.tolist()

In [None]:
# drop the country column name
cols.pop(0)

In [None]:
# convert all the year columns into rows
gdp_m_df = gdp_df.melt(id_vars = 'country', value_vars = cols, var_name = 'year')

In [None]:
gdp_m_df.rename(columns={'value' : 'gdp'}, inplace=True)

In [None]:
gdp_m_df.head()

In [None]:
cleaned_gdp_df = gdp_m_df.dropna()

In [None]:
len(cleaned_gdp_df)

#### Merge the country dataframes

In [None]:
# merge dataframes keeping only rows that have the same country and year and adding the remaining columns
merged_country_df = pd.merge(cleaned_gdp_df,cleaned_pop_df, how="inner", on=['country','year'])

In [None]:
# Olympic Data

### Create database connection

In [None]:
protocol = 'postgresql'
username = 'postgres'
password = 'bootcamp'
host = 'localhost'
port = 5432
database_name = 'olympic_db'
rds_connection_string = f'{protocol}://{username}:{password}@{host}:{port}/{database_name}'
engine = create_engine(rds_connection_string)

### Load DataFrames into database

In [None]:
inspector = inspect(engine)
inspector.get_table_names()

In [None]:
merged_country_df.to_sql(name='country_data', con=engine, if_exists='append', index=False)