In [1]:
import pandas as pd
from sqlalchemy import create_engine

### Extract CSVs into DataFrames

In [2]:
happiness_file = "Resources/happiness_2019.csv"
happiness_df = pd.read_csv(happiness_file)
happiness_df.head()

Unnamed: 0,country,id,score,gdp,social_support,health,freedom,generosity,corruption
0,Afghanistan,154,3.203,0.35,0.517,0.361,0.0,0.158,0.025
1,Albania,107,4.719,0.947,0.848,0.874,0.383,0.178,0.027
2,Algeria,88,5.211,1.002,1.16,0.785,0.086,0.073,0.114
3,Argentina,47,6.086,1.092,1.432,0.881,0.471,0.066,0.05
4,Armenia,116,4.559,0.85,1.055,0.815,0.283,0.095,0.064


In [3]:
education_file = "Resources/education_stats.csv"
education_df = pd.read_csv(education_file)
education_df.head()

Unnamed: 0,country,Nah1,Nah2,Nah3,nah4,year,percent_educated,country_code
0,Australia,EDUTRY,25_34,PC_AGE,A,2019,52.478458,11
1,Austria,EDUTRY,25_34,PC_AGE,A,2019,41.610401,10
2,Belgium,EDUTRY,25_34,PC_AGE,A,2019,47.282982,18
3,Canada,EDUTRY,25_34,PC_AGE,A,2019,62.965115,9
4,Czech Republic,EDUTRY,25_34,PC_AGE,A,2019,32.580391,20


### Transform premise DataFrame

In [4]:
new_happiness_df = happiness_df[['id', 'country', 'score', 'gdp']].copy()
new_happiness_df.head()

Unnamed: 0,id,country,score,gdp
0,154,Afghanistan,3.203,0.35
1,107,Albania,4.719,0.947
2,88,Algeria,5.211,1.002
3,47,Argentina,6.086,1.092
4,116,Armenia,4.559,0.85


In [5]:
new_happiness_df.count()

id         156
country    156
score      156
gdp        156
dtype: int64

In [6]:
# new_happiness_df.drop_duplicates(subset = 'id', inplace = True)
# new_happiness_df.dropna(inplace = True)


In [7]:
# new_happiness_df.head()

In [8]:
# new_happiness_df.count()

### Transform county DataFrame

In [21]:
new_education_df = education_df[['country', 'percent_educated', 'country_code']].copy()
new_education_df.head()

Unnamed: 0,country,percent_educated,country_code
0,Australia,52.478458,11
1,Austria,41.610401,10
2,Belgium,47.282982,18
3,Canada,62.965115,9
4,Czech Republic,32.580391,20


In [10]:
new_education_df.count()

country             33
percent_educated    33
country_code        33
dtype: int64

In [11]:
# new_education_df.drop_duplicates(subset = 'id', inplace = True)
# new_education_df.dropna(inplace = True)


In [12]:
# new_education_df.head()

In [13]:
# new_education_df.count()

### Create database connection

In [14]:
rds_connection_string = "postgres:postgres@localhost:5432/education"
engine = create_engine(f'postgresql://{rds_connection_string}')

In [15]:
# Confirm tables
engine.table_names()

['happiness', 'education']

### Load DataFrames into database

In [16]:
new_happiness_df.to_sql(name='happiness', con=engine, if_exists='append', index=False)

In [17]:
new_education_df.to_sql(name='education', con=engine, if_exists='append', index=False)

In [18]:
pd.read_sql_query('select * from happiness', con=engine).head()

Unnamed: 0,id,country,score,gdp
0,154,Afghanistan,3.203,0.35
1,107,Albania,4.719,0.947
2,88,Algeria,5.211,1.002
3,47,Argentina,6.086,1.092
4,116,Armenia,4.559,0.85


In [19]:
pd.read_sql_query('select * from education', con=engine).head()

Unnamed: 0,country,percent_educated,country_code
0,Australia,52.478458,11
1,Austria,41.610401,10
2,Belgium,47.282982,18
3,Canada,62.965115,9
4,Czech Republic,32.580391,20
