In [19]:
# import dependencies
import pandas as pd
import psycopg2 as ps
import getpass
from config import host_name,username, password, dbname

## Clean data before loading into database

In [22]:
# load housing price csv into dataframe
house_price = pd.read_csv('Resources/house_price_df.csv')
house_price_df = pd.DataFrame(house_price)
house_price.head()

Unnamed: 0,Region Name,State name,State and County,Region Code,2000,2001,2002,2003,2004,2005,...,2012,2013,2014,2015,2016,2017,2018,2019,2020,Price change % 2015-19
0,Abbeville County,South Carolina,"Abbeville County, South Carolina",45001,100,107.98,111.81,114.34,120.85,124.86,...,123.05,123.16,130.71,131.96,133.75,142.15,152.61,152.99,165.44,0.170454
1,Acadia Parish,Louisiana,"Acadia Parish, Louisiana",22001,100,104.43,108.49,110.81,115.76,121.31,...,144.42,149.83,153.58,154.92,157.71,160.82,162.84,168.98,170.25,0.100273
2,Accomack County,Virginia,"Accomack County, Virginia",51001,100,110.4,117.26,128.84,150.11,182.78,...,163.35,162.22,160.17,163.32,163.24,168.2,165.83,171.98,182.96,0.073734
3,Ada County,Idaho,"Ada County, Idaho",16001,100,105.04,108.78,111.46,117.7,136.45,...,115.45,129.05,142.79,152.52,165.94,184.58,209.58,233.07,254.81,0.632257
4,Adair County,Missouri,"Adair County, Missouri",29001,100,102.37,102.96,105.43,107.36,113.61,...,116.24,116.75,118.19,117.48,119.61,121.36,127.29,128.14,128.2,0.084186


In [23]:
# rename columns
house_price_df.rename(columns=
                      {'Region Name':'region_name',
                       'State name':'state_name',
                       'State and County':'state_and_county',
                       'Region Code':'region_code',
                       'Price change % 2015-19':'percent_price_change_2015_19'
                        },
                      inplace=True
                     )

house_price_df.head()

Unnamed: 0,region_name,state_name,state_and_county,region_code,2000,2001,2002,2003,2004,2005,...,2012,2013,2014,2015,2016,2017,2018,2019,2020,percent_price_change_2015_19
0,Abbeville County,South Carolina,"Abbeville County, South Carolina",45001,100,107.98,111.81,114.34,120.85,124.86,...,123.05,123.16,130.71,131.96,133.75,142.15,152.61,152.99,165.44,0.170454
1,Acadia Parish,Louisiana,"Acadia Parish, Louisiana",22001,100,104.43,108.49,110.81,115.76,121.31,...,144.42,149.83,153.58,154.92,157.71,160.82,162.84,168.98,170.25,0.100273
2,Accomack County,Virginia,"Accomack County, Virginia",51001,100,110.4,117.26,128.84,150.11,182.78,...,163.35,162.22,160.17,163.32,163.24,168.2,165.83,171.98,182.96,0.073734
3,Ada County,Idaho,"Ada County, Idaho",16001,100,105.04,108.78,111.46,117.7,136.45,...,115.45,129.05,142.79,152.52,165.94,184.58,209.58,233.07,254.81,0.632257
4,Adair County,Missouri,"Adair County, Missouri",29001,100,102.37,102.96,105.43,107.36,113.61,...,116.24,116.75,118.19,117.48,119.61,121.36,127.29,128.14,128.2,0.084186


## Create connection to RDS

In [20]:
# Create connection to RDS
def connect_to_db(hostname, db, port, user, pw):
   try:
       conn = ps.connect(host=hostname, database=db, user=user, password=pw, port=port)
 
   except ps.OperationalError as e:
       raise e
   else:
       print('Connected!')
       return conn

In [21]:
# initialize connection
conn = None

conn = connect_to_db(host_name, dbname, 5432, username, password)

Connected!


In [77]:
curr = conn.cursor()

## Upload data into dataframe

In [78]:
# write insert command

def insert_into_table(curr, region_name, state_name, state_and_county, region_code, y2000, y2001, y2002, y2003, y2004, y2005, y2006, y2007, y2008, y2009, 
                      y2010, y2011, y2012, y2013, y2014, y2015, y2016, y2017, y2018, y2019, y2020, percent_price_change_2015_19):

    ## specify table and columns to insert into
    insert_into_hp_table = ("""INSERT INTO house_price (region_name, state_name, state_and_county, region_code, "2000",
                    "2001", "2002", "2003", "2004", "2005", "2006", "2007", "2008", "2009", "2010", "2011", "2012", "2013", "2014", 
                    "2015", "2016", "2017", "2018", "2019", "2020", percent_price_change_2015_19)
                    VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s, %s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)""")

    ## using parameters, specify variables that get inserted into table
    row_to_insert =(region_name, state_name, state_and_county, region_code, 
                    y2000, y2001, y2002, y2003, y2004, y2005, y2006, y2007, y2008, y2009, 
                    y2010, y2011, y2012, y2013, y2014, y2015, y2016, y2017, y2018, y2019, 
                    y2020, percent_price_change_2015_19)

    curr.execute(insert_into_hp_table, row_to_insert)

In [79]:
# create formula to append data to table in database

def append_from_df_to_db(curr, df):
    for i, row in df.iterrows():
        insert_into_table(curr, row['region_name'], row['state_name'], row['state_and_county'],
                                row['region_code'], row['2000'], row['2001'], row['2002'],
                                row['2003'], row['2004'], row['2005'], row['2006'], row['2007'],
                                row['2008'], row['2009'], row['2010'], row['2011'], row['2012'],
                                row['2013'], row['2014'], row['2015'], row['2016'], row['2017'],
                                row['2018'], row['2019'], row['2020'],row['percent_price_change_2015_19'])

In [80]:
append_from_df_to_db(curr, house_price_df)

In [81]:
conn.commit()