# Realtor.com  + Zillow.com

In [5]:
import pandas as pd
from sqlalchemy import create_engine
import os

### Store CSV into DataFrame

In [6]:
csv_file = os.path.join("Resources", "realtor.csv")
price_data_df = pd.read_csv(csv_file)
price_data_df.head()

Unnamed: 0,month_date_yyyymm,postal_code,zip_name,flag,median_listing_price,median_listing_price_mm,median_listing_price_yy,active_listing_count,active_listing_count_mm,active_listing_count_yy,...,median_square_feet_yy,average_listing_price,average_listing_price_mm,average_listing_price_yy,total_listing_count,total_listing_count_mm,total_listing_count_yy,pending_ratio,pending_ratio_mm,pending_ratio_yy
0,202009,97703,"bend, or",*,914549.5,0.0218,,107.0,0.07,,...,0.0388,1198963.0,0.0281,0.2502,288.0,-0.0799,-0.3394,1.6916,-0.4384,1.3075
1,202009,4572,"waldoboro, me",,235050.0,0.0981,0.038,17.0,0.2143,-0.4848,...,0.3504,287365.0,0.0986,0.1052,23.0,0.2105,-0.4103,0.3529,-0.0042,0.1711
2,202009,85379,"surprise, az",*,340050.0,0.0625,,54.0,0.4211,,...,-0.1608,355044.0,0.0307,0.0626,218.0,-0.0046,-0.3272,3.037,-1.7261,2.012
3,202009,14845,"horseheads, ny",*,278500.0,-0.0656,0.1091,38.0,-0.0256,,...,-0.0954,274070.0,-0.0076,0.0179,86.0,-0.0227,-0.0549,1.2632,0.0067,0.578
4,202009,62454,"robinson, il",,109950.0,0.0,-0.0443,88.0,-0.0435,-0.1927,...,-0.1889,135324.0,0.0107,-0.0552,91.0,-0.0619,-0.1727,0.0341,-0.0203,0.0249


### Create new data with select columns

In [7]:
price_df = price_data_df[['postal_code', 'median_listing_price', 'average_listing_price', 'total_listing_count']].copy()
price_df.head()

Unnamed: 0,postal_code,median_listing_price,average_listing_price,total_listing_count
0,97703,914549.5,1198963.0,288.0
1,4572,235050.0,287365.0,23.0
2,85379,340050.0,355044.0,218.0
3,14845,278500.0,274070.0,86.0
4,62454,109950.0,135324.0,91.0


### Store JSON data into a DataFrame

In [4]:
json_file = os.path.join("..", "Resources", "customer_location.json")
customer_location_df = pd.read_json(json_file)
customer_location_df.head()

Unnamed: 0,id,address,longitude,latitude,us_state
0,1,043 Mockingbird Place,-86.5186,39.1682,Indiana
1,2,4 Prentice Point,-85.0707,41.0938,Indiana
2,3,46 Derek Junction,-96.7776,32.7673,Texas
3,4,11966 Old Shore Place,-94.3567,39.035,Missouri
4,5,5 Evergreen Circle,-73.9772,40.7808,New York


### Clean DataFrame

In [5]:
new_customer_location_df = customer_location_df[["id", "address", "us_state"]].copy()
new_customer_location_df.head()

Unnamed: 0,id,address,us_state
0,1,043 Mockingbird Place,Indiana
1,2,4 Prentice Point,Indiana
2,3,46 Derek Junction,Texas
3,4,11966 Old Shore Place,Missouri
4,5,5 Evergreen Circle,New York


### Connect to local database

In [None]:
pg_user = 'postgres'
pg_password = 'postgres'
db_name = 'customer_db'

connection_string = f"{pg_user}:{pg_password}@localhost:5432/{db_name}"
engine = create_engine(f'postgresql://{connection_string}')

#before runining this create a database in postgres that the file will write to.

### Check for tables

In [None]:
engine.table_names()

##confirm that there are no table in the SQL database

### Use pandas to load csv converted DataFrame into database

In [None]:
new_customer_data_df.to_sql(name='customer_name', con=engine, if_exists='append', index=False)

#to sqlfunction... uses the tables that have been created in pandas and writes it to sql as is.

### Use pandas to load json converted DataFrame into database

In [None]:
#before running this create table,describe tableformatting and specifying primary key. then use Pandas to write to the file
#this ensures that the table doesnt have duplicates. since it callson the primary key it wont allow that to happen


new_customer_location_df.to_sql(name='customer_location', con=engine, if_exists='append', index=False)

### Confirm data has been added by querying the customer_name table
* NOTE: can also check using pgAdmin

In [None]:
pd.read_sql_query('select * from customer_name', con=engine).head()

### Confirm data has been added by querying the customer_location table

In [None]:
pd.read_sql_query('select * from customer_location', con=engine).head()