In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os, sys, inspect

# add parent dir to system dir
currdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
rootdir = os.path.dirname(currdir)
sys.path.insert(0, rootdir)

In [2]:
ffname_construction = os.path.join(rootdir, "data", "int", "Construction_Demographics.csv")
ffname_income = os.path.join(rootdir, "data", "int", "median_income_by_zip.csv")
ffname_homeprice = os.path.join(rootdir, "data", "int", "1_Output_quandl.csv")
ffname_mort15 = os.path.join(rootdir, "data", "int", "15_year_rates.csv")
ffname_mort30 = os.path.join(rootdir, "data", "int", "30_year_rates.csv")

construction_df = pd.read_csv(ffname_construction)
income_df = pd.read_csv(ffname_income)
homeprice_df = pd.read_csv(ffname_homeprice)
mortgage_15_df = pd.read_csv(ffname_mort15)
mortgage_30_df = pd.read_csv(ffname_mort30)

In [3]:
construction_df.rename(columns={'OriginalZip': 'Zip Code', 'CalendarYearIssued': 'Year','Issuedmonth': 'Month'}, inplace=True)

In [4]:
homeprice_income_df = pd.merge(homeprice_df, income_df, on=["Zip Code","Year","Month"], suffixes=("_hp", "_inc"),how='left')

In [5]:
construction_homeprice_income_df = pd.merge(
    construction_df,homeprice_income_df, on=["Zip Code","Year","Month"], suffixes=("_cons", "_hpinc"),how='right')

In [6]:
mortgage_15_df.rename(columns={'year': 'Year','month': 'Month'}, inplace=True)
mortgage_30_df.rename(columns={'year': 'Year','month': 'Month'}, inplace=True)

In [7]:
mortgage_15_30 = pd.merge(mortgage_15_df, mortgage_30_df, on=["Year","Month"], suffixes=("_15", "_30"))

In [8]:
real_estate_df = pd.merge(
    construction_homeprice_income_df,mortgage_15_30, on=["Year","Month"], suffixes=("_conshpinc", "mort"),how='left')

In [9]:
real_estate_df = real_estate_df[["Zip Code","Year","Month","Home Value Index","Median Listing Price","Median Listing Price Per sqf","Sales",
                                 "Median Income (dollars)","Number of Households","Mortgage Rate_15","Mortgage Rate_30","Total_NewConstructions",
                                 "Total_RemodelsRepairs","TotalCost_NewConstructions","TotalCost_RemodelRepairs","Inventory Measure",
                                 "Median Price Cut","Median Price of Reduction","Median Rental Price"]]

In [10]:
real_estate_df=real_estate_df.sort_values(['Zip Code', 'Year','Month'])
real_estate_df = real_estate_df.reset_index(drop=True)

In [11]:
real_estate_df.to_csv(os.path.join(rootdir, "data", "int", "realestate_demographics.csv"),index=False)