# Combine Results of BHI and Zonda Models

In [1]:
import pandas as pd

bhi_df = pd.read_csv(f'../outputs/home_prices_bhi.csv')
bhi_df['Zip'] = bhi_df['Zip'].astype('str').str.rstrip('.0')
bhi_home_count = bhi_df.shape[0]
print(bhi_df.shape)
bhi_df.head()

(34693, 15)


Unnamed: 0,Brand,Plan Name,City,State,Zip,Latitude,Longitude,Base Sq Ft,Bedrooms,Baths,Garage,Stories,Base Price,Predicted Price,% Difference
0,ABD Development,Courtyard 50,Davenport,FL,33837,28.20431,-81.55363,2530.0,4.0,3.0,2.0,2.0,594000.0,594506.0,0.085201
1,ABD Development,Marbella 3BR,Davenport,FL,33837,28.20431,-81.55363,1904.0,3.0,2.0,2.0,1.0,475000.0,544993.0,13.724257
2,ABD Development,Marbella 4BR,Davenport,FL,33837,28.20431,-81.55363,2192.0,4.0,3.0,2.0,1.0,536000.0,582154.0,8.255414
3,ABD Development,Sienna,Davenport,FL,33837,28.20431,-81.55363,2293.0,4.0,3.0,2.0,1.0,650000.0,593977.0,-9.007028
4,ABD Development,The Bristol,Palm Coast,FL,32137,29.50987,-81.18764,5021.0,4.0,4.0,3.0,2.0,1110000.0,1116287.0,0.564831


In [2]:
zonda_df = pd.read_csv(f'../outputs/home_prices_zonda.csv')
zonda_df['Zip'] = zonda_df['Zip'].astype('str').str.rstrip('.0')
zonda_home_count = zonda_df.shape[0]
print(zonda_df.shape)
zonda_df.head()

(243069, 17)


Unnamed: 0,Brand,Plan Name,City,State,Zip,Latitude,Longitude,Base Sq Ft,Bedrooms,Baths,Garage,Stories,Lot Size,Sales Rate,Base Price,Predicted Price,% Difference
0,1034 NE 72nd Street LLC,Plan 1225,Seattle,WA,98115,47.681056,-122.315907,1225.0,2.0,2.0,0.0,3.0,1016.0,1.7,749900.0,782103.0,4.204023
1,1034 NE 72nd Street LLC,Plan 1643,Seattle,WA,98115,47.681056,-122.315907,1643.0,3.0,2.5,0.0,3.0,1016.0,1.7,989900.0,983704.0,-0.627912
2,1034 NE 72nd Street LLC,Plan 1737,Seattle,WA,98115,47.681056,-122.315907,1737.0,3.0,2.5,0.0,3.0,1016.0,1.7,979900.0,1028449.0,4.834754
3,13th Floor Homes,Amelia,Tamarac,FL,33319,26.192302,-80.211595,1580.0,3.0,2.5,1.0,2.0,3000.0,4.3,332000.0,350371.0,5.384523
4,13th Floor Homes,Amelia,West Palm Beach,FL,33404,26.781443,-80.082896,1558.0,3.0,2.5,1.0,2.0,2900.0,3.7,312990.0,307142.0,-1.885908


In [4]:
def merge_columns(row, column_name):
    if row[f'{column_name}_x'] and not pd.isna(row[f'{column_name}_x']):
        return row[f'{column_name}_x']
    elif row[f'{column_name}_y'] and not pd.isna(row[f'{column_name}_y']):
        return row[f'{column_name}_y']
    else:
        return None
def get_price(row):
    if row['Base Price BHI'] and not pd.isna(row['Base Price BHI']) and row['Base Price Zonda'] and not pd.isna(row['Base Price Zonda']):
        base_price = (row['Base Price BHI'] + row['Base Price Zonda']) / 2
        predicted_price = (row['Predicted Price BHI'] + row['Predicted Price Zonda']) / 2
    elif row['Base Price BHI'] and not pd.isna(row['Base Price BHI']):
        base_price = row['Base Price BHI']
        predicted_price = row['Predicted Price BHI']
    elif row['Base Price Zonda'] and not pd.isna(row['Base Price Zonda']):
        base_price = row['Base Price Zonda']
        predicted_price = row['Predicted Price Zonda']
    percent_difference = (predicted_price - base_price) / ((predicted_price + base_price) / 2) * 100
    return pd.Series([predicted_price, percent_difference], index=['Predicted Price', '% Difference'])
df = zonda_df.merge(bhi_df, on=['Brand', 'Plan Name', 'Zip'], how='outer')
df['City'] = df.apply(lambda row: merge_columns(row, 'City'), axis=1)
df['State'] = df.apply(lambda row: merge_columns(row, 'State'), axis=1)
df['Latitude'] = df.apply(lambda row: merge_columns(row, 'Latitude'), axis=1)
df['Longitude'] = df.apply(lambda row: merge_columns(row, 'Longitude'), axis=1)
df['Base Sq Ft'] = df.apply(lambda row: merge_columns(row, 'Base Sq Ft'), axis=1)
df['Bedrooms'] = df.apply(lambda row: merge_columns(row, 'Bedrooms'), axis=1)
df['Baths'] = df.apply(lambda row: merge_columns(row, 'Baths'), axis=1)
df['Garage'] = df.apply(lambda row: merge_columns(row, 'Garage'), axis=1)
df['Stories'] = df.apply(lambda row: merge_columns(row, 'Stories'), axis=1)
df = df.rename(columns={
    'Base Price_x': 'Base Price Zonda',
    'Base Price_y': 'Base Price BHI',
    'Predicted Price_x': 'Predicted Price Zonda',
    'Predicted Price_y': 'Predicted Price BHI'
})
df[['Predicted Price', '% Difference']] = df.apply(lambda row: get_price(row), axis=1)
df = df.filter(regex='^(?!.*_y$)') # filter out columns ending in '_y'
df = df.drop(['Predicted Price BHI', 'Predicted Price Zonda'], axis=1)
df = df.reindex(columns=['Brand', 'Plan Name', 'City', 'State', 'Zip', 'Latitude', 'Longitude', 'Base Sq Ft', 'Bedrooms', 'Baths', 'Garage', 'Stories', 'Base Price BHI', 'Base Price Zonda', 'Predicted Price', '% Difference'])
df = df.drop_duplicates()
df.to_csv('../outputs/home_prices.csv', index=False)
matched_df = df[df['Base Price BHI'].notnull()]
matched_df.sample(10)

Unnamed: 0,Brand,Plan Name,City,State,Zip,Latitude,Longitude,Base Sq Ft,Bedrooms,Baths,Garage,Stories,Base Price BHI,Base Price Zonda,Predicted Price,% Difference
177669,Perry Homes,2412O,Pflugerville,TX,7866,30.435605,-97.617944,2412.0,4.0,3.5,2.0,2.0,558900.0,558900.0,565503.5,1.174578
16579,Bloomfield Homes,Primrose VI,Midlothian,TX,76065,32.450233,-96.967089,4035.0,5.0,4.5,3.0,2.0,687656.666667,688656.666667,626893.0,-9.317316
58052,David Weekley Homes,Baileywood,Bastrop,TX,78602,30.125467,-97.362711,2015.0,3.0,2.0,2.0,1.0,425490.0,425490.0,414752.0,-2.55593
178356,Perry Homes,2574W,Manvel,TX,77578,29.514457,-95.398847,2574.0,4.0,3.0,2.0,1.0,483900.0,467400.0,479452.0,0.796145
13716,Bela Flor Communities,Plan 5526,Apache Junction,AZ,85119,33.421776,-111.493123,2650.0,4.0,3.0,3.0,1.0,854900.0,854900.0,884465.0,3.399516
248093,John Wieland Homes,Bainbridge,Charlotte,NC,28204,35.20571,-80.81394,2951.0,4.0,4.0,2.0,3.0,839990.0,,830008.0,-1.195451
179706,Perry Homes,2980W,Fulshear,TX,77441,29.703274,-95.876387,2980.0,4.0,3.0,3.0,1.0,585400.0,585400.0,579451.0,-1.021418
118863,KB Home,Plan 2177,Durham,NC,27704,36.02109,-78.843726,2177.0,3.0,2.5,2.0,2.0,417990.0,417990.0,413686.5,-1.034898
119296,KB Home,Plan 2381,San Antonio,TX,78254,29.517804,-98.791413,2381.0,3.0,2.0,2.0,1.0,321995.0,321995.0,334214.5,3.724268
16074,Bloomfield Homes,Dogwood III,Mansfield,TX,76063,32.552543,-97.100844,2333.0,4.0,3.0,2.0,2.0,532990.0,371847.142857,436916.0,-3.486331
