In [1]:
import pickle
import numpy as np
import folium

### Housing Data Analysis
Using models trained earlier (notebooks can be found in Archives folder) to analyse additional housing data from Ames, Iowa

In [2]:
#Load cleaned housing data
data = pickle.load(open('./pickles/realtor_clean.p','rb'))
#Load residuals model
residual_model = pickle.load(open('./pickles/final_model.p','rb'))
#Load location model
location_model = pickle.load(open('./pickles/location_model.p','rb'))

In [3]:
#Randomly choose a house from the dataframe
np.random.seed(42)
test_old = data.drop(['Residuals'], axis = 1).iloc[np.random.randint(data.shape[0]),:].to_frame().transpose()

In [6]:
#Predicted price of the original house
location_model.predict(test_old[['latitude','e_long']]) + \
residual_model.predict(test_old.drop(['e_long','Location_Price',
                                      'latitude','longitude'],axis = 1))

array([252781.6862173])

In [10]:
#Create a copy of the house to simulate renovation
test_new = test_old.copy()

In [23]:
location_model.predict(test_old[['latitude','e_long']]) + residual_model.predict(test_new.drop(['e_long','Location_Price','latitude','longitude'],axis = 1))

array([362608.07855769])

In [17]:
#Simulate renovation by changing values
test_new['Quarter_Q2'] = 0; test_new['Quarter_Q3'] = 1
test_new['BsmtQual_Gd'] = 0; test_new['BsmtQual_Ex'] = 1
test_new['Age'] = 0
test_new['RoofMatl_CompShg'] = 0; test_new['RoofMatl_WdShngl'] = 1
test_new['ExterQual_TA'] = 0; test_new['ExterQual_Ex'] = 1
test_new['CentralAir_N'] = 0; test_new['CentralAir_Y'] = 1
#Difference in residuals after renovation
residual_model.predict(test_new.drop(['e_long','Location_Price',
                                      'latitude','longitude'],axis = 1)) - residual_model.predict(test_old.drop(['e_long','Location_Price',
                                      'latitude','longitude'],axis = 1))

array([109826.39234039])

In [16]:
def potential(row):
    '''
    Calculate potential value after renovating property
    '''
    new_row = row.copy()
    new_row.Age = 0
    new_row.Quarter_Q2 = 0; new_row.Quarter_Q3 = 1
    #ExterQual
    new_row.loc[[col for col in new_row.index if 'ExterQual' in col]] =  0
    new_row.ExterQual_Ex = 1
    #Kitchenn
    new_row.loc[[col for col in new_row.index if 'Kitchen' in col]] =  0
    new_row.KitchenQual_Ex = 1
    #Bsmt
    new_row.loc[[col for col in new_row.index if 'BsmtQual' in col]] =  0
    new_row.BsmtQual_Ex = 1
    #Central_Air
    new_row.CentralAir_N = 0
    new_row.CentralAir_Y = 1
    #Roof
    new_row.loc[[col for col in new_row.index if 'Roof' in col]] =  0
    new_row.RoofMatl_WdShngl = 1
    #model
    potential_ = residual_model.predict(new_row.drop(['Location_Price','longitude','latitude','e_long','Residuals']).to_frame().transpose()) - residual_model.predict(row.drop(['Location_Price','longitude','latitude','e_long','Residuals']).to_frame().transpose())
    return(potential_)

In [18]:
#Copy data to new dataframe
data_w_potential = data.copy()
#Run potential function and unpack array (default output from model)
data_w_potential['potential'] = data.apply(potential,axis = 1)
data_w_potential.loc[:,'potential'] = data_w_potential.loc[:,'potential'].apply(lambda x: x[0])

In [19]:
#Choose the top 20 investment properties
data_most_potential = data_w_potential.sort_values(by = 'potential')[-20:]

In [22]:
#Total potential from 20 properites
sum(data_most_potential.potential)

2751033.4469959675

In [20]:
#Plot Potential investment properties
m = folium.Map(location=[test_old.latitude,test_old.longitude],tiles='Stamen Toner')
for i in range(0,data_most_potential.shape[0]):
    folium.Marker(location = [data_most_potential.latitude.iloc[i], data_most_potential.longitude.iloc[i]]).add_to(m)
m

In [21]:
#Plot example house from first test
m = folium.Map(location=[test_old.latitude,test_old.longitude],tiles='Stamen Toner')
folium.Marker([test_old.latitude,test_old.longitude],popup = 'Example House').add_to(m)
m