# Final Model Analysis

In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('data/final_model.csv', header = None, names = ['names', 'coefficients']).set_index('names')
coef = df['coefficients'].to_dict()

## Prediction Function

In [3]:
def predict(data):
    data = data.split(',')
    columns = 'id,date,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,condition,grade,sqft_above,sqft_basement,yr_built,yr_renovated,zipcode,lat,long,sqft_living15,sqft_lot15'.split(',')
    df = pd.DataFrame(dict(zip(columns, data)), index=[0])
    df['yr_sold'] = df.date.map(lambda x: int(x.split('/')[-1]))
    df.drop('date', axis=1, inplace=True)
    for col in df.columns:
        df[col] = pd.to_numeric(df[col])
    df['yr_since_renovation'] = np.where(df['yr_renovated']==0.0, df['yr_sold']-df['yr_built'], df['yr_sold']-df['yr_renovated'])
    df['yr_since_built'] = df['yr_sold'] - df['yr_built']
    categoricals = ['floors', 'condition', 'grade', 'zipcode']
    df = df.astype({col: 'str' for col in categoricals})
    df = pd.get_dummies(df)
    df['renovated'] = df.yr_renovated.map(lambda x: 1 if x>0 else 0)
    continuous = ['price', 'bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot']
    for col in continuous:
        df[col] = df[col].map(np.log)
    df.replace(np.nan, 0, inplace=True)
    data_dict = df.iloc[0].to_dict()

    prediction = coef['Intercept']
    for key, value in coef.items():
        prediction += value * data_dict.get(key, 0)
    prediction = np.exp(prediction)
    return prediction

In [5]:
data = '7129300520,10/13/2014,221900.0,3,1.0,1180,5650,1.0,,0.0,3,7,1180,0.0,1955,0.0,98178,47.5112,-122.257,1340,5650'
predict(data)

239026.23346531886

data will come in the following format:

id,date,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,condition,grade,sqft_above,sqft_basement,yr_built,yr_renovated,zipcode,lat,long,sqft_living15,sqft_lot15
7129300520,10/13/2014,221900.0,3,1.0,1180,5650,1.0,,0.0,3,7,1180,0.0,1955,0.0,98178,47.5112,-122.257,1340,5650

In [7]:
# todo: import data from kc_house_data.csv and run the prediction function
with open('data/kc_house_data.csv') as f:
    f.readline()
    for i in range(5):
        print(predict(f.readline()))

239026.23346531886
575905.0612859579
239975.87787140472
556278.8552897956
471694.00041375885


# Coefficient Analysis

In [8]:
coef

{'Intercept': -86.96192670503272,
 'bedrooms': -0.057155141850407175,
 'bathrooms': 0.062049951410309764,
 'sqft_living': 0.4834287268804242,
 'sqft_lot': 0.06665523673039399,
 'waterfront': 0.6029579655205117,
 'sqft_basement': -5.318890782739171e-05,
 'lat': 0.647575198704021,
 'long': -0.5281545373156602,
 'yr_since_built': 0.0004074831112883181,
 'renovated': 0.062242978234706765,
 'floors_15': 0.015162167725278057,
 'floors_30': -0.06364755868433236,
 'condition_2': 0.18932432113189712,
 'condition_3': 0.3134916654304245,
 'condition_4': 0.3483735022840655,
 'condition_5': 0.3988017450421917,
 'grade_11': 0.1344705027303335,
 'grade_12': 0.17766664807078092,
 'grade_4': -0.4157584764311476,
 'grade_5': -0.4607961421238744,
 'grade_6': -0.4110665063315864,
 'grade_7': -0.3327349771172694,
 'grade_8': -0.22093200342053887,
 'grade_9': -0.08214638517531614,
 'zipcode_98004': 0.9510595844225008,
 'zipcode_98005': 0.5853394662050718,
 'zipcode_98006': 0.5712839348356973,
 'zipcode_9800

bedrooms: -0.057155141850407175
- each bedroom decreases the sale price of a house by 5%

bathrooms: 0.062049951410309764
- each bathroom increases the sale price of a house by 6%

sqft_living: 0.4834287268804242
- a 1% change in square footage living area increases the sale price of a house by .48%

sqft_lot: 0.06665523673039399
- a 1% change in square footage lot area increases the sale price of a house by .07%

waterfront: 0.6029579655205117
- if the house is on the waterfront, the sale price of a house increases by 60%

sqft_basement: -5.318890782739171e-05
- a 1% change in square footage basement area decreases the sale price of a house by .00005%

lat: 0.647575198704021
- if you move north, a 1 degree increase in latitude increases the sale price of a house by 65%

long: -0.5281545373156602
- if you move east, a 1 degree increase in longitude decreases the sale price of a house by 53%

yr_since_built: 0.0004074831112883181
- a 1-year increase in the age of a house increases its sale price by .04%

renovated: 0.062242978234706765
- a house that has been renovated has its sale price increased by 6%

# Recommendations and Future Work

Interaction features, such as bathrooms*bedrooms, could increase the accuracy of our model.

Trying polynomial features could also make our model more accurate.

Trial-and-error would be needed to determine which features could be changed in these ways to help our model.

# Conclusions

Our model will be usefull in predicting sale prices of houses in King county. We can use these predictions to help our clients set the prices for their houses, and find houses that are currently underpriced.