# House Price Predictor 

In [2]:
import pandas as pd
import tensorflow as tf
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

## Format the data and drop any NA 

In [5]:
data_path = "./kc_house_data.csv"
data = pd.read_csv(data_path)
data = data.dropna(axis=0)
print(data.columns)

Index(['id', 'date', 'price', 'bedrooms', 'bathrooms', 'sqft_living',
       'sqft_lot', 'floors', 'waterfront', 'view', 'condition', 'grade',
       'sqft_above', 'sqft_basement', 'yr_built', 'yr_renovated', 'zipcode',
       'lat', 'long', 'sqft_living15', 'sqft_lot15'],
      dtype='object')


## Set y and X values

In [6]:
y = data["price"].astype(np.float32)
data_prediction_features = ["bedrooms", "bathrooms", "sqft_living", "sqft_lot", "floors", "waterfront", "view", "condition", "grade", "sqft_above", "sqft_basement", "yr_built"]
X = data[data_prediction_features].astype(np.float32)

Create a test and train split 1:5

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Normalize the data

In [8]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

## Create Model

In [9]:
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(X_train_scaled.shape[1],)),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dropout(0.4), # add various dropout layers to prevent overfitting
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dropout(0.1),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dense(1)  # Regression task, so no activation function (linear activation)
])

2023-08-15 10:27:10.238568: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [10]:
loss_fn = tf.keras.losses.MeanSquaredError() # mean squared error for regression task
# ues adam optimizer function for dynamically updating gradient descent.
optimizerFunction = tf.keras.optimizers.Adam(
    learning_rate=0.0005,
    beta_1=0.9,
    beta_2=0.995,
    epsilon=5e-06,
    amsgrad=True,
    name='Adam',
)
model.compile(optimizer=optimizerFunction,
              loss=loss_fn)

## Start Training

In [11]:
callback = tf.keras.callbacks.EarlyStopping(monitor="loss", patience=30) # add a callback to prevent overfitting (but be very patient because of low processing time)
model.fit(X_train_scaled, y_train, epochs=175, batch_size=32, validation_split=0.1, callbacks=[callback])


Epoch 1/175
Epoch 2/175
Epoch 3/175
Epoch 4/175
Epoch 5/175
Epoch 6/175
Epoch 7/175
Epoch 8/175
Epoch 9/175
Epoch 10/175
Epoch 11/175
Epoch 12/175
Epoch 13/175
Epoch 14/175
Epoch 15/175
Epoch 16/175
Epoch 17/175
Epoch 18/175
Epoch 19/175
Epoch 20/175
Epoch 21/175
Epoch 22/175
Epoch 23/175
Epoch 24/175
Epoch 25/175
Epoch 26/175
Epoch 27/175
Epoch 28/175
Epoch 29/175
Epoch 30/175
Epoch 31/175
Epoch 32/175
Epoch 33/175
Epoch 34/175
Epoch 35/175
Epoch 36/175
Epoch 37/175
Epoch 38/175
Epoch 39/175
Epoch 40/175
Epoch 41/175
Epoch 42/175
Epoch 43/175
Epoch 44/175
Epoch 45/175
Epoch 46/175
Epoch 47/175
Epoch 48/175
Epoch 49/175
Epoch 50/175
Epoch 51/175
Epoch 52/175
Epoch 53/175
Epoch 54/175
Epoch 55/175
Epoch 56/175
Epoch 57/175
Epoch 58/175
Epoch 59/175
Epoch 60/175
Epoch 61/175
Epoch 62/175
Epoch 63/175
Epoch 64/175
Epoch 65/175
Epoch 66/175
Epoch 67/175
Epoch 68/175
Epoch 69/175
Epoch 70/175
Epoch 71/175
Epoch 72/175
Epoch 73/175
Epoch 74/175
Epoch 75/175
Epoch 76/175
Epoch 77/175
Epoch 78

<keras.callbacks.History at 0x1342f6a10>

## Evaluate Model

In [12]:
loss = model.evaluate(X_test_scaled, y_test)
print("Test loss:", loss)

Test loss: 41965613056.0


## Predict the price of user input.
Enter in the various attributes into the user_input array. The order is the order of the X data input.

In [14]:
user_input = [2, 0.75, 1020, 1076, 2, 0, 0, 3, 7, 1020, 0, 2008]

# Normalize the user input using the same scaler
user_input_scaled = scaler.transform(np.array(user_input).reshape(1, -1))

# Predict the price using the trained model
predicted_price = model.predict(user_input_scaled)[0][0]

print(f"Estimated price: ${predicted_price:.2f}")

Estimated price: $313654.38


