In [1]:
# Imports
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [2]:
# Loading the dataset
data = pd.read_excel('../1_Data/Real estate valuation data set.xlsx')

In [3]:
# Transforming data
X = data.iloc[:, 2:-1].values  # Selecting feature columns
y = data.iloc[:, -1].values.reshape(-1, 1)  # Target column (house price)

scaler_X = StandardScaler()
X = scaler_X.fit_transform(X)

scaler_y = StandardScaler()
y = scaler_y.fit_transform(y)

In [4]:
# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
# Neural Network Architecture
input_size = X.shape[1]
hidden_sizes = [64, 32, 16]
output_size = 1

In [6]:
# Defining functions for network architecture

# This function initializes parameters by producing randomized weights for each layer and setting bias to zero
# Takes the array of how many neurons would be in each layer and outputs a dictionary for the weights and biases
def initialize_parameters():
    np.random.seed(42)
    parameters = {
        'W1': np.random.randn(input_size, hidden_sizes[0]) * np.sqrt(2 / input_size),
        'b1': np.zeros((1, hidden_sizes[0])),
        'W2': np.random.randn(hidden_sizes[0], hidden_sizes[1]) * np.sqrt(2 / hidden_sizes[0]),
        'b2': np.zeros((1, hidden_sizes[1])),
        'W3': np.random.randn(hidden_sizes[1], hidden_sizes[2]) * np.sqrt(2 / hidden_sizes[1]),
        'b3': np.zeros((1, hidden_sizes[2])),
        'W4': np.random.randn(hidden_sizes[2], output_size) * np.sqrt(2 / hidden_sizes[2]),
        'b4': np.zeros((1, output_size))
    }
    return parameters

# Activation function
def relu(Z, alpha=0.01):
    return np.where(Z > 0, Z, alpha * Z)

# Used for adjusting weights during back-propagation
def relu_derivative(Z, alpha=0.01):
    return np.where(Z > 0, 1, alpha)

# Forward propagation
# Passes a sample through the network, does not change any weights in this function
# Requires the sample (which is an array of one value for each feature) as well as the dictionary originally set in the initialization (with the weights/biases)
def forward_propagation(X, parameters):
    cache = {}
    cache['Z1'] = np.dot(X, parameters['W1']) + parameters['b1']
    cache['A1'] = relu(cache['Z1'])
    
    cache['Z2'] = np.dot(cache['A1'], parameters['W2']) + parameters['b2']
    cache['A2'] = relu(cache['Z2'])
    
    cache['Z3'] = np.dot(cache['A2'], parameters['W3']) + parameters['b3']
    cache['A3'] = relu(cache['Z3'])
    
    cache['Z4'] = np.dot(cache['A3'], parameters['W4']) + parameters['b4']
    cache['A4'] = cache['Z4']  # Linear activation for regression output
    
    return cache

# Compute loss (MSE)
def compute_loss(y_true, y_pred):
    m = y_true.shape[0]
    return (1 / (2 * m)) * np.sum((y_pred - y_true) ** 2)

# Backward propagation
# For finding the adjusted weights based on loss for that particular sample
def backward_propagation(X, y, parameters, cache):
    m = y.shape[0]
    grads = {}
    
    dZ4 = cache['A4'] - y
    grads['dW4'] = (1 / m) * np.dot(cache['A3'].T, dZ4)
    grads['db4'] = (1 / m) * np.sum(dZ4, axis=0, keepdims=True)
    
    dA3 = np.dot(dZ4, parameters['W4'].T)
    dZ3 = dA3 * relu_derivative(cache['Z3'])
    grads['dW3'] = (1 / m) * np.dot(cache['A2'].T, dZ3)
    grads['db3'] = (1 / m) * np.sum(dZ3, axis=0, keepdims=True)
    
    dA2 = np.dot(dZ3, parameters['W3'].T)
    dZ2 = dA2 * relu_derivative(cache['Z2'])
    grads['dW2'] = (1 / m) * np.dot(cache['A1'].T, dZ2)
    grads['db2'] = (1 / m) * np.sum(dZ2, axis=0, keepdims=True)
    
    dA1 = np.dot(dZ2, parameters['W2'].T)
    dZ1 = dA1 * relu_derivative(cache['Z1'])
    grads['dW1'] = (1 / m) * np.dot(X.T, dZ1)
    grads['db1'] = (1 / m) * np.sum(dZ1, axis=0, keepdims=True)
    
    return grads

# Update parameters with SGDM
def update_parameters(parameters, grads, velocities, learning_rate, momentum=0.9):
    for key in parameters.keys():
        if key.startswith('W') or key.startswith('b'):
            velocities[key] = momentum * velocities[key] - learning_rate * grads['d' + key]
            parameters[key] += velocities[key]
    return parameters, velocities

# Training the network
def train_network(X_train, y_train, epochs=1000, learning_rate=0.05):
    parameters = initialize_parameters()
    velocities = {key: np.zeros_like(value) for key, value in parameters.items()}
    
    for epoch in range(epochs):
        cache = forward_propagation(X_train, parameters)
        loss = compute_loss(y_train, cache['A4'])
        grads = backward_propagation(X_train, y_train, parameters, cache)
        parameters, velocities = update_parameters(parameters, grads, velocities, learning_rate)
        
        if (epoch + 1) % 100 == 0:
            print(f"Epoch {epoch+1}/{epochs}, Loss: {loss:.4f}")
            
    return parameters

# Evaluate the network
def evaluate_network(X_test, y_test, parameters):
    cache = forward_propagation(X_test, parameters)
    y_pred = cache['A4']
    
    ss_residual = np.sum((y_test - y_pred) ** 2)
    ss_total = np.sum((y_test - np.mean(y_test)) ** 2)
    r2_score = 1 - (ss_residual / ss_total)
    
    mse = compute_loss(y_test, y_pred)
    return mse, r2_score

In [7]:
# Train model
parameters = train_network(X_train, y_train, epochs=1000, learning_rate=0.05)

# Evaluate the model
mse, r2_score = evaluate_network(X_test, y_test, parameters)
print(f"Test Mean Squared Error: {mse:.4f}")
print(f"R² Score: {r2_score:.4f}")

# Get actual prices to compare with
# This required reversing the scaling
y_pred_scaled = forward_propagation(X_test, parameters)['A4']
y_pred_actual = scaler_y.inverse_transform(y_pred_scaled)
y_test_actual = scaler_y.inverse_transform(y_test)

# Display samples and their predicted and actual prices
results = pd.DataFrame({'Actual Price': y_test_actual.flatten(), 'Predicted Price': y_pred_actual.flatten()})
print(results.head(10))

Epoch 100/1000, Loss: 0.4580
Epoch 200/1000, Loss: 0.1956
Epoch 300/1000, Loss: 0.1814
Epoch 400/1000, Loss: 0.1735
Epoch 500/1000, Loss: 0.1687
Epoch 600/1000, Loss: 0.1655
Epoch 700/1000, Loss: 0.1629
Epoch 800/1000, Loss: 0.1606
Epoch 900/1000, Loss: 0.1584
Epoch 1000/1000, Loss: 0.1562
Test Mean Squared Error: 0.1044
R² Score: 0.7701
   Actual Price  Predicted Price
0          45.1        52.163421
1          42.3        36.901442
2          52.2        53.178658
3          37.3        46.712917
4          22.8        23.220962
5          36.3        41.909712
6          53.0        48.373440
7          51.4        48.160268
8          16.1        19.694691
9          59.0        54.242535
