In [21]:
import csv
import random
import math
from sklearn.model_selection import train_test_split

# Load and preprocess the data
def load_data(file_path):
    with open(file_path, "r") as file:
        reader = csv.reader(file)
        headers = next(reader)

        # Indices of numerical features (excluding 'ocean_proximity' which is the last column)
        feature_indices = list(range(8))  
        target_index = 8  

        X, Y = [], []

        for row in reader:
            try:
                features = [float(row[i]) for i in feature_indices]
                target = float(row[target_index])
                X.append(features)
                Y.append(target)
            except ValueError:
                # Skip rows with missing or non-numeric data
                continue

    return X, Y

X, Y = load_data(r'C:\Users\VICTUS\Downloads\housing\housing.csv')

X[:3], Y[:3]

([[-122.23, 37.88, 41.0, 880.0, 129.0, 322.0, 126.0, 8.3252],
  [-122.22, 37.86, 21.0, 7099.0, 1106.0, 2401.0, 1138.0, 8.3014],
  [-122.24, 37.85, 52.0, 1467.0, 190.0, 496.0, 177.0, 7.2574]],
 [452600.0, 358500.0, 352100.0])

In [22]:
# Normalize the features using min-max scaling
def normalize_features(X):
    num_features = len(X[0])
    min_vals = [min(feature[i] for feature in X) for i in range(num_features)] #for each feature it generates minimum value
    max_vals = [max(feature[i] for feature in X) for i in range(num_features)] # - - generates max value

    normalized_X = []
    for row in X:
        normalized_row = [
            (row[i] - min_vals[i]) / (max_vals[i] - min_vals[i]) if max_vals[i] != min_vals[i] else 0
            for i in range(num_features)
        ]
        normalized_X.append(normalized_row)

    return normalized_X

# Normalize X
X_normalized = normalize_features(X)

# Display normalized features
X_normalized[:3]


[[0.21115537848605498,
  0.5674814027630182,
  0.7843137254901961,
  0.02233073910168371,
  0.019863438857852266,
  0.008940833543541018,
  0.020555829633284,
  0.5396684183666433],
 [0.21215139442231049,
  0.5653560042507968,
  0.39215686274509803,
  0.180502568798006,
  0.17147734326505276,
  0.06721040387903249,
  0.18697582634435125,
  0.5380270616957007],
 [0.21015936254980092,
  0.5642933049946866,
  1.0,
  0.03726028790884582,
  0.029329608938547486,
  0.013817651840017937,
  0.028942608123663872,
  0.46602805478545123]]

In [23]:
# Add bias term (intercept) to feature vectors
def add_bias_term(X):
    return [[1.0] + row for row in X]

# Initialize weights
def initialize_weights(n):
    return [random.uniform(-0.5, 0.5) for _ in range(n)]

# Predict function
def predict(X, weights):
    return [sum(w * x for w, x in zip(weights, row)) for row in X]
def compute_mae(Y_true, Y_pred):
    n = len(Y_true)
    return sum(abs((yt - yp)) for yt, yp in zip(Y_true, Y_pred)) / n
# Compute Mean Squared Error
def compute_mse(Y_true, Y_pred):
    n = len(Y_true)
    return sum((yt - yp) ** 2 for yt, yp in zip(Y_true, Y_pred)) / n

# Gradient Descent function
def gradient_descent(X, Y, weights, lr=0.01, epochs=100): # lr = learning rate
    n = len(Y)
    for epoch in range(epochs):
        predictions = predict(X, weights)
        gradients = [0.0] * len(weights)

        for i in range(len(weights)):
            for j in range(n):
                gradients[i] += (predictions[j] - Y[j]) * X[j][i]
            gradients[i] /= n

        # Update weights
        for i in range(len(weights)):
            weights[i] -= lr * gradients[i]

        # Print progress every 10 epochs
        if epoch % 10 == 0:
            mse = compute_mse(Y, predictions)
            print(f"Epoch {epoch}: RMSE = {math.sqrt(mse):.2f}")

    return weights

# Prepare data
X_bias = add_bias_term(X_normalized)
weights = initialize_weights(len(X_bias[0]))

X_train, X_test, Y_train, Y_test = train_test_split(X_bias, Y, test_size=0.2, random_state=1)

# Train model
trained_weights = gradient_descent(X_train, Y_train, weights, lr=0.1, epochs=1000)

trained_weights


Epoch 0: RMSE = 237028.05
Epoch 10: RMSE = 117187.08
Epoch 20: RMSE = 111889.96
Epoch 30: RMSE = 110819.92
Epoch 40: RMSE = 109877.68
Epoch 50: RMSE = 108963.30
Epoch 60: RMSE = 108073.53
Epoch 70: RMSE = 107207.51
Epoch 80: RMSE = 106364.47
Epoch 90: RMSE = 105543.69
Epoch 100: RMSE = 104744.53
Epoch 110: RMSE = 103966.34
Epoch 120: RMSE = 103208.55
Epoch 130: RMSE = 102470.60
Epoch 140: RMSE = 101751.94
Epoch 150: RMSE = 101052.08
Epoch 160: RMSE = 100370.53
Epoch 170: RMSE = 99706.82
Epoch 180: RMSE = 99060.51
Epoch 190: RMSE = 98431.14
Epoch 200: RMSE = 97818.31
Epoch 210: RMSE = 97221.61
Epoch 220: RMSE = 96640.64
Epoch 230: RMSE = 96075.02
Epoch 240: RMSE = 95524.36
Epoch 250: RMSE = 94988.31
Epoch 260: RMSE = 94466.51
Epoch 270: RMSE = 93958.61
Epoch 280: RMSE = 93464.26
Epoch 290: RMSE = 92983.14
Epoch 300: RMSE = 92514.93
Epoch 310: RMSE = 92059.29
Epoch 320: RMSE = 91615.93
Epoch 330: RMSE = 91184.54
Epoch 340: RMSE = 90764.81
Epoch 350: RMSE = 90356.46
Epoch 360: RMSE = 8995

[117854.7609373931,
 -102686.47090500961,
 -113791.22640462495,
 86730.55122233105,
 50248.82736590588,
 48882.11743852196,
 -7015.432180285033,
 49908.32179708757,
 503663.93219058565]

In [25]:
y_pred_train = predict(X_train, trained_weights)
y_pred_test = predict(X_test, trained_weights)
mse_train = compute_mse(Y_train, y_pred_train)
mse_test = compute_mse(Y_test, y_pred_test)
print(f"RMSE in trained data = {math.sqrt(mse_train)}\t\tRMSE in test data = {math.sqrt(mse_test)}")
mae_train = compute_mae(Y_train, y_pred_train)
mae_test = compute_mae(Y_test, y_pred_test)
print(f"MAE in trained data = {(mae_train)}\t\t\tMAE in test data = {(mae_test)}")
mae1 = compute_mae(Y_test, y_pred_train)
mae2 = compute_mae(Y_train, y_pred_test)
mse1 = compute_mse(Y_test, y_pred_train)
mse2 = compute_mse(Y_train, y_pred_test)
print(f"RMSE in Y_test, y_pred_train data = {math.sqrt(mse1)}\t\t\tRMSE in Y_train, y_pre_test data = {math.sqrt(mse2)}")
print(f"MAE in Y_test, y_pred_train data = {(mae1)}\t\t\tMAE in Y_train, y_pre_test data = {(mae2)}")

RMSE in trained data = 78278.83549430076		RMSE in test data = 76758.50987693858
MAE in trained data = 58959.901828312955			MAE in test data = 57709.98643723191
RMSE in Y_test, y_pred_train data = 135232.75938375504			RMSE in Y_train, y_pre_test data = 68896.1475800677
MAE in Y_test, y_pred_train data = 104870.13175988315			MAE in Y_train, y_pre_test data = 26663.884249816067
