In [146]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Pre-processing the Data

house_df = pd.read_csv("house_price_dataset_handled.csv")
X = house_df.drop('price', axis=1)  # Features
y = house_df['price']  # Target variable

# Splitting the data for testing and training

X_train_unscaled , X_test_unscaled , y_train , y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

# Scaling the Features

scaler = StandardScaler()

X_train = scaler.fit_transform(X_train_unscaled)
X_test = scaler.transform(X_test_unscaled)

# Initializing theta

theta = np.random.randn(X.shape[1])*0.01 



In [147]:
# Funtions

def cost_function(y_pred, y_test_or_train):
    m = len(y_pred)
    sum = 0
    y = y_test_or_train.to_numpy()
    for i in range(m):
        sum += ( y_pred[i] - y[i] )**2
    return ( 1 / (2 * m) ) * sum

def batch_gradient_descent(theta, X_train, y_train, lr):
    m = len(y_train)
    y_train = y_train.to_numpy()
    y_pred = np.dot(X_train, theta)  # Predictions

    print(f"y_pred (first 5): {y_pred[:5]}")  # Debug print

    for j in range(len(theta)):  # Iterate over all parameters (features)
        sum = 0
        for i in range(m):  # Iterate over all data points
            sum += (y_train[i] - y_pred[i]) * X_train[i, j]

        print(f"Gradient sum for theta[{j}]: {sum}")  # Debug print for gradient

        # Update theta[j]
        theta[j] = theta[j] - (lr * sum) / m

    return theta



In [None]:
theta = [ 0.00699621,  0.01041627, -0.00349794, -0.00394576, -0.00231157,
       -0.00797739]

In [None]:
# Training the model

tolerance = 1e-6 # Convergence thershold
# 1st previous_cost = float('inf')
previous_cost = 229229645440.9421
# 1st alpha = 0.0000000000001
alpha = 0.001
iteration = 0
cost_history = []
theta_history = []

while True:

    if iteration >= 10000:
        break
    
    y_pred = np.dot(X_train , theta)  
    cost = cost_function(y_pred, y_train)
    print(cost)
    
    if abs(previous_cost - cost) < tolerance:  
        break  

    theta = batch_gradient_descent(theta, X_train, y_train, alpha)  
    previous_cost = cost  
    iteration += 1

    if iteration % 100 == 0:  
       cost_history.append(cost)
       theta_history.append(theta.copy())



In [None]:
cost_history

In [None]:
theta_history

In [151]:
theta

array([ 0.00699621,  0.01041627, -0.00349794, -0.00394576, -0.00231157,
       -0.00797739])

In [152]:
y_predictions = np.dot(X_test , theta)

In [153]:
# Evaluation Functions

def mean_absolute_error(y_pred , y_test_or_train):
    m = len(y_pred)
    sum = 0
    y = y_test_or_train.values
    for i in range(m):
        sum += np.abs( y_pred[i] - y[i] )
    return (1/m) * sum

def mean_squared_error(y_pred, y_test_or_train):
    m = len(y_pred)
    sum = 0
    y = y_test_or_train.values
    for i in range(m):
        sum += ( y_pred[i] - y[i] )**2
    return ( 1 / m ) * sum

def r2_score(y_pred , y_test_or_train):
    numerator = 0
    denominator = 0
    sum = 0
    m = len(y_pred)
    y = y_test_or_train.values

    for i in range(m):
        sum += y[i]
        numerator += (y_pred[i] - y[i])**2
    mean = sum/m

    for i in range(m):
        denominator += (y[i] - mean)**2
    
    return 1 - (numerator/denominator)

def root_mean_squared_error(mse):
    return mse**0.5

In [154]:
# Testing the model

print(f"Cost : ", cost_function(y_predictions,y_test))
print(f"Mean squared error : ", mean_squared_error(y_predictions,y_test))
print(f"Root mean squared error : ", root_mean_squared_error(mean_squared_error(y_predictions,y_test)))
print(f"Mean absolute error : ", mean_absolute_error(y_predictions,y_test))
print(f"R2 score : ", r2_score(y_predictions,y_train))
print(f"These are the model predictions : ", y_predictions)

Cost :  229229645440.9421
Mean squared error :  458459290881.8842
Root mean squared error :  677096.219810659
Mean absolute error :  556496.9892387113
R2 score :  -0.3850864942957213
These are the model predictions :  [-1.49016993e-02 -2.20208579e-02  8.58972118e-03  2.88433401e-03
  1.01450577e-02  4.00753889e-03  3.04582469e-02  7.77833332e-03
  6.21402811e-03 -1.45468991e-02  1.41589339e-03  1.49120966e-02
  5.68647575e-03  5.21443511e-04  1.02484716e-02  1.51224634e-02
 -1.25344852e-03 -1.03127616e-02  1.35345904e-02 -3.00708831e-03
 -5.01318749e-03  1.26818777e-02  1.00197510e-02 -1.02077129e-02
  1.71526391e-04 -2.16273507e-02 -9.34648297e-03 -2.55553922e-04
  1.62298486e-02  2.46469737e-02  8.33530531e-03 -3.32320664e-03
 -1.16222756e-03  2.72676854e-03 -1.21593583e-02  1.45691900e-02
 -2.48630113e-03 -5.64730431e-03 -8.03903555e-03  1.81088620e-02
  1.02201918e-03  3.24965055e-03 -7.31490076e-03  1.12935211e-02
  6.54949278e-03  7.28345537e-03  1.38133073e-03  8.41182458e-03
 -