In [37]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Pre-processing the Data

house_df = pd.read_csv("house_price_dataset_handled.csv")
X = house_df.drop('price', axis=1)  # Features
y = house_df['price']  # Target variable

# Splitting the data for testing and training

X_train_unscaled , X_test_unscaled , y_train , y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

# Scaling the Features

scaler = StandardScaler()

X_train = scaler.fit_transform(X_train_unscaled)
X_test = scaler.transform(X_test_unscaled)

# Initializing theta

theta = np.random.randn(X.shape[1])*0.01 



In [41]:
# Funtions

def cost_function(y_pred, y_test_or_train):
    m = len(y_pred)
    sum = 0
    y = y_test_or_train.to_numpy()
    for i in range(m):
        sum += ( y_pred[i] - y[i] )**2
    return ( 1 / (2 * m) ) * sum

def batch_gradient_descent(theta , X_train , y_pred , y_train , lr):
    y_train = y_train.to_numpy()
    y_pred = np.dot(X_train , theta)  

    for j in range(len(theta)):
        sum = 0

        for i in range(len(X_train)):
            sum += (y_train[i] - y_pred[i])*X_train[i, j]
            
        theta[j] = theta[j] - (lr*(sum))/len(y_test)

    return theta


In [63]:
# Training the model

tolerance = 1e-6 # Convergence thershold
previous_cost = float('inf')
alpha = 0.2
iteration = 0
cost_history = []
theta_history = []

while True:

    if iteration == 10000:
        break

    cost = cost_function(y_pred, y_train)  
    
    if abs(previous_cost - cost) < tolerance:  
        break  

    theta = batch_gradient_descent(theta, X_train, y_pred, y_train, alpha)  
    previous_cost = cost  
    iteration += 1

    if iteration % 10 == 0:  
       cost_history.append(cost)
       theta_history.append(theta.copy())



In [64]:
cost_history

[]

In [65]:
theta_history

[]

In [66]:
theta

array([-2.75817800e-03, -3.98130980e+05, -5.35451837e+05, -6.22814854e+05,
       -9.91140395e+04,  1.52822065e+05])

In [68]:
y_predictions = np.dot(X_test , theta)

In [70]:
# Evaluation Functions

def mean_absolute_error(y_pred , y_test_or_train):
    m = len(y_pred)
    sum = 0
    y = y_test_or_train.values
    for i in range(m):
        sum += np.abs( y_pred[i] - y[i] )
    return (1/m) * sum

def mean_squared_error(y_pred, y_test_or_train):
    m = len(y_pred)
    sum = 0
    y = y_test_or_train.values
    for i in range(m):
        sum += ( y_pred[i] - y[i] )**2
    return ( 1 / m ) * sum

def r2_score(y_pred , y_test_or_train):
    numerator = 0
    denominator = 0
    sum = 0
    m = len(y_pred)
    y = y_test_or_train.values

    for i in range(m):
        sum += y[i]
        numerator += (y_pred[i] - y[i])**2
    mean = sum/m

    for i in range(m):
        denominator += (y[i] - mean)**2
    
    return 1 - (numerator/denominator)

def root_mean_squared_error(mse):
    return mse**0.5

In [71]:
# Testing the model

print(f"Cost : ", cost_function(y_predictions,y_test))
print(f"Mean squared error : ", mean_squared_error(y_predictions,y_test))
print(f"Root mean squared error : ", root_mean_squared_error(mean_squared_error(y_predictions,y_test)))
print(f"Mean absolute error : ", mean_absolute_error(y_predictions,y_test))
print(f"R2 score : ", r2_score(y_predictions,y_train))
print(f"These are the model predictions : ", y_predictions)

Cost :  1869482641983.888
Mean squared error :  3738965283967.776
Root mean squared error :  1933640.4226142399
Mean absolute error :  1363195.9120679088
R2 score :  -3.47523260127943
These are the model predictions :  [-4.50704334e+06  1.77332281e+06 -1.02128364e+06  1.78067189e+06
 -1.37142510e+05 -1.13759110e+05 -8.48979930e+05  1.42748906e+06
  2.59623616e+05 -1.45723643e+06  2.69296249e+05  5.66000747e+05
 -1.62126722e+06 -8.84442561e+05 -1.33563502e+06 -8.97295291e+05
  1.23434044e+06 -2.32980292e+05 -9.98452394e+05  1.30630168e+05
  4.68016604e+05  9.75458092e+05 -3.18114052e+06  2.46964377e+06
 -3.21987063e+05 -3.36223752e+06 -1.07606962e+06  7.09092788e+05
 -4.75904091e+05 -1.10428016e+06  4.18594435e+04  6.17217892e+04
 -1.99659923e+05  1.79527121e+06  1.17145299e+05 -1.18807451e+06
  4.13637634e+05  7.54449220e+05 -5.56734674e+05 -6.63288041e+05
 -8.45884444e+05 -3.66224488e+05  5.37643835e+05  3.02914008e+05
 -8.32666007e+05  7.89284110e+05  3.75783633e+05 -1.21925831e+06
 