In [41]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
import copy
import math

# Read dataset
dataset = pd.read_csv("Housing.csv")

# Extract features and target variable
prices = dataset.price.values
areas = dataset.area.values

# Normalize features and target variable
prices_normalized = (prices - np.min(prices)) / (np.max(prices) - np.min(prices))
areas_normalized = (areas - np.min(areas)) / (np.max(areas) - np.min(areas))

def cost_function(x, y, w, b):
    """
    Calculate the cost function for linear regression.

    Args:
    x: Feature values
    y: Target values
    w: Weight parameter
    b: Bias parameter

    Returns:
    float: The cost of the current parameters
    """
    size = len(x)
    total = 0
    for i in range(size):
        y_pred = w * x[i] + b
        value_cost = (y_pred - y[i]) ** 2
        total += value_cost
    total_sum = (1 / (2 * size)) * total
    return total_sum

def compute_gradient(x, y, w, b):
    """
    Compute the gradient of the cost function with respect to parameters w and b.

    Args:
    x: Feature values
    y: Target values
    w: Weight parameter
    b: Bias parameter

    Returns:
    tuple: The gradients with respect to w and b
    """
    m = x.shape[0]    
    dj_dw = 0
    dj_db = 0
    
    for i in range(m):  
        f_wb = w * x[i] + b 
        dj_dw_i = (f_wb - y[i]) * x[i] 
        dj_db_i = f_wb - y[i] 
        dj_db += dj_db_i
        dj_dw += dj_dw_i 
    dj_dw = dj_dw / m 
    dj_db = dj_db / m 
        
    return dj_dw, dj_db

def gradient_descent(x, y, w_init, b_init, alpha, num_iters, cost_function, gradient_function):
    """
    Perform gradient descent to optimize parameters w and b for linear regression.

    Args:
    x: Feature values
    y: Target values
    w_init: Initial value for parameter w
    b_init: Initial value for parameter b
    alpha: Learning rate
    num_iters: Number of iterations
    cost_function: Function to calculate the cost
    gradient_function: Function to compute the gradient

    Returns:
    tuple: The optimized parameters w and b, along with lists of cost and parameter history
    """
    w = copy.deepcopy(w_init) 
    b = copy.deepcopy(b_init)
    J_history = []
    p_history = []
    
    for i in range(num_iters):
        dj_dw, dj_db = gradient_function(x, y, w, b)     
        b = b - alpha * dj_db                            
        w = w - alpha * dj_dw                            

        if i % math.ceil(num_iters / 10) == 0:
            J_history.append(cost_function(x, y, w, b))
            p_history.append([w, b])
            print(f"Iteration {i:4}: Cost {J_history[-1]:0.2e}")

    return w, b, J_history, p_history

# Initialize parameters
initial_w = 0
initial_b = 0
iterations = 10000
learning_rate = .5

# Run gradient descent
final_w, final_b, cost_history, param_history = gradient_descent(prices_normalized, areas_normalized, 
                                                                 initial_w, initial_b, learning_rate, 
                                                                 iterations, cost_function, compute_gradient)

# Display final parameters
print(f"Optimized parameters (w, b): ({final_w:8.4f}, {final_b:8.4f})")

# Define a class for linear model
class LinearModel:
    def __init__(self, w, b):
        self.w = w
        self.b = b

    def predict(self, x):
        return self.w * x + self.b

# Create the linear model instance with final parameters
model = LinearModel(w=final_w, b=final_b)

# Generate predictions
predictions = model.predict(prices_normalized)

# Generate different values of w and b
w_values = np.linspace(0, model.w, 10)
b_values = np.linspace(0, model.b, 10)

# Create a function to update the plot for each frame of the animation
def update(frame):
    plt.cla()  # Clear the current plot
    output = w_values[frame] * prices_normalized + b_values[frame]
    plt.plot(prices_normalized, output, c='b', label='Regression Line')
    plt.scatter(prices_normalized, areas_normalized, marker='x', c='r', label='Actual Values')
    plt.xlabel('Price (Normalized)')
    plt.ylabel('Area (Normalized)')
    plt.title('Effect of Different w and b on Prediction Line')
    
    # Display the updated values of w and b
    plt.text(0.05, 0.95, f'w: {w_values[frame]:.4f}\nb: {b_values[frame]:.4f}', 
             horizontalalignment='left', verticalalignment='top', transform=plt.gca().transAxes)
    
    plt.legend()
    plt.grid(True)
    
    # Stop the animation when the current w and b values match the final values
    if np.allclose(w_values[frame], model.w) and np.allclose(b_values[frame], model.b):
        ani.event_source.stop()  # Stop the animation

# Create the animation
fig = plt.figure(figsize=(10, 6))
ani = FuncAnimation(fig, update, frames=len(w_values), interval=50, blit=False)

# Display the animation
plt.show()


Iteration    0: Cost 1.67e-02
Iteration 1000: Cost 7.91e-03
Iteration 2000: Cost 7.91e-03
Iteration 3000: Cost 7.91e-03
Iteration 4000: Cost 7.91e-03
Iteration 5000: Cost 7.91e-03
Iteration 6000: Cost 7.91e-03
Iteration 7000: Cost 7.91e-03
Iteration 8000: Cost 7.91e-03
Iteration 9000: Cost 7.91e-03
Optimized parameters (w, b): (  0.4937,   0.1116)


In [42]:
from sklearn.metrics import r2_score

# Calculate R^2 score
r2 = r2_score(areas_normalized, predictions)

print(f"R^2 Score: {r2:.4f}")


R^2 Score: 0.2873


In [43]:
from sklearn.metrics import mean_absolute_error

# Calculate mean absolute error
mae = mean_absolute_error(areas_normalized, predictions)

print(f"Mean Absolute Error: {mae:.4f}")


Mean Absolute Error: 0.0907


In [44]:
threshold = 0.1  # Define threshold

# Count number of correct predictions within threshold
num_correct = np.sum(np.abs(predictions - areas_normalized) <= threshold)

# Calculate accuracy percentage
accuracy_percent = (num_correct / len(predictions)) * 100

print(f"Accuracy Percentage: {accuracy_percent:.2f}%")


Accuracy Percentage: 69.54%
