In [285]:
#The following demonstration aims to give an idea of how locally weighted regression is performed.
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.preprocessing import StandardScaler
#Importing all the basic libraries and classes for performing operations.
from sklearn.datasets import fetch_california_housing
#We will be using the california_housing_dataset for our demonstration.

In [286]:
data = fetch_california_housing()
X_raw = data.data
Y = data.target.reshape(-1, 1)
#Fetched all the datapoints in matrix form.
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_raw)
#Performed standardised scaling on them to prevent one feature from dominating the other.
m = X_scaled.shape[0]
ones = np.ones((m, 1))
X = np.hstack([ones, X_scaled])
#Added the bias term to our standardised input matrix X_scaled, to finally form our main matrix.

In [287]:
#LWR requires external queries for prediction of values. However we do not have any such query space with their actual values available for our dataset.
#Hence, for comparison purpose of accuracy, we will be using query_space = X_input_space. Allowing us to measure the average accuracy.
#For seeing the average accuracy, we will use the following formula:
# 100 - [|Y_predicted - Y_real| * 100 / Y_real] = percentage of accuracy (assuming this to be equivalent to 1 - percentage of error)
X_query_raw = X_raw[:, [1, 2]]
#Copying the columns of raw X data to create our query space.
X_query = scaler.fit_transform(X_query_raw)
#Scaled the Query matrix to be used for computation.

In [288]:
X = X[:, [1, 2]]
#Since we only need 2 features of the X matrix, we will get rid of all other features.
def weight_compute(X_query_i, tau):
    diff = X - X_query_i
    eucl_dist_sq = np.sum(diff**2, axis = 1)
    #Calculating the euclidean distance between the query vector and the dataset input vector.
    weights = np.exp(-eucl_dist_sq / (2 * tau**2))
    #Using the basic gaussian weight formula, we make a 1-D array of weights to be attached to our datapoints per query.
    return weights
#Computed the Weight array to be used in our calculation.

In [277]:
i = 0
#Iterator through our query space. Gives the ith query.
tau = 0.1
#Hyperparameter set at 0.1 for now, aka the standard variance for gaussian weight.
predicted_values = []
#Array to store our predicted values per query.
while(i < 200):
    X_query_i = X_query[i]
    #Fetching ith row as our query vector.
    weights_i = weight_compute(X_query_i, tau)
    #Computing array of weights.
    Y_weighted_i = Y * weights_i[:, np.newaxis] 
    Y_Weighted_i = Y_weighted_i.reshape(-1)
    X0 = X[:, 0]
    X1 = X[:, 1]
    M = np.column_stack([X0*X0, X0*X1, X1*X1])
    A, B, C = M.T @ weights_i
    term0 = np.sum(Y_weighted_i*X0)
    term1 = np.sum(Y_weighted_i*X1)
    D = A*C - B*B
    Q1 = (C*term0 - B*term1)/D
    Q2 = (A*term1 - B*term0)/D
    Q_i = np.array([Q1,Q2]).reshape(2,1)
    #This is a simplified version of the standard locally weighted regression formula.
    #The formula is normally given by (X.T @ W @ X)^-1 @ X.T @ W @ Y. The time complexity is approximately O(m^2).
    #Here, W is the diagonal matrix of shape (m x m) of weights given per query for each datapoint input of the form [[w1, 0, 0, 0....], [0, w2, 0, 0...]..[0, 0.. 0, wm]]
    #My simplified version of formula and the standard formula can be verified to be equal. The simplified version has a complexity of O(m).
    H = X_query_i @ Q_i
    #Calculating the predicted value for one query.
    predicted_values.append(H)
    #Storing the value for future plot graphing.
    i += 1

KeyboardInterrupt: 

In [290]:
#This Cell was written by chatGPT.

import time
# Prepare a single query
x_q = X_query[0]
start = time.time()
# 1) weights
weights_i = weight_compute(x_q, tau)              # O(m)
Yw = Y.ravel() * weights_i                 # O(m)
X0, X1 = X[:,0], X[:,1]
# 2) A, B, C in one pass
M = np.column_stack([X0*X0, X0*X1, X1*X1])        # (m,3)
A,B,C = M.T @ weights_i                          # single O(m) matrix‑vector
# 3) Q1, Q2
term0 = np.sum(Yw*X0)                             # O(m)
term1 = np.sum(Yw*X1)                             # O(m)
Q1 = C*term0 - B*term1
Q2 = A*term1 - B*term0
Q     = np.array([Q1,Q2]).reshape(2,1)           # O(1)
# 4) single prediction
y_pred = x_q @ Q                                  # O(n)=O(2)
print("Elapsed:", time.time() - start)

Elapsed: 0.0068950653076171875
