<a href="https://colab.research.google.com/github/ShravaniSindagi00/ML_LAB/blob/main/LWR.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Activity to complete:

Consider two more features and implement the algorithm
Implement the same for Diabetes dataset available in sklearn.datasets
Compare KNN regression and Local weighted regression (LWR) algorithm considering multiple features for both housing dataset and Diabetes dataset.

In [3]:
from sklearn.datasets import fetch_california_housing
import pandas as pd
import numpy as np

# Load dataset
housing_data = fetch_california_housing()
X_df = pd.DataFrame(housing_data.data, columns=housing_data.feature_names)
Y = housing_data.target  # Median house value in 100,000s

# Select 3 features
features = ['MedInc', 'HouseAge', 'AveRooms']
X = X_df[features].values


In [4]:
def locally_weighted_regression(X, Y, tau, x_query):
    weights = np.exp(-np.sum((X - x_query)**2, axis=1) / (2 * tau**2))
    X_augmented = np.c_[np.ones(X.shape[0]), X]  # Add intercept
    x_query_augmented = np.insert(x_query, 0, 1)
    W = np.diag(weights)
    X_transpose_W = X_augmented.T @ W
    theta = np.linalg.pinv(X_transpose_W @ X_augmented) @ X_transpose_W @ Y
    return x_query_augmented @ theta


In [5]:
x_query = np.array([3.0, 30, 6.0])
tau = 10
y_pred = locally_weighted_regression(X, Y, tau, x_query)
print(f"Predicted (scaled) house value: {y_pred}")


Predicted (scaled) house value: 1.4933710720800608


 Activity 2: <br>
 Implement Locally Weighted Regression (LWR) on the Diabetes Dataset

In [6]:
from sklearn.datasets import load_diabetes
from sklearn.preprocessing import StandardScaler

# Load dataset
diabetes = load_diabetes()
X_full = diabetes.data
Y = diabetes.target
feature_names = diabetes.feature_names

# Select the first 3 features for simplicity
X = X_full[:, :3]

# Standardize features for stability
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


In [7]:
x_query = X_scaled[0]  # A sample query point
tau = 0.5  # Bandwidth for Diabetes dataset

y_pred = locally_weighted_regression(X_scaled, Y, tau, x_query)
print(f"Predicted disease progression for patient: {y_pred:.2f}")


Predicted disease progression for patient: 231.53


 Activity 3: <br>
 Compare KNN Regression vs Locally Weighted Regression (LWR)

In [8]:
from sklearn.datasets import fetch_california_housing, load_diabetes
from sklearn.neighbors import KNeighborsRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import numpy as np
import pandas as pd


In [9]:
# Load housing dataset
housing = fetch_california_housing()
X_h = pd.DataFrame(housing.data, columns=housing.feature_names)[['MedInc', 'HouseAge', 'AveRooms']].values
Y_h = housing.target

# Normalize
scaler_h = StandardScaler()
X_h_scaled = scaler_h.fit_transform(X_h)


In [10]:
knn_h = KNeighborsRegressor(n_neighbors=5)
knn_h.fit(X_h_scaled, Y_h)


In [11]:
# Sample query point (scaled)
x_query = scaler_h.transform(np.array([[3.0, 30, 6.0]]))[0]

# --- KNN Prediction ---
knn_pred_h = knn_h.predict([x_query])[0]

# --- LWR Prediction ---
def locally_weighted_regression(X, Y, tau, x_query):
    weights = np.exp(-np.sum((X - x_query)**2, axis=1) / (2 * tau**2))
    X_aug = np.c_[np.ones(X.shape[0]), X]
    x_query_aug = np.insert(x_query, 0, 1)
    W = np.diag(weights)
    theta = np.linalg.pinv(X_aug.T @ W @ X_aug) @ X_aug.T @ W @ Y
    return x_query_aug @ theta

lwr_pred_h = locally_weighted_regression(X_h_scaled, Y_h, tau=1, x_query=x_query)

print(f"Housing Dataset:\nKNN Prediction: {knn_pred_h:.2f}\nLWR Prediction: {lwr_pred_h:.2f}")


Housing Dataset:
KNN Prediction: 0.96
LWR Prediction: 1.35
