# **Widrow Hoff or µ-LMS an approximate to Gradient Descent**

In [2]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error, r2_score

# Load the Auto-MPG dataset
auto_mpg = pd.read_csv('auto-mpg.csv')

# Split the dataset into training and testing sets
train_set = auto_mpg.sample(frac=0.8, random_state=42)
test_set = auto_mpg.drop(train_set.index)

# Normalize the input features
train_mean = train_set.mean()
train_std = train_set.std()
train_set_norm = (train_set - train_mean) / train_std
test_set_norm = (test_set - train_mean) / train_std


In [3]:

# Implement gradient descent
def gradient_descent(X, y, alpha, num_iters):
    m = X.shape[0]
    theta = np.zeros(X.shape[1])
    J_history = np.zeros(num_iters)
    for i in range(num_iters):
        h = np.dot(X, theta)
        theta -= alpha * (1/m) * np.dot(X.T, (h - y))
        J_history[i] = np.sum((h - y)**2) / (2*m)
    return theta, J_history

# Train the gradient descent model
X_train = train_set_norm.drop('mpg', axis=1).values
y_train = train_set_norm['mpg'].values
X_train = np.hstack([np.ones((X_train.shape[0], 1)), X_train])  # Add bias term
alpha = 0.1
num_iters = 1000
theta_gd, J_history_gd = gradient_descent(X_train, y_train, alpha, num_iters)


In [4]:
# Evaluate the gradient descent model
X_test = test_set_norm.drop('mpg', axis=1).values
y_test = test_set_norm['mpg'].values
X_test = np.hstack([np.ones((X_test.shape[0], 1)), X_test])  # Add bias term
y_pred_gd = np.dot(X_test, theta_gd)
print('Gradient Descent')
print('Mean squared error: %.2f' % mean_squared_error(y_test, y_pred_gd))
print('Coefficient of determination: %.2f' % r2_score(y_test, y_pred_gd))

Gradient Descent
Mean squared error: 0.21
Coefficient of determination: 0.78


In [12]:
# Creating Widrow-Hoff learning rule

def Widrow_Hoff(X, y, alpha, num_iters):
    m = X.shape[0]
    theta = np.zeros(X.shape[1])
    J_history = np.zeros(num_iters)
    for i in range(num_iters):
        h = np.dot(X, theta)
        theta -= alpha * (1/m) * np.dot(X.T, (h - y))
        J_history[i] = np.sum((h - y)**2) / (2*m)
    return theta, J_history

# Train the Widrow-Hoff model
X_train = train_set_norm.drop('mpg', axis=1).values 
y_train = train_set_norm['mpg'].values
X_train = np.hstack([np.ones((X_train.shape[0], 1)), X_train])  # Add bias term
alpha = 0.1
num_iters = 100
theta_wh, J_history_wh = Widrow_Hoff(X_train, y_train, alpha, num_iters)

# Evaluate the Widrow-Hoff model
X_test = test_set_norm.drop('mpg', axis=1).values
y_test = test_set_norm['mpg'].values
X_test = np.hstack([np.ones((X_test.shape[0], 1)), X_test])  # Add bias term
y_pred_wh = np.dot(X_test, theta_wh)
print('Widrow-Hoff')
print('Mean squared error: %.2f' % mean_squared_error(y_test, y_pred_wh))
print('Coefficient of determination: %.2f' % r2_score(y_test, y_pred_wh))


Widrow-Hoff
Mean squared error: 0.21
Coefficient of determination: 0.78
