# Lab 2 : Linear Regression

---



## Importing the NumPy library

In [18]:
import numpy as np

## Importing the dataset

In [19]:
from sklearn import datasets
california_housing = datasets.fetch_california_housing()
X = california_housing.data  # Feature matrix
y = california_housing.target  # Target values (median house values)

In [20]:
X = np.array(X)
y = np.array(y)

In [24]:
print(X)

[[   8.3252       41.            6.98412698 ...    2.55555556
    37.88       -122.23      ]
 [   8.3014       21.            6.23813708 ...    2.10984183
    37.86       -122.22      ]
 [   7.2574       52.            8.28813559 ...    2.80225989
    37.85       -122.24      ]
 ...
 [   1.7          17.            5.20554273 ...    2.3256351
    39.43       -121.22      ]
 [   1.8672       18.            5.32951289 ...    2.12320917
    39.43       -121.32      ]
 [   2.3886       16.            5.25471698 ...    2.61698113
    39.37       -121.24      ]]


In [21]:
X.shape

(20640, 8)

In [25]:
print(y)

[4.526 3.585 3.521 ... 0.923 0.847 0.894]


## Applying normalization

In [22]:
mean = np.mean(X, axis=0)
std = np.std(X, axis=0)

X_normalized = (X - mean) / std

In [26]:
print(X_normalized)

[[ 2.34476576  0.98214266  0.62855945 ... -0.04959654  1.05254828
  -1.32783522]
 [ 2.33223796 -0.60701891  0.32704136 ... -0.09251223  1.04318455
  -1.32284391]
 [ 1.7826994   1.85618152  1.15562047 ... -0.02584253  1.03850269
  -1.33282653]
 ...
 [-1.14259331 -0.92485123 -0.09031802 ... -0.0717345   1.77823747
  -0.8237132 ]
 [-1.05458292 -0.84539315 -0.04021111 ... -0.09122515  1.77823747
  -0.87362627]
 [-0.78012947 -1.00430931 -0.07044252 ... -0.04368215  1.75014627
  -0.83369581]]


In [27]:
X_normalized.shape

(20640, 8)

## 1. Linear regression single prediction

In [28]:
def single_prediction(weight, feature_vector):
  weight = np.array(weight)
  feature_vector = np.array(feature_vector)
  return np.dot(weight, feature_vector)

## 2. Linear regression vector prediction

In [29]:
def linear_regression_prediction(weight_vector, feature_matrix):
  return np.dot(feature_matrix, weight_vector)

## 3. Mean Squared Error (MSE)

In [30]:
def mean_squared_error(predictions, actual_values):
  squared_errors = np.square(predictions - actual_values)
  return np.mean(squared_errors)

## 4. MSE Gradient

In [31]:
def mse_gradient(predictions, actual_values, feature_vector):
    predictions = np.array(predictions)
    actual_values = np.array(actual_values)
    feature_vector = np.array(feature_vector)

    # Calculate the gradient of MSE with respect to predictions
    try:
      gradient_predictions = 2 * np.dot(predictions - actual_values, feature_vector) / len(predictions)
    except:
      gradient_predictions = 2 * np.dot(predictions - actual_values, feature_vector)

    return gradient_predictions

## 5. Gradient Descent Algorithm

### Vanilla Gradient Descent

In [36]:
def vanilla_gradient_descent(weight_vector, learning_rate, X, y, linear_regression_predictions, epochs):
    iteration = 0

    while iteration < epochs:
        # Update the entire weight vector
        predictions = linear_regression_predictions(weight_vector, X)
        gradients = mse_gradient(predictions, y, X)
        weight_vector -= learning_rate * gradients

        # Calculate the current loss using the provided input function
        current_loss = mean_squared_error(predictions, y)

        iteration += 1
        print(f"Iteration: {iteration}\nLoss MSE: {current_loss}\n")

    print(f"No. of iterations taken: {iteration}\nFinal Loss MSE: {current_loss}")
    return weight_vector

### Stochastic Gradient Descent

In [70]:
def stochastic_gradient_descent(weight_vector, learning_rate, target_loss, X, y, linear_regression_predictions, epochs, stopping_criterion):
    iteration = 0
    current_loss = float('inf')

    if (stopping_criterion == "epochs"):
        while iteration < epochs:
            # Stochastic Gradient Descent: Update only one randomly chosen weight at a time
            random_index = np.random.randint(len(X))
            predictions = linear_regression_predictions(weight_vector, X[random_index])
            gradients = mse_gradient(predictions, y[random_index], X[random_index])

            weight_vector -= learning_rate * gradients
            # Calculate the current loss using the provided input function
            current_loss = mean_squared_error(predictions, y)

            iteration += 1
            print(f"Iteration = {iteration}\nLoss_MSE = {current_loss}\n")
    else:
        while current_loss > target_loss:
            # Stochastic Gradient Descent: Update only one randomly chosen weight at a time
            random_index = np.random.randint(len(X))
            predictions = linear_regression_predictions(weight_vector, X[random_index])
            gradients = mse_gradient(predictions, y[random_index], X[random_index])

            weight_vector -= learning_rate * gradients
            # Calculate the current loss using the provided input function
            current_loss = mean_squared_error(predictions, y)

            iteration += 1
            print(f"Iteration = {iteration}\nLoss_MSE = {current_loss}\n")

    print(f"Training took {iteration} iterations. Loss_MSE = {current_loss}")
    return weight_vector


## 6. Application of Linear Regression

In [65]:
initial_weights = np.zeros(8)
learning_rate = 0.005
epochs = 1000
stopping_criterion = 0.1
target_loss = 0.1
batch_size = 32
feature_matrix = np.array(X_normalized)

### Applying Vanilla Gradient Descent

In [44]:
optimized_weights = vanilla_gradient_descent(initial_weights, learning_rate, feature_matrix, y, linear_regression_prediction, epochs)

Iteration: 1
Loss MSE: 5.610483198987253

Iteration: 2
Loss MSE: 5.59634048236379

Iteration: 3
Loss MSE: 5.582500699855777

Iteration: 4
Loss MSE: 5.568956858852584

Iteration: 5
Loss MSE: 5.555702141568447

Iteration: 6
Loss MSE: 5.5427299002549875

Iteration: 7
Loss MSE: 5.530033652557259

Iteration: 8
Loss MSE: 5.517607077008663

Iteration: 9
Loss MSE: 5.505444008660266

Iteration: 10
Loss MSE: 5.4935384348401675

Iteration: 11
Loss MSE: 5.4818844910387385

Iteration: 12
Loss MSE: 5.4704764569157005

Iteration: 13
Loss MSE: 5.45930875242511

Iteration: 14
Loss MSE: 5.448375934054519

Iteration: 15
Loss MSE: 5.437672691174618

Iteration: 16
Loss MSE: 5.427193842495898

Iteration: 17
Loss MSE: 5.416934332628865

Iteration: 18
Loss MSE: 5.406889228744585

Iteration: 19
Loss MSE: 5.397053717332339

Iteration: 20
Loss MSE: 5.38742310105134

Iteration: 21
Loss MSE: 5.377992795673543

Iteration: 22
Loss MSE: 5.368758327114688

Iteration: 23
Loss MSE: 5.359715328550787

Iteration: 24
Loss 

In [45]:
print("Optimized weights after Vanilla Gradient Descent: ")
print(optimized_weights)

Optimized weights after Vanilla Gradient Descent: 
[ 0.81659877  0.17689017 -0.12729893  0.14127008  0.0166395  -0.04392099
 -0.48604502 -0.44967077]


In [56]:
mean_squared_error(linear_regression_prediction(optimized_weights, X_normalized), y)

4.8319778785051986

In [46]:
prediction = single_prediction(optimized_weights, X_normalized[0])

print("Prediction: ", prediction)

Prediction:  2.0581950846994683


In [41]:
y[0]

4.526

### Applying Stochastic Gradient Descent

In [58]:
optimized_weights_ = stochastic_gradient_descent_batch_new(initial_weights, learning_rate, feature_matrix, y, linear_regression_prediction, batch_size, epochs)

Iteration: 1
Loss MSE: 4.132682280945563

Iteration: 2
Loss MSE: 4.585270514128593

Iteration: 3
Loss MSE: 5.764234489008666

Iteration: 4
Loss MSE: 5.050293406514788

Iteration: 5
Loss MSE: 4.93963075565723

Iteration: 6
Loss MSE: 6.503146969743378

Iteration: 7
Loss MSE: 4.547476737020446

Iteration: 8
Loss MSE: 4.241147260892997

Iteration: 9
Loss MSE: 5.013148883534873

Iteration: 10
Loss MSE: 5.472767914585946

Iteration: 11
Loss MSE: 5.4817692028025125

Iteration: 12
Loss MSE: 5.49147883067221

Iteration: 13
Loss MSE: 4.922778952605201

Iteration: 14
Loss MSE: 4.047855156730028

Iteration: 15
Loss MSE: 4.085621983934569

Iteration: 16
Loss MSE: 4.920160321400273

Iteration: 17
Loss MSE: 5.211620578267869

Iteration: 18
Loss MSE: 4.653977128982516

Iteration: 19
Loss MSE: 4.724033031980423

Iteration: 20
Loss MSE: 4.81656136543673

Iteration: 21
Loss MSE: 5.1334119562031155

Iteration: 22
Loss MSE: 4.239466136420639

Iteration: 23
Loss MSE: 5.390317062279051

Iteration: 24
Loss MS

In [59]:
print("Optimized weights after Stochastic Gradient Descent: ")
print(optimized_weights_)

Optimized weights after Stochastic Gradient Descent: 
[ 0.80937813  0.13518408 -0.24279816  0.30366408 -0.00886763 -0.12278882
 -0.72897939 -0.66008096]


In [60]:
mean_squared_error(linear_regression_prediction(optimized_weights_, X_normalized), y)

4.8184835867933025

## Explanation

As we can see that the MSE generated by Vanilla Gradient Descent is almost 4.832, however the MSE generated by Stochastic Gradient Descent is 4.818.

It means that the Stochastic Gradient Descent method is more powerful and robust than Vanilla Gradient Descent.