# Double Variable Polynomial Regression

## Install and Import Dependencies

In [7]:
%pip install numpy pandas matplotlib

Note: you may need to restart the kernel to use updated packages.


In [8]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

## Load Data

In [9]:
df = pd.read_csv('../DoubleVariablePolynomialRegression.ipynb/data/Fish.csv')

In [10]:
df.head()

Unnamed: 0,Height,Width,Weight
0,11.52,4.02,242.0
1,12.48,4.3056,290.0
2,12.3778,4.6961,340.0
3,12.73,4.4555,363.0
4,12.444,5.134,430.0


## Note: No NAN Values

In [11]:
df.isna().sum()

Height    0
Width     0
Weight    0
dtype: int64

## Normalize the Features

In [12]:
df['Height'] = (df['Height']-np.mean(df['Height']))/np.std(df['Height'])
df['Width'] = (df['Width']-np.mean(df['Width']))/np.std(df['Width'])
df.head()

Unnamed: 0,Height,Width,Weight
0,0.596579,-0.236529,242.0
1,0.821261,-0.066579,290.0
2,0.797341,0.165793,340.0
3,0.879771,0.022621,363.0
4,0.812835,0.426371,430.0


## Generate Random Test and Train Splits

In [13]:
seed = 420
train_fraction = 0.8
train = df.sample(frac=train_fraction, random_state=seed)
test = df.drop(train.index)

## Polynomial Regression Model

In [19]:
class PolynomialRegressionModel:
    def __init__(self, degree, q, lmbda):
        """
        Polynomial Regression Model for some particular degree.
        """
        self.train_errors = {}
        self.test_errors = {}
        self.q = q
        self.lmbda = lmbda
        self.degree = degree
        # Initialize Weights
        self.weights = np.random.rand(degree+1, degree+1)

    def calculate_loss(self, X_i, t_i):
        # print('inside self.calculate_loss()')
        assert type(X_i[0]) == np.float64 and type(t_i) == np.float64 and type(X_i[1]) == np.float64, "Types are not matching. Check!"

        a = X_i[0]
        b = X_i[1]
        t = t_i
        prediction = self.predict([(a, b)])
        # print('predicted:')
        # print(prediction)
        # print('expected')
        # print(t)

        grad = np.zeros_like(self.weights)
        grad.fill(0.0)
        for i in range(self.degree+1):
            for j in range(self.degree+1):
                if i + j <= self.degree:
                    grad[i][j] = (a**i)*(b**j)*(t - prediction)
        
        grad += (self.lmbda*self.q//2)*(np.abs(self.weights)**(self.q-1))
        # print('loss: ')
        # print(grad)
        return -1*grad

    def fit(self, X_train, y_train, X_test, y_test, lr=0.01, epochs=500, batch_size=20):
        """
        Fit the polynomial regression model using Batch Gradient Descent.

        Parameters:
        X_train: Input Feature variables.
        y_train: Target Variable
        X_test: Input Feature variables for test data
        y_test: Target Variables for test data
        lr: Learning Rate for Gradient Descent
        epochs: No of Epochs to train

        Returns:
        NA
        """
        print('Starting Training.....')
        X_train = np.array(X_train)
        y_train = np.array(y_train)
        # print(X_train.head())
        for epoch in range(epochs):
            count = 0
            loss = np.zeros_like(self.weights)
            # print(X_train.shape[0])
            
            for i in range(X_train.shape[0]):
                # print('sample')
                # print(X_train[i][0])
                # print(X_train[i][1])
                # print(y_train[i])
                if epoch == 0 or (epoch*epochs+i)%(X_train.shape[0]/2):
                    self.train_errors[epoch*epochs + i] = self.calculate_error(X_train, y_train)
                    self.test_errors[epoch*epochs + i] = self.calculate_error(X_test, y_test)

                X_i = (X_train[i][0],X_train[i][1])
                t_i =  y_train[i]

                if count%batch_size == 0:
                    loss /= batch_size
                    # print('loss: ')
                    # print(loss)
                    # print(self.weights)
                    # self.weights += (self.lmbda*self.q//2)*(np.abs(self.weights)**(self.q-1))
                    self.weights -= lr*loss
                    # print(self.weights)
                    loss = np.zeros_like(self.weights)
                else:
                    loss += self.calculate_loss(X_i, t_i)
            
                count+=1
            if epoch%(epochs/10) == 0:
                print(f"epoch: {epoch}")
                print(f"Error: {self.calculate_error(X_train, y_train)}")

        return

    def calculate_error(self, X_test, y_test):
        """
        Find the error of the model on some data.

        Parameters:
        X_test: The sample Input Feature.
        y_test: The sample Target Feature.

        Returns:
        A float value that is the MSE b/w the predicted outputs and the target outputs.
        """
        X_test = np.array(X_test)
        y_test = np.array(y_test)
        predictions = self.predict(X_test)
        mse = np.mean(
            (predictions-y_test)**2
        )
        return mse

    def predict(self, X_test):
        """
        Make Predictions using the trained model.

        Parameters:
        X_test: The sample Input Features.

        Returns:
        A numpy Array with the predicted target variable value for each of the samples having
        same dimensions as X_test.
        """
        result = [] 
        for sample in X_test:
            assert type(sample[0]) == np.float64 and type(sample[1]) == np.float64, "Variable doesn't have the required type!"
            degree = 2
            a = sample[0]
            b = sample[1]
            y = 0
            for i in range(degree+1):
                for j in range(degree+1):
                    if i + j <= degree:
                        y += self.weights[i][j]*(a**i)*(b**j)
            result.append(y)
        return np.array(result)

## Grid Search

## Without Regularization

In [15]:
import json

In [None]:
model = PolynomialRegressionModel(degree=5, q=2, lmbda=0.1)
model.fit(train.drop(['Weight'], axis=1), train['Weight'], test.drop(['Weight'], axis=1), test['Weight'], lr=0.001, epochs=200, batch_size=2)

In [25]:
errors = []
for degree in [1, 2, 3, 4, 5, 6, 7, 8, 9]:
    for lr in [0.1, 0.001, 0.0001]:
        print(f"doing: {degree}, {lr} for 200 epochs")
        model = PolynomialRegressionModel(degree=5, q=0, lmbda=0)
        model.fit(train.drop(['Weight'], axis=1), train['Weight'], test.drop(['Weight'], axis=1), test['Weight'], lr=lr, epochs=200, batch_size=20)
        errors.append({
                    "degree": degree,
                    "lr": lr,
                    "test_errors: ": model.test_errors,
                    "train_errors: ": model.train_errors
                })
json_obj = json.dumps(errors)
with open('double_noreg.json', 'w') as fp:
    fp.write(json_obj)

doing: 1, 0.001 for 200 epochs
Starting Training.....
epoch: 0
Error: 254987.36622977004


  grad[i][j] = (a**i)*(b**j)*(t - prediction)


epoch: 20
Error: 147597.30626165238
epoch: 40
Error: 104737.4547311336
epoch: 60
Error: 82393.90741900583
epoch: 80
Error: 68334.14127751158
epoch: 100
Error: 58532.328299594476
epoch: 120
Error: 51311.81528935179
epoch: 140
Error: 45803.480695999446
epoch: 160
Error: 41488.404926595074
epoch: 180
Error: 38032.689458350964
doing: 2, 0.001 for 200 epochs
Starting Training.....
epoch: 0
Error: 253867.72355085713


  grad[i][j] = (a**i)*(b**j)*(t - prediction)


epoch: 20
Error: 147100.47530235464
epoch: 40
Error: 104467.02838873432
epoch: 60
Error: 82227.95993280229
epoch: 80
Error: 68226.03107912627
epoch: 100
Error: 58459.60722924353
epoch: 120
Error: 51261.785881895026
epoch: 140
Error: 45768.32393028431
epoch: 160
Error: 41463.11101369707
epoch: 180
Error: 38013.98514703995
doing: 3, 0.001 for 200 epochs
Starting Training.....
epoch: 0
Error: 253948.426464409


  grad[i][j] = (a**i)*(b**j)*(t - prediction)


epoch: 20
Error: 147067.12082876515
epoch: 40
Error: 104404.58999939008
epoch: 60
Error: 82159.20134957075
epoch: 80
Error: 68157.92594164744
epoch: 100
Error: 58394.71358138548
epoch: 120
Error: 51201.17722276352
epoch: 140
Error: 45712.438913687365
epoch: 160
Error: 41412.057964645064
epoch: 180
Error: 37967.682739025295
doing: 4, 0.001 for 200 epochs
Starting Training.....
epoch: 0
Error: 255052.89799243366


  grad[i][j] = (a**i)*(b**j)*(t - prediction)


epoch: 20
Error: 147364.06666066215
epoch: 40
Error: 104478.88156834603
epoch: 60
Error: 82164.564275544
epoch: 80
Error: 68139.31509969271
epoch: 100
Error: 58367.584345250245
epoch: 120
Error: 51171.82856133196
epoch: 140
Error: 45683.73063924747
epoch: 160
Error: 41385.296532452005
epoch: 180
Error: 37943.386050533154
doing: 5, 0.001 for 200 epochs
Starting Training.....
epoch: 0
Error: 254630.25761440117


  grad[i][j] = (a**i)*(b**j)*(t - prediction)


epoch: 20
Error: 147247.52074871372
epoch: 40
Error: 104478.98987297734
epoch: 60
Error: 82216.59552630868
epoch: 80
Error: 68215.74165100402
epoch: 100
Error: 58454.47197380075
epoch: 120
Error: 51261.46071762852
epoch: 140
Error: 45771.740289210014
epoch: 160
Error: 41469.277998911166
epoch: 180
Error: 38022.11769339522
doing: 6, 0.001 for 200 epochs
Starting Training.....
epoch: 0
Error: 254049.24364569611


  grad[i][j] = (a**i)*(b**j)*(t - prediction)


epoch: 20
Error: 147271.4833298508
epoch: 40
Error: 104600.22201326446
epoch: 60
Error: 82328.50106302697
epoch: 80
Error: 68302.75885174556
epoch: 100
Error: 58519.62479289286
epoch: 120
Error: 51310.09394210343
epoch: 140
Error: 45808.32532146553
epoch: 160
Error: 41497.11151467394
epoch: 180
Error: 38043.558308100124
doing: 7, 0.001 for 200 epochs
Starting Training.....
epoch: 0
Error: 255008.33345103698


  grad[i][j] = (a**i)*(b**j)*(t - prediction)


epoch: 20
Error: 147398.02462525017
epoch: 40
Error: 104545.5447219729
epoch: 60
Error: 82245.09558809182
epoch: 80
Error: 68224.39836332388
epoch: 100
Error: 58452.150116114164
epoch: 120
Error: 51253.07276813971
epoch: 140
Error: 45760.174140634066
epoch: 160
Error: 41456.26023771446
epoch: 180
Error: 38008.67382260426
doing: 8, 0.001 for 200 epochs
Starting Training.....
epoch: 0
Error: 254920.17854160425


  grad[i][j] = (a**i)*(b**j)*(t - prediction)


epoch: 20
Error: 147282.34569801047
epoch: 40
Error: 104451.713980096
epoch: 60
Error: 82174.76956598107
epoch: 80
Error: 68171.61252862206
epoch: 100
Error: 58411.47356373712
epoch: 120
Error: 51220.676343878564
epoch: 140
Error: 45733.53570851859
epoch: 160
Error: 41433.74924533378
epoch: 180
Error: 37989.23541453939
doing: 9, 0.001 for 200 epochs
Starting Training.....
epoch: 0
Error: 255059.60362045496


  grad[i][j] = (a**i)*(b**j)*(t - prediction)


epoch: 20
Error: 147306.23559916392
epoch: 40
Error: 104434.13312749239
epoch: 60
Error: 82140.79332361354
epoch: 80
Error: 68131.54248883497
epoch: 100
Error: 58370.20182199576
epoch: 120
Error: 51180.663364019696
epoch: 140
Error: 45695.980642125985
epoch: 160
Error: 41399.152584318144
epoch: 180
Error: 37957.70685379684


In [35]:
with open('double_noreg.json', 'r') as fp:
    data = json.load(fp)

best_degree = None
best_lr = None
best_epoch = None
best_test_loss = float('inf')  # Set to positive infinity initially


# Perform grid search
for entry in data:
    degree = entry['degree']
    lr = entry['lr']
    test_errors = entry['test_errors: ']
    for epoch in test_errors:
        error = test_errors[epoch]
        if error < best_test_loss:
            best_test_loss = error
            best_lr = lr
            best_degree = degree
            best_epoch = epoch

# Display the best configuration and test loss
print(f"Best Degree: {best_degree}")
print(f"Best Learning Rate: {best_lr}")
print(f"Best Test Loss: {best_test_loss}")
print(f"Epoch: {best_epoch}")

Best Degree: 4
Best Learning Rate: 0.001
Best Test Loss: 68681.13519550429


## With Regularization

In [None]:
errors = []
# Note this should be chosen from analysis of previous grid search without regularization
best_fit_degree = 2
for degree in [best_fit_degree]:  
    for lr in [0.01, 0.001, 0.0001]:
        for q in [0.5, 1, 2, 4]:
            for batch_size in [20, 1]:
                for lmbda in [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]:
                    print(f"doing: {degree}, {lr}, {q}, {batch_size} for 500 epochs")
                    model = PolynomialRegressionModel(degree=degree, q=q, lmbda=lmbda)
                    model.fit(train.drop(['Weight'], axis=1), train['Weight'], test.drop(['Weight'], axis=1), test['Weight'], lr=lr, epochs=500, batch_size=batch_size)
                    errors.append({
                        "degree": degree,
                        "lr": lr,
                        "q": q, 
                        "lmbda": lmbda,
                        "batch_size": batch_size,
                        "test_errors: ": model.test_errors,
                        "train_errors: ": model.train_errors
                    })
                    print(errors)
json_obj = json.dumps(errors)
with open('double_withreg.json', 'w') as fp:
    json.dump(json_obj, fp)

## Plots

In [36]:
model = PolynomialRegressionModel(degree=4, q=0, lmbda=0)
model.fit(train.drop(['Weight'], axis=1), train['Weight'], test.drop(['Weight'], axis=1), test['Weight'], lr=0.001, epochs=200, batch_size=2)

Starting Training.....
epoch: 0
Error: 219966.28384434822


  grad[i][j] = (a**i)*(b**j)*(t - prediction)


epoch: 20
Error: 65008.30184788361
epoch: 40
Error: 42915.53887939515
epoch: 60
Error: 32891.54530683681
epoch: 80
Error: 27465.28735733693
Unexpected exception formatting exception. Falling back to standard exception


Traceback (most recent call last):
  File "/home/mpradyumna/Desktop/RegressionFromScratch/.venv/lib64/python3.11/site-packages/IPython/core/interactiveshell.py", line 3526, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/tmp/ipykernel_16297/1728085082.py", line 2, in <module>
    model.fit(train.drop(['Weight'], axis=1), train['Weight'], test.drop(['Weight'], axis=1), test['Weight'], lr=0.001, epochs=200, batch_size=2)
  File "/tmp/ipykernel_16297/3202280365.py", line 70, in fit
    self.test_errors[epoch*epochs + i] = self.calculate_error(X_test, y_test)
                                         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/tmp/ipykernel_16297/3202280365.py", line 107, in calculate_error
    predictions = self.predict(X_test)
                  ^^^^^^^^^^^^^^^^^^^^
  File "/tmp/ipykernel_16297/3202280365.py", line -1, in predict
KeyboardInterrupt

During handling of the above exception, another exception occurred:

Traceback (most recent call 

In [None]:
model.test_errors
epochs = list(model.train_errors.keys())
errors = list(model.train_errors.values())

plt.plot(epochs, errors, marker='o')
epochs = list(model.test_errors.keys())
errors = list(model.test_errors.values())
plt.plot(epochs, errors, marker='x')
plt.title('Error vs Samples')
plt.xlabel('Samples')
plt.ylabel('Error')
plt.grid(True)
plt.show()

In [None]:
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

In [None]:
a_values = np.linspace(min(train.to_numpy()[:, 0]), max(train.to_numpy()[:, 0]), 100)
b_values = np.linspace(min(test.to_numpy()[:, 1]), max(train.to_numpy()[:, 1]), 100)
a_mesh, b_mesh = np.meshgrid(a_values, b_values)

prediction_points = np.c_[a_mesh.ravel(), b_mesh.ravel()]
predictions = model.predict(prediction_points)
predictions_surface = predictions.reshape(a_mesh.shape)

fig = plt.figure(figsize=(10, 8))
ax = fig.add_subplot(111, projection='3d')

ax.plot_surface(a_mesh, b_mesh, predictions_surface, cmap='viridis')

# ax.scatter((train+test .to_numpy()[:, 0], train.to_numpy()[:, 1], train['Weight'].to_numpy(), color='blue', marker='o')

ax.set_xlabel('Height')
ax.set_ylabel('Width')
ax.set_zlabel('Predicted Weight')

plt.title('3D Surface Plot of Polynomial Regression Model Predictions')
plt.show()

## R2 value

In [None]:
from __future__ import division 
import numpy as np

def compute_r2(y_true, y_predicted):
    sse = sum((y_true - y_predicted)**2)
    tse = (len(y_true) - 1) * np.var(y_true, ddof=1)
    r2_score = 1 - (sse / tse)
    return r2_score, sse, tse

y_pred = model.predict(np.array(test.drop(['Weight'], axis=1)))
y_actual = test['Weight']
compute_r2(y_actual, y_pred)[0]

In [None]:
# Plotting the 45-degree line
plt.plot([min(y_actual), max(y_actual)], [min(y_actual), max(y_actual)], linestyle='--', color='gray', label='45-degree line')

# Scatter plot for y_pred and y_actual
plt.scatter(y_actual, y_pred, color='blue', label='Scatter plot')

# Adding labels and title
plt.xlabel('y_actual')
plt.ylabel('y_pred')
plt.title('Scatter plot of y_pred vs y_actual')

# Adding a legend
plt.legend()

# Display the plot
plt.show()