In [6]:
from typing import Tuple
import numpy as np


class PolynomialRegression:
    def __init__(self, degree: int = 1, reg_lambda: float = 1e-8):
        """
        Constructor
        """
        self.degree: int = degree
        self.reg_lambda: float = reg_lambda
        


    def polyfeatures(X: np.ndarray, degree: int) -> np.ndarray:
        """
        Expands the given X into an (n, degree) array of polynomial features of degree degree.

        Args:
            X (np.ndarray): Array of shape (n, 1).
            degree (int): Positive integer defining maximum power to include.

        Returns:
            np.ndarray: A (n, degree) numpy array, with each row comprising of
                X, X * X, X ** 3, ... up to the degree^th power of X.
                Note that the returned matrix will not include the zero-th power.

        """
        t = X.copy()
        for i in range(degree):
            t = np.c_[t,X**i]
        return t

    def fit(self, X: np.ndarray, y: np.ndarray):
        """
        Trains the model, and saves learned weight in self.weight

        Args:
            X (np.ndarray): Array of shape (n, 1) with observations.
            y (np.ndarray): Array of shape (n, 1) with targets.

        Note:
            You need to apply polynomial expansion and scaling at first.
        """
        n = X.shape[0]
        X_ = polyfeatures(X, degree)        
        if n!=1:
            self.mean = X_.mean(axis = 0)
            self.std = X_.std(axis = 0) 
        else:
            self.mean = 0
            self.std = 1
        
        X_ = (X_ - self.mean)/self.std # adding ones
        X_ = np.c_[np.ones([n,1]), X_]

        # closed form of regression 
        _, d = X_.shape 
        # construct reg matrix 
        reg_matrix = self.regLambda * np.eye(d)
        reg_matrix[0, 0] = 0

        # analytical solution (X'X + regMatrix)^-1 X' y 
        self.theta = np.linalg.pinv(X_.T.dot(X_) + reg_matrix).dot(X_.T).dot(y)
                
        



    def predict(self, X: np.ndarray) -> np.ndarray:
        """
        Use the trained model to predict values for each instance in X.

        Args:
            X (np.ndarray): Array of shape (n, 1) with observations.

        Returns:
            np.ndarray: Array of shape (n, 1) with predictions.
        """
        n = X.shape[0]
        X_ = self.polyfeatures(X, self.degree)
        X_ = (X-self.mean)/self.std
        X_ = np.c_[np.ones([n,1]), X_]
        return X_.dot(self.theta)


def mean_squared_error(a: np.ndarray, b: np.ndarray) -> float:
    """Given two arrays: a and b, both of shape (n, 1) calculate a mean squared error.

    Args:
        a (np.ndarray): Array of shape (n, 1)
        b (np.ndarray): Array of shape (n, 1)

    Returns:
        float: mean squared error between a and b.
    """
    mse = np.mean((b-a)**2)
    return mse


def learningCurve(
    Xtrain: np.ndarray,
    Ytrain: np.ndarray,
    Xtest: np.ndarray,
    Ytest: np.ndarray,
    reg_lambda: float,
    degree: int,
) -> Tuple[np.ndarray, np.ndarray]:
    """Compute learning curves.

    Args:
        Xtrain (np.ndarray): Training observations, shape: (n, 1)
        Ytrain (np.ndarray): Training targets, shape: (n, 1)
        Xtest (np.ndarray): Testing observations, shape: (n, 1)
        Ytest (np.ndarray): Testing targets, shape: (n, 1)
        reg_lambda (float): Regularization factor
        degree (int): Polynomial degree

    Returns:
        Tuple[np.ndarray, np.ndarray]: Tuple containing:
            1. errorTrain -- errorTrain[i] is the training mean squared error using model trained by Xtrain[0:(i+1)]
            2. errorTest -- errorTest[i] is the testing mean squared error using model trained by Xtrain[0:(i+1)]

    Note:
        - For errorTrain[i] only calculate error on Xtrain[0:(i+1)], since this is the data used for training.
            THIS DOES NOT APPLY TO errorTest.
        - errorTrain[0:1] and errorTest[0:1] won't actually matter, since we start displaying the learning curve at n = 2 (or higher)
    """
    n = len(Xtrain)

    errorTrain = np.zeros(n)
    errorTest = np.zeros(n)
    # Fill in errorTrain and errorTest arrays


In [8]:
from utils import load_dataset

filePath = "data/polydata.dat"
file = open(filePath,'r')
allData = np.loadtxt(file, delimiter=',')


X = allData[:, [0]]
y = allData[:, [1]]

# regression with degree = d
d = 8
model = PolynomialRegression(degree=d, reg_lambda=0)
model.fit(X, y)

# output predictions
xpoints = np.linspace(np.max(X), np.min(X), 100).reshape(-1, 1)
ypoints = model.predict(xpoints)

# plot curve
plt.figure()
plt.plot(X, y, "rx")
plt.title(f"PolyRegression with d = {d}")
plt.plot(xpoints, ypoints, "b-")
plt.xlabel("X")
plt.ylabel("Y")
plt.show()

FileNotFoundError: [Errno 2] No such file or directory: 'data/polydata.dat'

In [48]:
def polyfeatures_1(X, degree):
    n = len(X)
    #print('lalala',X)
    XX = np.zeros((n,degree))
    for i in range(0,n):
        for j in range(1, degree+1):
            XX[i,j-1] = X[i]**j
    #print(XX)
    return XX #n-by-d 
 

In [None]:
import matplotlib.pyplot as plt
import numpy as np

from utils import load_dataset

if __name__ == "__main__":
    from polyreg import PolynomialRegression  # type: ignore
else:
    from .polyreg import PolynomialRegression

if __name__ == "__main__":
    """
        Main function to test polynomial regression
    """

    # load the data
    allData = load_dataset("polyreg")

    X = allData[:, [0]]
    y = allData[:, [1]]

    # regression with degree = d
    d = 8
    model = PolynomialRegression(degree=d, reg_lambda=0)
    model.fit(X, y)

    # output predictions
    xpoints = np.linspace(np.max(X), np.min(X), 100).reshape(-1, 1)
    ypoints = model.predict(xpoints)

    # plot curve
    plt.figure()
    plt.plot(X, y, "rx")
    plt.title(f"PolyRegression with d = {d}")
    plt.plot(xpoints, ypoints, "b-")
    plt.xlabel("X")
    plt.ylabel("Y")
    plt.show()
