## Download Vehicle MPG Dataset

In [2]:
from ucimlrepo import fetch_ucirepo
import pandas as pd

vehicle_mpg = fetch_ucirepo(id=9)

X = vehicle_mpg.data.features
y = vehicle_mpg.data.targets

data = pd.concat([X, y], axis=1)

cleaned_data = data.dropna()

X = cleaned_data.iloc[:, :-1]
y = cleaned_data.iloc[:, -1]

rows_removed = data.shape[0] - cleaned_data.shape[0]
print(f"Rows removed: {rows_removed}")

Rows removed: 6


## Data Splitting and Standardization

In [3]:
from sklearn.model_selection import train_test_split
X_train, X_leftover, y_train, y_leftover = train_test_split(X, y, test_size=0.3, random_state=42, shuffle = True)
X_val, X_test, y_val, y_test = train_test_split(X_leftover, y_leftover, test_size=0.5, random_state=42, shuffle = True)

X_mean = X_train.mean(axis=0)
X_std = X_train.std(axis=0)

X_train = (X_train - X_mean) / X_std
X_val = (X_val - X_mean) / X_std
X_test = (X_test - X_mean) / X_std

y_mean = y_train.mean()
y_std = y_train.std()

y_train = (y_train - y_mean) / y_std
y_val = (y_val - y_mean) / y_std
y_test = (y_test - y_mean) / y_std

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from abc import ABC, abstractmethod
from typing import Tuple


def batch_generator(train_x, train_y, batch_size):
    """
    Generator that yields batches of train_x and train_y.

    :param train_x (np.ndarray): Input features of shape (n, f).
    :param train_y (np.ndarray): Target values of shape (n, q).
    :param batch_size (int): The size of each batch.

    :return tuple: (batch_x, batch_y) where batch_x has shape (B, f) and batch_y has shape (B, q). The last batch may be smaller.
    """
    assert len(train_x) == len(train_y), "Number of samples in X and y do not match."

    batch_x = np.array_split(train_x, math.ceil(len(train_x) / batch_size), axis=0)
    batch_y = np.array_split(train_y, math.ceil(len(train_y) / batch_size), axis=0)
    return batch_x, batch_y


class ActivationFunction(ABC):
    @abstractmethod
    def forward(self, x: np.ndarray) -> np.ndarray:
        """
        Computes the output of the activation function, evaluated on x

        Input args may differ in the case of softmax

        :param x (np.ndarray): input
        :return: output of the activation function
        """
        pass

    @abstractmethod
    def derivative(self, x: np.ndarray) -> np.ndarray:
        """
        Computes the derivative of the activation function, evaluated on x
        :param x (np.ndarray): input
        :return: activation function's derivative at x
        """
        pass


class Sigmoid(ActivationFunction):
    def __init__(self):
        pass

    def forward(self, x):
        sigma_x = 1 / (1 + np.exp(-x))
        return sigma_x
    
    def derivative(self):
        return (sigma_x)(1-sigma_x)           

class Tanh(ActivationFunction):
    def __init__(self):
        pass
    
    def forward(self, x):
        tanh_x = (np.exp(x) - np.exp(-x))/(np.exp(x) + np.exp(-x))
        return tanh_x
    
    def deriative(self):
        return 1 - (tanh_x)**2

class Relu(ActivationFunction):
    def __init__(self):
        pass    

    def forward(self, x):
        return np.max(0, x)
    
    def derivative(self, x):
        return np.ones_like(x)
    

class Softmax(ActivationFunction):
    def __init__(self):
        pass    

    def forward(self, x):
        return (np.exp(x))/(np.sum(x))
    
    def derivative(self):
        pass

class Linear(ActivationFunction):
    def __init__(self):
     pass

    def forward(self, x):
        return x
    
    def derivative(self, x):
        return np.ones_like(x)

    

class LossFunction(ABC):
    @abstractmethod
    def loss(self, y_true: np.ndarray, y_pred: np.ndarray) -> np.ndarray:
        pass

    @abstractmethod
    def derivative(self, y_true: np.ndarray, y_pred: np.ndarray) -> np.ndarray:
        pass

    @abstractmethod
    def delta(self, y_true: np.ndarray, y_pred: np.ndarray) -> np.ndarray:
        pass


class SquaredError(LossFunction):
    def __init__(self):
        pass
    
    def loss(self, y_true, y_pred):
        return 1/2 * np.mean(np.square(y_pred - y_true.flatten()))
    
    def derivative(self, activ_func_deriv, prev_output, next_delta, weight):
        dL_dW = np.dot(prev_output.T, np.multiply(next_delta, activ_func_deriv))
        dL_db = np.sum(np.multiply(next_delta, activ_func_deriv))
        delta = np.dot(np.multiply(next_delta, activ_func_deriv),weight)
        return dL_dW, dL_db, delta

class CrossEntropy(LossFunction):
    pass


class Layer:
    def __init__(self, fan_in: int, fan_out: int, activation_function: ActivationFunction, loss_function: LossFunction):
        """
        Initializes a layer of neurons

        :param fan_in: number of neurons in previous (presynpatic) layer
        :param fan_out: number of neurons in this layer
        :param activation_function: instance of an ActivationFunction
        """
        self.fan_in = fan_in
        self.fan_out = fan_out
        self.activation_function = activation_function
        self.loss_function = loss_function

        # this will store the activations (forward prop)
        self.activations = None
        # this will store the delta term (dL_dPhi, backward prop)
        self.delta = None

        # Initialize weights and biaes
        self.W = None  # weights
        self.b = None  # biases

    def predict(self, x: np.ndarray):
       return self.W @ x.T + self.b

    def forward(self, h: np.ndarray):
        """
        Computes the activations for this layer

        :param h: input to layer
        :return: layer activations
        """
        self.activations = self.activation_function.forward(h)

        return self.activations

    def backward(self, h: np.ndarray, delta: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
        """
        Apply backpropagation to this layer and return the weight and bias gradients

        :param h: input to this layer
        :param delta: delta term from layer above
        :return: (weight gradients, bias gradients)
        """
        dL_dW = self.loss_function.derivative()
        dL_db = None
        self.delta = None
        return dL_dW, dL_db


class MultilayerPerceptron:
    def __init__(self, layers: Tuple[Layer]):
        """
        Create a multilayer perceptron (densely connected multilayer neural network)
        :param layers: list or Tuple of layers
        """
        self.layers = layers

    def forward(self, x: np.ndarray) -> np.ndarray:
        """
        This takes the network input and computes the network output (forward propagation)
        :param x: network input
        :return: network output
        """
        for layer in self.layers:
            current_layer = Layer()
            y_pred = current_layer.predict(x)
            activation = current_layer.forward(y_pred)
            x = activation
            

        return None

    def backward(self, loss_grad: np.ndarray, input_data: np.ndarray) -> Tuple[list, list]:
        """
        Applies backpropagation to compute the gradients of the weights and biases for all layers in the network
        :param loss_grad: gradient of the loss function
        :param input_data: network's input data
        :return: (List of weight gradients for all layers, List of bias gradients for all layers)
        """
        dl_dw_all = []
        dl_db_all = []


        return None, None

    def train(self, train_x: np.ndarray, train_y: np.ndarray, val_x: np.ndarray, val_y: np.ndarray, loss_func: LossFunction, learning_rate: float=1E-3, batch_size: int=16, epochs: int=32) -> Tuple[np.ndarray, np.ndarray]:
        """
        Train the multilayer perceptron

        :param train_x: full training set input of shape (n x d) n = number of samples, d = number of features
        :param train_y: full training set output of shape (n x q) n = number of samples, q = number of outputs per sample
        :param val_x: full validation set input
        :param val_y: full validation set output
        :param loss_func: instance of a LossFunction
        :param learning_rate: learning rate for parameter updates
        :param batch_size: size of each batch
        :param epochs: number of epochs
        :return:
        """
        x_batches, y_batches = batch_generator(train_x, train_y, batch_size)

        training_losses = None
        validation_losses = None

        for epoch in range(epochs):
            total_loss = 0
            bi = 0
            for bx, by in zip(x_batches, y_batches):
                y_pred = self.forward(bx)


        return training_losses, validation_losses
    