# Linear Neural Network

In [None]:
import numpy as np
import pandas as pd

from nnfs.losses import LossFunction
from nnfs.optimizers import Optimizer
from nnfs.utils import Preprocessing

np.random.seed(42)
np.set_printoptions(precision=4)


# configuration
TEST_RATIO = 0.2

## Mathematical Representation

Let $\mathbf{x}^{[i]} = \{ x^{[i]}_1, x^{[i]}_2, \ldots x^{[i]}_{11}\}$ represent the $i^{th}$ feature vector, where all $\mathbf{x} \in \mathbb{R}^{11}$


And $y^{[i]}$ represent the true value of the $i^{th}$ feature vector, where $y \in \{0, 2, \ldots 10\}$


Given a training dataset $\mathcal{D}_{train}$ with size $n$ in the form

$$\mathcal{D}_{train} = \left\{ [\mathbf{x}^T, y]^{[1]}, \ldots, [\mathbf{x}^T, y]^{[n]} \right\}$$

#### A loss function, 

$$c : \mathbb{R}^{11} \times \mathbb{R} \times \mathbb{R}^{12} \rightarrow \mathbb{R}$$

#### An empirical risk function 

$$\hat{\mathscr{l}}(\boldsymbol{\theta}) = \frac{1}{n} \sum_{i=1}^{n}c(\mathbf{x}^{[i]}, y^{[i]}, \boldsymbol{\theta})$$

Where $\boldsymbol{\theta}$ is the parameter vector of the form $\boldsymbol{\theta} = [\mathbf{w}^T, b]$

#### Goal: to find a $\boldsymbol{\theta}^*$ which minimizes the empirical risk

$$\boldsymbol{\theta}^* = \underset{\boldsymbol{\theta}}{argmin} \;\; \hat{\mathscr{l}}(\boldsymbol{\theta})$$


#### Linear Neural Network


$$\hat{y} = w_1x_1 + \ldots + w_{11}x_{11} + b$$


$$\hat{y}^{[i]} = \mathbf{w}^T \mathbf{x}^{[i]} + b$$

Let our loss function be the *squared error* function, then

$$c(\mathbf{x}^{[i]}, y^{[i]}, \boldsymbol{\theta}) = \frac{1}{2} \left( \hat{y}^{[i]} - y^{[i]} \right)^2$$


Thus the empirical risk function for this linear network is of the form

$$\hat{\mathscr{l}}(\boldsymbol{\theta}) = \frac{1}{n} \sum_{i=1}^{n} \frac{1}{2} \left( \mathbf{w}^T \mathbf{x}^{[i]} + b - y^{[i]} \right)^2$$

## Python Code


- $\mathcal{D}_{train}$ is `train`
- $\mathcal{D}_{test}$ is `test`
- $\boldsymbol{\theta}$ is `theta`
- $\mathbf{w}$ is `weights`
- $b$ is `bias`
- $\hat{\mathscr{l}}$ is `risk_fn`
- $\mathbf{x}$ is `x`
- $\hat{y}$ is `y_pred`
- $y$ is `y_true`
- $c$ is `loss_fn`

## Data Preparation

In [None]:
white_wine_csv = "../data/raw/winequality-white.csv"
red_wine_csv = "../data/raw/winequality-red.csv"

white_wine = pd.read_csv(white_wine_csv, delimiter=";")
white_wine_raw = white_wine.to_numpy()

white_wine_raw.shape

## Linear Neural Network Class

In [None]:
class LinearNN:
    def __init__(self, n_features: int, learning_rate: float) -> None:
        self.weights, self.bias = self.init_theta((n_features, 1), (1, 1))
        self.eta = learning_rate

        self._loss = LossFunction.squared_error()
        self._loss_gradient = self._loss.backward

        self._optimizer = Optimizer.adam(learning_rate=learning_rate)

    def forward(self, xs: np.ndarray) -> np.ndarray:
        return ((self.weights.T @ xs.T) + self.bias).T

    def risk(self, y_trues: np.ndarray, y_preds: np.ndarray) -> float:
        return float(np.mean(self._loss(y_trues, y_preds), axis=0))

    def weight_gradient(
        self, xs: np.ndarray, y_trues: np.ndarray, y_preds: np.ndarray
    ) -> np.ndarray:
        return np.mean(xs * self._loss_gradient(y_trues, y_preds), axis=0)

    def bias_gradient(self, y_trues: np.ndarray, y_preds: np.ndarray) -> np.ndarray:
        return np.mean(self._loss_gradient(y_trues, y_preds), axis=0)

    def update_theta(self, xs: np.ndarray, y_trues: np.ndarray, y_preds: np.ndarray):
        self.weights = self._optimizer.update(
            self.weights,
            self.weight_gradient(xs, y_trues, y_preds).reshape(self.weights.shape),
        )

        self.bias = self._optimizer.update(
            self.bias, self.bias_gradient(y_trues, y_preds)
        )

    def train(self, xs: np.ndarray, ys: np.ndarray, rs: int = 10):
        for i in range(rs):
            y_preds = self.forward(xs)
            if (i + 1) % (10 ** (np.log10(rs) - 1)) == 0:
                print(f"Epoch {i + 1} :: risk={round(self.risk(ys, y_preds), 4)}")
            self.update_theta(xs, ys, y_preds)

    def evaluate(self, x_test: np.ndarray, y_test: np.ndarray):
        y_pred = self.forward(x_test)
        print(f"R^2 Score: {self._loss.accuracy(y_test.T, y_pred.T)}")
        print(f"Empirical Risk {round(self.risk(y_test, y_pred), 4)}")

    @staticmethod
    def init_theta(
        weights_shape: tuple[int, int], bias_shape: tuple[int, int]
    ) -> tuple[np.ndarray, np.ndarray]:
        w, b = map(lambda x: x * 10, map(np.random.random, (weights_shape, bias_shape)))
        return w, b

In [None]:
n = 2
true_ws = np.random.random(n) * 100
true_b = 0
xs = np.random.random((10000, n))

true_ys = (true_ws.T @ xs.T).reshape((xs.shape[0], 1))

test, train = Preprocessing.train_test_split(
    np.concatenate((xs, true_ys), axis=1), TEST_RATIO, shuffle=True
)
x_train, y_train = Preprocessing.xy_split(train)
x_test, y_test = Preprocessing.xy_split(test)

x_train[:, :10]

In [None]:
# loss_fn=lambda y, yhat: 0.5 * np.sqrt(y - yhat)
model = LinearNN(x_train.shape[1], learning_rate=1)


model.train(x_train, y_train, rs=1000)

In [None]:
model.evaluate(x_test, y_test)

In [None]:
print(model.weights.T.flatten(), model.bias.flatten())

In [None]:
print(true_ws, true_b)