# Notebook created to train the logistic regression model without having to reload the dataset every time

# Because for some reason, ucimlrepository takes an weirdly long time to load

In [1]:
from pandas import DataFrame
from ucimlrepo import fetch_ucirepo

# Iris dataset
DATASET_ID = 53

iris = fetch_ucirepo(id=DATASET_ID)  # fetch dataset
assert iris.data is not None

DATA: DataFrame = iris.data.original

FEAT: DataFrame = iris.data.features
LAB_NAME: str = iris.data["headers"][-1]
LABELS: DataFrame = DATA[LAB_NAME]  # type: ignore
COL_NAMES = list(FEAT.columns)


## Gradient Descent
`gradient_descent.py`


In [2]:
import matplotlib.pyplot as plt
import numpy as np
import plot_util
from numpy import cos, pi, sin
from numpy import floating as fl
from numpy.typing import NDArray


def gradient_descent(df, params: NDArray, alpha: float, num_iters: int) -> NDArray:
    """This function implements the gradient descent. It iteratively computes the optimal parameters that minimize the given function.

    Parameters
    ----------
    `df`: function
        derivative function (i.e. gradient)
    `params`: NDArray
        Initial vector of parameters to optimize
    `alpha`: float
        define how the function will converge. Values too big will give bad results and values too small won't converge or converge will too slowly
    `num_iters`: int
        Number of iterations
    Return value
    ------------
    Optimal vector for the initial configuration and parameters"""
    for _ in range(num_iters):
        params -= alpha * df(params)
    return params


def grad_desc_ml(
        features: NDArray, labels: NDArray, df, w: NDArray, b: fl, alpha: float, num_iters: int
) -> tuple[NDArray, fl]:
    """Same gradient descent `gradient_desent` method, but that takes `features` (X) and `labels` (y)
    as additional parameters, since they're obviously going to be need for any kind of learning whatsoever.
    Parameters
    ----------
    `features` : NDArray
        Samples / features.
    `labels` : NDArray
        labels / class associated to each sample.
    `df`: function
        derivative function (i.e. gradient)
    `w` : NDArray
        weights vector.
    `b` : fl (float or NDArray[float])
        bias
    `alpha`: float
        define how the function will converge. Values too big will give bad results and values too small won't converge or will converge too slowly
    `num_iters`: Number of iterations
    Return value
    ------------
    Optimal vector for the initial configuration and parameters"""

    for _ in range(num_iters):
        grad_w, grad_b = df(features, labels, w, b)
        w -= alpha * grad_w
        b -= alpha * grad_b
    return w, b


## Naive Bayes
`naive_bayes.py`

In [3]:
from pprint import pprint
from typing import Any

# NB: floating is any (numpy) floating type NDArray or not
from numpy import float32 as f32, floating as fl

from numpy.typing import NDArray
from pandas import DataFrame

def normal_pdf(mean: fl, std: fl):
    """
    Parameters
    ----------
    `mean` : float or NDArray of float
        The mean (μ) of the normal distribution.
    `std : float or NDArray of float
        The standard deviation (σ) of the normal distribution.
    Returns
    -------
    A lambda function representing the normal distribution's PDF,
    i.e.  (1 / (σ * sqrt(2π))) * exp(-((x - μ)² / (2σ²)))."""
    return lambda x: (1 / (std * np.sqrt(2 * np.pi))) * np.exp(-((x - mean) ** 2) / (2 * std**2))


def get_distrib_parameters(data: DataFrame, feature_names: list[str], labels: DataFrame) -> dict[Any, list[tuple[fl, fl]]]:
    """
    Parameters
    ----------
    `data` : The dataset.
    `feature_names` : The names of the features to extract the normal parameters from.
    `labels` : Labels to extract the different values from (will be the keys of the return dict)
    Returns
    -------
    Parameters for each distribution of each feature feature for each class.
    i.e. a dictionary {class: [(mean_i, std_i), ...]} for each feature i."""
    classes = labels.unique()
    out: dict[Any, list[tuple[fl, fl]]] = {}
    for classv in classes:
        out_classv = []  # list of (mean, std) for each feature by class value
        data_c = data[labels == classv]  # data for current class
        for feature in feature_names:
            feat = data_c[feature]
            mean, std = feat.mean(), feat.std()
            out_classv.append((f32(mean), f32(std)))
        out[classv] = out_classv

    return out


def predict_bayes(x: NDArray, params_by_class: dict[Any, list[tuple[fl, fl]]]) -> Any:
    """
    Parameters
    ----------
    `x` : The sample to predict.
    `params_by_class` : The parameters of the normal distribution of each feature for each class.
    Returns
    -------
    The predicted class for the sample x."""
    probs = {}
    if type(x) is not np.ndarray:
        x = np.asarray(x)

    for class_value, params in params_by_class.items():
        probs[class_value] = 1
        for feature_idx, (mean, std) in enumerate(params):
            x_i = x[feature_idx]
            probs[class_value] *= normal_pdf(mean, std)(x_i)  # computes P(X_i | y) for current y = class_value
    # get the class that maximize the conditional probability
    return max(probs, key=lambda class_value: probs[class_value])


# ================================================================
# ======================= TEST:==================================
# ================================================================


def test_get_normal_parameters():
    params_by_class = get_distrib_parameters(FEAT, COL_NAMES, LABELS)
    print("Format: (mean_i, std_i), ...,  for each class")
    pprint(params_by_class)


def test_predict_bayes():
    params_by_class = get_distrib_parameters(FEAT, COL_NAMES, LABELS)
    # test sample
    idx = np.random.randint(0, len(FEAT))
    x = FEAT.iloc[idx]
    print("Sample to predict:\n", x, "\n ")
    pred = predict_bayes(x, params_by_class)
    print("Predicted class: ", pred)
    print("Actual class: ", LABELS.iloc[idx])


def main():
    # test_get_normal_parameters()
    print(" ")
    test_predict_bayes()


## Logistic Regression
`log_reg.py`

In [4]:
def z(X: NDArray, w: NDArray, b: fl) -> fl:
    """
    Returns
    -------
    ``np.dot(X, w) + b``: `float` or `NDArray[float]` (i.e. `floating`)
    Notes
    -----
    `w` and `X` can be interchanged e.g. `z(w, X, b)`, it won't give
    the same result (in general) but as long as matrix multiplication dimensions
    are respected, it will work."""
    return np.dot(X, w) + b


def sigmoid(z: fl) -> fl:
    """ Returns
    -----------
    1 / (1 + exp(-z))"""
    return 1 / (1 + np.exp(-z))


def norm(X: NDArray):
    return (X - np.mean(X)) / np.std(X)


def grad(X: NDArray, y: NDArray, w: NDArray, b: fl):
    """Computes (vectorized) the gradient of the log loss function w.r.t "w" and "b" for the current iteration.
    It is used in the gradient descent algorithm.

    Parameters
    ----------
    `X` : NDArray
        Samples / features.
    `y` : NDArray
        labels / class associated to each sample.
    `w` : NDArray
        weights vector.
    `b` : fl (float or NDArray[float])
        bias
    Returns
    -------
    (dw, db) :
        The gradient of the log loss function w.r.t "w" and "b"."""

    predictions = sigmoid(z(w, X, b))  # Sigmoid function applied to z
    errors = y - predictions  # Difference between actual and predicted values
    db = -np.sum(errors)  # Vectorized computation of db component

    X_sum_over_rows = np.sum(X, axis=1)  # Sum over rows of X
    dw = -np.sum(X_sum_over_rows * errors)  # Vectorized computation of dw component

    return dw, db


def train_log_reg(X: NDArray, y: NDArray, w: NDArray, b: fl, n_it: int, lr: float) -> tuple[NDArray, fl]:
    """
    Parameters
    ----------
    `X` : NDArray
        Samples / features.
    `y` : NDArray
        labels / class associated to each sample.
    `w` : NDArray
        initial weight vector.
    `b` : fl (float or NDArray[float])
        inital bias
    `n_it` : int
        iterations number
    `lr` : float
        learning rate
    Returns
    -------
        Trained (weight vector, bias) with gradient descent that minimize the log loss function."""
    return grad_desc_ml(X, y, grad, w, b, lr, n_it)



In [17]:
test_predict_bayes()

Sample to predict:
 sepal length    4.6
sepal width     3.4
petal length    1.4
petal width     0.3
Name: 6, dtype: float64 
 
Predicted class:  Iris-setosa
Actual class:  Iris-setosa
