# Notebook created to train the logistic regression model without having to reload the dataset every time

# Because for some reason, ucimlrepository takes an weirdly long time to load

In [8]:
from pandas import DataFrame
from ucimlrepo import fetch_ucirepo
import numpy as np
from numpy import floating as fl, float32 as f32, float64 as f64, int32 as i32
from numpy.typing import NDArray
from pprint import pprint

# Iris dataset
DATASET_ID = 53

iris = fetch_ucirepo(id=DATASET_ID)  # fetch dataset
assert iris.data is not None

DATA: DataFrame = iris.data.original
LAB_NAME: str = iris.data["headers"][-1]

from sklearn.model_selection import train_test_split
FEAT, FEAT_test, y_train, y_test = train_test_split(iris.data.features, DATA[LAB_NAME], test_size=0.3, random_state=42)

DATA_train = FEAT.copy(deep=True)
DATA_train["class"] = y_train

DATA_test = FEAT_test.copy(deep=True)
DATA_test["class"] = y_test

# FEAT: DataFrame = X_train
# FEAT_test: DataFrame = X_test

LABELS_STR: DataFrame = DATA_train[LAB_NAME]  # type: ignore
LABELS_STR_test: DataFrame = DATA_test[LAB_NAME]  # type: ignore

lab_values  = LABELS_STR.unique()
lab_values_test  = LABELS_STR_test.unique()

LAB_IDX_VAL: dict[int, str] = dict(zip(range(len(lab_values)), lab_values))
LAB_VAL_IDX: dict[str, int] = dict(zip(lab_values, range(len(lab_values))))

LAB_IDX_VAL_test: dict[int, str] = dict(zip(range(len(lab_values_test)), lab_values_test))
LAB_VAL_IDX_test: dict[str, int] = dict(zip(lab_values_test, range(len(lab_values_test))))


LABELS: NDArray[int] = np.array([LAB_VAL_IDX[class_value] for class_value in LABELS_STR])
LABELS_test: NDArray[int] = np.array([LAB_VAL_IDX[class_value] for class_value in LABELS_STR])
COL_NAMES = list(FEAT.columns)
DATA_test

Unnamed: 0,sepal length,sepal width,petal length,petal width,class
73,6.1,2.8,4.7,1.2,Iris-versicolor
18,5.7,3.8,1.7,0.3,Iris-setosa
118,7.7,2.6,6.9,2.3,Iris-virginica
78,6.0,2.9,4.5,1.5,Iris-versicolor
76,6.8,2.8,4.8,1.4,Iris-versicolor
31,5.4,3.4,1.5,0.4,Iris-setosa
64,5.6,2.9,3.6,1.3,Iris-versicolor
141,6.9,3.1,5.1,2.3,Iris-virginica
68,6.2,2.2,4.5,1.5,Iris-versicolor
82,5.8,2.7,3.9,1.2,Iris-versicolor


## Gradient Descent
`gradient_descent.py`


In [None]:
def grad_desc_ml(
        features: NDArray, labels: NDArray, df, w: NDArray, b: fl, alpha: float, num_iters: int
) -> tuple[NDArray, fl]:
    """Same gradient descent `gradient_desent` method, but that takes `features` (X) and `labels` (y)
    as additional parameters, since they're obviously going to be need for any kind of learning whatsoever.
    Parameters
    ----------
    `features` : NDArray
        Samples / features.
    `labels` : NDArray
        labels / class associated to each sample.
    `df`: function
        derivative function (i.e. gradient)
    `w` : NDArray
        weights vector.
    `b` : fl (float or NDArray[float])
        bias
    `alpha`: float
        define how the function will converge. Values too big will give bad results and values too small won't converge or will converge too slowly
    `num_iters`: Number of iterations
    Return value
    ------------
    Optimal vector for the initial configuration and parameters"""

    for _ in range(num_iters):
        grad_w, grad_b = df(features, labels, w, b)
        w -= alpha * grad_w
        b -= alpha * grad_b
    return w, b


## Logistic Regression
`log_reg.py`

In [None]:
def z(X: NDArray, w: NDArray, b: fl) -> fl:
    """
    Returns
    -------
    ``np.dot(X, w) + b``: `float` or `NDArray[float]` (i.e. `floating`)
    Notes
    -----
    `w` and `X` can be interchanged e.g. `z(w, X, b)`, it won't give
    the same result (in general) but as long as matrix multiplication dimensions
    are respected, it will work."""
    return np.dot(X, w) + b


def sigmoid(z: fl) -> fl:
    """ Returns
    -----------
    1 / (1 + exp(-z))"""
    return 1 / (1 + np.exp(-z))


def norm(X: NDArray):
    return (X - np.mean(X)) / np.std(X)


def grad(X: NDArray, y: NDArray, w: NDArray, b: fl):
    """Computes (vectorized) the gradient of the log loss function w.r.t "w" and "b" for the current iteration.
    It is used in the gradient descent algorithm.

    Parameters
    ----------
    `X` : NDArray
        Samples / features.
    `y` : NDArray
        labels / class associated to each sample.
    `w` : NDArray
        weights vector.
    `b` : fl (float or NDArray[float])
        bias
    Returns
    -------
    (dw, db) :
        The gradient of the log loss function w.r.t "w" and "b"."""

    predictions = sigmoid(z(w, X, b))  # Sigmoid function applied to z
    errors = y - predictions  # Difference between actual and predicted values
    db = -np.sum(errors)  # Vectorized computation of db component

    X_sum_over_rows = np.sum(X, axis=1)  # Sum over rows of X
    dw = -np.sum(X_sum_over_rows * errors)  # Vectorized computation of dw component

    return dw, db


def train_log_reg(X: NDArray, y: NDArray, w: NDArray, b: fl, n_it: int, lr: float) -> tuple[NDArray, fl]:
    """
    Parameters
    ----------
    `X` : NDArray
        Samples / features.
    `y` : NDArray
        labels / class associated to each sample.
    `w` : NDArray
        initial weight vector.
    `b` : fl (float or NDArray[float])
        inital bias
    `n_it` : int
        iterations number
    `lr` : float
        learning rate
    Returns
    -------
        Trained (weight vector, bias) with gradient descent that minimize the log loss function."""
    return grad_desc_ml(X, y, grad, w, b, lr, n_it)



## Logistic Regression but with CuPy (Nvidia / Cuda)

In [None]:
def compute_metrics(data, predicted_values):
    from sklearn.metrics import precision_score, recall_score, accuracy_score, f1_score
    """ This function calculates the performance metrics for each class in a binary classification problem.
        The metrics calculated are Precision, Recall, and F1 Score.
        :param data: (DataFrame): The DataFrame containing the actual labels.
        :param predicted_values: (list): The list containing the predicted labels.
        :return: dict: A dictionary containing the performance metrics for each class."""
    y_true, y_pred = data, predicted_values

    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    accuracy = accuracy_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)

    return {
        'precision': float(precision),
        'recall': float(recall),
        'accuracy': float(accuracy),
        'f1_score': float(f1)
    }

In [None]:
import cupy as cp
from cupy import ndarray as CPArray
from numpy.random import rand, randint


def z(w: CPArray, X: CPArray, b: float) -> CPArray:
    """:return: ``cp.dot(X, w) + b``.
    i.e. float or CPArray[float] (i.e. cupy.ndarray)
    NOTE: `w` and `X` can be interchanged e.g. (`z(X, w, b)), it won't give
    the same result (in general) but as long as matrix multiplication dimensions
    are respected, it will work."""
    return cp.dot(X, w) + b


def sigmoid(z): return 1 / (1 + cp.exp(-z))


def norm(X: CPArray): return (X - cp.mean(X)) / cp.std(X)


def grad(X: CPArray, y: CPArray, w: CPArray, b: float):
    """:return: (dw, db). i.e. Computes aforementioned derivatives w.r.t "w" and "b". 
    (on gpu. X, y, w, b are `cupy.ndarray` shortened to `CPArray`)"""

    predictions = sigmoid(z(w, X, b))  # Sigmoid function applied to z
    errors = y - predictions  # Difference between actual and predicted values
    db = -cp.sum(errors)  # Vectorized computation of db component

    X_sum_over_rows = cp.sum(X, axis=1)  # Sum over rows of X
    dw = -cp.sum(X_sum_over_rows * errors)  # Vectorized computation of dw component

    return dw, db


# because shorter names for function and variable, while keeping the function name as asked from the exercise
def train_log_reg(X: NDArray, y: NDArray, w: NDArray, b: float, n_it: int, lr: float) -> tuple[NDArray, float]:
    """
    :param X: Feature matrix (covariables)
    :param y: Label vector
    :param w: initial weight vector
    :param b:  initial bias
    :param n_it: iterations number
    :param lr: learning rate
    :return: Trained weight vector and bias to minimize by gradient descent.
    """
    X, y, w = map(cp.array, (X, y, w))
    for _ in range(n_it):
        grad_w, grad_b = grad(X, y, w, b)
        w -= lr * grad_w
        b -= lr * grad_b
    return w, b


def predict_log_reg(X: NDArray, w: NDArray, b):
    """ Predict the class labels for a set of examples X using logistic regression parameters w and b.
    :param X: The input features. 2D Matrix NDArray
    :param w: The weights of the logistic regression model. Vector NDArray
    :param b: The bias of the logistic regression model. float
    :return: Vector of predicted class labels (0 or 1) for each example in X. Vector NDArray
    """
    X, w = map(cp.array, (X, w))
    return i32(sigmoid(z(w, X, b)).get() >= 0.5)


def test_train_gpu(m, n):
    X, y, w, b = rand(m, n), rand(m), rand(n), rand()
    n_it, lr = 100, 0.03
    w, b = train_log_reg(X, y, w, b, n_it, lr)

In [None]:
m, n = FEAT.shape
init_w = np.random.rand(n)
init_b = np.random.rand()
n_it, lr = 10000, 1e-10

train_log_reg(FEAT.to_numpy(), LABELS, init_w, init_b, n_it, lr)

In [None]:
def pred_compute(ta_tes, w, b):
    
    # X_test = np.array([data_test.Gender_Female, data_test.Age, data_test.EstimatedSalary]).T  # features
    # X_test[:, 1:3] = np.apply_along_axis(norm, 0, X_test[:, 1:3])
    predicted_val_logreg = predict_log_reg(FEAT_test, w, b)
    metrics = compute_metrics(predicted["actual"], predicted_val_logreg)
    return metrics

In [None]:
def maximize_train_param(tries: int):
    params = []
    # w = np.array([0.13276234, 0.24566002, - 0.00970713])
    w = np.array([0.13017195, 0.24306963, -0.01229752])
    n_it = 3900
    # for n_it in np.linspace(3732, 3760, tries, dtype=int):
    print(pred_compute(data_test, w, 0)["f1_score"])
    for lr in np.linspace(1e-9 * 0.01, 1e-9 * 2, tries, dtype=f64):
        # w = np.random.uniform(0, 0.5, size=3)
        b = 0.
        w, b = train_log_reg(X, labels, np.array([0.13017195, 0.24306963, -0.01229752]), 0., n_it, lr)
        f1_score_ = pred_compute(data_test, w, b)["f1_score"]
        params.append((w, n_it, lr, f1_score_))
        print(w, lr, "\nf1_score:", f1_score_)
        print("______")

    return max(params, key=lambda x: x[-1])