# Numerics

In [None]:
!pip install numpy scikit-learn matplotlib agosto

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
from algosto.solvers import SGDSolver
from algosto.utils.functions import binary_log_likelihood
from algosto.utils import plot

## Data generation

In [None]:
X, y = make_classification(
    n_samples=500,
    n_features=2,
    n_redundant=0,
    n_informative=2,
    n_clusters_per_class=1,
    random_state=651
)

class_0 = X[y == 0]
class_1 = X[y == 1]

plt.scatter(class_0[:, 0], class_0[:, 1], color='blue', label='Classe 0', alpha=0.7)
plt.scatter(class_1[:, 0], class_1[:, 1], color='red', label='Classe 1', alpha=0.7)

plt.xlim((-3.5, 3.5))
plt.ylim((-2.5, 2.5))

plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.title('Visualisation des classes')
plt.legend()

## Logistic regression

In [None]:
def logistic(X: np.array, weights: np.array):
    def sigmoid(S):
        return 1 / (1 + np.exp(-S))
    return sigmoid(X @ weights.T)

## Gradient Descent

In [None]:
batch_size = 500
n_batch = int(np.ceil(X.shape[0]/batch_size))
BLL_f, BLL_grad = binary_log_likelihood(X, y, batch_size, logistic)

solver = SGDSolver(d=2, N=n_batch, objective=BLL_f, grad=BLL_grad, gamma=0.1, random_state=42)

solver.fit(x_start=np.array([0, 0]))

plot(solver)

In [None]:
plt.figure()

class_0 = X[y == 0]
class_1 = X[y == 1]


plt.scatter(class_0[:, 0], class_0[:, 1], color='blue', label='Classe 0', alpha=0.7)
plt.scatter(class_1[:, 0], class_1[:, 1], color='red', label='Classe 1', alpha=0.7)

w1, w2 = solver.get_trajectory()[-1]
x_values = np.linspace(-3.5, 3.5, 100)  # Points sur l'axe x
decision_boundary = -(w1 / w2) * x_values  # Calcul de y
plt.plot(x_values, decision_boundary, color='black', linestyle='--', label='Frontière de décision')

# Tracer les zones de prédiction
plt.fill_between(
    x_values, decision_boundary, 2.5, color='blue', alpha=0.2, label='Classe 0 (zone)'
)
plt.fill_between(
    x_values, decision_boundary, -2.5, color='red', alpha=0.2, label='Classe 1 (zone)'
)

plt.xlim((-3.5, 3.5))
plt.ylim((-2.5, 2.5))

plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.title('Visualisation des classes')
plt.legend()

plt.show()

## Stochastic Gradient Descent

In [None]:
batch_size = 10
n_batch = int(np.ceil(X.shape[0]/batch_size))
BLL_f, BLL_grad = binary_log_likelihood(X, y, batch_size, logistic)

solver = SGDSolver(d=2, N=n_batch, objective=BLL_f, grad=BLL_grad, gamma=0.1, random_state=42)

solver.fit(x_start=np.array([0, 0]))

plot(solver)

In [None]:
plt.figure()

class_0 = X[y == 0]
class_1 = X[y == 1]


plt.scatter(class_0[:, 0], class_0[:, 1], color='blue', label='Classe 0', alpha=0.7)
plt.scatter(class_1[:, 0], class_1[:, 1], color='red', label='Classe 1', alpha=0.7)

w1, w2 = solver.get_trajectory()[-1]
x_values = np.linspace(-3.5, 3.5, 100)  # Points sur l'axe x
decision_boundary = -(w1 / w2) * x_values  # Calcul de y
plt.plot(x_values, decision_boundary, color='black', linestyle='--', label='Frontière de décision')

# Tracer les zones de prédiction
plt.fill_between(
    x_values, decision_boundary, 2.5, color='blue', alpha=0.2, label='Classe 0 (zone)'
)
plt.fill_between(
    x_values, decision_boundary, -2.5, color='red', alpha=0.2, label='Classe 1 (zone)'
)

plt.xlim((-3.5, 3.5))
plt.ylim((-2.5, 2.5))

plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.title('Visualisation des classes')
plt.legend()

plt.show()