In [4]:
import numpy as np
import pandas as pd

def elastic_net_regression(X, y, alpha=1.0, l1_ratio=0.5, max_iter=1000, tol=1e-4, learning_rate=0.01):
    """
    Effectue une régression ElasticNet sur les données d'entraînement.

    Parameters:
    - X: ndarray, shape (n_samples, n_features)
        Les données d'entraînement.
    - y: ndarray, shape (n_samples,)
        Les étiquettes d'entraînement.
    - alpha: float, optional, default: 1.0
        Paramètre de régularisation total (alpha = alpha_1 + alpha_2).
    - l1_ratio: float, optional, default: 0.5
        Ratio de mélange entre les termes de régularisation L1 et L2.
    - max_iter: int, optional, default: 1000
        Nombre maximal d'itérations pour la descente de gradient.
    - tol: float, optional, default: 1e-4
        Tolérance pour la convergence.
    - learning_rate: float, optional, default: 0.01
        Taux d'apprentissage pour la descente de gradient.

    Returns:
    - coef_: ndarray, shape (n_features,)
        Les coefficients de régression.
    - intercept_: float
        L'intercept de régression.
    """

    m, n = X.shape
    coef_ = np.zeros(n)
    intercept_ = 0.0

    for _ in range(max_iter):
        # Compute predictions
        y_pred = np.dot(X, coef_) + intercept_

        # Compute gradients
        residuals = y_pred - y
        gradient_coef = np.dot(X.T, residuals) / (m + 1e-8)
        gradient_intercept = np.sum(residuals) / (m + 1e-8)

        # Apply L1 and L2 penalties
        l1_penalty = alpha * l1_ratio * np.sign(coef_)
        l2_penalty = alpha * (1 - l1_ratio) * coef_

        # Update coefficients and intercept using gradient descent
        coef_ -= learning_rate * (gradient_coef + l1_penalty + l2_penalty)
        intercept_ -= learning_rate * gradient_intercept

        # Check for convergence
        if np.linalg.norm(gradient_coef) < tol:
            break

    return coef_, intercept_

# Exemple d'utilisation
df = pd.read_csv('../artifacts/data_transformation/train.csv')

# Check for NaN or infinite values in the dataset
print("NaN values in X:", np.isnan(df).any().any())
print("Infinite values in X:", np.isinf(df).any().any())
print("NaN values in y:", np.isnan(df[df.columns[-1]]).any())
print("Infinite values in y:", np.isinf(df[df.columns[-1]]).any())

# Drop any rows with NaN or infinite values
df = df.dropna()
df = df[~df.isin([np.nan, np.inf, -np.inf]).any(axis=1)]

train_x = df[df.columns[:-1]].values
train_y = df[df.columns[-1]].values

# Appeler la fonction pour obtenir les coefficients et l'intercept
coefficients, intercept = elastic_net_regression(train_x, train_y, alpha=0.1, l1_ratio=0.5)




# Afficher les résultats
print("Coefficients:", coefficients)
print("Intercept:", intercept)


NaN values in X: False
Infinite values in X: False
NaN values in y: False
Infinite values in y: False


  coef_ -= learning_rate * (gradient_coef + l1_penalty + l2_penalty)


Coefficients: [nan nan nan nan nan nan nan nan nan nan nan]
Intercept: nan
