# **Module `home_credit.model.facade`**

**TODO** À la prochaine passe sur ce module en faire une classe de wrapper de modèles pour créer une couche d'abstraction qui nous permette d'utiliser indifféremment du SKL, du LGBM ou d'autres librairies dans une même application.

# **`fit_facade`**`(clf, X_y_train, X_y_valid, loss_func)`

Fit a classifier using the appropriate training method based on the classifier type.

**Utilisée** par `kfold_train_and_eval_model`.

Le but est de faire abstraction de la librairie utilisée (Scikit-learn, LightGBM, ...).

In [None]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import log_loss
from lightgbm import LGBMClassifier
import pandas as pd

from home_credit.model.facade import fit_facade

def custom_lgbm_log_loss(y_true: pd.Series, y_pred: pd.Series, sample_weight, group):
    # Utiliser log_loss à l'intérieur de cette fonction
    print(f"y_true shape: {y_true.shape}")
    print(f"y_pred shape: {y_pred.shape}")
    return log_loss(y_true, y_pred, sample_weight=sample_weight)

# Create a dummy dataset for the example
X, y = load_iris(return_X_y=True)
print(f"X shape: {X.shape}")
print(f"y shape: {y.shape}")

X_train, X_valid, y_train, y_valid = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print(f"X_train shape: {X_train.shape}")
print(f"X_valid shape: {X_valid.shape}")
print(f"X_train shape: {y_train.shape}")
print(f"X_valid shape: {y_valid.shape}")

# Initialize a LightGBM model
clf = LGBMClassifier()

# Test fitting with the LightGBM model
fit_facade(clf, (X_train, y_train), (X_valid, y_valid), custom_lgbm_log_loss)

X shape: (150, 4)
y shape: (150,)
X_train shape: (120, 4)
X_valid shape: (30, 4)
X_train shape: (120,)
X_valid shape: (30,)
y_true shape: (120,)
y_pred shape: (360,)




ValueError: Found input variables with inconsistent numbers of samples: [360, 120]

In [None]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import log_loss
from home_credit.model.facade import fit_facade

# Create a dummy dataset for the example
X, y = load_iris(return_X_y=True)
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize a scikit-learn model
clf = RandomForestClassifier()

# Test fitting with the scikit-learn model
fit_facade(clf, (X_train, y_train), (X_valid, y_valid), log_loss)

# **`predict_facade`**`(clf, X)`

Facade function for making predictions using a classifier.

**Utilisée** par `kfold_train_and_eval_model`.

Le but est de faire abstraction de la librairie utilisée (Scikit-learn, LightGBM, ...).

In [None]:
import numpy as np
import pandas as pd
from lightgbm import LGBMClassifier
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from home_credit.model.facade import predict_facade

# Create a dummy dataset for testing
X, y = load_iris(return_X_y=True)
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize a LightGBM model and fit it
clf = LGBMClassifier()
clf.fit(X_train, y_train)

# Test the predict_facade function with a NumPy array
X_test_np = np.array([[5.1, 3.5, 1.4, 0.2], [6.2, 2.9, 4.3, 1.3]])
predictions_np = predict_facade(clf, X_test_np)
print("Predictions with NumPy array input:")
print(predictions_np)

# Test the predict_facade function with a Pandas DataFrame
X_test_df = pd.DataFrame({
    'feature1': [5.1, 6.2],
    'feature2': [3.5, 2.9],
    'feature3': [1.4, 4.3],
    'feature4': [0.2, 1.3]
})

predictions_df = predict_facade(clf, X_test_df)
print("\nPredictions with Pandas DataFrame input:")
print(predictions_df)


Predictions with NumPy array input:
[0 1]

Predictions with Pandas DataFrame input:
[0 1]


# **`predict_proba_facade`**`(clf, X)`

Predict class probabilities using a classifier.

**Utilisée** par `kfold_train_and_eval_model`.

Le but est de faire abstraction de la librairie utilisée (Scikit-learn, LightGBM, ...).

In [None]:
import numpy as np
from lightgbm import LGBMClassifier
from home_credit.model.facade import predict_proba_facade

# Create a dummy LGBMClassifier
clf = LGBMClassifier()

# Create a sample input dataset
X_test = np.array([[5.1, 3.5, 1.4, 0.2], [6.2, 2.9, 4.3, 1.3]])

# Make predictions
predictions = predict_proba_facade(clf, X_test)

NotFittedError: No best_iteration found. Need to call fit with early_stopping callback beforehand.

In [None]:
from home_credit.model.facade import predict_proba_facade

# Create a dummy classifier that is not LGBM
class DummyClassifier:
    def predict_proba(self, X):
        return np.array([[0.2, 0.8], [0.6, 0.4]])

# Create a sample input dataset
X_test = np.array([[1.1, 2.2], [3.3, 4.4]])

# Make predictions
predictions = predict_proba_facade(DummyClassifier(), X_test)

# **`get_feat_imp_facade`**`(clf)`

In [None]:
from sklearn.datasets import load_iris
from home_credit.model.facade import get_feature_importances_facade

# Class attribute to store the Iris dataset
iris = load_iris() 

In [None]:
from lightgbm import LGBMClassifier

# Initialize a LightGBM model
clf = LGBMClassifier()

# Train the classifier with the Iris dataset
clf.fit(iris.data, iris.target)

# Display the feature importances
display(get_feature_importances_facade(clf))

array([336, 399, 661, 380])

In [None]:
from sklearn.ensemble import RandomForestClassifier

# Initialize a RandomForestClassifier
clf = RandomForestClassifier()

# Train the classifier with the Iris dataset
clf.fit(iris.data, iris.target)

# Display the feature importances
display(get_feature_importances_facade(clf))

array([0.05941136, 0.01719151, 0.45463166, 0.46876547])

In [None]:
from sklearn.linear_model import LogisticRegression

# Initialize a LogisticRegression model
clf = LogisticRegression()

# Train the classifier with the Iris dataset
clf.fit(iris.data, iris.target)

# Display the feature importances
display(get_feature_importances_facade(clf))

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


array([-0.41795722,  0.96618456, -2.52142059, -1.08400771])

In [None]:
from sklearn.dummy import DummyClassifier

# Initialize a DummyClassifier
clf = DummyClassifier(strategy="uniform")

# Train the classifier with the Iris dataset
clf.fit(iris.data, iris.target)

# Display the feature importances
try:
    display(get_feature_importances_facade(clf))
except ValueError as e:
    print(e)

DummyClassifier does not support feature importances. Use `[1.0 / n_features] * n_features` instead.
