# Full example of the pipeline

This notebooks synthesizes all the previous notebooks into a single pipeline. It is a good starting point to understand how to use the pipeline from end to end. For more details, use the numeroted notebooks in the order.

### Load data and train the model

In [None]:
import sys

sys.path.append("../")

In [None]:
import torch

from beexai.dataset.dataset import Dataset
from beexai.dataset.load_data import load_data
from beexai.evaluate.metrics.get_results import get_all_metrics
from beexai.explanation.explaining import CaptumExplainer
from beexai.training.train import Trainer
from beexai.utils.path import create_dir
from beexai.utils.sampling import stratified_sampling
from beexai.utils.time_seed import set_seed

In [None]:
seed = 42
set_seed(seed)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

DATA_NAME = "kickstarter"
MODEL_NAME = "NeuralNetwork"

create_dir(f"../output/data")
CONFIG_PATH = f"config/{DATA_NAME}.yml"
data_test, target_col, task, dataCleaner = load_data(
    from_cleaned=True, config_path=CONFIG_PATH, keep_corr_features=True
)
scale_params = {
    "x_num_scaler_name": "quantile_normal",
    "x_cat_encoder_name": "ordinalencoder",
    "y_scaler_name": "labelencoder",
    "cat_not_to_onehot": ["name"],
}
data = Dataset(data_test, target_col)
X_train, X_test, y_train, y_test = data.get_train_test(
    test_size=0.2, scaler_params=scale_params
)
X_train, X_val, y_train, y_val = data.get_train_val(X_train, y_train, val_size=0.2)
num_labels = data.get_classes_num(task)

In [None]:
### Train a NN model
NN_PARAMS = {"input_dim": X_train.shape[1], "output_dim": num_labels}

trainer = Trainer(MODEL_NAME, task, NN_PARAMS, device)
# trainer = Trainer("XGBRegressor" ,task, device=device)
trainer.train(
    X_train.values,
    y_train.values,
    loss_file=f"../output/loss.png",
    x_val=X_val,
    y_val=y_val,
)
trainer.model.eval()  # comment if not NN
metrics = trainer.get_metrics(X_test, y_test)
for k, v in metrics.items():
    print(f"{k}: {v}")

In [None]:
TEST_SIZE = 100
X_test_sampled, y_test_sampled = stratified_sampling(X_test, y_test, TEST_SIZE, task)

METHOD = "IntegratedGradients"  # Change to Non-Gradient based methods for sklearn models

exp = CaptumExplainer(
    trainer.model, task=task, method=METHOD, sklearn=False, device=device
)  # change sklearn to True if not NN
exp.init_explainer()

all_preds = all_preds = trainer.model.predict(X_test_sampled.values)
get_all_metrics(
    X_test_sampled,
    all_preds,
    trainer.model,
    exp,
    baseline="zero",
    auc_metric="accuracy", #change to mse for regression
    print_plot=False,
    save_path=None,
    device=device
)