## Imports

In [None]:
import sys
from pathlib import Path
PROJECT_DIR = Path.cwd().parent
sys.path.append(str(PROJECT_DIR))

# basics
import numpy as np
import pandas as pd
import joblib
import shap

# viz
import matplotlib.pyplot as plt
import seaborn as sns

# utils
import os
from tqdm import tqdm
from ppm.nodes.metrics_calculator import (
    metrics_calculate,
    show_results
)
from ppm.nodes.process_outputs import (
    ranking_output
)
from ppm.nodes.feature_explainer import (
    tree_explainer,
    shap_explainer
)
from ppm.nodes.plots import (
    plot_true_vs_pred_multiple,
    plot_true_vs_pred
)

## Parameters

In [None]:
n_samples_oos = 5

In [None]:
path_root_oos = os.path.join(
    '..', 'oos_n2', # data or oos
)
path_root_data = os.path.join(
    "..","data"
)
path_primary = os.path.join(
    path_root_oos, "03_primary"
)
path_model = os.path.join(
    path_root_data, "04_model"
)
path_encoders = os.path.join(
    path_model, "encoders"
)
path_model_final = os.path.join(
    path_model, "model"
)


file_path_input_data = os.path.join(
    path_primary, "data_input.csv"
)
file_path_encoder_order = os.path.join(
    path_encoders, "encoders_orders.csv"
)

## Read

In [None]:
encoders_order = pd.read_csv(file_path_encoder_order)

In [None]:
# read model pycaret
model = {}
for model_name in os.listdir(path_model_final):
    model[model_name.split(".")[0]] = joblib.load(
        os.path.join(
            path_model_final,
            model_name
        )
    )
estimator = model["actual_estimator"]

In [None]:
# read encoders
encoders = {}
for encoder_content in encoders_order.values:
    encoder_name, encoder_url = encoder_content
    encoders[encoder_name.split(".")[0]] = joblib.load(
        encoder_url
    )

In [None]:
data_input = pd.read_csv(
    file_path_input_data, # file_path_input_data, file_path_not_outliers
    index_col = 0
).head(n_samples_oos)

## Filter columns

In [None]:
target = [
    "price"
]
cd_setor_drop = [
    "cd_setor",
]
id_drop = [
    "ID"
]
cols_drop = cd_setor_drop + target

In [None]:
X = data_input.drop(cols_drop, axis = 1)

y = data_input[id_drop + target]

## Encoder process

In [None]:
encoded_data = {
    "oos": X.copy()
}
for encoder_name, encoder_content in encoders.items():
    print("--- encoder: [{}]".format(encoder_name))
    for type_name, data_to_encoder in encoded_data.items():
        id_row = data_to_encoder[id_drop[0]].values
        data_to_encoder.drop(id_drop[0], axis = 1, inplace = True)
        encoded_data[type_name] = encoder_content.transform(data_to_encoder)
        encoded_data[type_name].insert(0, id_drop[0], id_row)

## Predictions

In [None]:
X_encoded = encoded_data["oos"].copy()

In [None]:
y_pred = estimator.predict(
    X_encoded.drop(id_drop, axis = 1)
)

## Metrics

In [None]:
args_preds = [
   y[target[0]].values,
    y_pred
]

metrics_pred = metrics_calculate(args_preds)

metrics_test = show_results(metrics_pred, "test")

In [None]:
data_values = {}
data_values["oos"] = ranking_output(args_preds)

## Plots

In [None]:
fig_shap, explainer = shap_explainer(
    [estimator],
    X_encoded.drop(id_drop, axis = 1)
)

In [None]:
explainer.shap_values(X_encoded.drop(id_drop, axis = 1))

In [None]:
fig_oos_true_pred = plot_true_vs_pred_multiple(
    data_values,
    metrics_test,
    target,
    linewidth = 2
    #linestyle='dashed'
)

In [None]:
fig_true_vs_pred = plot_true_vs_pred(
    data_values,
    None,
    None,
    figsize = (18, 8),
    linestyle = 'dashed'
)