In [None]:
import os
import sys

sys.path.append('src/')

import logging
import sys

import hydra
import mlflow
import numpy as np
import pandas as pd
import torch
from joblib import Memory
from omegaconf import DictConfig, OmegaConf

from evaluators import torchFastTextEvaluator
from framework_classes import (
    DATA_GETTER,
    DATASETS,
    LOSSES,
    MODELS,
    MODULES,
    OPTIMIZERS,
    PREPROCESSORS,
    SCHEDULERS,
    TOKENIZERS,
    TRAINERS,
)
from utils.data import get_df_naf, get_file_system, get_processed_data, get_test_data, get_Y
from utils.mappings import mappings
from utils.mlflow import create_or_restore_experiment

%load_ext autoreload
%autoreload 2

In [None]:
revision = "NAF2008"
model_class = "torchFastText"
start_month = 1
start_year = 2018
text_feature = "libelle"
textual_features_1 = "NAT_LIB"
textual_features_2 = "AGRI"
categorical_features_1 = "TYP"
categorical_features_2 = "NAT"
categorical_features_3 = "SRF"
categorical_features_4 = "CJ"
categorical_features_5 = "CRT"

In [None]:
cfg_dict = {"data": 
                {"sirene":"sirene_4", 
                "start_month": start_month, 
                "start_year": start_year, 
                "revision": revision,
                "text_feature": text_feature,
                "textual_features" : [textual_features_1, textual_features_2],
                "categorical_features" : [categorical_features_1, categorical_features_2, categorical_features_3, categorical_features_4, categorical_features_5],}, 
                
            "model":{"name": "torchFastText",
                    "preprocessor": "PyTorch",}}
cfg_dict_data = cfg_dict["data"]
df_naf = get_df_naf(revision=cfg_dict_data["revision"])
Y = get_Y(revision=revision)

In [None]:
preprocessor = PREPROCESSORS[cfg_dict["model"]["preprocessor"]](cfg_dict)
preprocessor

In [None]:

df_test_ls= get_test_data(**cfg_dict["data"], y=Y)


In [None]:

df_test_ls = pd.concat(preprocessor.preprocess(df_test_ls,
            df_naf=df_naf,
            y=Y,
            text_feature=cfg_dict_data["text_feature"],
            textual_features=cfg_dict_data["textual_features"],
            categorical_features=cfg_dict_data["categorical_features"],
            test_size=0.1,), axis=0)

In [None]:
_, _, df_test = get_processed_data()


In [None]:
df_test

In [None]:
import mlflow

mlflow.set_tracking_uri("https://projet-ape-mlflow.user.lab.sspcloud.fr/")
mlflow.set_experiment('model_comparison_s4')
logged_model = 'runs:/45afc22a961a4cdcb282aad93693326d/model'

# Load model as a PyFuncModel.
module = mlflow.pytorch.load_model(logged_model)

In [None]:
evaluator = torchFastTextEvaluator(module)
test_res = evaluator.launch_test(
    df_test_ls,
    text_feature=cfg_dict["data"]["text_feature"],
    categorical_features=cfg_dict["data"]["categorical_features"],
    Y=Y,
    batch_size=256,
    num_workers=72,
)

test_res

In [None]:
df_res = evaluator.get_aggregated_preds(df=df_test_ls, Y=Y, **cfg_dict["data"], batch_size=256, num_workers=72)
df_res

In [None]:
from sklearn.metrics import RocCurveDisplay, roc_auc_score

auc = roc_auc_score(df_test_ls[Y].values, predictions.detach().numpy(), multi_class='ovr', average=None)
auc.argmin(), auc[auc.argmin()]

In [None]:
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt

predictions = evaluator.get_preds(
    df_test_ls,
    **cfg_dict["data"],
    Y=Y,
    batch_size=256,
    num_workers=72,
)

sorted_confidence = predictions.sort(descending=True).values
confidence_score = sorted_confidence[:, 0]

well_predicted = (predictions.argmax(dim=1) == df_test_ls[Y].values).float()
print(well_predicted.mean())
# Convert NumPy arrays to a DataFrame
df = pd.DataFrame({
    "confidence_score": confidence_score.numpy(),
    "well_predicted": well_predicted.numpy()  # Ensure this is categorical if needed
})

# Plot with proper data format
plt.figure(figsize=(10, 6))
sns.histplot(data=df, x="confidence_score", bins=100, hue="well_predicted", stat="percent")


In [None]:
import matplotlib.pyplot as plt
import numpy as np
import torch
import torchmetrics

predictions = evaluator.get_preds(
    df_test,
    **cfg_dict["data"],
    Y=Y,
    batch_size=256,
    num_workers=72,
)

# Number of bins for calibration
n_bins = 100  

# Convert logits to probabilities
num_classes = 732  # Adjust based on your setup

# Get the highest predicted probability and corresponding class
confidences, predicted_classes = torch.max(predictions, dim=1)  # Max probability per sample
true_labels = torch.tensor(df_test[Y].values)  # True labels

# Initialize bins
bin_boundaries = torch.linspace(0, 1, n_bins + 1)  # Bins from 0 to 1
bin_accs = []
bin_confidences = []

# Compute accuracy per bin
for i in range(n_bins):
    bin_mask = (confidences >= bin_boundaries[i]) & (confidences < bin_boundaries[i + 1])
    if bin_mask.sum() > 0:
        bin_acc = (predicted_classes[bin_mask] == true_labels[bin_mask]).float().mean()
        bin_conf = confidences[bin_mask].mean()
        bin_accs.append(bin_acc.item())
        bin_confidences.append(bin_conf.item())

# Convert to numpy
bin_confidences = np.array(bin_confidences)
bin_accs = np.array(bin_accs)

# Plot reliability diagram
plt.figure(figsize=(6, 6))
plt.plot([0, 1], [0, 1], linestyle="--", color="black", label="Perfect Calibration")
plt.plot(bin_confidences, bin_accs, marker="o", color="blue", label="Model Calibration")
plt.xlabel("Confidence")
plt.ylabel("Accuracy")
plt.title("Reliability Diagram")
plt.legend()
plt.grid()
plt.show()
