In [1]:
import os
import sys

sys.path.append(os.path.abspath("src"))

import logging
import sys

import hydra
import mlflow
import numpy as np
import pandas as pd
import torch
from joblib import Memory
from omegaconf import DictConfig, OmegaConf

from evaluators import Evaluator, torchFastTextEvaluator
from framework_classes import (
        DATA_GETTER,
        DATASETS,
        LOSSES,
        MODELS,
        MODULES,
        OPTIMIZERS,
        PREPROCESSORS,
        SCHEDULERS,
        TOKENIZERS,
        TRAINERS,
)
from models import FastTextWrapper
from utils.data import PATHS, get_df_naf, get_file_system, get_processed_data, get_test_data, get_Y
from utils.mappings import mappings
from utils.mlflow import create_or_restore_experiment
from utils.validation_viz import (
        calibration_curve,
        confidence_histogram,
        get_automatic_accuracy,
        sort_and_get_pred,
)
from dotenv import load_dotenv
load_dotenv()

%load_ext autoreload
%autoreload 2

  func_info = _get_func_info_if_type_hint_supported(predict_attr)


In [2]:
revision = "NAF2008"
model_class = "torchFastText"
start_month = 1
start_year = 2018
text_feature = "libelle"
textual_features_1 = "NAT_LIB"
textual_features_2 = "AGRI"
categorical_features_1 = "TYP"
categorical_features_2 = "NAT"
categorical_features_3 = "SRF"
categorical_features_4 = "CJ"
categorical_features_5 = "CRT"

In [3]:
cfg_dict = {"data": 
                {"sirene":"sirene_4", 
                "start_month": start_month, 
                "start_year": start_year, 
                "revision": revision,
                "text_feature": text_feature,
                "textual_features" : [textual_features_1, textual_features_2],
                "categorical_features" : [categorical_features_1, categorical_features_2, categorical_features_3, categorical_features_4, categorical_features_5],}, 
                
            "model":{"name": "PyTorch",
                    "preprocessor": "PyTorch",
                    "test_params": {"test_batch_size": 256, "run_id":'runs:/45afc22a961a4cdcb282aad93693326d/model'}}
            }
cfg_dict_data = cfg_dict["data"]
df_naf = get_df_naf(revision=cfg_dict_data["revision"])
Y = get_Y(revision=cfg_dict["data"]["revision"])
df_test_ls= get_test_data(**cfg_dict["data"], y=Y)

In [4]:
df_s3, df_s4 = DATA_GETTER[cfg_dict_data["sirene"]](**cfg_dict["data"])

In [5]:
df_train, df_val, df_test = get_processed_data(revision=cfg_dict["data"]["revision"])

In [6]:
model_name = "FastText-pytorch"
module = mlflow.pytorch.load_model(f"models:/{model_name}/latest")
torch_preds = torchFastTextEvaluator(module).get_preds(
        df=df_test,
        Y=Y,
        **cfg_dict["data"],
        batch_size=cfg_dict["model"]["test_params"]["test_batch_size"],
        num_workers=os.cpu_count() - 1,
    )
    

Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

/home/onyxia/work/codif-ape-train/.venv/lib/python3.12/site-packages/pytorch_lightning/utilities/parsing.py:209: Attribute 'accuracy_fn' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['accuracy_fn'])`.
/home/onyxia/work/codif-ape-train/.venv/lib/python3.12/site-packages/pytorch_lightning/utilities/parsing.py:209: Attribute 'original_model' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['original_model'])`.
You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
2025-04-08 15:32:19 - torchFastText.datasets.dataset - Creating DataLoader with 71 workers.


Predicting: |          | 0/? [00:00<?, ?it/s]


Detected KeyboardInterrupt, attempting graceful shutdown ...


NameError: name 'exit' is not defined

In [None]:
fs = get_file_system()
df_res = pd.read_parquet( PATHS[cfg_dict["data"]["revision"]][-1][:-8] + f"_predictions_torch.parquet", filesystem=fs)
df_res_ft = pd.read_parquet( PATHS[cfg_dict["data"]["revision"]][-1][:-8] + f"_predictions_ft.parquet",  filesystem=fs)

In [None]:
ground_truth = df_res["APE_NIV5"]
torchft_preds = df_res["APE_NIV5_pred_k1"]
fasttext_preds_labels = df_res_ft["APE_NIV5_pred_k1"]

In [None]:
print((torchft_preds == fasttext_preds_labels).mean())
print((torchft_preds == ground_truth).mean())
print((ground_truth == fasttext_preds_labels).mean())

## Confidence intervals

In [None]:
# Some results
sorted_confidence, well_predicted, predicted_confidence, predicted_class, true_values = (
    sort_and_get_pred(predictions=torch_preds, df=df_test, Y=Y)
)
fig1 = confidence_histogram(sorted_confidence, well_predicted, df=df_test)

In [None]:
import seaborn as sns
from matplotlib import pyplot as plt

well_predicted = (ground_truth == fasttext_preds_labels)
print(well_predicted.shape)
df = pd.DataFrame(
        {
            "confidence_score": fasttext_preds_scores.reshape((-1, )),
            "well_predicted": well_predicted,  # Ensure this is categorical if needed
        }
    )

# Plot with proper data format
fig, ax = plt.subplots(figsize=(10, 6))
sns.histplot(data=df, x="confidence_score", bins=100, hue="well_predicted", stat="percent")

## Accuracy on automatically coded samples vs rate of automatic coding

In [None]:
thresholds = np.linspace(0, 1, 100)
torchft_scores = sorted_confidence[:, 0] - sorted_confidence[:, 1:5].sum(axis = 1)
torchft_plot =  get_automatic_accuracy(thresholds, torch.clamp(torchft_scores, 0, 1).numpy(), predicted_class.numpy(), true_values)
ft_plot =  get_automatic_accuracy(thresholds, np.clip(fasttext_preds_scores.reshape(-1), 0, 1), fasttext_preds_labels.reshape(-1), ground_truth.values)


In [None]:
# file.py
import numpy as np
import plotly.graph_objects as go

# Assuming thresholds, torchft_plot, fasttext_preds_scores, fasttext_preds_labels, and ground_truth are already defined

# Create masks for the plots
mask_torchft = torchft_plot[0] > 0
mask_ft = ft_plot[0] > 0

# Create the Plotly figure
fig = go.Figure()

# Add traces for torchft
fig.add_trace(go.Scatter(
    x=torchft_plot[0][mask_torchft],
    y=torchft_plot[1][mask_torchft],
    mode='markers',
    hoverinfo='text',
    text=[f'Threshold: {thresh}' for thresh in thresholds[mask_torchft]],
    name='torchft'
))

# Add traces for ft
fig.add_trace(go.Scatter(
    x=ft_plot[0][mask_ft],
    y=ft_plot[1][mask_ft],
    mode='markers',
    hoverinfo='text',
    text=[f'({round(ft_plot[0][mask_ft], 3)},{round(ft_plot[1][mask_ft], 3)}), Threshold: {thresh}' for thresh in thresholds[mask_ft]],
    name='ft'
))

# Update layout
fig.update_layout(
    xaxis_title="Pourcentage de codif automatique",
    yaxis_title="Accuracy",
    legend=dict(
        x=1,
        y=1,
        traceorder="normal",
        font=dict(
            family="sans-serif",
            size=12,
            color="black"
        ),
        bgcolor="LightSteelBlue",
        bordercolor="Black",
        borderwidth=2
    ),
    width=800,  # Set the figure width
    height=600   # Set the figure height
)

# Show the plot
fig.show()

## Disagreements

In [None]:
filter_mask = (torchft_preds != ground_truth) & (fasttext_preds_labels == ground_truth)

ground_truth_disagreements = ground_truth[filter_mask]
torchft_disagreements = torchft_preds[filter_mask]
fasttext_disagreements = pd.Series(fasttext_preds_labels)[filter_mask]

disagreements = pd.DataFrame({"ground_truth": ground_truth_disagreements, "torchft": torchft_disagreements, "fasttext": fasttext_disagreements})
print(disagreements.value_counts().head(10))

In [None]:
def generate_text(check_df, filename = "output.txt", mode = 'a'):
        with open(filename, mode) as file:
            for index, row in check_df.iterrows():
                if index==0:
                    torchft_flag = "PREDICTION CORRECTE" if row['APE_NIV5'] == row['APE_NIV5_pred_k1_x'] else "PREDICTION INCORRECTE"
                    ft_flag = "PREDICTION CORRECTE" if row['APE_NIV5'] == row['APE_NIV5_pred_k1_y'] else "PREDICTION INCORRECTE"

                    file.write(f"APE_NIV5 Code: {row['APE_NIV5']}\n")
                    file.write(f"LIB_NIV5: {row['LIB_NIV5']}\n")
                    file.write(f"TorchFastText Prediction - Code: {row['APE_NIV5_pred_k1_x']}, Label: {row['LIB_NIV5_pred_k1_x']}- {torchft_flag}\n")
                    file.write(f"FastText Prediction - Code: {row['APE_NIV5_pred_k1_y']}, Label: {row['LIB_NIV5_pred_k1_y']} - {ft_flag}\n")
                    
                    file.write("-" * 50 + "\n")
                    file.write("Exemple de libellés :\n")
                file.write(f"{row['libelle']}\n")
                file.write(f"- TYP: {row['TYP']}, NAT: {row['NAT']}, SRF: {row['SRF']}, CJ: {row['CJ']}, CRT: {row['CRT']}\n")
            file.write("\n")
            file.write("="*75 + "\n")
            file.write("\n")

In [None]:
INV_MAPPINGS = {}

for key in list(mappings.keys()):
    INV_MAPPINGS[key] = {v: k for k, v in mappings[key].items()}

In [None]:
columns_filter = ["libelle" ,  "APE_NIV5", "LIB_NIV5", "APE_NIV5_pred_k1", "LIB_NIV5_pred_k1", "proba_k1"]
cat_var =  ["TYP","NAT", "SRF",	"CJ", "CRT"]
filename="torchIncorrect_ftCorrect.txt"


df_torchft = df_res[columns_filter + cat_var]
df_ft = df_res_ft[columns_filter]

generate_text(pd.DataFrame(), filename=filename, mode='w')
for row in disagreements.value_counts().head(10).items():
    gt, torchft, ft = row[0]
    torch_ft = df_torchft[(df_torchft["APE_NIV5"] == gt) & (df_torchft["APE_NIV5_pred_k1"] == torchft)]
    ft = df_ft[(df_ft["APE_NIV5"] == gt) & (df_ft["APE_NIV5_pred_k1"] == ft)]
    check = torch_ft.merge(ft, on=["libelle", "APE_NIV5", "LIB_NIV5"], how="inner").sample(5).reset_index()

    for key in cat_var:
        if key != "SRF":
            check[key] = check[key].map(INV_MAPPINGS[key].get)
    generate_text(check, filename=filename)

## Multi-level accuracy

In [None]:
df_res

In [None]:
import pandas as pd


# Calculate accuracy for each level
def calculate_accuracy(ground_truth_df, predictions_df, level):
    ground_truth_col = f'APE_NIV{level}'
    prediction_col = f'APE_NIV{level}_pred_k1'

    correct_predictions = (ground_truth_df[ground_truth_col] == predictions_df[prediction_col]).sum()
    total_predictions = len(ground_truth_df)

    accuracy = correct_predictions / total_predictions
    return accuracy

# Calculate accuracy for levels 1 to 5
accuracies_torch = {level: calculate_accuracy(df_res, df_res, level) for level in range(1, 6)}
accuracies_ft = {level: calculate_accuracy(df_res, df_res_ft, level) for level in range(1, 6)}
accuracies, accuracies_ft


In [None]:
# Create a DataFrame from the dictionaries
df = pd.DataFrame({
    'Model 1': accuracies,
    'Model 2': accuracies_ft
})

# Rename the index to reflect levels
df.index.name = 'Level'

df

In [None]:
# Accuracies at different levels when errors at level 5 (concept of "controlled error")

filter_mask = (torchft_preds != ground_truth)
torch_errors = df_res[filter_mask]

filter_mask = (fasttext_preds_labels != ground_truth)
ft_errors = df_res_ft[filter_mask]

accuracies_torch_errors = {level: calculate_accuracy(torch_errors, torch_errors, level) for level in range(1, 6)}
accuracies_ft_errors = {level: calculate_accuracy(ft_errors, ft_errors, level) for level in range(1, 6)}

accuracies_torch_errors, accuracies_ft_errors