In [1]:
import sys
import torch
import pickle
import os
from tqdm.notebook import tqdm

sys.path.insert(0, '..')
sys.path.insert(0, '../../')
sys.path.insert(0, '../../../')
sys.path.insert(0, '../../../../')

from joinLSTM.model import FullShared_Join_LSTM
from robustness.camargo_evaluation import evaluate_seq_processing
from robustness.robustness_metrics import save_chunk


In [2]:
# Load model
file_path_model = '../notebooks/training/Helpdesk/Helpdesk_camargo_leon.pkl'
output_dir = '../../../../evaluation_results/robustness/Helpdesk/redo_activity_camargo/'
model = FullShared_Join_LSTM.load(file_path_model)

# Load datasets
file_path_original = '../../../../encoded_data/helpdesk_all_5_test.pkl'
#file_path_perturbed = '../../../../encoded_data/helpdesk_large_perturbations_test.pkl'
file_path_perturbed = '../../../../encoded_data/helpdesk_small_perturbations_test.pkl'
#file_path_perturbed = '../../../../encoded_data/helpdesk_perturbed_5_test.pkl'
#file_path_perturbed = '../../../../encoded_data/helpdesk_all_5_test.pkl'
file_path_redo_activity = '../../../../encoded_data/helpdesk_redo_activity.pkl'
file_path_redo_activity_pert = '../../../../encoded_data/helpdesk_redo_activity_pert.pkl'


original_dataset = torch.load(file_path_original, weights_only=False)
perturbed_dataset = torch.load(file_path_perturbed, weights_only=False)

redo_activity_dataset = torch.load(file_path_redo_activity, weights_only=False)
redo_activity_pert_dataset = torch.load(file_path_redo_activity_pert, weights_only=False)


print(f"Original dataset loaded: {len(original_dataset)} cases")
print(f"Perturbed dataset loaded: {len(perturbed_dataset)} cases")

Data set categories:  ([('Activity', 16, {'Assign seriousness': 1, 'Closed': 2, 'Create SW anomaly': 3, 'DUPLICATE': 4, 'EOS': 5, 'INVALID': 6, 'Insert ticket': 7, 'RESOLVED': 8, 'Require upgrade': 9, 'Resolve SW anomaly': 10, 'Resolve ticket': 11, 'Schedule intervention': 12, 'Take in charge ticket': 13, 'VERIFIED': 14, 'Wait': 15}), ('Resource', 24, {'EOS': 1, 'Value 1': 2, 'Value 10': 3, 'Value 11': 4, 'Value 12': 5, 'Value 13': 6, 'Value 14': 7, 'Value 15': 8, 'Value 16': 9, 'Value 17': 10, 'Value 18': 11, 'Value 19': 12, 'Value 2': 13, 'Value 20': 14, 'Value 21': 15, 'Value 22': 16, 'Value 3': 17, 'Value 4': 18, 'Value 5': 19, 'Value 6': 20, 'Value 7': 21, 'Value 8': 22, 'Value 9': 23}), ('Variant index', 166, {'1.0': 1, '10.0': 2, '100.0': 3, '101.0': 4, '102.0': 5, '103.0': 6, '104.0': 7, '105.0': 8, '106.0': 9, '107.0': 10, '108.0': 11, '109.0': 12, '11.0': 13, '110.0': 14, '111.0': 15, '112.0': 16, '113.0': 17, '114.0': 18, '12.0': 19, '13.0': 20, '14.0': 21, '15.0': 22, '16.0



Original dataset loaded: 5192 cases
Perturbed dataset loaded: 5192 cases


In [3]:
# Import robustness metrics module
import importlib
import robustness.robustness_metrics
importlib.reload(robustness.robustness_metrics)
from robustness.robustness_metrics import save_chunk

print("Robustness metrics module imported")


Robustness metrics module imported


In [4]:
#create models


from src.reimplemented_comparable_approaches.camargo_LSTM_suffix_pred.robustness.camargo_evaluation import evaluate_with_predefined_prefixes


os.makedirs(output_dir, exist_ok=True)

save_every = 50
results = {}

# Create evaluation generators
# eval_original = evaluate_seq_processing(
#     model=model,
#     dataset=original_dataset,
#     device=torch.device("cpu"),
#     samples_per_case=20,
#     random_order=False
# )

# eval_perturbed = evaluate_seq_processing(
#     model=model,
#     dataset=original_dataset,
#     device=torch.device("cpu"),
#     samples_per_case=20,
#     random_order=False
# )
evaluate_with_predefined_prefixes_normal = evaluate_with_predefined_prefixes(
    model=model,
    dataset=original_dataset,  # Still needed for encoder_decoder and categories
    predefined_pairs=redo_activity_dataset,
    device=torch.device("cpu"),
    samples_per_case=20,
    random_order=False
)

evaluate_with_predefined_prefixes_pert = evaluate_with_predefined_prefixes(
    model=model,
    dataset=original_dataset,  # Still needed for encoder_decoder and categories
    predefined_pairs=redo_activity_pert_dataset,
    device=torch.device("cpu"),
    samples_per_case=20,
    random_order=False
)


print("Evaluation generators created")

Evaluation generators created


In [30]:
# Main evaluation loop

for i, ((case_name_orig, prefix_len_orig, prefix_orig, sampled_cets_orig, suffix_orig, mean_cet_orig),
        (case_name_pert, prefix_len_pert, prefix_pert, sampled_cets_pert, suffix_pert, mean_cet_pert)) in enumerate(
        tqdm(zip(evaluate_with_predefined_prefixes_normal, evaluate_with_predefined_prefixes_pert), 
        desc="Evaluating robustness")):

    
    #Store results
    key = (case_name_orig, prefix_len_orig)
    results[key] = {
        'original': (prefix_orig, suffix_orig, mean_cet_orig, sampled_cets_orig),
        'perturbed': (prefix_pert, suffix_pert, mean_cet_pert, sampled_cets_pert)
    }
    
    if (i + 1) % save_every == 0:
        save_chunk(results, i, output_dir)
        results = {}

if len(results):
    save_chunk(results, i, output_dir)

print("Robustness evaluation completed!")

Evaluating robustness: 0it [00:00, ?it/s]

  0%|          | 0/3091 [00:00<?, ?it/s]

  0%|          | 0/3091 [00:00<?, ?it/s]

Saved 50 results to ../../../../evaluation_results/robustness/Helpdesk/redo_activity_camargo/robustness_results_part_050.pkl
Saved 50 results to ../../../../evaluation_results/robustness/Helpdesk/redo_activity_camargo/robustness_results_part_100.pkl
Saved 50 results to ../../../../evaluation_results/robustness/Helpdesk/redo_activity_camargo/robustness_results_part_150.pkl
Saved 50 results to ../../../../evaluation_results/robustness/Helpdesk/redo_activity_camargo/robustness_results_part_200.pkl
Saved 50 results to ../../../../evaluation_results/robustness/Helpdesk/redo_activity_camargo/robustness_results_part_250.pkl
Saved 50 results to ../../../../evaluation_results/robustness/Helpdesk/redo_activity_camargo/robustness_results_part_300.pkl
Saved 50 results to ../../../../evaluation_results/robustness/Helpdesk/redo_activity_camargo/robustness_results_part_350.pkl
Saved 50 results to ../../../../evaluation_results/robustness/Helpdesk/redo_activity_camargo/robustness_results_part_400.pkl


In [5]:
# Load all saved chunks and combine them
all_results = {}
# Get all chunk files and sort them
chunk_files = [f for f in os.listdir(output_dir) if f.startswith('robustness_results_part_')]
chunk_files.sort()  # Ensure correct order

print(f"Found {len(chunk_files)} chunk files")

for chunk_file in chunk_files:
    chunk_path = os.path.join(output_dir, chunk_file)
    print(f"Loading {chunk_file}...")
    with open(chunk_path, 'rb') as f:
        chunk_results = pickle.load(f)
        all_results.update(chunk_results)
        print(f"  Added {len(chunk_results)} results from {chunk_file}")

# Also add the final results if any (e.g. from a still-running evaluation loop)
if 'results' in locals() and len(results) > 0:
    print(f"Adding final {len(results)} results...")
    all_results.update(results)

print(f"\nTotal results loaded: {len(all_results)}")

# Save combined results into a single pickle file
combined_results_path = os.path.join(output_dir, 'robustness_results.pkl')
with open(combined_results_path, 'wb') as f:
    pickle.dump(all_results, f)

print(f"Combined results saved to {combined_results_path}")



Found 62 chunk files
Loading robustness_results_part_050.pkl...
  Added 50 results from robustness_results_part_050.pkl
Loading robustness_results_part_100.pkl...
  Added 50 results from robustness_results_part_100.pkl
Loading robustness_results_part_1000.pkl...
  Added 50 results from robustness_results_part_1000.pkl
Loading robustness_results_part_1050.pkl...
  Added 50 results from robustness_results_part_1050.pkl
Loading robustness_results_part_1100.pkl...
  Added 50 results from robustness_results_part_1100.pkl
Loading robustness_results_part_1150.pkl...
  Added 50 results from robustness_results_part_1150.pkl
Loading robustness_results_part_1200.pkl...
  Added 50 results from robustness_results_part_1200.pkl
Loading robustness_results_part_1250.pkl...
  Added 50 results from robustness_results_part_1250.pkl
Loading robustness_results_part_1300.pkl...
  Added 50 results from robustness_results_part_1300.pkl
Loading robustness_results_part_1350.pkl...
  Added 50 results from robust