# *Notebook* à utiliser pour faire le travail pratique # 3 sur l'analyse d'incidents.





## Imports

In [4]:
from transformers import T5Tokenizer, T5ForConditionalGeneration
import json
import torch
from torch.utils.data import Dataset
from collections import Counter
import regex as re
import string
import numpy as np

## Chargements Modèles et Tokenizers

In [5]:
# Charger les modèle / Tokenizer
tokenizer = T5Tokenizer.from_pretrained('t5-large')
model = T5ForConditionalGeneration.from_pretrained('t5-large')

# Vérification de la disponibilité du GPU
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Device : {device}")

Device : cuda


In [6]:
#load data
file_path = 'data/dev_examples.json'
with open(file_path, 'r') as file:
    data = json.load(file)

In [7]:
def create_input_data(data, prompt_type):
    formatted_data = []

    # Long (detailed) questions
    long_questions = [
        "Could you elaborate on the unexpected event described in the incident report?",
        "Please describe in detail the specific activity that was being carried out at the time of the incident.",
        "Who exactly was involved in the incident, and could you provide more details about them?",
        "Could you specify in detail the location where the incident occurred?",
        "Please provide the exact date and time when the incident took place.",
        "What were the underlying reasons and causes that led to the incident?",
        "Which specific equipment was involved in the incident, and can you describe it?",
        "What type of injury occurred, and can you provide more details about it?"
    ]

    # Short (concise) questions
    short_questions = [
        "What was the event in the report?",
        "What activity was being performed during the incident?",
        "Who was involved in the incident?",
        "Where did the incident occur?",
        "When did the incident happen?",
        "What caused the incident?",
        "What equipment was involved?",
        "What type of injury occurred?"
    ]

    questions = long_questions if prompt_type == "long" else short_questions
    argument_keys = ['EVENT', 'ACTIVITY', 'WHO', 'WHERE', 'WHEN', 'CAUSE', 'EQUIPMENT', 'INJURY']

    for item in data:
        text = item['text']
        arguments = item['arguments']

        for arg_key, question in zip(argument_keys, questions):
            input_text = f"{question} Context: {text} <extra_id_0>"
            target_text = f"{arguments.get(arg_key, 'Unknown')} <extra_id_1>"
            formatted_data.append((input_text, target_text))

    return formatted_data

# Create datasets for long and short questions
dataset_long = create_input_data(data, "long")
dataset_short = create_input_data(data, "short")

# Fonction d'évaluation des modèles

In [8]:
#Calcule du score d'une modèle
def normalize_answer(s):
    """Mettre en minuscule et retirer la ponctuation, des déterminants and les espaces."""
    def remove_articles(text):
        return re.sub(r'\b(a|an|the)\b', ' ', text)

    def white_space_fix(text):
        return ' '.join(text.split())

    def remove_punc(text):
        exclude = set(string.punctuation)
        return ''.join(ch for ch in text if ch not in exclude)

    def lower(text):
        return text.lower()

    return white_space_fix(remove_articles(remove_punc(lower(s))))

def f1_score(prediction, ground_truth):
    """Normalise les 2 textes, trouve ce qu'il y a en comment et estime précision, rappel et F1."""
    prediction_tokens = normalize_answer(prediction).split()
    ground_truth_tokens = normalize_answer(ground_truth).split()
    common = Counter(prediction_tokens) & Counter(ground_truth_tokens)
    num_same = sum(common.values())
    if len(ground_truth_tokens) == 0 or len(prediction_tokens) == 0:
        return int(ground_truth_tokens == prediction_tokens)
    if num_same == 0:
        return 0
    precision = 1.0 * num_same / len(prediction_tokens)
    recall = 1.0 * num_same / len(ground_truth_tokens)
    f1 = (2 * precision * recall) / (precision + recall)
    return f1

def exact_match_score(prediction, ground_truth): 
    """Vérifie si les 2 textes sont quasi-identiques."""
    return (normalize_answer(prediction) == normalize_answer(ground_truth))

def metric_max_over_ground_truths(metric_fn, prediction, ground_truths):
    """La fonction princiaple. Important de noter que ground_truths est une liste 
       parce qu'il peut y avoir plusieurs réponses possibles."""
    scores_for_ground_truths = []
    for ground_truth in ground_truths:
        score = metric_fn(prediction, ground_truth)
        scores_for_ground_truths.append(score)
    return max(scores_for_ground_truths)

# Modèle Question-réponse

In [9]:
def generate_answer(input_text):
    input_ids = tokenizer.encode(input_text, return_tensors="pt")
    model_output = model.generate(input_ids)
    answer = tokenizer.decode(model_output[0], skip_special_tokens=True)
    return answer

In [10]:
def evaluate_model(dataset, eval_fn):
    total_score = 0
    for item in dataset:
        input_text, target_text = item
        output = generate_answer(input_text)
        score = metric_max_over_ground_truths(eval_fn, output, [target_text])
        total_score += score
        print(f"Input: {input_text}\nPredicted: {output}\nTarget: {target_text}\nScore: {score}\n")

    average_score = total_score / len(dataset)
    print(f"Average Score: {average_score}")

In [11]:
print("Evaluating Long Questions:")
evaluate_model(dataset_long, f1_score)

print("\nEvaluating Short Questions:")
evaluate_model(dataset_short, f1_score)

Token indices sequence length is longer than the specified maximum sequence length for this model (880 > 512). Running this sequence through the model will result in indexing errors


Evaluating Long Questions:




Predicted: udge work. Context: Employee #1 was killed in an accident on November 10, 2013.
Target: ['Employee #1  was struck and thrown'] <extra_id_1>
Score: 0.3

Predicted: udge work. Context: Employee #1 was killed in a traffic accident.
Target: ['checking the depth of the cut into the asphalt', 'grind out existing asphalt from an  interstate at a railroad bridge overpass'] <extra_id_1>
Score: 0

Predicted: udge work. Context: Employee #1 was killed in a traffic accident.
Target: ['Employee #1', 'Employee #1  with Villager  Construction Inc.'] <extra_id_1>
Score: 0.2105263157894737

Predicted: udge work. Context: Employee #1 was killed in a traffic accident.
Target: ['railroad bridge overpass'] <extra_id_1>
Score: 0

Predicted: udge work. Context: Employee #1 was killed in a traffic accident.
Target: ['November 10  2013'] <extra_id_1>
Score: 0

Predicted: udge work. Context: Employee #1 was killed in a traffic accident.
Target: ['The driver of the Tahoe  continued traveling in the fa