# *Notebook* à utiliser pour faire le travail pratique # 3 sur l'analyse d'incidents.





## Imports

In [23]:
from transformers import BertModel, BertTokenizer
from transformers import BertForQuestionAnswering
from transformers import BertForMaskedLM, pipeline
import json
import torch
from torch.utils.data import Dataset
from collections import Counter
import regex as re
import string
import numpy as np

## Chargements Modèles et Tokenizers

In [24]:
# Charger les modèle / Tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-cased')
model_mask = BertForMaskedLM.from_pretrained('bert-base-cased')
fill_mask = pipeline("fill-mask", model=model_mask, tokenizer=tokenizer)
model_answer = BertForQuestionAnswering.from_pretrained('bert-base-uncased')

# Vérification de la disponibilité du GPU
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Device : {device}")

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForMaskedLM: ['cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForQuestionAnswering: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bia

Device : cuda


In [25]:
#load data
file_path = 'data/dev_examples.json'

max_description_length = 2000
with open(file_path, 'r') as file:
    data = json.load(file)

def create_input_data_mask(data):
    formatted_data = []

    for item in data:
        text = item['text']
        arguments = item['arguments']

        input_text_EVENT = f"This is a description of an incident : {text[0:max_description_length]}. The event of the incident is [MASK]."
        input_text_ACTIVITY = f"This is a description of an incident : {text[0:max_description_length]}. The activity of the incident is [MASK]."
        input_text_WHO = f"This is a description of an incident : {text[0:max_description_length]}. The person concern by the incident is [MASK]."
        input_text_WHERE = f"This is a description of an incident : {text[0:max_description_length]}. The location of the incident is [MASK]."
        input_text_WHEN = f"This is a description of an incident : {text[0:max_description_length]}. The incident occur [MASK]."
        input_text_CAUSE = f"This is a description of an incident : {text[0:max_description_length]}. The incident cause is [MASK]."
        input_text_EQUIPMENT = f"This is a description of an incident : {text[0:max_description_length]}. The equipement use is [MASK]."
        input_text_INJURY = f"This is a description of an incident : {text[0:max_description_length]}. The incident injury is [MASK]."
        input_text_INJURED = f"This is a description of an incident : {text[0:max_description_length]}. The person injured is [MASK]."
        input_text_BODYPARTS = f"This is a description of an incident : {text[0:max_description_length]}. The body part injured is [MASK]."
        input_text_DEATH = f"This is a description of an incident : {text[0:max_description_length]}. The person who died is [MASK]."

        formatted_data.append((input_text_EVENT, arguments['EVENT']))
        formatted_data.append((input_text_ACTIVITY, arguments['ACTIVITY']))
        formatted_data.append((input_text_WHO, arguments['WHO']))
        formatted_data.append((input_text_WHERE, arguments['WHERE']))
        formatted_data.append((input_text_WHEN, arguments['WHEN']))
        formatted_data.append((input_text_CAUSE, arguments['CAUSE']))
        formatted_data.append((input_text_EQUIPMENT, arguments['EQUIPMENT']))
        formatted_data.append((input_text_INJURY, arguments['INJURY']))
        formatted_data.append((input_text_INJURED, arguments['INJURED']))
        formatted_data.append((input_text_BODYPARTS, arguments['BODY-PARTS']))
        formatted_data.append((input_text_DEATH, arguments['DEATH']))
    
    return formatted_data

def create_input_data_answer(data):
    formatted_data = []

    for item in data:
        text = item['text'][0:max_description_length]
        arguments = item['arguments']
        
        for key, values in arguments.items():
            question = f"What is the {key} in the incident?"
            target_text = values

            item = {
                'contexte': text,
                'question': question,
                'target': target_text
            }
            formatted_data.append(item)
                
    
    return formatted_data

# Création des question à donner au modèle
dataset_mask = create_input_data_mask(data)
dataset_answer = create_input_data_answer(data)

with open('data/new_examples.json', 'r') as file:
    data = json.load(file)

custom_dataset_mask = create_input_data_mask(data)
custom_dataset_answer = create_input_data_answer(data)

print(dataset_mask[0])
print(dataset_answer[0])

('This is a description of an incident :  At around 10:00 p.m. on November 10  2013  Employee #1  with Villager  Construction Inc.  with a coworker  were using an asphalt milling machine  (Wirtgen; Model Number: W2100) to grind out existing asphalt from an  interstate at a railroad bridge overpass. Employee # 1 was standing on the  ground  checking the depth of the cut into the asphalt  using a handheld  pendant attached to the machine. The pedant could stretch out from ten to 15  ft. This allowed Employee #1 to walk back and forth  checking the cut. The  operator was on the top of the milling machine  controlling the operation of  the machine and ensuring that the milling machine and dump truck (driven by a  second coworker  who worked for an independent trucking service) kept a safe  working distance. A different company  Protective Services Inc. (PSI)  was  responsible for the traffic control of the job site and had shut down the  inside lane of a three lane section of the interstat

# Fonction d'évaluation des modèles

In [26]:
#Calcule du score d'une modèle
def normalize_answer(s):
    """Mettre en minuscule et retirer la ponctuation, des déterminants and les espaces."""
    def remove_articles(text):
        return re.sub(r'\b(a|an|the)\b', ' ', text)

    def white_space_fix(text):
        return ' '.join(text.split())

    def remove_punc(text):
        exclude = set(string.punctuation)
        return ''.join(ch for ch in text if ch not in exclude)

    def lower(text):
        return text.lower()

    return white_space_fix(remove_articles(remove_punc(lower(s))))

def f1_score(prediction, ground_truth):
    """Normalise les 2 textes, trouve ce qu'il y a en comment et estime précision, rappel et F1."""
    prediction_tokens = normalize_answer(prediction).split()
    ground_truth_tokens = normalize_answer(ground_truth).split()
    common = Counter(prediction_tokens) & Counter(ground_truth_tokens)
    num_same = sum(common.values())
    if len(ground_truth_tokens) == 0 or len(prediction_tokens) == 0:
        return int(ground_truth_tokens == prediction_tokens)
    if num_same == 0:
        return 0
    precision = 1.0 * num_same / len(prediction_tokens)
    recall = 1.0 * num_same / len(ground_truth_tokens)
    f1 = (2 * precision * recall) / (precision + recall)
    return f1

def exact_match_score(prediction, ground_truth): 
    """Vérifie si les 2 textes sont quasi-identiques."""
    return (normalize_answer(prediction) == normalize_answer(ground_truth))

def metric_max_over_ground_truths(metric_fn, prediction, ground_truths):
    """La fonction princiaple. Important de noter que ground_truths est une liste 
       parce qu'il peut y avoir plusieurs réponses possibles."""
    scores_for_ground_truths = []
    for ground_truth in ground_truths:
        score = metric_fn(prediction, ground_truth)
        scores_for_ground_truths.append(score)
    return max(scores_for_ground_truths)

# Modèle de maskage

In [27]:
#Evaluate the mask model

def evaluate_fill_model(dataset, metric_fn):
    total_score = 0.0
    for item in dataset:
        input = item[0]
        target = item[1]
        output = fill_mask(input)
        pred = output[0]['token_str']
        pred = '' if pred == "unknown" else pred
        total_score += metric_max_over_ground_truths(metric_fn, pred, target)

    total_score /= len(dataset)

    print(f"The average score of the mask model in the dataset is : {total_score}.")

evaluate_fill_model(dataset_mask, f1_score)
evaluate_fill_model(dataset_mask, exact_match_score)
evaluate_fill_model(custom_dataset_mask, f1_score)
evaluate_fill_model(custom_dataset_mask, exact_match_score)

The average score of the mask model in the dataset is : 0.18045454545454545.
The average score of the mask model in the dataset is : 0.18.
The average score of the mask model in the dataset is : 0.045454545454545456.
The average score of the mask model in the dataset is : 0.045454545454545456.


# Modèle Question-réponse

In [28]:
def answer_question(question, context, topN=20):
    def get_top_answers(possible_starts, possible_ends, input_ids=20):
        answers = []
        for start, end in zip(possible_starts, possible_ends):
            answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(input_ids[start:end+1]))
            answers.append(answer)
        return answers 

    inputs = tokenizer.encode_plus(question, context, add_special_tokens=True, return_tensors="pt")    
    input_ids = inputs["input_ids"].tolist()[0]

    model_out = model_answer(**inputs)
     
    answer_start_scores = model_out["start_logits"]
    answer_end_scores = model_out["end_logits"]

    possible_starts = np.argsort(answer_start_scores.cpu().detach().numpy()).flatten()[::-1][:topN]
    possible_ends = np.argsort(answer_end_scores.cpu().detach().numpy()).flatten()[::-1][:topN]
    
    #get best answer
    answer_start = torch.argmax(answer_start_scores)  
    # Get the most likely end of answer with the argmax of the score
    answer_end = torch.argmax(answer_end_scores) + 1  

    answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(input_ids[answer_start:answer_end]))
    answers = get_top_answers(possible_starts, possible_ends, input_ids )

    return { "answer":answer,"answer_start":answer_start,"answer_end":answer_end,"input_ids":input_ids,
            "answer_start_scores":answer_start_scores,"answer_end_scores":answer_end_scores,"inputs":inputs,"answers":answers,
            "possible_starts":possible_starts,"possible_ends":possible_ends}

In [29]:
#Evaluate the answer model

def evaluate_answer_model(dataset, metric_fn):
    total_score = 0.0
    for item in dataset:
        output = answer_question(item['contexte'], item['question'])
        total_score += metric_max_over_ground_truths(metric_fn, output['answer'], item['target'])

    total_score /= len(dataset)
    print(f"The average score of the question-response model in the dev dataset is : {total_score}.")

evaluate_answer_model(dataset_answer, f1_score)
evaluate_answer_model(custom_dataset_answer, exact_match_score)
evaluate_answer_model(dataset_answer, f1_score)
evaluate_answer_model(custom_dataset_answer, exact_match_score)

The average score of the question-response model in the dev dataset is : 0.12085948258850022.
The average score of the question-response model in the dev dataset is : 0.00909090909090909.
The average score of the question-response model in the dev dataset is : 0.12085948258850022.
The average score of the question-response model in the dev dataset is : 0.00909090909090909.
