In [2]:
import json
import os 
from data_utils import get_tokenizer, preprocess_fn
from datasets import load_dataset
from tqdm.notebook import tqdm

import evaluate

predictions_path = f'outputs/test_tuetscheck_e2e_nlg'
references_path = f'outputs/test_tuetscheck_e2e_nlg'

predict_path =  os.path.join(predictions_path, 'predictions.json')
reference_path = os.path.join(references_path, 'references.json')

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
with open(predict_path, "r") as file:
    predictions = json.load(file)
    predictions = list(predictions.values())
with open(reference_path, "r") as file:
    references = json.load(file)
    references = list(references.values())

In [5]:
tokenizer = get_tokenizer()

def load_preprocessed_dataset(tokenizer, split='test'):
    dataset = load_dataset("tuetschek/e2e_nlg")
    dataset = dataset.map(preprocess_fn, fn_kwargs={"tokenizer":tokenizer})
    return dataset[split] # type: ignore

In [9]:
testset = load_preprocessed_dataset(tokenizer, split='test')

In [10]:
testset[0]

{'meaning_representation': 'name[Blue Spice], eatType[coffee shop], area[city centre]',
 'human_reference': 'A coffee shop in the city centre area called Blue Spice.',
 'input_ids': [1,
  1024,
  29961,
  21319,
  1706,
  625,
  1402,
  17545,
  1542,
  29961,
  1111,
  600,
  3905,
  18296,
  1402,
  4038,
  29961,
  12690,
  8442,
  29962,
  13,
  12953,
  29901,
  13],
 'labels': [319,
  26935,
  18296,
  297,
  278,
  4272,
  8442,
  4038,
  2000,
  10924,
  1706,
  625,
  29889,
  2]}

In [6]:
references[0]

[319, 26935, 18296, 297, 278, 4272, 8442, 4038, 2000, 10924, 1706, 625, 29889]

### transform tokenized to text

In [21]:
pred = predictions[0] 
# Décoder la séquence de tokens pour obtenir le texte
text = tokenizer.decode(pred)

print(text)

Blue Spice is a coffee shop located in the city centre.


In [6]:
predictions_text = dict()
for i, pred in enumerate(predictions):
    text = tokenizer.decode(pred)
    predictions_text[i] = [text]

references_text = dict()
for i, ref in enumerate(references):
    text = tokenizer.decode(ref)
    references_text[i] = [text]

print("Shape of predictions_text: ", len(predictions_text))
for i in range(5):
    print(predictions_text[i])

print("Shape of references_text: ", len(references_text))
for i in range(5):
    print(references_text[i])

Shape of predictions_text:  4693
['Blue Spice is a coffee shop located in the city centre.']
['Blue Spice is a coffee shop located in the city centre.']
['Blue Spice is a coffee shop located in the riverside area.']
['Blue Spice is a coffee shop located in the riverside area.']
['Blue Spice is a coffee shop near Crowne Plaza Hotel. It has a customer rating of 5 out of 5.']
Shape of references_text:  4693
['A coffee shop in the city centre area called Blue Spice.']
['Blue Spice is a coffee shop in city centre.']
['There is a coffee shop Blue Spice in the riverside area.']
['At the riverside, there is a coffee shop called The Blue Spice.']
['The coffee shop Blue Spice is based near Crowne Plaza Hotel and has a high customer rating of 5 out of 5.']


In [7]:
with open(os.path.join(predictions_path, "predictions_text.json"), 'w') as file:
    json.dump(predictions_text, file)

with open(os.path.join(references_path, "references_text.json"), 'w') as file:
    json.dump(references_text, file)

## evaluation

### BLEU metric

In [9]:
indice = 0

bleu = evaluate.load('bleu')

Downloading builder script:   0%|          | 0.00/5.94k [00:00<?, ?B/s]

Downloading extra modules:   0%|          | 0.00/1.55k [00:00<?, ?B/s]

Downloading extra modules:   0%|          | 0.00/3.34k [00:00<?, ?B/s]

In [None]:
bleu.add(predictions=predictions[indice], references=references[indice])

In [None]:
bleu.add(predictions=predictions[indice], references=references[indice])

In [58]:
results = bleu.compute(predictions=predictions_text[indice], references=references_text[indice])
print(results)

{'bleu': 0.29502343631964045, 'precisions': [0.75, 0.45454545454545453, 0.2, 0.1111111111111111], 'brevity_penalty': 1.0, 'length_ratio': 1.0, 'translation_length': 12, 'reference_length': 12}


In [62]:
indice = 2000

print(predictions_text[indice])
print(references_text[indice])

results = bleu.compute(predictions=predictions_text[indice], references=references_text[indice])
results

['Cocum is a pub with a high customer rating near Café Sicilia.']
['For a pub with high customer rating near Café Sicilia, try Cocum.']


{'bleu': 0.4716381284555772,
 'precisions': [0.8461538461538461,
  0.5833333333333334,
  0.45454545454545453,
  0.3],
 'brevity_penalty': 0.925961078642316,
 'length_ratio': 0.9285714285714286,
 'translation_length': 13,
 'reference_length': 14}

In [54]:
results_bleu = dict()
for i in range(len(predictions_text)):
    results = bleu.compute(predictions=predictions_text[i], references=references_text[i])
    results_bleu[i] = results
    
with open(os.path.join(predictions_path, "results_bleu_metric.json"), 'w') as file:
    json.dump(results_bleu, file)

In [63]:
# Charger le fichier JSON
with open(os.path.join(predictions_path, "results_bleu_metric.json"), 'r') as file:
    results_bleu_metric = json.load(file)

# Extraire les scores BLEU et calculer leur moyenne
bleu_scores = [entry['bleu'] for entry in results_bleu_metric.values()]
print(len(bleu_scores))
average_bleu = sum(bleu_scores) / len(bleu_scores)

print(f'Moyenne des scores BLEU : {average_bleu}')

4693
Moyenne des scores BLEU : 0.26107133315338654


In [57]:
# construire un dictionnaire avec les predictions et les references et le score BLEU
results_bleu_metric = dict()
for i in range(len(predictions_text)):
    results_bleu_metric[i] = {
        'predictions': predictions_text[i],
        'references': references_text[i],
        'bleu': results_bleu_metric[i]['bleu']
    }

# Sauvegarder les résultats dans un fichier JSON
with open(os.path.join(predictions_path, "results_bleu_metric.json"), 'w') as file:
    json.dump(results_bleu_metric, file)

KeyError: 0

### ROUGE metric

In [15]:
rouge = evaluate.load('rouge')

In [None]:
from tqdm import tqdm

results_rouge = dict()
for i in tqdm(range(len(predictions_text))):
    results = rouge.compute(predictions=predictions_text[i], references=references_text[i])
    results_rouge[i] = results


In [25]:
with open(os.path.join(predictions_path, "results_rouge_metric.json"), 'w') as file:
    json.dump(results_rouge, file)

In [27]:
results = rouge.compute(predictions=predictions_text[i], references=references_text[i])
results

{'rouge1': np.float64(0.8235294117647058),
 'rouge2': np.float64(0.5333333333333333),
 'rougeL': np.float64(0.7058823529411764),
 'rougeLsum': np.float64(0.7058823529411764)}

In [4]:
# Charger le fichier JSON
with open(os.path.join(predictions_path, "results_rouge_metric.json"), 'r') as file:
    results_rouge_metric = json.load(file)

# Extraire les scores ROUGE et calculer leur moyenne
# ROUGE-1
rouge1_scores = [entry['rouge1'] for entry in results_rouge_metric.values()]
average_rouge1 = sum(rouge1_scores) / len(rouge1_scores)
# ROUGE-2
rouge2_scores = [entry['rouge2'] for entry in results_rouge_metric.values()]
average_rouge2 = sum(rouge2_scores) / len(rouge2_scores)
# ROUGE-L
rougeL_scores = [entry['rougeL'] for entry in results_rouge_metric.values()]
average_rougeL = sum(rougeL_scores) / len(rougeL_scores)
# ROUGE-Lsum
rougeLsum_scores = [entry['rougeLsum'] for entry in results_rouge_metric.values()]
average_rougeLsum = sum(rougeLsum_scores) / len(rougeLsum_scores)

print(f'Moyenne des scores ROUGE 1 : {average_rouge1}')
print(f'Moyenne des scores ROUGE 2 : {average_rouge2}')
print(f'Moyenne des scores ROUGE L : {average_rougeL}')
print(f'Moyenne des scores ROUGE Lsum : {average_rougeLsum}')

Moyenne des scores ROUGE 1 : 0.6900641918751375
Moyenne des scores ROUGE 2 : 0.41961928291366424
Moyenne des scores ROUGE L : 0.4915199506339781
Moyenne des scores ROUGE Lsum : 0.4915199506339781


### CIDER metric

In [8]:
cider = evaluate.load('cider')

FileNotFoundError: Couldn't find a module script at /Users/lazlo/Documents/GitHub/M2IASD/NLP/LoRA/cider/cider.py. Module 'cider' doesn't exist on the Hugging Face Hub either.

In [11]:
from tqdm import tqdm

results_cider = dict()
for i in tqdm(range(len(predictions_text))):
    results = cider.compute(predictions=predictions_text[i], references=references_text[i])
    results_cider[i] = results


[0.6363636363636364,
 0.9,
 0.7272727272727273,
 0.43478260869565216,
 0.761904761904762,
 0.8947368421052632,
 0.3414634146341463,
 0.5333333333333333,
 0.5,
 0.3846153846153846,
 0.6,
 0.5625,
 0.5806451612903225,
 0.5517241379310344,
 0.5,
 0.6,
 0.888888888888889,
 0.7,
 0.380952380952381,
 0.7692307692307692,
 0.8571428571428571,
 0.36842105263157887,
 0.4444444444444445,
 0.3636363636363637,
 0.5217391304347826,
 0.4444444444444445,
 0.4827586206896552,
 0.5,
 0.38461538461538464,
 0.5517241379310344,
 0.37499999999999994,
 0.4313725490196078,
 0.5806451612903226,
 0.4615384615384615,
 0.48148148148148145,
 0.2903225806451613,
 0.5882352941176471,
 0.7999999999999999,
 0.4782608695652174,
 0.7999999999999999,
 0.5652173913043478,
 0.6808510638297872,
 0.5957446808510638,
 0.4081632653061225,
 0.6818181818181819,
 0.6274509803921569,
 0.8085106382978724,
 0.5217391304347826,
 0.7555555555555556,
 0.5641025641025641,
 0.375,
 0.6,
 0.7727272727272727,
 0.5405405405405406,
 0.577777

In [1]:
def transform_json_to_txt(input_json_path, output_txt_path):
    """
    Transform a JSON reference file to a formatted text file.

    Parameters:
    input_json_path (str): The path to the input JSON file.
    output_txt_path (str): The path to the output text file.
    """
    with open(input_json_path, 'r') as json_file:
        json_data = json.load(json_file)

    with open(output_txt_path, 'w') as output_file:
        for key, sentences in json_data.items():
            for sentence in sentences:
                output_file.write(sentence + '\n')
            output_file.write('\n')

    print(f"Transformation complete. The output is saved in '{output_txt_path}'")

# Example usage:
if __name__ == "__main__":
    input_path = 'outputs/test_tuetscheck_e2e_nlg/references_text.json'
    output_path = 'outputs/test_tuetscheck_e2e_nlg/references_text.txt'
    transform_json_to_txt(input_path, output_path)

Transformation complete. The output is saved in 'output.txt'


In [2]:
# Save this as transform_predictions_json_to_txt.py

import json

def transform_predictions_json_to_txt(input_json_path, output_txt_path):
    """
    Transform a JSON predictions file to a formatted text file.

    Parameters:
    input_json_path (str): The path to the input JSON file.
    output_txt_path (str): The path to the output text file.
    """
    with open(input_json_path, 'r') as json_file:
        json_data = json.load(json_file)

    with open(output_txt_path, 'w') as output_file:
        for key, sentences in json_data.items():
            for sentence in sentences:
                output_file.write(sentence + '\n')
    
    print(f"Transformation complete. The output is saved in '{output_txt_path}'")

# Example usage:
if __name__ == "__main__":
    input_path = 'outputs/test_tuetscheck_e2e_nlg/predictions_text.json'
    output_path = 'outputs/test_tuetscheck_e2e_nlg/predictions_text.txt'
    transform_predictions_json_to_txt(input_path, output_path)

Transformation complete. The output is saved in 'outputs/test_tuetscheck_e2e_nlg/predictions_text.txt'
