In [1]:
! pip install transformers[torch] datasets==3.6.0 evaluate ctranslate2 --quiet

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/491.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m491.5/491.5 kB[0m [31m47.4 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m491.5/491.5 kB[0m [31m10.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m38.8/38.8 MB[0m [31m16.9 MB/s[0m eta [36m0:00:00[0m
[?25h

In [55]:
import datasets
import transformers
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from transformers import Trainer, TrainingArguments
import torch
import evaluate
import ctranslate2
from tqdm.notebook import tqdm
import pandas as pd
import json

In [None]:
tokenizer = AutoTokenizer.from_pretrained("google-t5/t5-small")
model = AutoModelForSeq2SeqLM.from_pretrained("google-t5/t5-small")

In [3]:
ds = datasets.load_dataset("GEM/e2e_nlg")

README.md: 0.00B [00:00, ?B/s]

e2e_nlg.py: 0.00B [00:00, ?B/s]

dataset_infos.json: 0.00B [00:00, ?B/s]

The repository for GEM/e2e_nlg contains custom code which must be executed to correctly load the dataset. You can inspect the repository content at https://hf.co/datasets/GEM/e2e_nlg.
You can avoid this prompt in future by passing the argument `trust_remote_code=True`.

Do you wish to run the custom code? [y/N] y


Downloading data:   0%|          | 0.00/1.33M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/881k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/1.08M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/70.6k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/33525 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/1484 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1847 [00:00<?, ? examples/s]

Generating challenge_train_sample split:   0%|          | 0/500 [00:00<?, ? examples/s]

Generating challenge_validation_sample split:   0%|          | 0/500 [00:00<?, ? examples/s]

Generating challenge_test_scramble split:   0%|          | 0/500 [00:00<?, ? examples/s]

In [None]:
ds

DatasetDict({
    train: Dataset({
        features: ['gem_id', 'gem_parent_id', 'meaning_representation', 'target', 'references'],
        num_rows: 33525
    })
    validation: Dataset({
        features: ['gem_id', 'gem_parent_id', 'meaning_representation', 'target', 'references'],
        num_rows: 1484
    })
    test: Dataset({
        features: ['gem_id', 'gem_parent_id', 'meaning_representation', 'target', 'references'],
        num_rows: 1847
    })
    challenge_train_sample: Dataset({
        features: ['gem_id', 'gem_parent_id', 'meaning_representation', 'target', 'references'],
        num_rows: 500
    })
    challenge_validation_sample: Dataset({
        features: ['gem_id', 'gem_parent_id', 'meaning_representation', 'target', 'references'],
        num_rows: 500
    })
    challenge_test_scramble: Dataset({
        features: ['gem_id', 'gem_parent_id', 'meaning_representation', 'target', 'references'],
        num_rows: 500
    })
})

In [None]:
ds['train'][0]

{'gem_id': 'e2e_nlg-train-0',
 'gem_parent_id': 'e2e_nlg-train-0',
 'meaning_representation': 'name[The Eagle], eatType[coffee shop], food[Japanese], priceRange[less than £20], customer rating[low], area[riverside], familyFriendly[yes], near[Burger King]',
 'target': 'The Eagle is a low rated coffee shop near Burger King and the riverside that is family friendly and is less than £20 for Japanese food.',
 'references': []}

In [None]:
ds['test'][0]

{'gem_id': 'e2e_nlg-test-0',
 'gem_parent_id': 'e2e_nlg-test-0',
 'meaning_representation': 'eatType[pub], food[Fast food], customer rating[high], area[riverside], familyFriendly[no], near[Café Rouge]',
 'target': 'The Mills is not kid friendly as it is a riverside pub near Café Rouge.  Its mid priced fast food is highly rated.',
 'references': ['The Mills is not kid friendly as it is a riverside pub near Café Rouge.  Its mid priced fast food is highly rated.']}

In [4]:
# Keep the columns need fro fine-tuning
columns_to_keep_train_val = ['meaning_representation', 'target']

# Prepare train set
train_dataset = ds['train'].remove_columns(
    [col for col in ds['train'].column_names if col not in columns_to_keep_train_val]
)

# Prepare validation set
validation_dataset = ds['validation'].remove_columns(
    [col for col in ds['validation'].column_names if col not in columns_to_keep_train_val]
)

# Keep the columns need for Evaluation
columns_to_keep_test = ['meaning_representation', 'target', 'references']
# Prepare test set
test_dataset = ds['test'].remove_columns(
    [col for col in ds['test'].column_names if col not in columns_to_keep_test]
)


In [5]:
processed_dataset = datasets.DatasetDict({
    'train': train_dataset,
    'validation': validation_dataset,
    'test': test_dataset
})

In [6]:
# Rename columns
processed_dataset = processed_dataset.rename_column("meaning_representation", "input_text")
processed_dataset = processed_dataset.rename_column("target", "labels")

In [7]:
processed_dataset

DatasetDict({
    train: Dataset({
        features: ['input_text', 'labels'],
        num_rows: 33525
    })
    validation: Dataset({
        features: ['input_text', 'labels'],
        num_rows: 1484
    })
    test: Dataset({
        features: ['input_text', 'labels', 'references'],
        num_rows: 1847
    })
})

In [None]:
# Tokenize the dataset
def tokenize_function(batch):
  model_inputs = tokenizer(batch["input_text"], padding="max_length", truncation=True, max_length=512)
  with tokenizer.as_target_tokenizer():
    labels = tokenizer(batch["labels"], padding="max_length", truncation=True, max_length=512)
  model_inputs["labels"] = labels["input_ids"]
  return model_inputs

tokenized_dataset = processed_dataset.map(tokenize_function, batched=True)


Map:   0%|          | 0/1484 [00:00<?, ? examples/s]



In [None]:
# Define the training arguments
training_arguments = transformers.Seq2SeqTrainingArguments(
    output_dir = 't5-small-e2e_nlg',
    num_train_epochs = 3,
    eval_strategy = 'epoch',
    save_strategy='epoch',
    per_device_train_batch_size = 16,
    per_device_eval_batch_size = 16,
    save_total_limit = 3,
    load_best_model_at_end = True,
    fp16=True,
    optim='adafactor',
    report_to = []
)

In [None]:
trainer = Trainer(
    model=model,
    args=training_arguments,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["validation"],
    tokenizer=tokenizer,
)

trainer.train()

  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,0.074,0.07837
2,0.0684,0.074207
3,0.0661,0.073335


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight', 'lm_head.weight'].


TrainOutput(global_step=6288, training_loss=0.10142879844015185, metrics={'train_runtime': 4377.3727, 'train_samples_per_second': 22.976, 'train_steps_per_second': 1.436, 'total_flos': 1.36120016830464e+16, 'train_loss': 0.10142879844015185, 'epoch': 3.0})

In [None]:
# Save the fine-tuned model
model.save_pretrained('t5-small-e2e_nlg')

# Save the tokenizer
tokenizer.save_pretrained('t5-small-e2e_nlg')

('t5-small-e2e_nlg/tokenizer_config.json',
 't5-small-e2e_nlg/special_tokens_map.json',
 't5-small-e2e_nlg/spiece.model',
 't5-small-e2e_nlg/added_tokens.json',
 't5-small-e2e_nlg/tokenizer.json')

In [None]:
!zip -r t5-small-e2e_nlg.zip t5-small-e2e_nlg


In [9]:
!unzip -q t5-small-e2e_nlg.zip -d t5-small-e2e_nlg


In [12]:

model = AutoModelForSeq2SeqLM.from_pretrained('/content/t5-small-e2e_nlg/t5-small-e2e_nlg')
tokenizer = AutoTokenizer.from_pretrained('/content/t5-small-e2e_nlg/t5-small-e2e_nlg')

In [32]:
# Test the fine-tuned model with some examples
test_dataset = processed_dataset["test"]
inputs = test_dataset["input_text"][:10]
labels = test_dataset["labels"][:10]

for i in range(len(inputs)):
  input_text = inputs[i]
  target_text = labels[i]

  tokenized_input = tokenizer(input_text, return_tensors="pt", max_length=512, truncation=True)
  output = model.generate(**tokenized_input, max_length=512)
  generated_text = tokenizer.decode(output[0], skip_special_tokens=True)

  print("Input:", input_text)
  print("Target:", target_text)
  print("Generated:", generated_text)
  print("-" * 50)


Input: eatType[pub], food[Fast food], customer rating[high], area[riverside], familyFriendly[no], near[Café Rouge]
Target: The Mills is not kid friendly as it is a riverside pub near Café Rouge.  Its mid priced fast food is highly rated.
Generated: Near Café Rouge in riverside is a fast food pub that is not kid friendly. It has a high customer rating.
--------------------------------------------------
Input: eatType[pub], food[Japanese], priceRange[moderate], customer rating[5 out of 5], area[city centre], near[Raja Indian Cuisine]
Target: The Wrestlerss is rated 5 out of 5, serving Japanese food in a pub. It is higher than average priced, and located near the city centre near Raja Indian Cuisine.
Generated: In the city centre near Raja Indian Cuisine is a pub that serves Japanese food. It has a moderate price range and a customer rating of 5 out of 5.
--------------------------------------------------
Input: eatType[restaurant], food[Chinese], priceRange[high], customer rating[1 out o

## Generation with ctranslate2

In [17]:
# Convert model to ctranslate format
! ct2-transformers-converter --model /content/t5-small-e2e_nlg/t5-small-e2e_nlg --output_dir t5-small-e2e_nlg-ct2

2025-09-25 11:40:34.873297: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1758800434.894921    5578 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1758800434.901051    5578 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1758800434.915848    5578 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1758800434.915892    5578 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1758800434.915896    5578 computation_placer.cc:177] computation placer alr

In [38]:
# Instantiate model as ctranslate Translator and instantiate Tokenizer
translator = ctranslate2.Translator(
    't5-small-e2e_nlg-ct2',
    device='cuda',
)
tokenizer = transformers.AutoTokenizer.from_pretrained('/content/t5-small-e2e_nlg/t5-small-e2e_nlg')

In [45]:
# Small test data
test_dataset = processed_dataset["test"]
inputs = test_dataset["input_text"][:10]
labels = test_dataset["labels"][:10]


In [40]:
# Define function to pre process inputs
def pre_process(text):
    input_ids = tokenizer.encode(text)
    input_tokens = tokenizer.convert_ids_to_tokens(input_ids)
    return input_tokens

In [41]:
# Define function to post process inputs
def post_process(output):
    output_tokens = output.hypotheses[0]
    output_ids = tokenizer.convert_tokens_to_ids(output_tokens)
    output_text = tokenizer.decode(output_ids, skip_special_tokens=True)
    return output_text

In [48]:
#
def generation_ct2(inputs, batch_size=32, max_input_length=512, max_decoding_length=512, beam_size=1, length_penalty=1.0, repetition_penalty=1.0):

  batch = [pre_process(text) for text in tqdm(inputs)]

  all_generated = []

  for i in tqdm(range(0, len(batch), batch_size)):
      subbatch = batch[i:i+batch_size]

      # translate_batch expects List[List[str]]
      results = translator.translate_batch(
          subbatch,
          max_batch_size=batch_size,
          beam_size=beam_size,
          max_input_length=max_input_length,
          max_decoding_length=max_decoding_length,
          num_hypotheses=1,
          length_penalty=length_penalty,
          repetition_penalty=repetition_penalty,
      )

      all_generated += [post_process(o) for o in results]

  return all_generated

In [49]:
# Example test generation
all_generated = generation_ct2(inputs)

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

In [50]:

for i in range(len(inputs)):
  input_text = inputs[i]
  target_text = labels[i]

  print("Input:", input_text)
  print("Target:", target_text)
  print("Generated:", all_generated[i])
  print("-" * 50)

Input: eatType[pub], food[Fast food], customer rating[high], area[riverside], familyFriendly[no], near[Café Rouge]
Target: The Mills is not kid friendly as it is a riverside pub near Café Rouge.  Its mid priced fast food is highly rated.
Generated: Near Café Rouge in riverside is a fast food pub that is not kid friendly. It has a high customer rating.
--------------------------------------------------
Input: eatType[pub], food[Japanese], priceRange[moderate], customer rating[5 out of 5], area[city centre], near[Raja Indian Cuisine]
Target: The Wrestlerss is rated 5 out of 5, serving Japanese food in a pub. It is higher than average priced, and located near the city centre near Raja Indian Cuisine.
Generated: In the city centre near Raja Indian Cuisine is a pub that serves Japanese food. It has a moderate price range and a customer rating of 5 out of 5.
--------------------------------------------------
Input: eatType[restaurant], food[Chinese], priceRange[high], customer rating[1 out o

# Full generation on the test set

In [51]:
test_dataset = processed_dataset["test"]
inputs = test_dataset["input_text"]
labels = test_dataset["labels"]

In [52]:
full_test_generation = generation_ct2(inputs)

  0%|          | 0/1847 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

In [53]:
len(full_test_generation)

1847

In [56]:

gen_json = []
for i in range(len(inputs)):
  gen_json.append({
      "input_text": inputs[i],
      "target": labels[i],
      "generated_text": full_test_generation[i]
  })

In [57]:
# Save the data
with open("t5-small-e2e_nlg-test-results-3epoch.json", "w") as f:
  json.dump(gen_json, f)

In [58]:
with open("t5-small-e2e_nlg-test-results-3epoch.json", "r") as f:
  result_json = json.load(f)

In [60]:
result_json[0]

{'input_text': 'eatType[pub], food[Fast food], customer rating[high], area[riverside], familyFriendly[no], near[Café Rouge]',
 'target': 'The Mills is not kid friendly as it is a riverside pub near Café Rouge.  Its mid priced fast food is highly rated.',
 'generated_text': 'Near Café Rouge in riverside is a fast food pub that is not kid friendly. It has a high customer rating.'}