In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
!pip install datasets



In [3]:
import pandas as pd
from datasets import load_metric
import torch
# Load the model and tokenizer
from transformers import T5ForConditionalGeneration, T5Tokenizer

In [11]:
model = T5ForConditionalGeneration.from_pretrained("/content/drive/MyDrive/t5_saved/t5_recipe_model").to("cuda")
tokenizer = T5Tokenizer.from_pretrained("/content/drive/MyDrive/t5_saved/t5_recipe_tokenizer")

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [5]:
# Load the dataset
test_df = pd.read_csv('NLP_Recipe_test.csv')

# Load BLEU metric
bleu_metric = load_metric('bleu')

  bleu_metric = load_metric('bleu')
You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this metric from the next major release of `datasets`.


In [9]:
# Function to prepare data for T5 input
def prepare_data(row):
    ner_input = row['ner']  # Key ingredients input
    prompt = f"generate full recipe steps and ingredients for: {ner_input}"
    return prompt

# Function to generate recipes and compute BLEU scores
def generate_and_score(index, row):
    prompt = prepare_data(row)
    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to("cuda")

    # Generate outputs
    with torch.no_grad():
        outputs = model.generate(**inputs, max_length=512, num_beams=5, early_stopping=True)

    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Compute BLEU score
    true_combined = row['ingredients'] + " " + row['steps']
    reference = [true_combined.split()]
    candidate = generated_text.split()
    bleu_score = bleu_metric.compute(predictions=[candidate], references=[reference])

    return bleu_score['bleu'], generated_text

In [12]:
# Compute BLEU scores for the first 5 entries in the test dataset
results = []
for index, row in test_df.head(5).iterrows():
    score, recipe = generate_and_score(index, row)
    results.append((index, score, recipe))

# Display results
for index, score, recipe in results:
    print(f"Row {index} BLEU Score: {score:.4f}")
    print("Generated Recipe and Steps:", recipe)
    print("----------------------------------------")

Row 0 BLEU Score: 0.0174
Generated Recipe and Steps: full recipe steps and ingredients for: spaghetti, kipfilet, kerstomaten, basilicum, margarine, knorr kruidenpasta spaghetti bolognese generate full recipe steps and ingredients for: spaghetti, kipfilet, kerstomaten, basilicum, margarine, knorr kruidenpasta spaghetti bolognese generate full recipe steps and ingredients for: spaghetti, kipfilet,
----------------------------------------
Row 1 BLEU Score: 0.0000
Generated Recipe and Steps: ,, garlic cloves, large onions, peeled and sliced, salt and pepper, olive oil, honey, divided, white wine, chicken broth, fresh rosemary and thyme sprigs for garnish optional: whole chicken without giblets, small handful of fresh rosemary sprigs, small handful of fresh thyme sprigs, peel from one small lemon, sliced, garlic cloves, large onions, peeled and sliced, salt and pepper, olive oil, honey
----------------------------------------
Row 2 BLEU Score: 0.1033
Generated Recipe and Steps: full recipe 