<a href="https://colab.research.google.com/github/Galium-aparine/CS49200_ML_Recipe_Generator/blob/maddie/RecipeGenerator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Note: The base code is from Hugging Face (https://huggingface.co/flax-community/t5-recipe-generation), but then I updated most of the code to use torch instead of flax for much faster text generation.

In [None]:
# Installing requirements
!pip install transformers



In [None]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
import torch

MODEL_NAME_OR_PATH = "flax-community/t5-recipe-generation"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME_OR_PATH, use_fast=True)
model = AutoModelForSeq2SeqLM.from_pretrained(
    MODEL_NAME_OR_PATH,
    torch_dtype=torch.float16  # Enables half precision for speed
).to("cuda").eval()

prefix = "items: "

generation_kwargs = {
    "max_length": 512,
    "min_length": 64,
    "no_repeat_ngram_size": 3,
    "do_sample": True,
    "top_k": 60,
    "top_p": 0.95
}


special_tokens = tokenizer.all_special_tokens
tokens_map = {
    "<sep>": "--",
    "<section>": "\n"
}
def skip_special_tokens(text, special_tokens):
    for token in special_tokens:
        text = text.replace(token, "")

    return text

def target_postprocessing(texts, special_tokens):
    if not isinstance(texts, list):
        texts = [texts]

    new_texts = []
    for text in texts:
        text = skip_special_tokens(text, special_tokens)

        for k, v in tokens_map.items():
            text = text.replace(k, v)

        new_texts.append(text)

    return new_texts

def generation_function(texts):
    _inputs = texts if isinstance(texts, list) else [texts]
    inputs = [prefix + inp for inp in _inputs]
    encodings = tokenizer(
        inputs,
        max_length=256,
        padding="max_length",
        truncation=True,
        return_tensors="pt"
    ).to("cuda")

    with torch.no_grad():
        output_ids = model.generate(
            input_ids=encodings.input_ids,
            attention_mask=encodings.attention_mask,
            **generation_kwargs
        )

    decoded = tokenizer.batch_decode(output_ids, skip_special_tokens=False)
    return target_postprocessing(decoded, special_tokens)
    # input_ids = inputs.input_ids
    # attention_mask = inputs.attention_mask

    # output_ids = model.generate(
        # input_ids=input_ids,
        # attention_mask=attention_mask,
        # **generation_kwargs
    # )
    # generated = output_ids.sequences
    # generated_recipe = target_postprocessing(
        # tokenizer.batch_decode(generated, skip_special_tokens=False),
        # special_tokens
    # )
    # return generated_recipe

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/1.92k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/1.79k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.56k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/892M [00:00<?, ?B/s]

In [None]:
items = [
    "macaroni, butter, salt, bacon, milk, flour, pepper, cream corn",
    "provolone cheese, bacon, bread, ginger"
]
generated = generation_function(items)
for text in generated:
    sections = text.split("\n")
    headline = ""
    for section in sections:
        section = section.strip()
        if section.startswith("title:"):
            section = section.replace("title:", "")
            headline = "TITLE"
        elif section.startswith("ingredients:"):
            section = section.replace("ingredients:", "")
            headline = "INGREDIENTS"
        elif section.startswith("directions:"):
            section = section.replace("directions:", "")
            headline = "DIRECTIONS"

        if headline == "TITLE":
            print(f"[{headline}]: {section.strip().capitalize()}")
        else:
            section_info = [f"  - {i+1}: {info.strip().capitalize()}" for i, info in enumerate(section.split("--"))]
            print(f"[{headline}]:")
            print("\n".join(section_info))

    print("-" * 130)

[TITLE]: Macaroni and corn
[INGREDIENTS]:
  - 1: 1 lb. box macaronis
  - 2: 2 tbsp. butter
  - 3: 1 tsp salt
  - 4: 1 to 1 1/2 lg bacon, chopped
  - 5: 2 1/2 c. milk
  - 6: 1/4 c flour
  - 7: 1 dash pepper
  - 8: 1 can cream corn
[DIRECTIONS]:
  - 1: Make a white sauce with the butter and flour.
  - 2: Add the milk. add the other seasonings.
  - 3: Cook and add corn.
  - 4: Pour in a casserole dish and top with grated cheese.
  - 5: Bake at 325 for 20 min.
----------------------------------------------------------------------------------------------------------------------------------
[TITLE]: Provolone bacon sandwich
[INGREDIENTS]:
  - 1: 1 slice provolo cheese or 2 slices sharp cheddar cheese
  - 2: 1 slice bacon, cooked and crumbled
  - 3: 1 piece sourdough bread, sliced lengthwise
  - 4: 2 slices crystallized ginger
  - 5: 1 dash tabasco optional
[DIRECTIONS]:
  - 1: Spread 1 tablespoon of tabassco on bread slices and place on a plate.
  - 2: Mix cheese and bacon and sprinkle evenl