In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
model='/content/drive/MyDrive/llama3'

In [3]:
!pip install datasets

Collecting datasets
  Downloading datasets-2.19.0-py3-none-any.whl (542 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m542.0/542.0 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m15.2 MB/s[0m eta [36m0:00:00[0m
Collecting xxhash (from datasets)
  Downloading xxhash-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m194.1/194.1 kB[0m [31m18.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting multiprocess (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl (134 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m17.6 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub>=0.21.2 (from datasets)
  Downloading huggingface_hub-0.22.2-py3-none-a

In [4]:
%%capture
import torch
major_version, minor_version = torch.cuda.get_device_capability()
# Must install separately since Colab has torch 2.2.1, which breaks packages
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
if major_version >= 8:
    # Use this for new GPUs like Ampere, Hopper GPUs (RTX 30xx, RTX 40xx, A100, H100, L40)
    !pip install --no-deps packaging ninja einops flash-attn xformers trl peft accelerate bitsandbytes
else:
    # Use this for older GPUs (V100, Tesla T4, RTX 20xx)
    !pip install --no-deps xformers trl peft accelerate bitsandbytes
pass

In [5]:
import pandas as pd
from datasets import load_metric
from transformers import AutoTokenizer
from unsloth import FastLanguageModel


In [6]:
max_seq_length = 2048
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True

In [7]:
if True:
    from unsloth import FastLanguageModel
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name = model, # YOUR MODEL YOU USED FOR TRAINING
        max_seq_length = max_seq_length,
        dtype = dtype,
        load_in_4bit = load_in_4bit,
    )
    FastLanguageModel.for_inference(model) # Enable native 2x faster inference

config.json:   0%|          | 0.00/1.14k [00:00<?, ?B/s]

==((====))==  Unsloth: Fast Llama patching release 2024.4
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.2.1+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. Xformers = 0.0.25.post1. FA = False.
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth


Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.


model.safetensors:   0%|          | 0.00/5.70G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/131 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/50.6k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/449 [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Unsloth 2024.4 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


In [8]:
# Load the dataset
test_df = pd.read_csv('NLP_Recipe_test.csv')  # Ensure this path is correct

# Load the BLEU metric
bleu_metric = load_metric('bleu')

# Function to generate recipes and compute BLEU scores
def generate_and_score(index, row):
    ner_input = row['ner']  # Key ingredients input
    true_output = row['ingredients'] + " " + row['steps']  # True combined output

    # Prepare the prompt
    recipe_prompt = f"""Given the following key ingredients, generate the full ingredient list with quantities and cooking steps:

    ### Key Ingredients:
    {ner_input}

    ### Full Ingredients and Steps:
    """

    # Tokenize and generate the output
    inputs = tokenizer(
        [recipe_prompt],
        return_tensors="pt"
    ).to('cuda')

    outputs = model.generate(**inputs, max_new_tokens=512, use_cache=True)
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Extract the generated recipe part after "### Full Ingredients and Steps:"
    generated_recipe = generated_text.split("### Full Ingredients and Steps:\n")[1].strip()

    # Compute BLEU score
    reference = [true_output.split()]
    candidate = generated_recipe.split()
    bleu_score = bleu_metric.compute(predictions=[candidate], references=[reference])

    return bleu_score['bleu'], generated_recipe

# Iterate over the first 5 rows and compute BLEU scores
for index, row in test_df.head(5).iterrows():
    score, recipe = generate_and_score(index, row)
    print(f"Row {index} BLEU Score: {score}")
    print("Generated Recipe and Steps:", recipe)
    print("----------------------------------------")

  bleu_metric = load_metric('bleu')
You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this metric from the next major release of `datasets`.


Downloading builder script:   0%|          | 0.00/2.48k [00:00<?, ?B/s]

Downloading extra modules:   0%|          | 0.00/1.55k [00:00<?, ?B/s]

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Row 0 BLEU Score: 0.181694029093083
Generated Recipe and Steps: 500.0 gram spaghetti, 1.0 kipfilet, 2.0 kerstomaten, 1.0 handful basilicum, 1.0 tablespoon margarine, 1.0 package knorr kruidenpasta spaghetti bolognese kook de spaghetti volgens de aanwijzingen op de verpakking al dente. snijd de kipfilet in kleine blokjes. snijd de kerstomaten in kleine blokjes. snijd de basilicum in kleine stukjes. schil en snipper de knoflook. verwarm de margarine in een wok. bak de kipfilet en de kerstomaten in de wok. voeg de knoflook en de basilicum toe. bak het geheel 1 minuut. voeg de knorr kruidenpasta spaghetti bolognese toe. roer goed door. schep de spaghetti door de saus. breng op smaak met peper en zout. verdeel de spaghetti over de borden. bestrooi met de geraspte parmezaanse kaas. serveer direct.
----------------------------------------


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Row 1 BLEU Score: 0.00680493553070567
Generated Recipe and Steps: 4.0 pound whole chicken without giblets, small handful of fresh rosemary sprigs, small handful of fresh thyme sprigs, peel from one small lemon, sliced, 4.0 clove garlic cloves, 2.0 large onions, peeled and sliced, salt and pepper, 0.2 cup olive oil, 0.2 cup honey, divided, 0.5 cup white wine, 1.0 cup chicken broth, fresh rosemary and thyme sprigs for garnish optional preheat oven to 375degf. place chicken in a large roasting pan. season with salt and pepper. place rosemary, thyme, lemon, garlic, and onions around the chicken. drizzle with olive oil and honey. roast for 1 hour. remove from oven and add wine and broth. roast for another 30 minutes or until chicken is cooked through. remove from oven and let rest for 10 minutes. garnish with fresh rosemary and thyme sprigs, if desired. serve with roasted vegetables or mashed potatoes.
----------------------------------------


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Row 2 BLEU Score: 0.10756476053445743
Generated Recipe and Steps: 4.0 fresh figs, rinsed and halved, 0.5 cup sugar, 1.0 cup filtered water, 0.5 cup freshly squeezed lemon juice, 1.0 cup fig puree, 0.5 cup honey, 1.0 cup ginger ale, 1.0 cup vodka, 0.5 cup freshly squeezed lemon juice, 1.0 cup ice, 1.0 cup fig puree, 0.5 cup honey, 1.0 cup ginger ale, 1.0 cup vodka, 0.5 cup freshly squeezed lemon juice in a small saucepan, combine the figs, sugar, water, and lemon juice. bring to a boil, then reduce the heat and simmer for 10 minutes. remove from the heat and let cool. puree the fig mixture in a blender or food processor. strain the puree through a fine mesh sieve into a bowl. discard the solids. cover and refrigerate until chilled, at least 2 hours. in a pitcher, combine the fig puree, honey, ginger ale, vodka, and lemon juice. stir to combine. pour the mixture over ice in a glass. garnish with a fig half and serve.
----------------------------------------


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Row 3 BLEU Score: 0.20548567130170534
Generated Recipe and Steps: 1.0 cup quick guacamole, 1.0 avocado, 1.0 tablespoon lime juice, 1.0 teaspoon kosher salt, veggies, 1.0 tablespoon extra virgin olive oil, 1.0 teaspoon red, /2 medium white onion, 1.0 teaspoon ground cumin, 1.0 teaspoon /2 teaspoon chili powder, of red pepper flakes, 1.0 teaspoon kosher salt, 1.0 clove garlic, 1.0 tablespoon fresh lime juice, fajitas and garnishes, 1.0 package corn tortillas, 1.0 tablespoon olive oil spray, 2.0 large eggs, 1.0 cup crumbled feta or queso fresco cheese, 1.0 handful of fresh cilantro, freshly ground black pepper, hot sauce and/or your favorite salsa in a small bowl, combine the guacamole, avocado, lime juice and salt. set aside. in a large bowl, combine the veggies, olive oil, red, onion, cumin, chili powder, red pepper flakes and salt. toss to coat. in a large skillet, heat 1 tablespoon of olive oil over medium high heat. add the veggies and cook, stirring occasionally, until they begin to