In [18]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install datasets
!pip install accelerate

In [2]:
%%capture
import torch
major_version, minor_version = torch.cuda.get_device_capability()
# Must install separately since Colab has torch 2.2.1, which breaks packages
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
if major_version >= 8:
    # Use this for new GPUs like Ampere, Hopper GPUs (RTX 30xx, RTX 40xx, A100, H100, L40)
    !pip install --no-deps packaging ninja einops flash-attn xformers trl peft accelerate bitsandbytes
else:
    # Use this for older GPUs (V100, Tesla T4, RTX 20xx)
    !pip install --no-deps xformers trl peft accelerate bitsandbytes
pass

In [3]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

# 4bit pre quantized models we support for 4x faster downloading + no OOMs.
# fourbit_models = [
#     "unsloth/mistral-7b-bnb-4bit",
#     "unsloth/mistral-7b-instruct-v0.2-bnb-4bit",
#     "unsloth/llama-2-7b-bnb-4bit",
#     "unsloth/c",
#     "unsloth/gemma-7b-it-bnb-4bit", # Instruct version of Gemma 7b
#     "unsloth/gemma-2b-bnb-4bit",
#     "unsloth/gemma-2b-it-bnb-4bit", # Instruct version of Gemma 2b
#     "unsloth/llama-3-8b-bnb-4bit", # [NEW] 15 Trillion token Llama-3
# ] # More models at https://huggingface.co/unsloth

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/mistral-7b-bnb-4bit",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

config.json:   0%|          | 0.00/1.05k [00:00<?, ?B/s]

==((====))==  Unsloth: Fast Mistral patching release 2024.4
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.2.1+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. Xformers = 0.0.25.post1. FA = False.
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth


Unused kwargs: ['quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.


model.safetensors:   0%|          | 0.00/4.13G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/971 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/438 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

In [4]:
# Do model patching and add fast LoRA weights
model = FastLanguageModel.get_peft_model(
    model,
    r = 16,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    use_gradient_checkpointing = True,
    random_state = 3407,
    max_seq_length = max_seq_length,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)


Unsloth 2024.4 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


In [5]:
%%capture
import torch
from transformers import TrainingArguments, Trainer
from datasets import load_dataset, Dataset, DatasetDict
from sklearn.model_selection import train_test_split
import json
import pandas as pd

In [6]:
%%capture
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, Seq2SeqTrainingArguments, Seq2SeqTrainer
from datasets import Dataset, DatasetDict
import pandas as pd

In [7]:
# Load the datasets
train_df = pd.read_csv('NLP_Recipe_train.csv')
test_df = pd.read_csv('NLP_Recipe_test.csv')

In [8]:
recipe_prompt = """Given the following key ingredients and details, generate the full ingredient list with quantities and cooking steps:

### Key Ingredients and Details:
{}

### Full Ingredients and Steps:
{}"""

EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN
def formatting_prompts_func(examples):
    # Here 'ingredients' and 'steps' are assumed to be lists of strings
    key_ingredients = examples["ner"]
    ingredients = [' '.join(ing) if isinstance(ing, list) else ing for ing in examples["ingredients"]]
    steps = [' '.join(stp) if isinstance(stp, list) else stp for stp in examples["steps"]]
    full_recipe = [ing + " " + stp for ing, stp in zip(ingredients, steps)]
    prompts = []
    for key_ingredient, full in zip(key_ingredients, full_recipe):
        prompt = recipe_prompt.format(key_ingredient, full) + EOS_TOKEN
        prompts.append(prompt)
    return {"text": prompts}


In [9]:
from datasets import Dataset, DatasetDict
train_dataset = Dataset.from_pandas(train_df)
test_dataset = Dataset.from_pandas(test_df)
datasets = DatasetDict({
    'train': train_dataset,
    'test': test_dataset
})
# Apply the formatting function to both train and test datasets
dataset = train_dataset.map(formatting_prompts_func, batched=True, remove_columns=datasets["train"].column_names)

Map:   0%|          | 0/6118 [00:00<?, ? examples/s]

In [12]:
from trl import SFTTrainer
from transformers import TrainingArguments

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = False, # Can make training 5x faster for short sequences.
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        max_steps = 60,
        learning_rate = 2e-4,
        fp16 = not torch.cuda.is_bf16_supported(),
        bf16 = torch.cuda.is_bf16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
    ),
)

  self.pid = os.fork()


Map (num_proc=2):   0%|          | 0/6118 [00:00<?, ? examples/s]

max_steps is given, it will override any value given in num_train_epochs


In [13]:
#@title Show current memory stats
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")

GPU = Tesla T4. Max memory = 14.748 GB.
4.625 GB of memory reserved.


In [14]:
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 6,118 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 4
\        /    Total batch size = 8 | Total steps = 60
 "-____-"     Number of trainable parameters = 41,943,040


Step,Training Loss
1,1.4992
2,1.4194
3,1.2567
4,1.4599
5,1.3339
6,1.2231
7,1.0652
8,1.0395
9,1.0182
10,0.8555


In [15]:
from transformers import TextStreamer


In [16]:
# Sample input ingredients
ingredients_input = "chicken, garlic, onion, tomato, basil"

# Prepare the prompt according to the trained model's expected format
recipe_prompt = f"""Given the following key ingredients, generate the full ingredient list with quantities and cooking steps:

### Key Ingredients:
{ingredients_input}

### Full Ingredients and Steps:
"""

# Convert the prompt to model inputs
inputs = tokenizer(
    [recipe_prompt],
    return_tensors="pt"
).to('cuda')

# Initialize the TextStreamer for continuous output generation
text_streamer = TextStreamer(tokenizer)

# Generate output using the model
outputs = model.generate(**inputs, streamer=text_streamer, max_new_tokens=128, use_cache=True)

# Decode the output
generated_steps = tokenizer.batch_decode(outputs, skip_special_tokens=True)

# Print the generated recipe and steps
print("Generated Recipe and Steps:", generated_steps[0])

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


<s> Given the following key ingredients, generate the full ingredient list with quantities and cooking steps:

### Key Ingredients:
chicken, garlic, onion, tomato, basil

### Full Ingredients and Steps:
1.0 pound chicken, 2.0 clove garlic, 1.0 onion, 1.0 tomato, 1.0 tablespoon basil in a large skillet, heat 1 tablespoon olive oil over medium heat . add chicken and cook, stirring occasionally, until chicken is no longer pink, about 5 minutes . add garlic, onion, tomato and basil . cook, stirring occasionally, until onion is translucent, about 5 minutes . add 1/2 cup water and bring to a boil . reduce heat to low and simmer, stir
Generated Recipe and Steps: Given the following key ingredients, generate the full ingredient list with quantities and cooking steps:

### Key Ingredients:
chicken, garlic, onion, tomato, basil

### Full Ingredients and Steps:
1.0 pound chicken, 2.0 clove garlic, 1.0 onion, 1.0 tomato, 1.0 tablespoon basil in a large skillet, heat 1 tablespoon olive oil over med

***Saving model***

In [17]:
model.save_pretrained("mistral_model")