In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm

In [2]:
%%capture
# Installs Unsloth, Xformers (Flash Attention) and all other packages!
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install --no-deps "xformers<0.0.27" "trl<0.9.0" peft accelerate bitsandbytes

# Install Flash Attention 2 for softcapping support
import torch
if torch.cuda.get_device_capability()[0] >= 8:
    !pip install --no-deps packaging ninja einops "flash-attn>=2.6.3"

In [3]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 2048
dtype = None
load_in_4bit = True

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


In [4]:
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/gemma-2-2b",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

==((====))==  Unsloth 2024.8: Fast Gemma2 patching. Transformers = 4.44.0.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.3.1+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.26.post1. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [5]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0,
    bias = "none",
    use_gradient_checkpointing = "unsloth",
    random_state = 3407,
    use_rslora = False,
    loftq_config = None,
)

Unsloth 2024.8 patched 26 layers with 26 QKV layers, 26 O layers and 26 MLP layers.


In [6]:
from sklearn.model_selection import train_test_split
from datasets import Dataset

df = pd.read_csv('synthetic_dataset.csv')
train_df, test_df = train_test_split(df, test_size=0.2, random_state=10)

train_dataset = Dataset.from_dict({
    'instruction': ['You are a great expert in the field of vegetarianism and recipes for vegans. You can see how recipes are built, which flavor combinations are the most popular.' for _ in range(len(train_df))],
    'input': train_df['question'].tolist(),
    'output': train_df['answer'].tolist()
})

test_dataset = Dataset.from_dict({
    'instruction': ['You are a great expert in the field of vegetarianism and recipes for vegans. You can see how recipes are built, which flavor combinations are the most popular.' for _ in range(len(test_df))],
    'input': test_df['question'].tolist(),
    'output': test_df['answer'].tolist()
})

In [7]:
alpaca_prompt = """Below is an instruction that describe  s your role, paired with an input that provides question you need to answer. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

EOS_TOKEN = tokenizer.eos_token

def formatting_prompts_func(examples):
    instructions = examples["instruction"]
    inputs = examples["input"]
    outputs = examples["output"]
    texts = []
    for instruction, input, output in zip(instructions, inputs, outputs):
        text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN  # without this token generation goes on forever!
        texts.append(text)
    return {"text": texts}

train_dataset = train_dataset.map(formatting_prompts_func, batched=True)
test_dataset = test_dataset.map(formatting_prompts_func, batched=True)

Map:   0%|          | 0/80 [00:00<?, ? examples/s]

Map:   0%|          | 0/20 [00:00<?, ? examples/s]

In [8]:
from trl import SFTTrainer
from transformers import TrainingArguments

training_args = TrainingArguments(
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    warmup_steps=5,
    num_train_epochs=3,
    learning_rate=2e-4,
    fp16=not torch.cuda.is_bf16_supported(),
    bf16=torch.cuda.is_bf16_supported(),
    logging_steps=1,
    optim="adamw_8bit",
    weight_decay=0.01,
    lr_scheduler_type="linear",
    seed=3407,
    output_dir="outputs",
)

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=train_dataset,
    dataset_text_field="text",
    max_seq_length=512,
    dataset_num_proc=2,
    packing=False,
    args=training_args,
)

Map (num_proc=2):   0%|          | 0/80 [00:00<?, ? examples/s]

In [9]:
trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 80 | Num Epochs = 3
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 4
\        /    Total batch size = 8 | Total steps = 30
 "-____-"     Number of trainable parameters = 20,766,720


Step,Training Loss
1,2.913
2,3.119
3,2.8335
4,2.5508
5,2.4371
6,2.2231
7,1.9466
8,1.8329
9,1.4001
10,1.3322


TrainOutput(global_step=30, training_loss=1.388383396466573, metrics={'train_runtime': 104.0159, 'train_samples_per_second': 2.307, 'train_steps_per_second': 0.288, 'total_flos': 763022048532480.0, 'train_loss': 1.388383396466573, 'epoch': 3.0})

In [10]:
responses = []
FastLanguageModel.for_inference(model)
for i in tqdm(test_dataset):
    inputs = tokenizer([alpaca_prompt.format(
        i['instruction'], # instruction
        i['input'], # input
        "") ], return_tensors = "pt").to("cuda")
    outputs = model.generate(**inputs, max_new_tokens = 128, use_cache = True)
    response = tokenizer.batch_decode(outputs)
    answ = response[0][response[0].index('Response:')+len('Response:')+1:]
    if '<eos>' in answ:
        answ = answ[:answ.index('<eos>')]
    responses.append(answ)

100%|██████████| 20/20 [02:03<00:00,  6.15s/it]


In [11]:
actual_answers = []
for i in test_dataset:
    actual_answers.append(i['output'])

In [None]:
%pip install sentence_transformers
from sentence_transformers import SentenceTransformer, util
encoding = SentenceTransformer('all-MiniLM-L6-v2')

In [13]:
mean_score = []
for i in range(20):
    reference_embeddings = encoding.encode(actual_answers[i], convert_to_tensor=True)
    generated_embeddings = encoding.encode(responses[i], convert_to_tensor=True)
    cosine_scores = util.pytorch_cos_sim(generated_embeddings, reference_embeddings)
    mean_score.append(cosine_scores.diag().mean().item())

print('Mean score STS: ', np.array(mean_score).mean())

Mean score STS:  0.7510703101754188


In [19]:
for i,v in enumerate(responses):
  print(i)
  print(v)
  print('---------------------------------------------------')

0
The recipe suggests cooking the stuffed roast in the pressure cooker for 45 minutes, but it's important to note that this is an estimate based on the size of the roast and the desired doneness.  Actual cooking times may vary depending on these factors.
---------------------------------------------------
1
There are various types of parasites, including bacteria, viruses, fungi, and protozoa. They can be transmitted through contaminated food, water, or direct contact with infected individuals.  These parasites can lead to various health issues, including digestive problems, weakened immune systems, and even more serious infections.
---------------------------------------------------
2
To ensure your family enjoys healthy meals while reducing calories, focus on flavor combinations that are naturally low in calories.  Use herbs, spices, and fresh vegetables to enhance the taste without adding extra fat or sugar.  Experiment with different flavor profiles to keep things interesting.
----