In [None]:
pip install unsloth transformers trl



In [None]:
from datasets import load_dataset
from unsloth import FastLanguageModel
from unsloth.chat_templates import get_chat_template, standardize_sharegpt
from trl import SFTTrainer, SFTConfig
import torch

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


In [None]:
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Llama-3.2-3B-Instruct",
    max_seq_length=2048,
    load_in_4bit=True
 )

==((====))==  Unsloth 2025.7.3: Fast Llama patching. Transformers: 4.53.1.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.7.1+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.3.1
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.31.post1. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [None]:
model = FastLanguageModel.get_peft_model(
    model, r=16,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]
)

Unsloth 2025.7.3 patched 28 layers with 28 QKV layers, 28 O layers and 28 MLP layers.


In [None]:
tokenizer = get_chat_template(tokenizer, chat_template="llama-3.1")

In [None]:
dataset = load_dataset("argilla/ultrafeedback-binarized-preferences", split="train")

In [None]:
def convert_format1_to_format2(example):
    return {
        "conversations": [
            {"role": "user", "content": example["instruction"]},
            {"role": "assistant", "content": example["chosen_response"]}
        ]
    }

In [None]:
dataset_reformatted = dataset.map(convert_format1_to_format2, remove_columns= dataset.column_names)

Map:   0%|          | 0/63619 [00:00<?, ? examples/s]

In [None]:
dataset_reformatted[0]

{'conversations': [{'content': 'Can you write a C++ program that prompts the user to enter the name of a country and checks if it borders the Mediterranean Sea? Here\'s some starter code to help you out:\n#include <iostream>\n#include <string>\nusing namespace std;\nint main() {\n    string country;\n    // prompt user for input\n    cout << "Enter the name of a country: ";\n    cin >> country;\n    // check if country borders the Mediterranean Sea\n    // [C++ code]\n    return 0;\n}',
   'role': 'user'},
  {'content': 'Here\'s a C++ program that prompts the user to enter the name of a country and checks if it borders the Mediterranean Sea:\n\n#include <iostream>\n#include <string>\n#include <set>\n#include <map>\n#include <algorithm>\n\nusing namespace std;\n\nint main() {\n    // store countries and their bordering seas in a map\n    map<string, set<string>> countries;\n    countries["Algeria"] = {"Mediterranean Sea", "North African Coast"};\n    countries["France"] = {"Mediterranea

In [None]:
sharegpt_dataset = standardize_sharegpt(dataset_reformatted)

Unsloth: Standardizing formats (num_proc=2):   0%|          | 0/63619 [00:00<?, ? examples/s]

In [None]:
sharegpt_dataset[0]

{'conversations': [{'content': 'Can you write a C++ program that prompts the user to enter the name of a country and checks if it borders the Mediterranean Sea? Here\'s some starter code to help you out:\n#include <iostream>\n#include <string>\nusing namespace std;\nint main() {\n    string country;\n    // prompt user for input\n    cout << "Enter the name of a country: ";\n    cin >> country;\n    // check if country borders the Mediterranean Sea\n    // [C++ code]\n    return 0;\n}',
   'role': 'user'},
  {'content': 'Here\'s a C++ program that prompts the user to enter the name of a country and checks if it borders the Mediterranean Sea:\n\n#include <iostream>\n#include <string>\n#include <set>\n#include <map>\n#include <algorithm>\n\nusing namespace std;\n\nint main() {\n    // store countries and their bordering seas in a map\n    map<string, set<string>> countries;\n    countries["Algeria"] = {"Mediterranean Sea", "North African Coast"};\n    countries["France"] = {"Mediterranea

In [None]:
new_dataset = sharegpt_dataset.map(lambda samples: {"text": [tokenizer.apply_chat_template(convo, tokenize = False) for convo in samples["conversations"]] }, batched = True)

Map:   0%|          | 0/63619 [00:00<?, ? examples/s]

In [None]:
from transformers import TrainingArguments

In [None]:
trainer = SFTTrainer(
    model = model,
    train_dataset = new_dataset,
    dataset_text_field = "text",
    max_seq_length = 1024,
    args = TrainingArguments(
        per_device_train_batch_size = 1,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        max_steps = 100,
        learning_rate = 2e-4,
        fp16 = not torch.cuda.is_bf16_supported(),
        bf16 = torch.cuda.is_bf16_supported(),
        logging_steps = 1,
        output_dir = "outputs",
        )
)

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/454 [00:00<?, ?B/s]

chat_template.jinja: 0.00B [00:00, ?B/s]

Unsloth: Tokenizing ["text"]:   0%|          | 0/63619 [00:00<?, ? examples/s]

In [None]:
trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 63,619 | Num Epochs = 1 | Total steps = 100
O^O/ \_/ \    Batch size per device = 1 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (1 x 4 x 1) = 4
 "-____-"     Trainable parameters = 24,313,856 of 3,237,063,680 (0.75% trained)
[34m[1mwandb[0m: Currently logged in as: [33mabhipodila1[0m ([33mabhipodila1-student[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
1,2.4757
2,1.7586
3,1.8375
4,1.5614
5,1.1585
6,1.3675
7,1.6066
8,1.4575
9,1.6557
10,1.3417




TrainOutput(global_step=100, training_loss=1.4594554221630096, metrics={'train_runtime': 334.4654, 'train_samples_per_second': 1.196, 'train_steps_per_second': 0.299, 'total_flos': 3186178937776128.0, 'train_loss': 1.4594554221630096})

In [None]:
model.save_pretrained("fine_tuned_SIFT_model")

In [None]:
import torch
model.eval()
def generate_response(user_input):
    # Create chat messages
    messages = [
        {"role": "user", "content": user_input}
    ]

    # Format prompt using chat template
    prompt = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )

    # Tokenize and move to CUDA
    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")

    # Generate response
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=300,
            do_sample=True,
            temperature=0.7,
            top_p=0.95
        )

    # Decode and clean response
    decoded_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
    response = decoded_output.split("Assistant:")[-1].strip()

    return response


In [None]:
print(generate_response("Design a conversation between a customer and a customer service agent."))

system

Cutting Knowledge Date: December 2023
Today Date: 26 July 2024

user

Design a conversation between a customer and a customer service agent.assistant

Customer: Hi, I'm having some issues with my order. The product I received does not match the one I ordered.

Customer Service Agent: I apologize for the inconvenience. Can you please provide me with your order number so that I can look into this further?

Customer: My order number is #1234. I also ordered a different color, but I received a product that is a different color.

Customer Service Agent: I apologize for the mistake. I will go ahead and check on the status of your order. Can you please confirm the color you ordered, so I can ensure that the correct product is sent out to you?

Customer: The correct color is blue. I ordered a blue shirt, but I received a yellow shirt.

Customer Service Agent: Thank you for the confirmation. I have checked on the status of your order and I can see that there was an error in the shipping

In [None]:
!zip -r all_files_sift.zip /content

  adding: content/ (stored 0%)
  adding: content/.config/ (stored 0%)
  adding: content/.config/default_configs.db (deflated 98%)
  adding: content/.config/gce (stored 0%)
  adding: content/.config/logs/ (stored 0%)
  adding: content/.config/logs/2025.07.10/ (stored 0%)
  adding: content/.config/logs/2025.07.10/13.34.37.820504.log (deflated 58%)
  adding: content/.config/logs/2025.07.10/13.33.45.486303.log (deflated 92%)
  adding: content/.config/logs/2025.07.10/13.34.38.537881.log (deflated 57%)
  adding: content/.config/logs/2025.07.10/13.34.23.548658.log (deflated 86%)
  adding: content/.config/logs/2025.07.10/13.34.14.671755.log (deflated 58%)
  adding: content/.config/logs/2025.07.10/13.34.29.179991.log (deflated 58%)
  adding: content/.config/configurations/ (stored 0%)
  adding: content/.config/configurations/config_default (deflated 15%)
  adding: content/.config/.last_survey_prompt.yaml (stored 0%)
  adding: content/.config/config_sentinel (stored 0%)
  adding: content/.config

In [None]:
# inference_model, inference_tokenizer = FastLanguageModel.from_pretrained(
#     model_name="./finetuned_model",
#     max_seq_length=1024,
#     load_in_4bit=True
# )

In [None]:
# text_prompts = [
#     "Design a conversation between a customer and a customer service agent."
# ]

# for prompt in text_prompts:
#   formatted_prompt = inference_tokenizer.apply_chat_template([{
#       "role": "user",
#       "content": prompt
#       }], tokenize=False)

#   model_inputs = inference_tokenizer(formatted_prompt, return_tensors="pt").to("cuda")
#   generated_ids = inference_model.generate(
#       **model_inputs,
#       max_new_tokens=512,
#       temperature=0.7,
#       do_sample=True,
#       pad_token_id=inference_tokenizer.pad_token_id
#   )
#   response = inference_tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
#   print(response)