In [1]:

!pip install unsloth transformers datasets trl --quiet


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.2/46.2 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m192.7/192.7 kB[0m [31m14.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m491.2/491.2 kB[0m [31m36.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m318.9/318.9 kB[0m [31m26.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m9.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m183.9/183.9 kB[0m [31m16.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m143.5/143.5 kB[0m [31m11.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m162.1/162.1 kB[0m [31m14.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━

In [None]:
# %% [code]
# Step 2: Import libraries and define our helper functions
import re
import torch
from datasets import Dataset
from unsloth import FastLanguageModel, is_bfloat16_supported
from transformers import TrainingArguments
from trl import SFTTrainer

# This is the alpaca-style prompt used in the unsloth notebook:
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context.
Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

# Our constant instruction (extracted from your sample)
INSTRUCTION = "Based on my book preferences and my current state, rank these movies from most to least likely to be enjoyed by me."

def format_sample(sample_text):
    """
    Converts a raw sample text into a dictionary with keys:
    - instruction: the constant instruction.
    - input: the context (book ratings and movies list), with the instruction removed if present.
    - output: the ranked list of movies (text following the "###" delimiter).
    """
    # First, split the sample into two parts by the delimiter "###"
    if "###" in sample_text:
        input_block, output_block = sample_text.split("###", 1)
    else:
        input_block = sample_text
        output_block = ""

    # Remove the constant instruction from the input block if it exists.
    if INSTRUCTION in input_block:
        input_block = input_block.replace(INSTRUCTION, "")

    return {
        "instruction": INSTRUCTION,
        "input": input_block.strip(),
        "output": output_block.strip()
    }

def load_and_format_file(file_path):
    """
    Loads a text file and splits it into samples using the marker
    "I have rated the following books:" so that each sample begins with that phrase.
    Each sample is then formatted using the format_sample function.
    """
    with open(file_path, "r", encoding="utf-8") as f:
        content = f.read().strip()

    # Split on the marker; this keeps the marker with each sample.
    raw_samples = re.split(r"(?=I have rated the following books:)", content)
    formatted_samples = [format_sample(sample) for sample in raw_samples if sample.strip()]
    return formatted_samples


In [None]:
# %% [code]
# Step 3: Load your dataset files (train and validation) and create a test set
# Make sure you upload your "train.txt" and "val.txt" files to Colab.
TRAIN_FILE = "train.txt"
VAL_FILE = "val.txt"

print("Loading and formatting training samples...")
train_samples = load_and_format_file(TRAIN_FILE)
print(f"Loaded {len(train_samples)} training samples.")

print("Loading and formatting validation samples...")
val_samples = load_and_format_file(VAL_FILE)
print(f"Loaded {len(val_samples)} validation samples.")

# Combine train and val samples to redistribute into train, val, and test
all_samples = train_samples + val_samples
total_samples = len(all_samples)
print(f"Total combined samples: {total_samples}")

# Define split ratios (e.g., 70% train, 15% val, 15% test)
train_ratio = 0.70
val_ratio = 0.15
test_ratio = 0.15

# Calculate split indices
train_end = int(total_samples * train_ratio)
val_end = train_end + int(total_samples * val_ratio)

# Shuffle the samples to ensure randomness
import random
random.shuffle(all_samples)

# Split into train, val, and test
train_samples_new = all_samples[:train_end]
val_samples_new = all_samples[train_end:val_end]
test_samples_new = all_samples[val_end:]

# Convert to Hugging Face Datasets
train_dataset = Dataset.from_list(train_samples_new)
val_dataset = Dataset.from_list(val_samples_new)
test_dataset = Dataset.from_list(test_samples_new)

# Print sizes to confirm
print(f"New training set size: {len(train_dataset)}")
print(f"New validation set size: {len(val_dataset)}")
print(f"New test set size: {len(test_dataset)}")

# Optional: Check a couple of samples to verify correct formatting
print("Sample training record 0:")
print(train_dataset[0])
print("\nSample validation record 0:")
print(val_dataset[0])
print("\nSample test record 0:")
print(test_dataset[0])

In [None]:
test_dataset[5]

{'instruction': 'Based on my book preferences and my current state, rank these movies from most to least likely to be enjoyed by me.',
 'input': 'I have rated the following books:\n- "Adam Named the Animals from A to Z (Roma Downey\'s Little Angels)": 5.0 stars, I rated this when active with a heart rate of 83.0 bpm\n- "You Are My Sunshine (Caroline Jayne Church)": 5.0 stars, I rated this when active with a heart rate of 83.0 bpm\n- "Angels Watching Over Us (Roma Downey\'s Little Angels)": 5.0 stars, I rated this when active with a heart rate of 83.0 bpm\n- "Thankful For God\'s Blessings (Roma Downey\'s Little Angels)": 5.0 stars, I rated this when active with a heart rate of 83.0 bpm\n\nHere are the movies I want to watch, along with my current state:\n- "Little Angels: Abc\'s": I am currently active with a heart rate of 83.0 bpm\n- "Happy Days: Season 3": I am currently active with a heart rate of 83.0 bpm\n- "Happy Days: Season 4": I am currently active with a heart rate of 83.0 bpm

In [None]:
# %% [code]
# Step 4: Load the model and tokenizer using unsloth.
# We will use the unsloth model "unsloth/Llama-3.2-3B-Instruct" and set a long max sequence length.
MODEL_NAME = "unsloth/Llama-3.2-3B-Instruct"
MAX_SEQ_LENGTH = 2048  # Using a longer sequence length (unsloth auto-scales RoPE)

# Optionally set the dtype (None will auto-detect; you can use torch.float16 on compatible GPUs)
dtype = None
load_in_4bit = False  # Change to True if you wish to use 4-bit quantization

print("Loading model and tokenizer...")
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=MODEL_NAME,
    max_seq_length=MAX_SEQ_LENGTH,
    dtype=dtype,
    load_in_4bit=load_in_4bit,
)
print("Model and tokenizer loaded.")


Loading model and tokenizer...
==((====))==  Unsloth 2025.3.14: Fast Llama patching. Transformers: 4.48.3.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Model and tokenizer loaded.


In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# %% [code]
# Step 5: Add LoRA adapters to the model to enable parameter-efficient fine-tuning.
print("Adding LoRA adapters...")
model = FastLanguageModel.get_peft_model(
    model,
    r=16,  # LoRA rank; adjust as needed
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
    lora_alpha=16,
    lora_dropout=0,
    bias="none",
    use_gradient_checkpointing="unsloth",
    random_state=3407,
    use_rslora=False,
    loftq_config=None,
)
print("LoRA adapters added.")


Adding LoRA adapters...


Not an error, but Unsloth cannot patch MLP layers with our manual autograd engine since either LoRA adapters
are not enabled or a bias term (like in Qwen) is used.
Unsloth 2025.3.14 patched 28 layers with 28 QKV layers, 28 O layers and 0 MLP layers.


LoRA adapters added.


In [None]:
# %% [code]
# Step 6: Create a function to merge the instruction, input, and output into the final prompt.
def formatting_prompts_func(examples):
    """
    Given a batch of samples (with keys 'instruction', 'input', 'output'),
    create a new "text" field that follows the alpaca prompt template.
    """
    instructions = examples["instruction"]
    inputs = examples["input"]
    outputs = examples["output"]
    texts = []
    for instr, inp, out in zip(instructions, inputs, outputs):
        text = alpaca_prompt.format(instr, inp, out) + tokenizer.eos_token
        texts.append(text)
    return {"text": texts}

# Map this formatting function over our datasets.
print("Formatting samples into the alpaca-style prompt...")
train_dataset = train_dataset.map(formatting_prompts_func, batched=True)
val_dataset = val_dataset.map(formatting_prompts_func, batched=True)
print("Formatting complete.")


Formatting samples into the alpaca-style prompt...


Map:   0%|          | 0/700 [00:00<?, ? examples/s]

Map:   0%|          | 0/150 [00:00<?, ? examples/s]

Formatting complete.


In [None]:
# %% [code]
# Step 7: Tokenize the dataset.
# Here we tokenize the new "text" field that contains our full prompt.
def tokenize_function(examples):
    tokenized = tokenizer(
        examples["text"],
        padding="max_length",
        truncation=True,
        max_length=MAX_SEQ_LENGTH,
    )
    # For causal LM training, labels are the same as input_ids.
    tokenized["labels"] = tokenized["input_ids"].copy()
    return tokenized

print("Tokenizing training and validation datasets...")
tokenized_train = train_dataset.map(tokenize_function, batched=True, remove_columns=train_dataset.column_names)
tokenized_val = val_dataset.map(tokenize_function, batched=True, remove_columns=val_dataset.column_names)
print("Tokenization complete.")


Tokenizing training and validation datasets...


Map:   0%|          | 0/700 [00:00<?, ? examples/s]

Map:   0%|          | 0/150 [00:00<?, ? examples/s]

Tokenization complete.


In [None]:
# %% [code]
# Step 8: Define training arguments and initialize the Trainer.
training_args = TrainingArguments(
    output_dir='./results_unsloth',
    per_device_train_batch_size=2,       # Adjust based on your GPU memory
    gradient_accumulation_steps=4,
    learning_rate=2e-4,
    weight_decay=0.01,
    warmup_steps=5,
    num_train_epochs=3,                  # Set to train for 3 full epochs
    logging_steps=1,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    optim="adamw_8bit",                  # Recommended by unsloth for efficiency
    fp16=not is_bfloat16_supported(),
    bf16=is_bfloat16_supported(),
)

print("Initializing SFTTrainer...")
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_val,
    dataset_text_field="text",  # Not used here because we already tokenized everything.
    max_seq_length=MAX_SEQ_LENGTH,
    dataset_num_proc=2,
    packing=False,              # Set to True if you wish to pack shorter sequences
    args=training_args,
)
print("Trainer initialized.")


Initializing SFTTrainer...




Trainer initialized.


In [None]:
# %% [code]
# Step 9: Start the training process.
print("Starting training...")
trainer.train()
print("Training completed.")


Starting training...


==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 700 | Num Epochs = 3 | Total steps = 261
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8
 "-____-"     Trainable parameters = 9,175,040/3,221,924,864 (0.28% trained)
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mmustafamansoor[0m ([33mmustafamansoor-fast-nuces[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Unsloth: Will smartly offload gradients to save VRAM!


Epoch,Training Loss,Validation Loss
1,0.5146,0.670763
2,0.6486,0.6621


Unsloth: Not an error, but LlamaForCausalLM does not accept `num_items_in_batch`.
Using gradient accumulation will be very slightly less accurate.
Read more on gradient accumulation issues here: https://unsloth.ai/blog/gradient


Training completed.


In [None]:


# %% [code]
# Step 10: Save the fine-tuned model and tokenizer.
print("Saving the fine-tuned model and tokenizer...")
model.save_pretrained('./fine_tuned_llama_unsloth')
tokenizer.save_pretrained('./fine_tuned_llama_unsloth')
print("Model and tokenizer saved to './fine_tuned_llama_unsloth'.")


Saving the fine-tuned model and tokenizer...
Model and tokenizer saved to './fine_tuned_llama_unsloth'.


In [None]:
import torch
torch.cuda.empty_cache()

In [None]:
import torch
from unsloth import FastLanguageModel

# Model directory aur max sequence length define karna
MODEL_DIR = './fine_tuned_llama_unsloth'  # Apna model directory path yahan set karen
MAX_SEQ_LENGTH = 2048

# Model aur tokenizer load karna
print("Model aur tokenizer load ho raha hai...")
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=MODEL_DIR,
    max_seq_length=MAX_SEQ_LENGTH,
    dtype=None,  # Auto-detect karega
    load_in_4bit=False,  # Agar 4-bit ma train kiya to True karen
)

# Model ko inference mode ma set karna
model = FastLanguageModel.for_inference(model)
print("Model aur tokenizer successfully load ho gaya!")

Model aur tokenizer load ho raha hai...
==((====))==  Unsloth 2025.3.14: Fast Llama patching. Transformers: 4.48.3.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Model aur tokenizer successfully load ho gaya!


In [None]:
import re
import math
from datasets import Dataset

# Alpaca prompt template define karna
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context.
Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

# Helper functions define karna
def parse_ranked_list(text):
    lines = text.strip().split('\n')
    movies = []
    for line in lines:
        match = re.match(r'\d+\.\s+"(.+?)"', line)
        if match:
            movies.append(match.group(1))
        else:
            break
    return movies

def compute_dcg(relevances):
    if not relevances:
        return 0.0
    dcg = relevances[0]
    for i in range(1, len(relevances)):
        dcg += relevances[i] / math.log2(i + 1)
    return dcg

def compute_ndcg(predicted_movies, ground_truth_movies):
    N = len(ground_truth_movies)
    if N == 0:
        return 0.0
    # Assign relevance scores: highest score to the top-ranked ground truth movie.
    relevance_scores = {movie: N - i for i, movie in enumerate(ground_truth_movies)}
    # For movies not predicted, relevance is 0.
    predicted_relevances = [relevance_scores.get(movie, 0) for movie in predicted_movies]
    ideal_relevances = sorted(relevance_scores.values(), reverse=True)
    dcg = compute_dcg(predicted_relevances)
    idcg = compute_dcg(ideal_relevances)
    return dcg / idcg if idcg > 0 else 0.0

# Evaluation loop
ndcg_scores = []
for idx, sample in enumerate(test_dataset):
    try:
        # Create the prompt using the sample's instruction and input.
        prompt = alpaca_prompt.format(sample["instruction"], sample["input"], "")

        # Tokenize the prompt.
        inputs = tokenizer(
            prompt,
            return_tensors="pt",
            truncation=True,
            max_length=MAX_SEQ_LENGTH
        ).to("cuda")

        # Generate a response.
        outputs = model.generate(
            **inputs,
            max_new_tokens=256,
            do_sample=False  # Deterministic output for evaluation.
        )

        # Decode the generated text.
        generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

        # Extract the predicted response.
        response_start = generated_text.find("### Response:") + len("### Response:")
        predicted_output = generated_text[response_start:].strip()

        # Parse the predicted and ground truth ranked movie lists.
        predicted_movies = parse_ranked_list(predicted_output)
        ground_truth_movies = parse_ranked_list(sample["output"])

        # Compute NDCG for the current sample.
        ndcg = compute_ndcg(predicted_movies, ground_truth_movies)
        ndcg_scores.append(ndcg)

        # Print a note for partial matches.
        if set(predicted_movies) != set(ground_truth_movies):
            print(f"Sample {idx}: Partial or mismatched set.")
            print(f"Predicted: {predicted_movies}")
            print(f"Ground Truth: {ground_truth_movies}")
            print(f"NDCG: {ndcg:.4f}\n")

    except Exception as e:
        print(f"Sample {idx} encountered an error: {e}")

    # Provide progress updates.
    if (idx + 1) % 10 == 0:
        print(f"{idx + 1} samples processed.")

# Calculate and print the average NDCG.
if ndcg_scores:
    average_ndcg = sum(ndcg_scores) / len(ndcg_scores)
    print(f"Average NDCG: {average_ndcg:.4f}")
else:
    print("No valid NDCG score calculated.")
