In [2]:
!pip install datasets
!pip install transformers
!pip install evaluate
!pip install accelerate -U
!pip install transformers[torch]
!pip install peft

Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Downloading evaluate-0.4.3-py3-none-any.whl (84 kB)
Installing collected packages: evaluate
Successfully installed evaluate-0.4.3
Collecting torch<2.7,>=2.1 (from transformers[torch])
  Downloading torch-2.6.0-cp312-cp312-win_amd64.whl.metadata (28 kB)
Collecting sympy==1.13.1 (from torch<2.7,>=2.1->transformers[torch])
  Downloading sympy-1.13.1-py3-none-any.whl.metadata (12 kB)
Downloading torch-2.6.0-cp312-cp312-win_amd64.whl (204.1 MB)
   ---------------------------------------- 0.0/204.1 MB ? eta -:--:--
   ---------------------------------------- 0.0/204.1 MB ? eta -:--:--
   ---------------------------------------- 0.5/204.1 MB 1.4 MB/s eta 0:02:27
   ---------------------------------------- 1.3/204.1 MB 2.6 MB/s eta 0:01:19
   ---------------------------------------- 2.1/204.1 MB 3.2 MB/s eta 0:01:04
    --------------------------------------- 2.9/204.1 MB 3.4 MB/s eta 0:00:59
    --------------

In [16]:
import torch
device = 'cuda' if torch.cuda.is_available() else 'cpu'

from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer, GenerationConfig
import evaluate
import pandas as pd
import numpy as np

In [18]:
huggingface_dataset_name = "allenai/sciq"
dataset = load_dataset(huggingface_dataset_name)

In [20]:
model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
base_model = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [22]:
def print_number_of_trainable_model_parameters(model):
    trainable_model_params = 0
    all_model_params = 0
    for _, param in model.named_parameters():
        all_model_params += param.numel()
        if param.requires_grad:
            trainable_model_params += param.numel()
    return f"trainable model parameters: {trainable_model_params}\nall model parameters: {all_model_params}\npercentage of trainable model parameters: {100 * trainable_model_params / all_model_params:.2f}%"

print(print_number_of_trainable_model_parameters(base_model))

trainable model parameters: 1100048384
all model parameters: 1100048384
percentage of trainable model parameters: 100.00%


In [28]:
# i = 20
# question = dataset['test'][i]['question']
# support = dataset['test'][i]['support']
# correct_answer = dataset['test'][i]['correct_answer']

# # Create prompt for question answering
# prompt = f"Context: {support}\nQuestion: {question}\nAnswer:"

# # Tokenize and generate answer
# input_ids = tokenizer(prompt, return_tensors="pt").input_ids
# output_ids = base_model.generate(input_ids, max_new_tokens=50)
# output = tokenizer.decode(output_ids[0], skip_special_tokens=True)

# # Print results
# print(f"Input Prompt:\n{prompt}")
# print("--------------------------------------------------------------------")
# print("Correct answer (human-annotated):")
# print(correct_answer)
# print("--------------------------------------------------------------------")
# print("Model generated answer:")
# print(output)

i = 20
question = dataset['test'][i]['question']
support = dataset['test'][i]['support']
correct_answer = dataset['test'][i]['correct_answer']

# ✨ Refined prompt: Keep it direct and task-oriented
prompt = f"Answer the question based on the context.\nContext: {support}\nQuestion: {question}\nAnswer:"

input_ids = tokenizer(prompt, return_tensors="pt").input_ids
output_ids = base_model.generate(
    input_ids,
    max_new_tokens=20,
    do_sample=False,
    temperature=0.7,
    top_p=0.9,
    eos_token_id=tokenizer.eos_token_id
)
output = tokenizer.decode(output_ids[0], skip_special_tokens=True)

# Only extract the final answer part (remove repeated prompt)
model_answer = output.replace(prompt, "").strip()

# 🖨️ Output the result
print(f"Input Prompt:\n{prompt}")
print("--------------------------------------------------------------------")
print("Correct answer (human-annotated):")
print(correct_answer)
print("--------------------------------------------------------------------")
print("Model generated answer:")
print(model_answer)

The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


Input Prompt:
Answer the question based on the context.
Context: Paracrine Signaling Signals that act locally between cells that are close together are called paracrine signals. Paracrine signals move by diffusion through the extracellular matrix. These types of signals usually elicit quick responses that last only a short amount of time. In order to keep the response localized, paracrine ligand molecules are normally quickly degraded by enzymes or removed by neighboring cells. Removing the signals will reestablish the concentration gradient for the signal, allowing them to quickly diffuse through the intracellular space if released again. One example of paracrine signaling is the transfer of signals across synapses between nerve cells. A nerve cell consists of a cell body, several short, branched extensions called dendrites that receive stimuli, and a long extension called an axon, which transmits signals to other nerve cells or muscle cells. The junction between nerve cells where sig

In [30]:
tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")

def tokenize_function(example):
    # Prompt format suitable for LLaMA
    start_prompt = "Answer the question based on the context.\n"
    context_template = "Context: {context}\nQuestion: {question}\nAnswer:"

    # Build prompt
    prompt = [
        context_template.format(context=support, question=question)
        for support, question in zip(example['support'], example['question'])
    ]

    # Tokenize input and output
    example['input_ids'] = tokenizer(prompt, padding="max_length", truncation=True, return_tensors="pt").input_ids
    example['labels'] = tokenizer(example['correct_answer'], padding="max_length", truncation=True, return_tensors="pt").input_ids

    return example

# Apply tokenizer
tokenized_datasets = dataset.map(tokenize_function, batched=True)

# Remove unused columns to prepare for training
tokenized_datasets = tokenized_datasets.remove_columns(['question', 'support', 'distractor1', 'distractor2', 'distractor3', 'correct_answer'])

# Optional: Subsample every 100th example for faster testing
tokenized_datasets = tokenized_datasets.filter(lambda example, index: index % 100 == 0, with_indices=True)

Map:   0%|          | 0/11679 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Filter:   0%|          | 0/11679 [00:00<?, ? examples/s]

Filter:   0%|          | 0/1000 [00:00<?, ? examples/s]

Filter:   0%|          | 0/1000 [00:00<?, ? examples/s]

In [38]:
from peft import LoraConfig, get_peft_model, TaskType
from peft import prepare_model_for_kbit_training

def print_trainable_parameters(model):
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    total_params = sum(p.numel() for p in model.parameters())
    print(f"Trainable params: {trainable_params} | Total params: {total_params} | Trainable%: {100 * trainable_params / total_params:.2f}%")


# Ensure base_model is compatible with PEFT
base_model = prepare_model_for_kbit_training(base_model)

# Define LoRA config for causal language modeling
lora_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,  # ✅ Changed to causal
    r=8,
    lora_alpha=32,
    lora_dropout=0.1,
    bias="none"
)

# Apply PEFT (LoRA)
peft_model_train = get_peft_model(base_model, lora_config)

# Print number of trainable parameters
print_trainable_parameters(peft_model_train)


The installed version of bitsandbytes was compiled without GPU support. 8-bit optimizers and GPU quantization are unavailable.


Trainable params: 1126400 | Total params: 1101174784 | Trainable%: 0.10%


In [42]:
from transformers import TrainingArguments

output_dir = "./peft-sciq-qa-training"

training_args = TrainingArguments(
    output_dir="./peft-sciq-cpu-test",
    per_device_train_batch_size=1,
    learning_rate=1e-3,
    num_train_epochs=1,
    auto_find_batch_size=True,
    logging_steps=10,
    report_to="none",
    fp16=False,
)

In [61]:
tokenized_datasets["train"] = tokenized_datasets["train"].select(range(1))


In [63]:
# import time

# trainer = Trainer(
#     model=peft_model_train,
#     args=training_args,
#     train_dataset=tokenized_datasets,
# )

# start = time.time()
# trainer.train()
# print("Training time:", time.time() - start)


import time

trainer = Trainer(
    model=peft_model_train,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
)

start = time.time()
trainer.train()
print("Training time:", time.time() - start)


No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss


Training time: 382.760062456131


In [65]:
peft_model_train.save_pretrained("lora-sciq-output")
tokenizer.save_pretrained("lora-sciq-output")

('lora-sciq-output\\tokenizer_config.json',
 'lora-sciq-output\\special_tokens_map.json',
 'lora-sciq-output\\chat_template.jinja',
 'lora-sciq-output\\tokenizer.json')

In [67]:
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel, PeftConfig

# Load adapter config and base model
peft_config = PeftConfig.from_pretrained("lora-sciq-output")
base_model = AutoModelForCausalLM.from_pretrained(peft_config.base_model_name_or_path)
model = PeftModel.from_pretrained(base_model, "lora-sciq-output")

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained("lora-sciq-output")


In [83]:
prompt = "Context: Paracrine signals move by what method through the extracellular matrix?\nAnswer:"

input_ids = tokenizer(prompt, return_tensors="pt").input_ids
output = tokenizer.decode(model.generate(input_ids, max_new_tokens=50)[0], skip_special_tokens=True)
print("Model output:\n", output)


Model output:
 Context: Paracrine signals move by what method through the extracellular matrix?
Answer: Paracrine signals move by diffusion through the extracellular matrix.


In [73]:
from datasets import load_dataset

dataset = load_dataset("allenai/sciq")
print(dataset)

DatasetDict({
    train: Dataset({
        features: ['question', 'distractor3', 'distractor1', 'distractor2', 'correct_answer', 'support'],
        num_rows: 11679
    })
    validation: Dataset({
        features: ['question', 'distractor3', 'distractor1', 'distractor2', 'correct_answer', 'support'],
        num_rows: 1000
    })
    test: Dataset({
        features: ['question', 'distractor3', 'distractor1', 'distractor2', 'correct_answer', 'support'],
        num_rows: 1000
    })
})


In [79]:
for i in range(10):
    data = dataset["train"][i]
    print(f"\n--- Example {i + 1} ---")
    print(f"Question: {data['question']}")
    print(f"Support: {data['support']}")
    print(f"Correct Answer: {data['correct_answer']}")
    print(f"Distractor 1: {data['distractor1']}")
    print(f"Distractor 2: {data['distractor2']}")
    print(f"Distractor 3: {data['distractor3']}")



--- Example 1 ---
Question: What type of organism is commonly used in preparation of foods such as cheese and yogurt?
Support: Mesophiles grow best in moderate temperature, typically between 25°C and 40°C (77°F and 104°F). Mesophiles are often found living in or on the bodies of humans or other animals. The optimal growth temperature of many pathogenic mesophiles is 37°C (98°F), the normal human body temperature. Mesophilic organisms have important uses in food preparation, including cheese, yogurt, beer and wine.
Correct Answer: mesophilic organisms
Distractor 1: protozoa
Distractor 2: gymnosperms
Distractor 3: viruses

--- Example 2 ---
Question: What phenomenon makes global winds blow northeast to southwest or the reverse in the northern hemisphere and northwest to southeast or the reverse in the southern hemisphere?
Support: Without Coriolis Effect the global winds would blow north to south or south to north. But Coriolis makes them blow northeast to southwest or the reverse in th