In [1]:
import torch
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    BitsAndBytesConfig,
    TrainingArguments
)
from peft import LoraConfig, prepare_model_for_kbit_training
from trl import SFTTrainer
from datasets import load_dataset

In [20]:
from datasets import load_dataset, Dataset

# Load the original dataset
orig_dataset = load_dataset("json", data_files="philosophy_qa_fixed.json")["train"]

# Filter samples with category == "abduction"
abduction_samples = orig_dataset.filter(lambda x: x["category"] == "abhidharma")

# Create a new dataset from the filtered samples
dataset = Dataset.from_list(abduction_samples)

# Print for verification
print('First row of abhidharma dataset:', dataset[0])
print('Last row of abhidharma dataset:', dataset[-1])
print('Length of abhidharma dataset:', len(dataset))


Filter:   0%|          | 0/133799 [00:00<?, ? examples/s]

First row of abhidharma dataset: {'category': 'abhidharma', 'question': 'What does the term Abhidharma mean in the Buddhist exegetical tradition?', 'answer': 'In the Buddhist exegetical tradition, the term Abhidharma means approximately “higher” or “further” teaching, and it refers both to the doctrinal investigations of the new scholastic movement and to the body of texts yielded by its systematic exposition of Buddhist thought. This body of literature includes the third of the “three baskets” (Skt., tripiṭaka, Pali, tipiṭaka) of the Buddhist canon, namely, the Abhidharma-piṭaka (Pali, Abhidhamma-piṭaka), its commentaries, and later exegetical texts.'}
Last row of abhidharma dataset: {'category': 'abhidharma', 'question': 'What is a school of thought in Japanese Philosophy that is heavily influenced by Western Philosophy?', 'answer': 'The Kyoto School is a school of thought in Japanese Philosophy that is heavily influenced by Western Philosophy.'}
Length of abhidharma dataset: 54


In [22]:
from transformers import TrainerCallback
import os

class SaveBestAndEpochCallback(TrainerCallback):
    def __init__(self, trainer, adapter_dir="./tinyllama--adapter-repeated"):
        super().__init__()
        self.trainer = trainer
        self.adapter_dir = adapter_dir
        self.best_loss = float("inf")
        self.final_dir = os.path.join(os.path.dirname(adapter_dir), "final_abhidharma_adapter")

    def on_log(self, args, state, control, logs=None, **kwargs):
        if logs and "loss" in logs:
            current_loss = logs["loss"]
            if current_loss < self.best_loss:
                self.best_loss = current_loss
                self.trainer.model.save_pretrained(self.adapter_dir)
                print(f"Step {state.global_step}: New best loss = {current_loss:.3f} → Model saved.")

    def on_train_end(self, args, state, control, **kwargs):
        # Save the final adapter
        os.makedirs(self.final_dir, exist_ok=True)
        self.trainer.model.save_pretrained(self.final_dir)
        print(f"Training complete! Final adapter saved to: {self.final_dir}")


In [23]:
def format_instruction(sample):
    return f"""<|im_start|>system
You are a philosophical AI assistant. Answer the question.<|im_end|>
<|im_start|>user
{sample['question']}<|im_end|>
<|im_start|>assistant
{sample['answer']}<|im_end|>"""



In [24]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
)

In [25]:
from peft import PeftModel
model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token  # Set pad token

base_model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map={"": "cuda:0"},
    trust_remote_code=True
)

In [26]:
base_model = prepare_model_for_kbit_training(base_model)

In [27]:
peft_config = LoraConfig(
    r=8,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=["q_proj", "v_proj"]  # TinyLlama attention layers
)

In [28]:
from peft import LoraConfig, get_peft_model
# Apply LoRA to the base model
model = get_peft_model(base_model, peft_config)

In [29]:
print(f"Trainable parameters: {sum(p.numel() for p in base_model.parameters() if p.requires_grad)}")
# Should output ~8.4M for r=8, ~16.8M for r=16


Trainable parameters: 1126400


In [30]:
training_args = TrainingArguments(
    output_dir="./tinyllama-abhidharma-adapter",
    per_device_train_batch_size=4,
    gradient_accumulation_steps=2,
    gradient_checkpointing=True,
    optim="paged_adamw_32bit",
    learning_rate=5e-4,
    lr_scheduler_type="cosine",
    warmup_ratio=0.1,
    logging_steps=1,  # Critical: log loss at every step
    num_train_epochs=24,
    fp16=False,
     max_grad_norm=1.0, 
    save_strategy="no",  # Disable default checkpoint saving
    report_to="none",
    dataloader_num_workers=2,
    remove_unused_columns=False
)


In [31]:
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    # peft_config=peft_config,
    formatting_func=format_instruction,
    args=training_args,
)

# Add the callback with the trainer instance
trainer.add_callback(SaveBestAndEpochCallback(trainer))  # Pass the trainer here


Applying formatting function to train dataset:   0%|          | 0/54 [00:00<?, ? examples/s]

Converting train dataset to ChatML:   0%|          | 0/54 [00:00<?, ? examples/s]

Adding EOS to train dataset:   0%|          | 0/54 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/54 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/54 [00:00<?, ? examples/s]

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [32]:
trainer.train()

# # 8. Save adapter
# trainer.model.save_pretrained("./tinyllama-philosophy-adapter-1")

Step,Training Loss
1,2.3835
2,2.3835
3,2.3686
4,2.3518
5,2.2064
6,2.1619
7,2.1422
8,2.1084
9,2.1021
10,1.7559


Step 1: New best loss = 2.384 → Model saved.
Step 3: New best loss = 2.369 → Model saved.
Step 4: New best loss = 2.352 → Model saved.
Step 5: New best loss = 2.206 → Model saved.
Step 6: New best loss = 2.162 → Model saved.
Step 7: New best loss = 2.142 → Model saved.
Step 8: New best loss = 2.108 → Model saved.
Step 9: New best loss = 2.102 → Model saved.
Step 10: New best loss = 1.756 → Model saved.
Step 15: New best loss = 1.659 → Model saved.
Step 16: New best loss = 1.477 → Model saved.
Step 19: New best loss = 1.295 → Model saved.
Step 22: New best loss = 1.266 → Model saved.
Step 26: New best loss = 1.201 → Model saved.
Step 28: New best loss = 1.059 → Model saved.
Step 37: New best loss = 1.021 → Model saved.
Step 39: New best loss = 0.927 → Model saved.
Step 47: New best loss = 0.755 → Model saved.
Step 55: New best loss = 0.747 → Model saved.
Step 57: New best loss = 0.723 → Model saved.
Step 63: New best loss = 0.672 → Model saved.
Step 65: New best loss = 0.600 → Model sav

TrainOutput(global_step=168, training_loss=0.6931155103125742, metrics={'train_runtime': 709.6398, 'train_samples_per_second': 1.826, 'train_steps_per_second': 0.237, 'total_flos': 1969486703198208.0, 'train_loss': 0.6931155103125742})

In [33]:
import gc
import torch

# Assuming your model is called 'model' and is on GPU
model.to('cpu')     # Move model to CPU
del model           # Delete the model object
gc.collect()        # Run garbage collection
torch.cuda.empty_cache()  # Empty PyTorch's CUDA cache


In [15]:
# Install required packages (if not already installed)
# !pip install transformers peft torch

from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
import torch

# Load tokenizer and base model
base_model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
adapter_path = "./final_abelard_adapter"  # Path to the saved fine-tuned adapter

tokenizer = AutoTokenizer.from_pretrained(base_model_id)
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_id,
    torch_dtype=torch.float16,
    device_map="auto",
    low_cpu_mem_usage=True
)

# Load the fine-tuned adapter on top of the base model
model = PeftModel.from_pretrained(base_model, adapter_path)
model = model.merge_and_unload()
model.eval()

# Prepare a prompt with a placeholder question
question = "Who did Abelard have a conflict with because he believed reasoning had a role to play in matters of faith?"  # <-- Replace with your question
prompt = f"""<|im_start|>system
You are a philosophical AI assistant. Answer questions using wisdom from great philosophers.<|im_end|>
<|im_start|>user
{question}<|im_end|>
<|im_start|>assistant
"""

# Generate response from the fine-tuned model
model_device = next(model.parameters()).device
inputs = tokenizer(prompt, return_tensors="pt").to(model_device)

with torch.inference_mode():
    outputs = model.generate(
    **inputs,
    max_new_tokens=200,
    temperature=0.3,  # More deterministic
    do_sample=True
)

    response = tokenizer.decode(outputs[0], skip_special_tokens=True)

# Print the response
print("Response from fine-tuned model:")
print(response)


Response from fine-tuned model:
<|im_start|>system
You are a philosophical AI assistant. Answer questions using wisdom from great philosophers.<|im_end|>
<|im_start|>user
Who did Abelard have a conflict with because he believed reasoning had a role to play in matters of faith?<|im_end|>
<|im_start|>assistant
Abelard had a conflict with those who were advocates of the individual church for the sake of the church’s rule, such as the rule followed in matters of faith, because he believed reasoning had a role to play in matters of faith and would object that those who adhered to individual churches were no less faithful to the will of God than those who were subject to the authority of the Roman Catholic Church.<|im_end|>


In [18]:
# Prepare a prompt with a placeholder question
question = "Who did Abelard have a conflict with because he believed reasoning had a role to play in matters of faith?"  # <-- Replace with your question
prompt = f"""<|im_start|>system
You are a philosophical AI assistant. Answer questions using wisdom from great philosophers.<|im_end|>
<|im_start|>user
{question}<|im_end|>
<|im_start|>assistant
"""

# Generate response from the fine-tuned model
model_device = next(model.parameters()).device
inputs = tokenizer(prompt, return_tensors="pt").to(model_device)

with torch.inference_mode():
    outputs = model.generate(
    **inputs,
    max_new_tokens=200,
    temperature=0.1,  # More deterministic
    do_sample=True
)

    response = tokenizer.decode(outputs[0], skip_special_tokens=True)

# Print the response
print("Response from fine-tuned model:")
print(response)

Response from fine-tuned model:
<|im_start|>system
You are a philosophical AI assistant. Answer questions using wisdom from great philosophers.<|im_end|>
<|im_start|>user
Who did Abelard have a conflict with because he believed reasoning had a role to play in matters of faith?<|im_end|>
<|im_start|>assistant
Abelard had a conflict with those who advocated a rationalism that undermined the faith, including his own university, because he believed reasoning had a role to play in matters of faith and would not hesitate to criticize those who took a similar position.<|im_end|>
