In [1]:
!pip install -q transformers bitsandbytes accelerate peft trl

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.3/61.3 MB[0m [31m18.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m564.7/564.7 kB[0m [31m24.1 MB/s[0m eta [36m0:00:00[0m
[?25h

In [4]:
from huggingface_hub import login
login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [6]:
import json
import torch
from datasets import Dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
)
from peft import LoraConfig
from trl import SFTTrainer

# Load your processed data
with open('processed_chunks.json', 'r', encoding='utf-8') as f:
    data = json.load(f)

# Convert to Hugging Face Dataset
# We'll just use the 'text' field for now as our training data.
# The SFTTrainer will format it correctly for us.
dataset = Dataset.from_list(data)

In [7]:
def format_instruction(example):
    # This is a simple instruction format. We're telling the model to act as a Q&A bot.
    # The 'context' is our documentation chunk, and the 'response' is the information
    # we want it to learn to provide.
    return f"""<s>[INST] As a technical expert, please answer the question based on the following context.

### Context:
{example['text']}

### Question:
What is the key information in the provided text? [/INST] The key information is that {example['text']} </s>"""

# Apply the formatting to the dataset
formatted_dataset = dataset.map(lambda x: {"text": format_instruction(x)})

Map:   0%|          | 0/1513 [00:00<?, ? examples/s]

In [8]:
# Model and tokenizer names
base_model = "mistralai/Mistral-7B-Instruct-v0.2"
new_model = "pandas-mistral-7b-finetuned"

# Quantization configuration
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=False,
)

# Load base model
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=bnb_config,
    device_map="auto" # Automatically uses the GPU
)
model.config.use_cache = False

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

config.json:   0%|          | 0.00/596 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Fetching 3 files:   0%|          | 0/3 [00:00<?, ?it/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/2.10k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

In [16]:
# PEFT (LoRA) configuration
peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=64,
    bias="none",
    task_type="CAUSAL_LM",
)

# Training arguments
training_arguments = TrainingArguments(
    output_dir="./results",
    report_to="none",
    num_train_epochs=1,
    per_device_train_batch_size=1, # Reduced batch size
    gradient_accumulation_steps=1,
    optim="paged_adamw_32bit",
    save_steps=25,
    logging_steps=25,
    learning_rate=2e-4,
    weight_decay=0.001,
    fp16=True,
    bf16=False,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type="constant",
    gradient_checkpointing=True, # Enabled gradient checkpointing
)

In [11]:
import trl
print(trl.__version__)


0.23.0


In [12]:
import inspect
from trl import SFTTrainer
print(inspect.signature(SFTTrainer.__init__))


(self, model: Union[str, torch.nn.modules.module.Module, transformers.modeling_utils.PreTrainedModel], args: Union[trl.trainer.sft_config.SFTConfig, transformers.training_args.TrainingArguments, NoneType] = None, data_collator: Optional[transformers.data.data_collator.DataCollator] = None, train_dataset: Union[datasets.arrow_dataset.Dataset, datasets.iterable_dataset.IterableDataset, NoneType] = None, eval_dataset: Union[datasets.arrow_dataset.Dataset, dict[str, datasets.arrow_dataset.Dataset], NoneType] = None, processing_class: Union[transformers.tokenization_utils_base.PreTrainedTokenizerBase, transformers.processing_utils.ProcessorMixin, NoneType] = None, compute_loss_func: Optional[Callable] = None, compute_metrics: Optional[Callable[[transformers.trainer_utils.EvalPrediction], dict]] = None, callbacks: Optional[list[transformers.trainer_callback.TrainerCallback]] = None, optimizers: tuple[typing.Optional[torch.optim.optimizer.Optimizer], typing.Optional[torch.optim.lr_scheduler.L

In [18]:
from trl import SFTTrainer
import torch

# ✅ Define how to extract the text field from your dataset
def formatting_func(example):
    return example["text"]

# ✅ Build the trainer
trainer = SFTTrainer(
    model=model,
    train_dataset=formatted_dataset,
    peft_config=peft_config,
    max_seq_length=512, # Added max_seq_length
    formatting_func=formatting_func,
    processing_class=tokenizer,
    args=training_arguments,
)

# 🚀 Start training
print("🚀 Starting fine-tuning...")
trainer.train()
print("✅ Fine-tuning complete!")

# 💾 Save the fine-tuned adapters
trainer.model.save_pretrained(new_model)
print(f"✅ Model adapters saved to '{new_model}'")



Applying formatting function to train dataset:   0%|          | 0/1513 [00:00<?, ? examples/s]

Adding EOS to train dataset:   0%|          | 0/1513 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/1513 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/1513 [00:00<?, ? examples/s]

🚀 Starting fine-tuning...


  return fn(*args, **kwargs)


Step,Training Loss
25,0.9834
50,0.8921
75,0.7288
100,0.7938
125,1.6604
150,0.8307
175,0.592
200,0.8307
225,0.5746
250,0.8085


  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*a

✅ Fine-tuning complete!
✅ Model adapters saved to 'pandas-mistral-7b-finetuned'


In [20]:
import shutil

# Zip the folder
shutil.make_archive("pandas-mistral-7b-finetuned", 'zip', "pandas-mistral-7b-finetuned")


'/content/pandas-mistral-7b-finetuned.zip'