In [1]:
pip install -q -U transformers accelerate peft bitsandbytes trl datasets

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.0/44.0 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.0/12.0 MB[0m [31m51.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m59.4/59.4 MB[0m [31m17.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m465.5/465.5 kB[0m [31m25.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m511.6/511.6 kB[0m [31m19.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m47.7/47.7 MB[0m [31m15.9 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
import torch
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    pipeline
)
from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training
from trl import SFTTrainer
from datasets import load_dataset
import json

In [5]:
# Model configuration
MODEL_NAME = "microsoft/Phi-3.5-mini-instruct"
OUTPUT_DIR = "./phi-bose-finetuned"
DATASET_FILE = "/content/drive/MyDrive/Bose/data/train.jsonl"

# QLoRA configuration
LORA_R = 16  # Rank
LORA_ALPHA = 32  # Alpha scaling
LORA_DROPOUT = 0.05

# Training configuration
BATCH_SIZE = 1
GRADIENT_ACCUMULATION_STEPS = 8
LEARNING_RATE = 2e-4
MAX_STEPS = 500
MAX_SEQ_LENGTH = 512

In [6]:
print("Loading dataset...")
dataset = load_dataset('json', data_files=DATASET_FILE, split='train')

# Format the dataset to combine prompt and completion into a single text field
def format_instruction(example):
    """Convert prompt/completion format to instruction format"""
    text = f"### User: {example['prompt']}\n### Assistant: {example['completion']}"
    return {"text": text}

# Apply formatting to dataset
dataset = dataset.map(format_instruction, remove_columns=dataset.column_names)

# Split into train and eval (90/10 split)
dataset = dataset.train_test_split(test_size=0.1, seed=42)
train_dataset = dataset['train']
eval_dataset = dataset['test']

print(f"Training samples: {len(train_dataset)}")
print(f"Evaluation samples: {len(eval_dataset)}")
print(f"\nSample formatted data:\n{train_dataset[0]}")

Loading dataset...


Generating train split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/101 [00:00<?, ? examples/s]

Training samples: 90
Evaluation samples: 11

Sample formatted data:
{'text': '### User: How does the overload protection work?\n### Assistant: It uses resistor-network power reduction with automatic reset.'}


In [7]:
print("\nConfiguring 4-bit quantization...")
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
)


Configuring 4-bit quantization...


In [8]:
print("Loading model...")
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
    torch_dtype=torch.float16
)

print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

Loading model...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json: 0.00B [00:00, ?B/s]

configuration_phi3.py: 0.00B [00:00, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/microsoft/Phi-3.5-mini-instruct:
- configuration_phi3.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.
`torch_dtype` is deprecated! Use `dtype` instead!


modeling_phi3.py: 0.00B [00:00, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/microsoft/Phi-3.5-mini-instruct:
- modeling_phi3.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/2.67G [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/195 [00:00<?, ?B/s]

Loading tokenizer...


tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

added_tokens.json:   0%|          | 0.00/306 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

In [9]:
print("Preparing model for k-bit training...")
model = prepare_model_for_kbit_training(model)

# Configure LoRA
peft_config = LoraConfig(
    r=LORA_R,
    lora_alpha=LORA_ALPHA,
    lora_dropout=LORA_DROPOUT,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
                    "gate_proj", "up_proj", "down_proj"]
)

Preparing model for k-bit training...


In [10]:
training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    num_train_epochs=3,
    per_device_train_batch_size=BATCH_SIZE,
    per_device_eval_batch_size=BATCH_SIZE,
    gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,
    gradient_checkpointing=True,
    optim="paged_adamw_8bit",
    learning_rate=LEARNING_RATE,
    lr_scheduler_type="cosine",
    warmup_steps=50,
    logging_steps=10,
    save_strategy="steps",
    save_steps=100,
    eval_strategy="steps",
    eval_steps=100,
    fp16=True,
    max_grad_norm=0.3,
    weight_decay=0.01,
    group_by_length=True,
    report_to="none",
    max_steps=MAX_STEPS,
)

In [12]:
print("Initializing trainer...")

# Define formatting function to extract text from dataset
def formatting_func(example):
    return example["text"]

trainer = SFTTrainer(
    model=model,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    peft_config=peft_config,
    formatting_func=formatting_func,
    processing_class=tokenizer,
    args=training_args,
)

Initializing trainer...


Applying formatting function to train dataset:   0%|          | 0/90 [00:00<?, ? examples/s]

Adding EOS to train dataset:   0%|          | 0/90 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/90 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/90 [00:00<?, ? examples/s]

Applying formatting function to eval dataset:   0%|          | 0/11 [00:00<?, ? examples/s]

Adding EOS to eval dataset:   0%|          | 0/11 [00:00<?, ? examples/s]

Tokenizing eval dataset:   0%|          | 0/11 [00:00<?, ? examples/s]

Truncating eval dataset:   0%|          | 0/11 [00:00<?, ? examples/s]

In [13]:
print("\n" + "="*60)
print("Starting training...")
print("="*60)

# Check GPU memory before training
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"Memory allocated: {torch.cuda.memory_allocated(0)/1024**3:.2f} GB")
    print(f"Memory reserved: {torch.cuda.memory_reserved(0)/1024**3:.2f} GB")

trainer.train()

print("\nTraining completed!")


Starting training...
GPU: Tesla T4
Memory allocated: 2.51 GB
Memory reserved: 3.56 GB


  return fn(*args, **kwargs)


Step,Training Loss,Validation Loss,Entropy,Num Tokens,Mean Token Accuracy
100,0.4035,1.104422,0.422025,31401.0,0.804538
200,0.1514,1.295109,0.273665,62760.0,0.814689
300,0.1345,1.467985,0.24423,93875.0,0.806917
400,0.1209,1.528854,0.228629,125282.0,0.801476
500,0.1184,1.54898,0.227315,156602.0,0.800774


  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)



Training completed!


In [14]:
print("\nSaving model...")
trainer.model.save_pretrained(OUTPUT_DIR)
tokenizer.save_pretrained(OUTPUT_DIR)
print(f"Model saved to {OUTPUT_DIR}")



Saving model...
Model saved to ./phi-bose-finetuned


In [24]:
print("\n" + "="*60)
print("Testing the fine-tuned model...")
print("="*60)

# Test prompts based on your data format
test_prompts = [
    "### User: What type of loudspeaker is the DesignMax DM8SE?\n### Assistant:",
    "### User: What kind of drivers does the DM8SE use?\n### Assistant:",
    "### User: What is the IP rating of the DM8SE loudspeaker, and what does it indicate?\n### Assistant:",
    "### User: What is the Net Weight of a single DM8SE loudspeaker?\n### Assistant:",
    "### User: What material is the speaker's grille made from?\n### Assistant:",
    "### User: What is the maximum 70V transformer tap setting in Watts?\n### Assistant:",
    "### User: What is the size of the Low-Frequency woofer in both inches and millimeters?\n### Assistant:",
    "### User: What is the peak power handling of the speaker?\n### Assistant:",
    "### User: If an installer bypasses the transformer, what is the Nominal Impedance of thespeaker?\n### Assistant:",
]

# Disable cache to avoid compatibility issues
model.config.use_cache = False

for prompt in test_prompts:
    print(f"\n")

    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512).to(model.device)

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=150,
            temperature=0.7,
            top_p=0.9,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id,
            use_cache=False
        )

    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    print(response)
    print("-" * 60)

# Re-enable cache after testing
model.config.use_cache = True



Testing the fine-tuned model...


### User: What type of loudspeaker is the DesignMax DM8SE?
### Assistant: The DesignMax DM8SE is a surface-mount loudspeaker.
------------------------------------------------------------


### User: What kind of drivers does the DM8SE use?
### Assistant: It uses coaxial two-way drivers.
------------------------------------------------------------


### User: What is the IP rating of the DM8SE loudspeaker, and what does it indicate?
### Assistant: It is rated IP55, indicating it is suitable for outdoor use in wet locations.
------------------------------------------------------------


### User: What is the Net Weight of a single DM8SE loudspeaker?
### Assistant: The Net Weight is 10.3 kg (22.8 lb).
------------------------------------------------------------


### User: What material is the speaker's grille made from?
### Assistant: The grille is made from solid brass.
------------------------------------------------------------


### User: What is th

In [None]:
import shutil
from google.colab import files

shutil.make_archive('phi3-mini_bose_qa_model', 'zip', 'phi-bose-finetuned')

# Trigger the download
files.download('phi3-mini_bose_qa_model.zip')