# ðŸŽ¯ TrialPulse Nexus - Fine-Tuning

Train your custom clinical trial AI model.

**IMPORTANT:** Go to Runtime > Change runtime type > GPU (T4)

In [None]:
# Step 1: Install dependencies (takes 2-3 minutes)
!pip install unsloth
!pip install --no-deps trl peft accelerate bitsandbytes

In [None]:
# Step 2: Load the base model
from unsloth import FastLanguageModel
import torch

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name='unsloth/llama-3.1-8b-bnb-4bit',
    max_seq_length=2048,
    load_in_4bit=True,
)
print('Model loaded!')

In [None]:
# Step 3: Add LoRA adapters for efficient training
model = FastLanguageModel.get_peft_model(
    model,
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    target_modules=['q_proj', 'k_proj', 'v_proj', 'o_proj', 'gate_proj', 'up_proj', 'down_proj'],
    bias='none',
)
print('LoRA adapters added!')

In [None]:
# Step 4: Upload your training data
# Click the folder icon on the left, then upload training_data_chat.jsonl
from google.colab import files
uploaded = files.upload()
print('Data uploaded!')

In [None]:
# Step 5: Load the training data
from datasets import load_dataset
dataset = load_dataset('json', data_files='training_data_chat.jsonl', split='train')
print(f'Loaded {len(dataset)} training examples')

In [None]:
# Step 6: Train the model (takes ~20-30 minutes)
from trl import SFTTrainer
from transformers import TrainingArguments

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field='messages',
    max_seq_length=2048,
    args=TrainingArguments(
        per_device_train_batch_size=2,
        gradient_accumulation_steps=4,
        num_train_epochs=3,
        learning_rate=2e-4,
        fp16=True,
        logging_steps=10,
        output_dir='outputs',
        optim='adamw_8bit',
    ),
)

print('Starting training...')
trainer.train()
print('Training complete!')

In [None]:
# Step 7: Save the model
model.save_pretrained('trialpulse-nexus-v1')
tokenizer.save_pretrained('trialpulse-nexus-v1')
print('Model saved!')

In [None]:
# Step 8: Download your trained model
!zip -r trialpulse_model.zip trialpulse-nexus-v1/
from google.colab import files
files.download('trialpulse_model.zip')
print('Download started!')

In [None]:
# Step 9: Test your model
FastLanguageModel.for_inference(model)

messages = [
    {'role': 'system', 'content': 'You are a clinical trial data quality expert.'},
    {'role': 'user', 'content': 'What is a DQI score and why is it important?'}
]

inputs = tokenizer.apply_chat_template(messages, return_tensors='pt').to('cuda')
outputs = model.generate(inputs, max_new_tokens=200)
print(tokenizer.decode(outputs[0]))