# microsoft/Phi-3-mini-4k-instruct

In [None]:
import pandas as pd
import torch
from datasets import Dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    TrainingArguments,
    Trainer,
    DataCollatorForLanguageModeling
)
from peft import (
    get_peft_model,
    LoraConfig,
    TaskType,
    prepare_model_for_kbit_training
)

# Step 1: Load the dataset (same as before)
df = pd.read_csv(r"C:\\Graduate courses\\s2\\Gen AI\\therpist\\train.csv")
print(f"Dataset loaded with {len(df)} examples")
print(f"Original model parameters that would be trained: millions")

# Process dataset (same as before)
def process_conversations(example):
    return {"formatted_text": example["conversations"]}

dataset = Dataset.from_pandas(df)
processed_dataset = dataset.map(process_conversations)

# Tokenize the dataset
tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct", trust_remote_code=True)

def tokenize_function(examples):
    return tokenizer(examples["formatted_text"], truncation=True, padding="max_length", max_length=512)

tokenized_dataset = processed_dataset.map(tokenize_function, batched=True)
tokenized_dataset = tokenized_dataset.remove_columns(["conversations", "id", "formatted_text"])
tokenized_dataset = tokenized_dataset.train_test_split(test_size=0.1)

# Load model in 8-bit precision to save memory
model = AutoModelForCausalLM.from_pretrained(
    "microsoft/Phi-3-mini-4k-instruct", 
    trust_remote_code=True,
    load_in_8bit=True,
)

# Count original trainable parameters
original_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Original trainable parameters: {original_params:,}")

# Configure LoRA
lora_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    r=8,                     # Rank of the update matrices
    lora_alpha=32,           # Parameter for scaling
    lora_dropout=0.1,        # Dropout probability for LoRA layers
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], # Target attention modules
    bias="none",
)

# Prepare model for LoRA fine-tuning
model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, lora_config)

# Count LoRA trainable parameters
lora_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"LoRA trainable parameters: {lora_params:,}")
print(f"Parameter reduction: {100 * (1 - lora_params / original_params):.2f}%")

# Training arguments
training_args = TrainingArguments(
    output_dir="./results/phi3-lora-mental-health",
    per_device_train_batch_size=8,   # Can use larger batches with LoRA
    gradient_accumulation_steps=4,
    warmup_steps=100,
    max_steps=1000,
    learning_rate=3e-4,
    fp16=True,
    logging_steps=20,
    save_steps=200,
    evaluation_strategy="steps",
    eval_steps=200,
    save_total_limit=2,
    load_best_model_at_end=True,
)

# Setup trainer
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer, 
    mlm=False
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["test"],
    data_collator=data_collator,
)

# Train the model
trainer.train()

# Save the LoRA adapter only (much smaller than full model)
model.save_pretrained("./lora-phi3-mental-health-adapter")
print("LoRA adapter saved successfully")

# Display parameter reduction statistics
print(f"Full model parameters: {original_params:,}")
print(f"LoRA-only parameters: {lora_params:,}")
print(f"Storage reduction: {(original_params - lora_params) / 1_000_000:.2f}M parameters")

# google/gemma-2-2b-it

In [None]:
import pandas as pd
import torch
from datasets import Dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    TrainingArguments,
    Trainer,
    DataCollatorForLanguageModeling
)
from peft import (
    get_peft_model,
    LoraConfig,
    TaskType,
    prepare_model_for_kbit_training
)

# Step 1: Load the dataset
print("Loading dataset...")
df = pd.read_csv(r"C:\Graduate courses\s2\Gen AI\therpist\train.csv")
print(f"Dataset loaded with {len(df)} examples")

# Step 2: Process dataset
def process_conversations(example):
    # Process each conversation into the format Gemma expects
    # Gemma uses a specific format for chat: <start_of_turn>user\nmessage<end_of_turn>
    conversation = example["conversations"]
    # You may need to adjust this formatting based on your data structure
    return {"formatted_text": conversation}

dataset = Dataset.from_pandas(df)
processed_dataset = dataset.map(process_conversations)

# Step 3: Tokenize dataset
print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-2b-it")
tokenizer.pad_token = tokenizer.eos_token

def tokenize_function(examples):
    return tokenizer(
        examples["formatted_text"], 
        truncation=True, 
        padding="max_length", 
        max_length=512
    )

print("Tokenizing dataset...")
tokenized_dataset = processed_dataset.map(tokenize_function, batched=True)
tokenized_dataset = tokenized_dataset.remove_columns(["conversations", "id", "formatted_text"])
tokenized_dataset = tokenized_dataset.train_test_split(test_size=0.1)

# Step 4: Load model in 8-bit to save memory
print("Loading base model...")
model = AutoModelForCausalLM.from_pretrained(
    "google/gemma-2-2b-it",
    load_in_8bit=True,
    device_map="auto"
)

# Calculate original trainable parameters
original_params = sum(p.numel() for p in model.parameters())
trainable_original = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Total parameters in model: {original_params:,}")
print(f"Trainable parameters before LoRA: {trainable_original:,}")

# Step 5: Configure LoRA
print("Applying LoRA configuration...")
# For Gemma, target the appropriate modules - this may need adjustment
target_modules = [
    "q_proj", "k_proj", "v_proj", "o_proj",
    "gate_proj", "up_proj", "down_proj"
]

lora_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    r=16,                   # Rank of the update matrices
    lora_alpha=32,          # Parameter for scaling
    lora_dropout=0.05,      # Dropout probability
    target_modules=target_modules,
    bias="none",
)

# Prepare model for LoRA fine-tuning
model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()  # This will print parameter info directly

# Calculate LoRA trainable parameters manually
lora_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Trainable parameters with LoRA: {lora_params:,}")
print(f"Parameter reduction: {100 * (1 - lora_params / original_params):.4f}%")
print(f"Memory savings: ~{(original_params - lora_params) / 1_000_000:.2f}M parameters")

# Step 6: Training arguments
print("Setting up training...")
training_args = TrainingArguments(
    output_dir="./results/gemma-lora-mental-health",
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    warmup_steps=100,
    max_steps=1000,
    learning_rate=2e-4,
    fp16=True,
    logging_steps=20,
    save_steps=200,
    evaluation_strategy="steps",
    eval_steps=200,
    save_total_limit=2,
    load_best_model_at_end=True,
)

# Step 7: Setup trainer
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer, 
    mlm=False
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["test"],
    data_collator=data_collator,
)

# Step 8: Train the model
print("Starting training...")
trainer.train()

# Step 9: Save the LoRA adapter only
adapter_path = "./gemma-lora-mental-health-adapter"
model.save_pretrained(adapter_path)
tokenizer.save_pretrained(adapter_path)
print(f"LoRA adapter saved to {adapter_path}")

# Step 10: Display parameter statistics for comparison
print("\n==== PARAMETER EFFICIENCY SUMMARY ====")
print(f"Total model parameters: {original_params:,}")
print(f"Parameters if fully fine-tuned: {trainable_original:,}")
print(f"Parameters with LoRA: {lora_params:,}")
print(f"Parameter reduction: {100 * (1 - lora_params / original_params):.4f}%")
print(f"Storage size reduction: ~{(original_params - lora_params) * 4 / (1024*1024):.2f} MB")
print("=====================================")

# Test the model with a sample input
test_input = "I've been feeling really anxious lately and can't sleep."
messages = [{"role": "user", "content": test_input}]

# Load the fine-tuned model with adapter for inference
from peft import PeftModel, PeftConfig
config = PeftConfig.from_pretrained(adapter_path)
base_model = AutoModelForCausalLM.from_pretrained(
    config.base_model_name_or_path,
    device_map="auto"
)
fine_tuned_model = PeftModel.from_pretrained(base_model, adapter_path)

from transformers import pipeline
pipe = pipeline("text-generation", model=fine_tuned_model, tokenizer=tokenizer)
response = pipe(messages)
print("\nSample response:")
print(response)

# TinyLlama/TinyLlama-1.1B-Chat-v1.0

In [None]:
import pandas as pd
import torch
from datasets import Dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    TrainingArguments,
    Trainer,
    DataCollatorForLanguageModeling
)
from peft import (
    get_peft_model,
    LoraConfig,
    TaskType,
    prepare_model_for_kbit_training
)

# Step 1: Load the dataset
print("Loading dataset...")
df = pd.read_csv(r"C:\Graduate courses\s2\Gen AI\therpist\train.csv")
print(f"Dataset loaded with {len(df)} examples")

# Step 2: Process dataset
def process_conversations(example):
    # TinyLlama uses Llama 2 chat template
    # Format: <|system|>\n{system_message}<|user|>\n{user_message}<|assistant|>\n{assistant_message}
    conversation = example["conversations"]
    # This is a placeholder - adjust based on your exact data format
    return {"formatted_text": conversation}

dataset = Dataset.from_pandas(df)
processed_dataset = dataset.map(process_conversations)

# Step 3: Tokenize dataset
print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
tokenizer.pad_token = tokenizer.eos_token

def tokenize_function(examples):
    return tokenizer(
        examples["formatted_text"], 
        truncation=True, 
        padding="max_length", 
        max_length=512
    )

print("Tokenizing dataset...")
tokenized_dataset = processed_dataset.map(tokenize_function, batched=True)
tokenized_dataset = tokenized_dataset.remove_columns(["conversations", "id", "formatted_text"])
tokenized_dataset = tokenized_dataset.train_test_split(test_size=0.1)

# Step 4: Load model
print("Loading base model...")
model = AutoModelForCausalLM.from_pretrained(
    "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
    load_in_8bit=True,  # Use 8-bit quantization to save memory
    device_map="auto"
)

# Calculate original parameters
original_params = sum(p.numel() for p in model.parameters())
trainable_original = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Total parameters in model: {original_params:,}")
print(f"Trainable parameters before LoRA: {trainable_original:,}")

# Step 5: Configure LoRA
print("Applying LoRA configuration...")
# These target modules are typical for Llama architecture models
target_modules = [
    "q_proj", "k_proj", "v_proj", "o_proj",
    "gate_proj", "up_proj", "down_proj"
]

lora_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    r=8,                    # Rank - can be smaller for this smaller model
    lora_alpha=16,          # Parameter for scaling
    lora_dropout=0.05,      # Dropout probability
    target_modules=target_modules,
    bias="none",
)

# Prepare model for LoRA fine-tuning
model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()  # Built-in function to show parameter stats

# Calculate LoRA trainable parameters
lora_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Trainable parameters with LoRA: {lora_params:,}")
print(f"Parameter reduction: {100 * (1 - lora_params / original_params):.4f}%")
print(f"Memory savings: ~{(original_params - lora_params) / 1_000_000:.2f}M parameters")

# Step 6: Training arguments
print("Setting up training...")
training_args = TrainingArguments(
    output_dir="./results/tinyllama-lora-mental-health",
    per_device_train_batch_size=8,    # Can use larger batches for this smaller model
    gradient_accumulation_steps=4,
    warmup_steps=100,
    max_steps=1000,          # Adjust based on dataset size
    learning_rate=3e-4,
    fp16=True,
    logging_steps=20,
    save_steps=200,
    evaluation_strategy="steps",
    eval_steps=200,
    save_total_limit=2,
    load_best_model_at_end=True,
)

# Step 7: Setup trainer
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer, 
    mlm=False
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["test"],
    data_collator=data_collator,
)

# Step 8: Train the model
print("Starting training...")
trainer.train()

# Step 9: Save the LoRA adapter only
adapter_path = "./tinyllama-lora-mental-health-adapter"
model.save_pretrained(adapter_path)
tokenizer.save_pretrained(adapter_path)
print(f"LoRA adapter saved to {adapter_path}")

# Step 10: Display parameter statistics
print("\n==== PARAMETER EFFICIENCY SUMMARY ====")
print(f"Total model parameters: {original_params:,}")
print(f"Parameters if fully fine-tuned: {trainable_original:,}")
print(f"Parameters with LoRA: {lora_params:,}")
print(f"Parameter reduction: {100 * (1 - lora_params / original_params):.4f}%")
print(f"Storage size reduction: ~{(original_params - lora_params) * 2 / (1024*1024):.2f} MB")
print("=====================================")

# Test the model
test_input = "I've been feeling overwhelmed lately with work and personal life."

# For TinyLlama chat format
formatted_prompt = f"<|user|>\n{test_input}<|assistant|>\n"

# Load the fine-tuned model with adapter for inference
from peft import PeftModel, PeftConfig
config = PeftConfig.from_pretrained(adapter_path)
base_model = AutoModelForCausalLM.from_pretrained(
    config.base_model_name_or_path,
    device_map="auto"
)
fine_tuned_model = PeftModel.from_pretrained(base_model, adapter_path)

# Generate response
inputs = tokenizer(formatted_prompt, return_tensors="pt").to(fine_tuned_model.device)
outputs = fine_tuned_model.generate(
    inputs.input_ids,
    max_new_tokens=256,
    temperature=0.7,
    top_p=0.9
)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
print("\nSample response:")
print(response)