In [None]:
# Keep session alive
import time
from datetime import datetime

def keep_alive():
    """Print timestamp every 30 minutes to keep session active"""
    while True:
        time.sleep(1800)  # 30 minutes
        print(f"Keep-alive: {datetime.now().strftime('%H:%M:%S')}")

# Start keep-alive in background
import threading
thread = threading.Thread(target=keep_alive, daemon=True)
thread.start()
print("Keep-alive started!")

## Step 1: Install Dependencies

In [None]:
%%capture
!pip uninstall -y diffusers bitsandbytes
!pip install -q torch==2.1.2
!pip install -q transformers==4.37.2
!pip install -q datasets==2.16.1
!pip install -q accelerate==0.27.0
!pip install -q peft==0.9.0
!pip install -q trl==0.8.1

print("Packages installed!")

## Step 2: Import Libraries

In [None]:
import torch
import json
import os
import gc
from pathlib import Path
from datetime import datetime

from datasets import Dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    TrainingArguments,
    DataCollatorForLanguageModeling,
)
from peft import (
    LoraConfig,
    get_peft_model,
    PeftModel,
)
from trl import SFTTrainer

print(f"PyTorch: {torch.__version__}")
print(f"CUDA: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f" GPU: {torch.cuda.get_device_name(0)}")
    print(f"VRAM: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")

## Step 3: Configuration

In [None]:
print("Configuration\n")

# Paths
TRAIN_DATA_PATH = "/kaggle/input/freud-2-0/freud_training_data/train.json"
VAL_DATA_PATH = "/kaggle/input/freud-2-0/freud_training_data/validation.json"
OUTPUT_DIR = "./freud_phi2_finetuned"
HF_MODEL_NAME = "Dalton-Khatri/freud-phi2"

# Model
BASE_MODEL = "microsoft/phi-2"

# Training (optimized for P100 + FP32)
NUM_EPOCHS = 3
BATCH_SIZE = 1  # Small due to FP32
GRADIENT_ACCUMULATION_STEPS = 8
LEARNING_RATE = 2e-4
WARMUP_RATIO = 0.03
MAX_SEQ_LENGTH = 512

# LoRA
LORA_R = 8
LORA_ALPHA = 16
LORA_DROPOUT = 0.05

# Checkpointing
LOGGING_STEPS = 50
SAVE_STEPS = 500

print(f"Config loaded")
print(f"Effective batch: {BATCH_SIZE * GRADIENT_ACCUMULATION_STEPS}")
print(f"Max sequence: {MAX_SEQ_LENGTH}")

## Step 4: Load Training Data

In [None]:
print("Loading data...\n")

with open(TRAIN_DATA_PATH, 'r') as f:
    train_data = json.load(f)

with open(VAL_DATA_PATH, 'r') as f:
    val_data = json.load(f)

train_dataset = Dataset.from_list(train_data)
val_dataset = Dataset.from_list(val_data)

print(f"Train: {len(train_dataset):,} samples")
print(f"Val: {len(val_dataset):,} samples\n")

print(" Sample:")
print("="*80)
print(train_dataset[0]['text'][:400])
print("="*80)

## Step 5: Load Model in FP32

In [None]:
print(f"Loading {BASE_MODEL} in FP32...\n")

# Load in FULL PRECISION (FP32)
model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    torch_dtype=torch.float32,  
    device_map="auto",
    trust_remote_code=True,
)

print("Model loaded!")
print(f"Parameters: {model.num_parameters():,}")
print(f"Data type: {next(model.parameters()).dtype}")
print(f"Device: {next(model.parameters()).device}")

## Step 6: Load Tokenizer

In [None]:
print(" Loading tokenizer...\n")

tokenizer = AutoTokenizer.from_pretrained(
    BASE_MODEL,
    trust_remote_code=True,
)

# Set padding token
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
    model.config.pad_token_id = tokenizer.eos_token_id

print(" Tokenizer ready!")
print(f"Vocab: {len(tokenizer):,}")
print(f"EOS: {tokenizer.eos_token} ({tokenizer.eos_token_id})")
print(f"PAD: {tokenizer.pad_token} ({tokenizer.pad_token_id})")

## Step 7: Apply LoRA

In [None]:
print("Applying LoRA...\n")
# Enable gradient checkpointing
model.gradient_checkpointing_enable()

lora_config = LoraConfig(
    r=LORA_R,
    lora_alpha=LORA_ALPHA,
    lora_dropout=LORA_DROPOUT,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=["Wqkv", "fc1", "fc2"],  
)

model = get_peft_model(model, lora_config)

trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
total = sum(p.numel() for p in model.parameters())

print("LoRA applied!")
print(f"Trainable: {trainable:,} ({100*trainable/total:.2f}%)")
print(f"Total: {total:,}")

## Step 8: Configure Training Arguments

In [None]:
print("Setting up training...\n")

training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    num_train_epochs=NUM_EPOCHS,
    per_device_train_batch_size=BATCH_SIZE,
    per_device_eval_batch_size=BATCH_SIZE,
    gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,
    learning_rate=LEARNING_RATE,
    warmup_ratio=WARMUP_RATIO,
    
    fp16=False,  
    bf16=False,  
    
    # Optimizer
    optim="adamw_torch",
    weight_decay=0.01,
    max_grad_norm=1.0,
    
    # Logging
    logging_steps=LOGGING_STEPS,
    logging_dir=f"{OUTPUT_DIR}/logs",
    
    # Saving
    save_strategy="steps",
    save_steps=SAVE_STEPS,
    save_total_limit=2,
    
    # Evaluation
    evaluation_strategy="steps",
    eval_steps=SAVE_STEPS,
    load_best_model_at_end=True,
    metric_for_best_model="loss",
    greater_is_better=False,
    
    # Misc
    report_to="none",
    dataloader_num_workers=0,
    remove_unused_columns=False,
    seed=42,
)

print("Training args set!")
print(f"\n Settings:")
print(f"   - Precision: FP32 (no mixed precision)")
print(f"   - Batch: {BATCH_SIZE} x {GRADIENT_ACCUMULATION_STEPS} = {BATCH_SIZE*GRADIENT_ACCUMULATION_STEPS}")
print(f"   - Learning rate: {LEARNING_RATE}")

## Step 9: Create SFTTrainer

In [None]:
print(" Creating trainer...\n")

# Set environment variable
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"

# Clear GPU cache
gc.collect()
torch.cuda.empty_cache()

print(f"ðŸ’¾ GPU allocated: {torch.cuda.memory_allocated()/1024**3:.2f} GB")
print(f"ðŸ’¾ GPU reserved: {torch.cuda.memory_reserved()/1024**3:.2f} GB")

def tokenize_function(examples):
    """Tokenize the text field"""
    return tokenizer(
        examples["text"],
        truncation=True,
        max_length=MAX_SEQ_LENGTH,
        padding=False, 
    )

print("Tokenizing datasets...")
tokenized_train = train_dataset.map(
    tokenize_function,
    batched=True,
    remove_columns=train_dataset.column_names,
    desc="Tokenizing train",
)
tokenized_val = val_dataset.map(
    tokenize_function,
    batched=True,
    remove_columns=val_dataset.column_names,
    desc="Tokenizing validation",
)
print("Tokenization complete!\n")

# Create data collator
from transformers import DataCollatorForLanguageModeling
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False,  
)

from transformers import Trainer 
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_val,
    data_collator=data_collator,
)

print("Trainer ready!\n")

## Step 10: Start Training ðŸš€

In [None]:
print("="*80)
print("STARTING TRAINING")
print("="*80)
print(f"Started: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print("\nExpected time: ~4-5 hours")
print("Checkpoints every 500 steps\n")

# Train!
train_result = trainer.train()

print("\n" + "="*80)
print("TRAINING COMPLETE!")
print("="*80)
print(f"Finished: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print(f"\nFinal loss: {train_result.training_loss:.4f}")

## Step 11: Save Model

In [None]:
print("\nSaving model...\n")

trainer.save_model(OUTPUT_DIR)
tokenizer.save_pretrained(OUTPUT_DIR)

print(f"Saved to: {OUTPUT_DIR}/")
print("\nFiles:")
for f in Path(OUTPUT_DIR).glob("*"):
    print(f"   - {f.name}")

## Step 12: Test the Model

In [None]:
print("Testing model...\n")

def test_model(user_input, emotion="neutral"):
    prompt = (
        "<|system|>: You are Freud, a calm, empathetic therapeutic AI assistant. "
        "You respond thoughtfully, kindly, and supportively. "
        "You ask gentle follow-up questions and never judge the user.\n"
        f"<|user|>:\n"
        f"[emotion: {emotion}]\n"
        f"{user_input}\n"
        f"<|assistant|>:\n"
    )
    
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=150,
            temperature=0.7,
            top_p=0.9,
            do_sample=True,
            repetition_penalty=1.2,
            pad_token_id=tokenizer.pad_token_id,
            eos_token_id=tokenizer.eos_token_id,
        )
    
    full = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    if "<|assistant|>:" in full:
        response = full.split("<|assistant|>:")[-1].strip()
        if "<|user|>" in response:
            response = response.split("<|user|>")[0].strip()
    else:
        response = full.strip()
    
    return response

# Test cases
tests = [
    ("Hi", "greeting"),
    ("I feel sad today", "sad"),
    ("I'm anxious about my exam", "anxious"),
    ("I had a great day!", "happy"),
]

print("="*80)
for user, emotion in tests:
    print(f"\nðŸ‘¤ User ({emotion}): {user}")
    resp = test_model(user, emotion)
    print(f"ðŸ¤– Freud: {resp}")
    print("-"*80)

print("\nTesting complete!")

## Step 13: Merge LoRA Adapter 

In [None]:
print("Merging LoRA adapter...\n")

# Reload base
base = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    device_map="auto",
    trust_remote_code=True,
)

# Load and merge
merged = PeftModel.from_pretrained(base, OUTPUT_DIR)
merged = merged.merge_and_unload()

# Save
MERGED_DIR = f"{OUTPUT_DIR}_merged"
merged.save_pretrained(MERGED_DIR)
tokenizer.save_pretrained(MERGED_DIR)

print(f"Merged model: {MERGED_DIR}/")

In [None]:
import shutil
shutil.make_archive('freud_model_new_phi', 'zip', '/kaggle/working/freud_phi2_finetuned')

In [None]:
!zip -r freud_model_1.zip /kaggle/working/third_sem_project/freud_model

In [None]:
from IPython.display import FileLink

FileLink(r'freud_model_new_phi.zip')