# Colab Launcher – LoRA Training for FLAN-T5 on BioLaySumm

This notebook mirrors the Slurm script `scripts/slurm/train_flant5_base_lora.sbatch` and runs the repository code directly (no notebook-specific code).


In [None]:
#@title 1) GPU and environment
!nvidia-smi || true
!pip install -q "transformers>=4.40" "datasets>=2.18" "evaluate>=0.4.2" peft rouge-score accelerate tensorboard


In [None]:
#@title 2) Clone repo and cd
!rm -rf PatternAnalysis-2025 || true
!git clone https://github.com/0NATE4/PatternAnalysis-2025.git
%cd PatternAnalysis-2025/recognition/layrad-flant5-lora-nchung
!pwd
!ls -la


In [None]:
#@title 3) Mount Google Drive for persistent checkpoints
from google.colab import drive
drive.mount('/content/drive')
print('Drive mounted')

# Create backup directory in Drive
!mkdir -p /content/drive/MyDrive/Colab\ Notebooks/layrad-checkpoints


In [None]:
#@title 4) Check for existing checkpoints
import os
import glob

# Check local output directory
output_dir = "./outputs/lora_training"
local_checkpoints = glob.glob(f"{output_dir}/checkpoint-*")

# Check Drive backup
drive_checkpoints = glob.glob("/content/drive/MyDrive/Colab Notebooks/layrad-checkpoints/lora-checkpoint-*")

print(f"Local checkpoints: {len(local_checkpoints)}")
print(f"Drive checkpoints: {len(drive_checkpoints)}")

if local_checkpoints:
    latest_local = max(local_checkpoints, key=os.path.getctime)
    print(f"Latest local: {latest_local}")
    
if drive_checkpoints:
    latest_drive = max(drive_checkpoints, key=os.path.getctime)
    print(f"Latest drive: {latest_drive}")
    
    # Copy latest from Drive if no local checkpoint
    if not local_checkpoints:
        print("Copying latest checkpoint from Drive...")
        !cp -r "{latest_drive}" "{output_dir}/"
        print("Checkpoint restored from Drive")


In [None]:
#@title 5) Run LoRA training (will auto-resume from checkpoint)
!python src/train.py configs/train_flant5_base_lora.yaml


In [None]:
#@title 6) Backup final checkpoint to Drive
import shutil
import datetime

# Find latest checkpoint
checkpoints = glob.glob(f"{output_dir}/checkpoint-*")
if checkpoints:
    latest = max(checkpoints, key=os.path.getctime)
    timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
    backup_name = f"lora-checkpoint-{timestamp}"
    
    print(f"Backing up {latest} to Drive as {backup_name}")
    shutil.copytree(latest, f"/content/drive/MyDrive/Colab Notebooks/layrad-checkpoints/{backup_name}")
    print("Backup complete!")
else:
    print("No checkpoints found to backup")


# Colab Launcher – LoRA Training for FLAN-T5 on BioLaySumm

This notebook mirrors the Slurm script `scripts/slurm/train_flant5_base_lora.sbatch` and runs the repository code directly (no notebook-specific code).


In [None]:
#@title 1) GPU and environment
!nvidia-smi || true
!pip install -q "transformers>=4.40" "datasets>=2.18" "evaluate>=0.4.2" peft rouge-score accelerate tensorboard


In [None]:
#@title 2) Clone repo and cd
!rm -rf PatternAnalysis-2025 || true
!git clone https://github.com/0NATE4/PatternAnalysis-2025.git
%cd PatternAnalysis-2025/recognition/layrad-flant5-lora-nchung
!pwd
!ls -la


In [None]:
#@title 3) Optional: mount Google Drive for persistent checkpoints
try:
    from google.colab import drive
    drive.mount('/content/drive')
    print('Drive mounted')
    # optionally override output dir in YAML via sed below
except Exception as e:
    print('Drive not available:', e)


In [None]:
#@title 4) Run LoRA training
!python src/train.py configs/train_flant5_base_lora.yaml
