# ACIE Training on Google Colab

This notebook trains the ACIE model using the project folder from your Google Drive.

## Quick Start
1. **Drive Setup**: Upload your project folder to Google Drive.
2. **Runtime**: Ensure you are using a GPU runtime (`Runtime` > `Change runtime type` > `T4 GPU`).

In [None]:
# 1. Mount Google Drive
from google.colab import drive
import os
import sys

drive.mount('/content/drive')

# 2. Find Project Root (Auto-detect setup.py)
SEARCH_ROOT = "/content/drive/My Drive/ACIE"
PROJECT_ROOT = None

print(f"Searching for setup.py in {SEARCH_ROOT}...")
for root, dirs, files in os.walk(SEARCH_ROOT):
    if "setup.py" in files:
        PROJECT_ROOT = root
        print(f"✅ Found project at: {PROJECT_ROOT}")
        break

if PROJECT_ROOT:
    os.chdir(PROJECT_ROOT)
    sys.path.append(PROJECT_ROOT)
    print(f"Current working directory set to: {os.getcwd()}")
else:
    print(f"❌ Error: Could not find setup.py in {SEARCH_ROOT} or its subdirectories.")
    print("Please check where you uploaded the files.")
    # List directories to help debug
    !ls -F "{SEARCH_ROOT}"

In [None]:
# 3. Install Dependencies
# We are now inside the project folder containing setup.py
import os
if os.path.exists("setup.py"):
    print("Installing dependencies...")
    !pip install -q pytorch-lightning torchmetrics python-dotenv
    !pip install -q "numpy<2.0" pandas scipy networkx
    !pip install -q "bcrypt<4.0.0" passlib python-jose[cryptography]
    # Install project in editable mode
    !pip install -e .
else:
    print("CRITICAL ERROR: setup.py not found in current directory!")
    !ls -F

In [None]:
# 4. Run Training
# Configuration
DATASET_SIZE = "10k"
MAX_EPOCHS = 20
BATCH_SIZE = 64
OUTPUT_DIR = "outputs/colab_run1"

# Check for data
DATA_DIR = "data" if os.path.exists("data") else "."
print(f"Using data directory: {DATA_DIR}")

cmd = f"python acie/training/train.py --data_dir {DATA_DIR} --output_dir {OUTPUT_DIR} --dataset_size {DATASET_SIZE} --max_epochs {MAX_EPOCHS} --batch_size {BATCH_SIZE} --gpus 1"

print(f"Starting training command: {cmd}")
!{cmd}

In [None]:
# 5. TensorBoard (Optional)
%load_ext tensorboard
%tensorboard --logdir outputs/colab_run1/logs