# AGV2 Colab Pipeline
Use this notebook to run encoder + PPO training on Colab. Upload your `.env` and parquet files.


In [None]:
#@title 1. Environment setup (Colab-safe pins)

!pip install --quiet --index-url https://download.pytorch.org/whl/cpu torch==2.8.0 torchvision==0.23.0 torchaudio==2.8.0

!pip install --quiet stable-baselines3==2.4.0 gymnasium==0.29.1

!pip install --quiet polars==1.15.0 numpy==2.1.1 python-dotenv==1.0.0 PyYAML==6.0.2 pandas==2.2.2 requests==2.32.4 joblib==1.4.2 PyJWT==2.8.0

!pip install --quiet tradelocker==0.56.2 --no-deps  # package pins requests==2.32.2, but we keep Colab's 2.32.4


In [None]:
#@title 2. Clone repo & upload secrets

import os, shutil

if os.path.exists('AGV2'):
    shutil.rmtree('AGV2')

!git clone https://github.com/Harmonyone1/AGV2.git

%cd AGV2

from google.colab import files

print('Upload .env and parquet files now...')

uploaded = files.upload()

uploaded.keys()


In [None]:

#@title 3. Train Stage-1 encoder

import os, subprocess, torch, sys

REPO_ROOT = '/content/AGV2'
SRC_ROOT = f"{REPO_ROOT}/src"
UPLOAD_PATH = f"{REPO_ROOT}/encoder_windows_eth.parquet"
DATA_PATH = UPLOAD_PATH if os.path.exists(UPLOAD_PATH) else f"{REPO_ROOT}/data/features/encoder_windows_eth.parquet"
if not os.path.exists(DATA_PATH):
    raise FileNotFoundError('Upload encoder_windows_eth.parquet via cell 2 or update DATA_PATH.')

ENV_PATH = f"{REPO_ROOT}/.env"
CONFIG_PATH = f"{REPO_ROOT}/config/encoder.yaml"

%cd /content/AGV2

env = os.environ.copy()
existing_path = env.get('PYTHONPATH', '')
paths = [SRC_ROOT, REPO_ROOT]
if existing_path:
    paths.append(existing_path)
env['PYTHONPATH'] = ':'.join(paths)

train_cmd = [sys.executable,'scripts/train_encoder.py','--data',DATA_PATH,'--config',CONFIG_PATH,'--env',ENV_PATH,'--device','cuda' if torch.cuda.is_available() else 'cpu','--seed','0']
print('Running:', ' '.join(train_cmd))
result = subprocess.run(train_cmd, capture_output=True, text=True, cwd=REPO_ROOT, env=env)
print(result.stdout)
print(result.stderr)
result.check_returncode()


In [None]:
#@title 4. Generate embeddings, train PPO, backtest

import subprocess, torch

EMBED_INPUT = DATA_PATH
EMBED_OUTPUT = '/content/AGV2/data/features/encoder_windows_eth_emb.parquet'
CHECKPOINT = 'models/encoders/encoder_best.pt'
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

subprocess.run(['python','scripts/embed_windows.py','--input',EMBED_INPUT,'--checkpoint',CHECKPOINT,'--output',EMBED_OUTPUT,'--batch-size','256','--device',DEVICE], check=True)

train_cmd = [\n    'python','scripts/train_policy.py',\n    '--config','config/rl_policy.yaml',\n    '--data',EMBED_OUTPUT,\n    '--timesteps','200000'\n]\nif torch.cuda.is_available():\n    train_cmd.extend(['--device','cuda'])\nsubprocess.run(train_cmd, check=True)

subprocess.run(['python','scripts/backtest_policy.py','--config','config/backtest.yaml','--model','models/policies/ppo_trading_env.zip','--episodes','20'], check=True)
