# SOKE Setup and Data Preparation
Prepares environment, downloads dependencies, and organizes data structure for training.


In [None]:
# Clone GitHub repo
!git clone https://github.com/SattamAltwaim/SaSOKE.git
%cd SaSOKE

# Mount Google Drive for data and models
from google.colab import drive
drive.mount('/content/drive')

import os
print("Working directory:", os.getcwd())
print("Drive data path: /content/drive/MyDrive/SOKE_data/")


In [None]:
# Install dependencies
%pip install -q pytorch_lightning torchmetrics omegaconf shortuuid transformers diffusers einops wandb rich matplotlib
%pip install -q smplx h5py scikit-image spacy ftfy more-itertools natsort tensorboard sentencepiece
%pip install -q gdown pandas


In [None]:
# Check GPU availability
import torch
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"Number of GPUs: {torch.cuda.device_count()}")
else:
    print("No GPU detected")


## Download Required Models


In [None]:
# Download SMPL models to Drive (one-time setup)
drive_data = '/content/drive/MyDrive/SOKE_data'
os.makedirs(f'{drive_data}/deps', exist_ok=True)

!gdown 1YIXddvvBJPQVRuKON2Xc9EEDXikRTteo -O /tmp/smpl_models.zip
!unzip -q /tmp/smpl_models.zip -d {drive_data}/deps/
!rm /tmp/smpl_models.zip
print("SMPL models downloaded to Drive")


In [None]:
# Download t2m evaluators (required for evaluation metrics)
# Download to Drive location
!mkdir -p {drive_data}/deps/t2m
!cd {drive_data}/deps && bash /content/SaSOKE/prepare/download_t2m_evaluators.sh
print("t2m evaluators downloaded to Drive")


In [None]:
# Download SMPL-X normalization statistics to Drive
os.makedirs(f'{drive_data}/smpl-x', exist_ok=True)
!gdown 1NH-eVtS0nNjMjCwae-A1ii5sxj44C3bo -O {drive_data}/smpl-x/mean.pt
!gdown 1FHHWS0GPM2s6S2PB2JHv4ufdEbzezuKW -O {drive_data}/smpl-x/std.pt
print("SMPL-X mean/std downloaded to Drive")


In [None]:
# Download pretrained tokenizer to Drive (optional)
os.makedirs(f'{drive_data}/checkpoints/vae', exist_ok=True)
!gdown 18HdPeXwz4O6LY4FZMC5BZ9rja4pcUTFk -O {drive_data}/checkpoints/vae/tokenizer.ckpt
print("Pretrained tokenizer downloaded to Drive")


In [None]:
# Download fine-tuned mBART model to Drive
os.makedirs(f'{drive_data}/deps/mbart-h2s-csl-phoenix', exist_ok=True)
!gdown --folder 1GnaHrI0PC4ZRr-GK3FS2GXcQwlrpA5Gi -O {drive_data}/deps/
print("mBART model downloaded to Drive")


## Verify Installation


In [None]:
# Verify required files in Drive
required_files = [
    f'{drive_data}/deps/smpl_models',
    f'{drive_data}/deps/t2m/t2m',
    f'{drive_data}/deps/mbart-h2s-csl-phoenix',
    f'{drive_data}/smpl-x/mean.pt',
    f'{drive_data}/smpl-x/std.pt',
    f'{drive_data}/checkpoints/vae/tokenizer.ckpt'
]

print("Verification (in Drive):")
for path in required_files:
    exists = os.path.exists(path)
    status = "OK" if exists else "MISSING"
    print(f"[{status}] {path}")
