<a href="https://colab.research.google.com/github/SattamAltwaim/SaSOKE/blob/main/notebooks/1_setup_and_data_prep.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# SOKE Setup and Data Preparation
Prepares environment, downloads dependencies, and organizes data structure for training.


In [1]:
# Clone GitHub repo
!git clone https://github.com/SattamAltwaim/SaSOKE.git
%cd SaSOKE

# Mount Google Drive for data and models
from google.colab import drive
drive.mount('/content/drive')

import os
print("Working directory:", os.getcwd())
print("Drive data path: /content/drive/MyDrive/GraduationProject/CodeFiles/SaSOKE")


Cloning into 'SaSOKE'...
remote: Enumerating objects: 331, done.[K
remote: Counting objects: 100% (331/331), done.[K
remote: Compressing objects: 100% (244/244), done.[K
remote: Total 331 (delta 89), reused 320 (delta 80), pack-reused 0 (from 0)[K
Receiving objects: 100% (331/331), 2.42 MiB | 7.43 MiB/s, done.
Resolving deltas: 100% (89/89), done.
/content/SaSOKE
Mounted at /content/drive
Working directory: /content/SaSOKE
Drive data path: /content/drive/MyDrive/GraduationProject/CodeFiles/SaSOKE


In [2]:
# Install dependencies
%pip install -q pytorch_lightning torchmetrics omegaconf shortuuid transformers diffusers einops wandb rich matplotlib
%pip install -q smplx h5py scikit-image spacy ftfy more-itertools natsort tensorboard sentencepiece
%pip install -q gdown pandas


[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/831.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━[0m [32m593.9/831.6 kB[0m [31m17.6 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m831.6/831.6 kB[0m [31m14.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m983.2/983.2 kB[0m [31m39.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.8/44.8 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[?25h

In [3]:
# Check GPU availability
import torch
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"Number of GPUs: {torch.cuda.device_count()}")
else:
    print("No GPU detected")


CUDA available: False
No GPU detected


## Download Required Models


In [4]:
# Download SMPL models to Drive (one-time setup)
drive_data = '/content/drive/MyDrive/GraduationProject/CodeFiles/SaSOKE'
os.makedirs(f'{drive_data}/deps', exist_ok=True)

!gdown 1YIXddvvBJPQVRuKON2Xc9EEDXikRTteo -O /tmp/smpl_models.zip
!unzip -q /tmp/smpl_models.zip -d {drive_data}/deps/
!rm /tmp/smpl_models.zip
print("SMPL models downloaded to Drive")


Downloading...
From (original): https://drive.google.com/uc?id=1YIXddvvBJPQVRuKON2Xc9EEDXikRTteo
From (redirected): https://drive.google.com/uc?id=1YIXddvvBJPQVRuKON2Xc9EEDXikRTteo&confirm=t&uuid=89b24980-cebc-4f93-b69f-56ca925d3f2a
To: /tmp/smpl_models.zip
100% 841M/841M [00:09<00:00, 89.3MB/s]
SMPL models downloaded to Drive


In [5]:
# Download t2m evaluators (required for evaluation metrics)
# Download to Drive location
!mkdir -p {drive_data}/deps/t2m
!cd {drive_data}/deps && bash /content/SaSOKE/prepare/download_t2m_evalutors.sh
print("t2m evaluators downloaded to Drive")


The t2m evaluators will be stored in the './deps' folder
Downloading
Downloading...
From (original): https://drive.google.com/uc?id=1AYsmEG8I3fAAoraT4vau0GnesWBWyeT8
From (redirected): https://drive.google.com/uc?id=1AYsmEG8I3fAAoraT4vau0GnesWBWyeT8&confirm=t&uuid=c14c9d14-2452-4d13-8969-3844ff9ef6d0
To: /content/drive/.shortcut-targets-by-id/1Zt_YZTaC0EPNRRRhXB6M7T7uuWzDikpk/GraduationProject/CodeFiles/SaSOKE/deps/deps/t2m.tar.gz
100% 2.16G/2.16G [00:40<00:00, 52.8MB/s]
Extracting
._t2m
tar: Ignoring unknown extended header keyword 'LIBARCHIVE.xattr.com.apple.quarantine'
t2m/
t2m/._.DS_Store
tar: Ignoring unknown extended header keyword 'SCHILY.fflags'
tar: Ignoring unknown extended header keyword 'LIBARCHIVE.xattr.com.apple.FinderInfo'
t2m/.DS_Store
t2m/._glove
tar: Ignoring unknown extended header keyword 'LIBARCHIVE.xattr.com.apple.quarantine'
t2m/glove/
t2m/._t2m
tar: Ignoring unknown extended header keyword 'LIBARCHIVE.xattr.com.apple.quarantine'
t2m/t2m/
t2m/t2m/._.DS_Store
tar:

In [6]:
# Download SMPL-X normalization statistics to Drive
os.makedirs(f'{drive_data}/smpl-x', exist_ok=True)
!gdown 1NH-eVtS0nNjMjCwae-A1ii5sxj44C3bo -O {drive_data}/smpl-x/mean.pt
!gdown 1FHHWS0GPM2s6S2PB2JHv4ufdEbzezuKW -O {drive_data}/smpl-x/std.pt
print("SMPL-X mean/std downloaded to Drive")


Downloading...
From: https://drive.google.com/uc?id=1NH-eVtS0nNjMjCwae-A1ii5sxj44C3bo
To: /content/drive/MyDrive/GraduationProject/CodeFiles/SaSOKE/smpl-x/mean.pt
100% 1.91k/1.91k [00:00<00:00, 4.84MB/s]
Downloading...
From: https://drive.google.com/uc?id=1FHHWS0GPM2s6S2PB2JHv4ufdEbzezuKW
To: /content/drive/MyDrive/GraduationProject/CodeFiles/SaSOKE/smpl-x/std.pt
100% 1.90k/1.90k [00:00<00:00, 7.80MB/s]
SMPL-X mean/std downloaded to Drive


In [7]:
# Download pretrained tokenizer to Drive (optional)
os.makedirs(f'{drive_data}/checkpoints/vae', exist_ok=True)
!gdown 18HdPeXwz4O6LY4FZMC5BZ9rja4pcUTFk -O {drive_data}/checkpoints/vae/tokenizer.ckpt
print("Pretrained tokenizer downloaded to Drive")


Downloading...
From (original): https://drive.google.com/uc?id=18HdPeXwz4O6LY4FZMC5BZ9rja4pcUTFk
From (redirected): https://drive.google.com/uc?id=18HdPeXwz4O6LY4FZMC5BZ9rja4pcUTFk&confirm=t&uuid=01d0cdd9-71ae-408f-b9d7-a90f69509195
To: /content/drive/MyDrive/GraduationProject/CodeFiles/SaSOKE/checkpoints/vae/tokenizer.ckpt
100% 1.77G/1.77G [00:24<00:00, 72.7MB/s]
Pretrained tokenizer downloaded to Drive


In [8]:
# Download fine-tuned mBART model to Drive
os.makedirs(f'{drive_data}/deps/mbart-h2s-csl-phoenix', exist_ok=True)
!gdown --folder 1GnaHrI0PC4ZRr-GK3FS2GXcQwlrpA5Gi -O {drive_data}/deps/
print("mBART model downloaded to Drive")


Retrieving folder contents
Processing file 189e3DNUiOdfJNgTmxUX6nm5DlVLtk3dp config.json
Processing file 1RvyRV7kVlfw866AnUtgxNhNYTZtuoW4p map_ids.pkl
Processing file 1iVGE5R5FgR5CkpHQCgUR_8aqujhpQ2DF pytorch_model.bin
Processing file 1TRfdAdXKaGPlLCt1cOWm3whU3YreKwnG sentencepiece.bpe.model
Processing file 1tSGMTpSRZror-Sqhiz_-tmM10f_DC6YX tokenizer.json
Retrieving folder contents completed
Building directory structure
Building directory structure completed
Downloading...
From: https://drive.google.com/uc?id=189e3DNUiOdfJNgTmxUX6nm5DlVLtk3dp
To: /content/drive/MyDrive/GraduationProject/CodeFiles/SaSOKE/deps/mbart-h2s-csl-phoenix/config.json
100% 1.06k/1.06k [00:00<00:00, 4.00MB/s]
Downloading...
From: https://drive.google.com/uc?id=1RvyRV7kVlfw866AnUtgxNhNYTZtuoW4p
To: /content/drive/MyDrive/GraduationProject/CodeFiles/SaSOKE/deps/mbart-h2s-csl-phoenix/map_ids.pkl
100% 131k/131k [00:00<00:00, 37.8MB/s]
Downloading...
From (original): https://drive.google.com/uc?id=1iVGE5R5FgR5CkpHQCgU

## Verify Installation


In [10]:
# Verify required files in Drive
required_files = [
    f'{drive_data}/deps/smpl_models',
    f'{drive_data}/deps/t2m/t2m',
    f'{drive_data}/deps/mbart-h2s-csl-phoenix',
    f'{drive_data}/smpl-x/mean.pt',
    f'{drive_data}/smpl-x/std.pt',
    f'{drive_data}/checkpoints/vae/tokenizer.ckpt'
]

print("Verification (in Drive):")
for path in required_files:
    exists = os.path.exists(path)
    status = "OK" if exists else "MISSING"
    print(f"[{status}] {path}")


Verification (in Drive):
[OK] /content/drive/MyDrive/GraduationProject/CodeFiles/SaSOKE/deps/smpl_models
[OK] /content/drive/MyDrive/GraduationProject/CodeFiles/SaSOKE/deps/t2m/t2m
[OK] /content/drive/MyDrive/GraduationProject/CodeFiles/SaSOKE/deps/mbart-h2s-csl-phoenix
[OK] /content/drive/MyDrive/GraduationProject/CodeFiles/SaSOKE/smpl-x/mean.pt
[OK] /content/drive/MyDrive/GraduationProject/CodeFiles/SaSOKE/smpl-x/std.pt
[OK] /content/drive/MyDrive/GraduationProject/CodeFiles/SaSOKE/checkpoints/vae/tokenizer.ckpt
