# Setting up data and env

## Upload Your Project

In [None]:
from google.colab import files
print("Upload your project645.zip file...")
uploaded = files.upload()

# Verify upload
for fn in uploaded.keys():
  print(f'User uploaded file "{fn}" with length {len(uploaded[fn])} bytes')

Upload your project645.zip file...


Saving project645.zip to project645.zip
User uploaded file "project645.zip" with length 24112955 bytes


## Unzip the project

In [None]:
!unzip -q project645.zip -d /content/
# Verify unzip by listing contents
!ls /content/project645/
!ls /content/project645/code/

code  LICENSE  mai645.yml  README.md  train_data_bvh
fix_feet.py			 read_bvh.py
generate_training_euler_data.py  read_bvh.pyc
generate_training_pos_data.py	 rotation2xyz.py
generate_training_quad_data.py	 rotation2xyz.pyc
__pycache__			 rotation_conversions.py
pytorch_train_euler_aclstm.py	 synthesize_euler_motion.py
pytorch_train_pos_aclstm.py	 synthesize_pos_motion_original_colab.py
pytorch_train_quad_aclstm.py	 synthesize_pos_motion.py
read_bvh_hierarchy.py		 synthesize_quad_motion.py
read_bvh_hierarchy.pyc		 test_encodings.py


## Mount Google Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Create Output Folder

*   **Create an Output Folder in Google Drive:** It's highly recommended to create a dedicated folder in your Google Drive to store the outputs (trained models, generated data/BVH files). For example, you could create a folder named `mai645_project_outputs` directly in your "My Drive".
*   Define a variable in Colab pointing to this Drive folder. **Make sure the path matches the folder you created.**

In [None]:
# IMPORTANT: Adjust this path if you named your Drive folder differently!
# uncomment according to which experiment you are running
GDRIVE_OUTPUT_DIR = "/content/drive/MyDrive/mai645_project_outputs_con5"
# GDRIVE_OUTPUT_DIR = "/content/drive/MyDrive/mai645_project_outputs_con30"
# GDRIVE_OUTPUT_DIR = "/content/drive/MyDrive/mai645_project_outputs_con45"
# Create the directory in Drive if it doesn't exist (optional, Colab can create it)
import os
os.makedirs(GDRIVE_OUTPUT_DIR, exist_ok=True)
print(f"Outputs will be saved to: {GDRIVE_OUTPUT_DIR}")

Outputs will be saved to: /content/drive/MyDrive/mai645_project_outputs_con30


## Install Dependencies

In [None]:
# Install the latest compatible PyTorch, torchvision, torchaudio for this Colab environment
!pip install -q torch torchvision torchaudio

# Install other libraries, specifically targeting numpy in the 2.0.x range.
# This aims to satisfy thinc (>=2.0.0) AND tensorflow/numba (<2.1.0).
!pip install -q contourpy==1.3.1 cycler==0.12.1 fonttools==4.56.0 kiwisolver==1.4.8 matplotlib==3.10.1 "numpy>=2.0.0,<2.1.0" opencv-python==4.11.0.86 packaging==24.2 pyparsing==3.2.3 python-dateutil==2.9.0.post0 six==1.17.0 transforms3d==0.4.2

# Verify installation
!pip show torch torchvision torchaudio transforms3d numpy opencv-python tensorflow numba

## Change the current working directory to where your Python scripts are located

In [None]:
cd /content/project645/code/

/content/project645/code


# Quaternion Preprocess

In [None]:
# Define path for cached preprocessed data archive on Google Drive
PREPROCESSED_QUAD_ARCHIVE_GDRIVE_PATH = os.path.join(GDRIVE_OUTPUT_DIR, "preprocessed_quad_data.tar.gz")
# Define a path to a key file that indicates preprocessing was successful and data is present
# This file is expected to be created by generate_training_quad_data.py
PREPROCESSED_QUAD_DONE_FLAG_PATH = "/content/project645/train_data_quad/salsa/metadata_quad.json"

# Variable to track if actual preprocessing is needed or if cached data can be used
preprocessing_needed = True

In [None]:

# Load Cached Preprocessed Quaternion Data (if available)

if os.path.exists(PREPROCESSED_QUAD_ARCHIVE_GDRIVE_PATH):
  print(f"Found cached preprocessed data archive at {PREPROCESSED_QUAD_ARCHIVE_GDRIVE_PATH}.")
  print("Attempting to load...")
  # Ensure the base target directory for extraction exists
  !mkdir -p /content/project645
  # Copy from Drive to local Colab environment
  !cp "{PREPROCESSED_QUAD_ARCHIVE_GDRIVE_PATH}" /tmp/preprocessed_quad_data.tar.gz
  # Extract the archive into /content/project645/
  # The archive was created with paths relative to /content/project645
  print("Extracting data...")
  !tar -xzf /tmp/preprocessed_quad_data.tar.gz -C /content/project645/

  # Verify if a key file (flag path) now exists after extraction
  if os.path.exists(PREPROCESSED_QUAD_DONE_FLAG_PATH):
    print("Successfully loaded and verified cached preprocessed data.")
    preprocessing_needed = False
  else:
    print(f"Cached data archive found and extracted, but key file {PREPROCESSED_QUAD_DONE_FLAG_PATH} is missing.")
    print("This might indicate an issue with the cache or the extraction process. Will re-preprocess.")
    # Attempt to clean up potentially incomplete/corrupted extraction to avoid issues
    !rm -rf /content/project645/train_data_quad
    !rm -rf /content/project645/reconstructed_bvh_data_quad
    preprocessing_needed = True # Ensure it's true if verification fails
else:
  print(f"No cached preprocessed data archive found at {PREPROCESSED_QUAD_ARCHIVE_GDRIVE_PATH}.")
  print("Proceeding with fresh preprocessing.")
  preprocessing_needed = True # Explicitly set, though it's the default


Found cached preprocessed data archive at /content/drive/MyDrive/mai645_project_outputs_v2t1/preprocessed_quad_data.tar.gz.
Attempting to load...
Extracting data...
Successfully loaded and verified cached preprocessed data.


In [None]:
if preprocessing_needed:
  print("Running Quaternion preprocessing script as cached data was not used...")
  # Preprocess Quaternion data
  !python generate_training_quad_data.py

  # Verify that preprocessing script created the expected outputs and the flag file
  if os.path.exists(PREPROCESSED_QUAD_DONE_FLAG_PATH):
    print("Preprocessing script completed. Expected output flag file found.")
    print("Checking output directories:")
    !ls /content/project645/train_data_quad/
    !ls /content/project645/train_data_quad/salsa/
    !ls /content/project645/reconstructed_bvh_data_quad/salsa/

    # After successful preprocessing, save the results to Google Drive
    print("\nPreprocessing finished. Saving data to Google Drive for future runs...")
    # Create the archive.
    # -C /content/project645 makes paths in archive relative (e.g., train_data_quad, reconstructed_bvh_data_quad)
    # The directories to archive are /content/project645/train_data_quad and /content/project645/reconstructed_bvh_data_quad
    !tar -czf /tmp/preprocessed_quad_data.tar.gz -C /content/project645 train_data_quad reconstructed_bvh_data_quad
    !cp /tmp/preprocessed_quad_data.tar.gz "{PREPROCESSED_QUAD_ARCHIVE_GDRIVE_PATH}"
    print(f"Preprocessed data successfully saved to {PREPROCESSED_QUAD_ARCHIVE_GDRIVE_PATH}")
  else:
    print(f"Preprocessing script ran, but the key output flag file ({PREPROCESSED_QUAD_DONE_FLAG_PATH}) was not found.")
    print("Data will not be cached. Please check the preprocessing script and its outputs.")
    print("Listing contents of relevant directories for debugging (if they exist):")
    !ls -ld /content/project645/train_data_quad/ # Use -ld to show directory info or error
    !ls -ld /content/project645/train_data_quad/salsa/
    !ls -ld /content/project645/reconstructed_bvh_data_quad/
    !ls -ld /content/project645/reconstructed_bvh_data_quad/salsa/

else:
  print("Skipping Quaternion preprocessing as cached data was successfully loaded.")
  print("Verifying existence of loaded data directories:")
  # Confirm the directories expected from cache are present
  !ls /content/project645/train_data_quad/
  !ls /content/project645/train_data_quad/salsa/
  !ls /content/project645/reconstructed_bvh_data_quad/salsa/


Skipping Quaternion preprocessing as cached data was successfully loaded.
Verifying existence of loaded data directories:
salsa
01.npy	05.npy	09.npy	13.npy	17.npy	21.npy	25.npy	29.npy
02.npy	06.npy	10.npy	14.npy	18.npy	22.npy	26.npy	30.npy
03.npy	07.npy	11.npy	15.npy	19.npy	23.npy	27.npy	metadata_quad.json
04.npy	08.npy	12.npy	16.npy	20.npy	24.npy	28.npy
01.bvh	04.bvh	07.bvh	10.bvh	13.bvh	16.bvh	19.bvh	22.bvh	25.bvh	28.bvh
02.bvh	05.bvh	08.bvh	11.bvh	14.bvh	17.bvh	20.bvh	23.bvh	26.bvh	29.bvh
03.bvh	06.bvh	09.bvh	12.bvh	15.bvh	18.bvh	21.bvh	24.bvh	27.bvh	30.bvh


# Quaternion training

In [None]:

# Define paths for training outputs
QUAD_DATA_DIR = "/content/project645/train_data_quad/salsa/" # Created by preprocessing script
METADATA_QUAD_PATH = os.path.join(QUAD_DATA_DIR, "metadata_quad.json")
QUAD_WEIGHTS_DIR = os.path.join(GDRIVE_OUTPUT_DIR, "weights_quad")
QUAD_TRAIN_BVH_DIR = os.path.join(GDRIVE_OUTPUT_DIR, "train_output_bvh_quad")
os.makedirs(QUAD_WEIGHTS_DIR, exist_ok=True)
os.makedirs(QUAD_TRAIN_BVH_DIR, exist_ok=True)

# Training command for pytorch_train_quad_aclstm.py
# The --in_frame and --out_frame arguments are now determined by metadata inside the script.
# Standard BVH path is relative to the /code directory.
# Set total_iterations to a reasonable number for demonstration/testing.
# For full training, use a higher value like 50000 as per instructions.
TOTAL_ITERATIONS_QUAD = 50000 # Adjust as needed
BATCH_SIZE_QUAD = 32

!python pytorch_train_quad_aclstm.py \
    --dances_folder "{QUAD_DATA_DIR}" \
    --metadata_path "{METADATA_QUAD_PATH}" \
    --write_weight_folder "{QUAD_WEIGHTS_DIR}/" \
    --write_bvh_motion_folder "{QUAD_TRAIN_BVH_DIR}/" \
    --standard_bvh_file "../train_data_bvh/standard.bvh" \
    --batch_size {BATCH_SIZE_QUAD} \
    --seq_len 100 \
    --total_iterations {TOTAL_ITERATIONS_QUAD} \
    --print_loss_iter 100 \
    --save_model_iter 1000 \
    --save_bvh_iter 1000 \
    # --read_weight_path "{QUAD_WEIGHTS_DIR}/0015000.weight" # Optional: to resume training


print(f"\n--- Training for Quaternion script finished. Check {QUAD_WEIGHTS_DIR} for models. ---")

Loaded 42 non-end-site bones from hierarchy.
Loaded metadata from /content/project645/train_data_quad/salsa/metadata_quad.json
Loading motion files from /content/project645/train_data_quad/salsa/...
30 motion files loaded.
Training on: cuda
########### iter 0000000 ######################
loss: 2.463281
########### iter 0000100 ######################
loss: 0.191560
########### iter 0000200 ######################
loss: 0.158475
########### iter 0000300 ######################
loss: 0.146552
########### iter 0000400 ######################
loss: 0.139894
########### iter 0000500 ######################
loss: 0.127827
########### iter 0000600 ######################
loss: 0.120742
########### iter 0000700 ######################
loss: 0.112335
########### iter 0000800 ######################
loss: 0.104727
########### iter 0000900 ######################
loss: 0.094029
########### iter 0001000 ######################
loss: 0.091160
Saved model weights to /content/drive/MyDrive/mai645_project_outpu

# Synthesis

In [None]:
# Define paths for training outputs
QUAD_DATA_DIR = "/content/project645/train_data_quad/salsa/" # Created by preprocessing script
METADATA_QUAD_PATH = os.path.join(QUAD_DATA_DIR, "metadata_quad.json")
QUAD_WEIGHTS_DIR = os.path.join(GDRIVE_OUTPUT_DIR, "weights_quad")
QUAD_TRAIN_BVH_DIR = os.path.join(GDRIVE_OUTPUT_DIR, "train_output_bvh_quad")
os.makedirs(QUAD_WEIGHTS_DIR, exist_ok=True)
os.makedirs(QUAD_TRAIN_BVH_DIR, exist_ok=True)

# Define paths and parameters for synthesis
QUAD_SYNTH_BVH_DIR = os.path.join(GDRIVE_OUTPUT_DIR, "synthesis_output_bvh_quad")
os.makedirs(QUAD_SYNTH_BVH_DIR, exist_ok=True)

# Determine the latest weight file based on TOTAL_ITERATIONS_QUAD
# Assuming weight files are saved with leading zeros, e.g., 0050000.weight
LATEST_QUAD_WEIGHT_FILE_NAME = f"{22000:07d}.weight"
LATEST_QUAD_WEIGHT_PATH = os.path.join(QUAD_WEIGHTS_DIR, LATEST_QUAD_WEIGHT_FILE_NAME)

# Standard BVH path is relative to the /code directory.
STANDARD_BVH_PATH_SYNTH = "../train_data_bvh/standard.bvh"

# Synthesis script parameters
SYNTH_BATCH_SIZE = 1
SYNTH_INITIAL_SEQ_LEN = 300
SYNTH_GENERATE_FRAMES = 400
SYNTH_HIDDEN_SIZE = 1024 # Should match the trained model's hidden size
SYNTH_QUANT_COMPARE_LEN = 20 # For quantitative evaluation if seed data allows

# Check if the specific weight file exists, otherwise, it might cause an error.
print(f"Attempting to use weight file: {LATEST_QUAD_WEIGHT_PATH}")
if not os.path.exists(LATEST_QUAD_WEIGHT_PATH):
    print(f"WARNING: Weight file {LATEST_QUAD_WEIGHT_PATH} not found. Synthesis might fail or use an uninitialized model if the script handles this.")
    print(f"Please check the QUAD_WEIGHTS_DIR ({QUAD_WEIGHTS_DIR}) for available .weight files.")
    print("You might need to adjust LATEST_QUAD_WEIGHT_FILE_NAME or ensure training completed successfully and saved the model.")

!python synthesize_quad_motion.py \
    --read_weight_path "{LATEST_QUAD_WEIGHT_PATH}" \
    --dances_folder "{QUAD_DATA_DIR}" \
    --metadata_path "{METADATA_QUAD_PATH}" \
    --write_bvh_motion_folder "{QUAD_SYNTH_BVH_DIR}/" \
    --standard_bvh_file "{STANDARD_BVH_PATH_SYNTH}" \
    --batch_size {SYNTH_BATCH_SIZE} \
    --initial_seq_len {SYNTH_INITIAL_SEQ_LEN} \
    --generate_frames_number {SYNTH_GENERATE_FRAMES} \
    --hidden_size {SYNTH_HIDDEN_SIZE} \
    --quantitative_comparison_len {SYNTH_QUANT_COMPARE_LEN}

print(f"\n--- Quaternion synthesis finished. Check {QUAD_SYNTH_BVH_DIR} for generated BVH files. ---")


Attempting to use weight file: /content/drive/MyDrive/mai645_project_outputs_v2t1/weights_quad/0022000.weight
Using device: cuda
Loaded metadata from /content/project645/train_data_quad/salsa/metadata_quad.json
Loading motion files from /content/project645/train_data_quad/salsa/...
Loaded 01.npy, frames: 2243
Loaded 02.npy, frames: 2102
Loaded 03.npy, frames: 1831
Loaded 04.npy, frames: 1872
Loaded 05.npy, frames: 1679
Loaded 06.npy, frames: 1773
Loaded 07.npy, frames: 2072
Loaded 08.npy, frames: 3422
Loaded 09.npy, frames: 2576
Loaded 10.npy, frames: 1198
Loaded 11.npy, frames: 2109
Loaded 12.npy, frames: 1691
Loaded 13.npy, frames: 2272
Loaded 14.npy, frames: 1950
Loaded 15.npy, frames: 2141
Loaded 16.npy, frames: 2243
Loaded 17.npy, frames: 2102
Loaded 18.npy, frames: 1831
Loaded 19.npy, frames: 1872
Loaded 20.npy, frames: 1679
Loaded 21.npy, frames: 1773
Loaded 22.npy, frames: 2072
Loaded 23.npy, frames: 3422
Loaded 24.npy, frames: 2576
Loaded 25.npy, frames: 1198
Loaded 26.npy, fr