Exploration of AIST++ and FineDance Dataset

In [1]:
!ls /root/dev/DanceGen

Bailando  EDGE	LODGE  MambaDance  dataset


In [2]:
import os

os.chdir("/root/dev/DanceGen")
os.getcwd()

'/root/dev/DanceGen'

In [3]:
import torch

torch.cuda.set_device(5)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.cuda.current_device()

5

In [4]:
!gpustat

[1m[37m86e12131a702              [m  Fri Oct 25 19:27:13 2024  [1m[30m535.154.05[m
[36m[0][m [34mNVIDIA GeForce RTX 4090[m |[31m 29°C[m, [32m  0 %[m | [36m[1m[33m    3[m / [33m24564[m MB |
[36m[1][m [34mNVIDIA GeForce RTX 4090[m |[31m 32°C[m, [32m  0 %[m | [36m[1m[33m    3[m / [33m24564[m MB |
[36m[2][m [34mNVIDIA GeForce RTX 4090[m |[31m 28°C[m, [32m  0 %[m | [36m[1m[33m    3[m / [33m24564[m MB |
[36m[3][m [34mNVIDIA GeForce RTX 4090[m |[31m 32°C[m, [32m  0 %[m | [36m[1m[33m    3[m / [33m24564[m MB |
[36m[4][m [34mNVIDIA GeForce RTX 4090[m |[31m 35°C[m, [32m  0 %[m | [36m[1m[33m    3[m / [33m24564[m MB |
[36m[5][m [34mNVIDIA GeForce RTX 4090[m |[31m 36°C[m, [32m  0 %[m | [36m[1m[33m  778[m / [33m24564[m MB |


# AIST++ Exploration

In [6]:
!ls /root/dev/DanceGen/dataset/AIST/aistpp

motions  wavs


In [7]:
!ls /root/dataset/AIST/

edge_aistpp


In [8]:
import os

os.chdir("/root/dev/DanceGen/EDGE")
os.getcwd()

'/root/dev/DanceGen/EDGE'

## Preprocessing of AIST++ from EDGE

In [9]:
from pathlib import Path

data_dir = Path("/root/dev/DanceGen/dataset/AIST/aistpp")
wave_dir = Path(data_dir / "wavs")
motion_dir = Path(data_dir / "motions")

wav_file = sorted(wave_dir.glob("**/*.wav"))[0] # wav file in all subdirectories
wav_file = sorted(wave_dir.glob("*.wav"))[0] # wav file in only current directory
motion_file = sorted(motion_dir.glob("**/*.pkl"))[0] # pkl file in all subdirectories
motion_file = sorted(motion_dir.glob("*.pkl"))[0] # pkl file in only current directory
print(f"wave file: {wav_file}\nmotion file: {motion_file}")

wave file: /root/dev/DanceGen/dataset/AIST/aistpp/wavs/gBR_sBM_cAll_d04_mBR0_ch01.wav
motion file: /root/dev/DanceGen/dataset/AIST/aistpp/motions/gBR_sBM_cAll_d04_mBR0_ch01.pkl


### slicing audio with librosa

In [10]:
import librosa as lr
import soundfile as sf

# load an audio file as a floating point time series
# Audio will be automatically resampled to the given rate (default: sr=22050)
# if sr is None, the native sampling rate of the file is preserved.
audio, sr = lr.load(wav_file, sr=None) # sr: sampling rate

In [11]:
import IPython.display as ipd

ipd.display(ipd.Audio(audio, rate=sr))

In [12]:
print(audio.shape)
print(sr)
print("Total duration of the audio in seconds: ", len(audio) / sr)

(575488,)
48000
Total duration of the audio in seconds:  11.989333333333333


In [13]:
# stride: 0.5, length: 5
stride = 0.5
length = 5
window = int(length * sr)
stride_step = int(stride * sr)
print(window, stride_step)

240000 24000


- sampling rate: number of samples in 1 second
- length: duration (in seconds) of each slice
- stride: how far along the audio file (in seconds) the next slice starts after the previous one
- window: number of samples corresponding to the duration ```length```

In [14]:
audio_slices = []
start_idx = 0
idx = 0
while start_idx <= len(audio) - window:
    audio_slice = audio[start_idx : start_idx + window]
    # save sliced audio
    # sf.write(f"{out_dir}/{file_name}_slice{idx}.wav", audio_slice, sr)
    audio_slices.append(audio_slice)
    start_idx += stride_step
    idx += 1
print(len(audio))
print(stride_step)
print(len(audio_slices))
print(start_idx, idx)

575488
24000
14
336000 14


In [15]:
ipd.display(ipd.Audio(audio_slices[10], rate=sr))

In [16]:
def slice_audio(audio_file, stride, length, out_dir):
    # stride, length in seconds
    audio_file = Path(audio_file)
    audio, sr = lr.load(audio_file, sr=None)
    file_name = audio_file.stem
    start_idx = 0
    idx = 0
    window = int(length * sr)
    stride_step = int(stride * sr)
    while start_idx <= len(audio) - window:
        audio_slice = audio[start_idx : start_idx + window]
        sf.write(f"{out_dir}/{file_name}_slice{idx}.wav", audio_slice, sr)
        start_idx += stride_step
        idx += 1
    return idx

### slicing motion

In [17]:
print(motion_file)

/root/dev/DanceGen/dataset/AIST/aistpp/motions/gBR_sBM_cAll_d04_mBR0_ch01.pkl


In [18]:
import pickle

with open(motion_file, "rb") as rf:
    motion = pickle.load(rf)
motion = pickle.load(open(motion_file, "rb"))
print(motion.keys()) # smpl_trans → pos, smpl_poses → q, smpl_scaling → scale
print(motion["smpl_trans"].shape)
print(motion["smpl_poses"].shape)
print(motion["smpl_scaling"])

dict_keys(['smpl_loss', 'smpl_poses', 'smpl_scaling', 'smpl_trans'])
(720, 3)
(720, 72)
[93.77886]


In [19]:
# stride: 0.5, length: 5
pos, q, scale = motion["smpl_trans"], motion["smpl_poses"], motion["smpl_scaling"][0]

# normalize root position
pos /= scale

window = int(length * 60)
stride_step = int(stride * 60)
print(window, stride_step)

300 30


In [20]:
num_slices = idx
motion_slices = []

start_idx = 0
slice_count = 0
# slice until done or until matching audio slices
while start_idx <= len(pos) - window and slice_count < num_slices:
    pos_slice, q_slice = (
        pos[start_idx:start_idx+window],
        q[start_idx:start_idx+window],
    )
    out = {"pos": pos_slice, "q": q_slice}
    # with open(f"{out_dir}/{file_name}_slice{slice_count}.pkl", "wb") as wf:
    #     pickle.dump(out, wf)
    motion_slices.append(out)
    start_idx += stride_step
    slice_count += 1
print(len(motion["smpl_trans"]))
print(stride_step)
print(len(motion_slices))
print(start_idx, idx)

720
30
14
420 14


In [21]:
motion_slices[0].keys()

dict_keys(['pos', 'q'])

In [1]:
# convert motion into fbx file
import os

os.chdir("/root/dev/DanceGen/EDGE/SMPL-to-FBX")
os.getcwd()

'/root/dev/DanceGen/EDGE/SMPL-to-FBX'

In [2]:
from SmplObject import SmplObjects
from FbxReadWriter import FbxREadWrite

# class SmplObject_(SmplObjects):
#     def __init__(self, motion):
#         self.files[filename] = {
#             "smpl_poses": motion["q"],
#             "smpl_trans": motion["pos"]
#         }
#         self.keys = [key for key in self.files.keys()]
    
#     def __len__(self):
#         return len(self.keys)
    
#     def __getitem__(self, idx: int):
#         key = self.keys[idx]
#         return key, self.files[key]

fbx_source_path = "ybot.fbx"
# smplObjects = SmplObject_(motion_slices[0])

# FbxReadWrite(fbx_source_path)
lSdkManager, lScene = InitializeSdkObjects()
lResult = LoadScene(lSdkManager, lScene, fbx_source_path)
if not lResult:
    raise Exception("An Error occured while loading the scene :(")

# addAnimation(pkl_name, smpl_params)
fbxReadWrite = FbxReadWrite(fbx_source_path)
fbxReadWrite.addAnimation("file_name", motion_slices[0]["q"])
fbxReadWrite.writeFbx("res", "file_name")

Error: module FbxCommon failed to import.

Copy the files located in the compatible sub-folder lib/python<version> into your python interpreter site-packages folder.
For example: cp ../../lib/Python37_x64/* /usr/local/lib/python3.7/site-packages


NameError: name 'FbxAnimCurve' is not defined

In [None]:
import FbxReadWriter

## Dataset and Dataloader

In [None]:
data_path = "/root/dev/DanceGen/dataset/AIST/aistpp"

In [24]:
import os
import glob
import pickle
from typing import Any

from pytorch3d.transforms import (RotateAxisAngle, axis_angle_to_quaternion,
                                  quaternion_multiply, quaternion_to_axis_angle)
from torch.utils.data import Dataset

# NOTE: load_aistpp and process_dataset functions are in the class in .py file!
class AISTPPDataset(Dataset):
    def __init__(
        self, data_path: str, backup_path: str, train: bool,
        feature_type: str = "jukebox", normalizer: Any = None, data_len: int = -1,
        include_contacts: bool = True, force_reload: bool = False
    ):
        self.data_path = data_path
        self.raw_fps = 60
        self.data_fps = 30
        assert self.data_fps <= self.raw_fps
        self.data_stride = self.raw_fps // self.data_fps
        
        self.train = train
        self.split = "Train" if self.train else "Test"
        self.feature_type = feature_type
        
        self.normalizer = normalizer
        self.data_len = data_len
        
        pickle_name = "processed_train_data.pkl" if train else "processed_test_data.pkl"
        
        backup_path = Path(backup_path)
        backup_path.mkdir(parents=True, exist_ok=True)
        
        # save normalizer
        if not train:
            pickle.dump(normalizer, open(os.path.join(backup_path, "normalizer.pkl"), "wb"))
        
        # load raw data
        if not force_reload and pickle_name in os.listdir(backup_path):
            print("Using cached dataset...")
            with open(os.path.join(backup_path, pickle_name), "rb") as f:
                data = pickle.load(f)
        else:
            print("Loading dataset...")
            data = self.load_aistpp() # Call this last
            with open(os.path.join(backup_path, pickle_name), "wb") as f:
                pickle.dump(data, f, pickle.HIGHEST_PROTOCOL)
        
        print(f"Loaded {self.split} Dataset with Dimensions: Pos: {data['pos'].shape}, Q: {data['q'].shape}")
        
        # process data, convert to 6-DoF etc
        pose_input = self.process_dataset(data["pos"], data["q"])
        self.data = {
            "pose": pose_input,
            "filenames": data["filenames"],
            "wavs": data["wavs"],
        }
        assert len(pose_input) == len(data["filenames"])
        self.length = len(pose_input)
    
    def __len__(self):
        return self.length
    
    def __getitem__(self, idx):
        filename_ = self.data["filenames"][idx]
        feature = torch.from_numpy(np.load(filename_))
        return self.data["pose"][idx], feature, filename_, self.data["wavs"][idx]

In [22]:
# def load_aistpp(self):
split_data_path = os.path.join(data_path, "train" if self.train else "test")
motion_path = os.path.join(split_data_path, "motions_sliced")
sound_path = os.path.join(split_data_path, f"{self.feature_type}_feats")
wav_path = os.path.join(split_data_path, "wavs_sliced")

# sort motions and sounds


'0.7.8'