Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

12_zhao_2_1_1数据音频和动作序列好像不同步 #137

Open
gh786137611 opened this issue May 13, 2024 · 8 comments
Open

12_zhao_2_1_1数据音频和动作序列好像不同步 #137

gh786137611 opened this issue May 13, 2024 · 8 comments

Comments

@gh786137611
Copy link

我可视化出来12_zhao_2_1_1这个中文数据, 动作都开始动1秒了,音频才开始播放。
而且看动作好像不同步

@gh786137611
Copy link
Author

公布的数据中12_zhao_2_2_2,声音和动作也不同步

@gh786137611
Copy link
Author

12_zhao_2_2_2.mp4

这是可视化数据12_zhao_2_2_2的结果,感觉不同步

@gh786137611
Copy link
Author

我只可视化了上半身

@Lcococi
Copy link

Lcococi commented May 23, 2024

请问怎么可视化呢?

@gh786137611
Copy link
Author

按照smplx的骨骼定义,只画了上半身,1秒30帧,合成视频。然后用ffmpeg把音频和视频合成起来. 最终结果看起来动作和音频不同步

这是可视化的源码:
import os.path

import numpy as np
from smplx import *
import smplx
import torch
import matplotlib.pyplot as plt

from tqdm import tqdm

uppper_body_bones = [
# spine
[0, 3], [3, 6], [6, 9], [9, 12], [12, 15],
# left arm
[9, 13], [13, 16], [16, 18], [18, 20],
# right arm
[9, 14], [14, 17], [17, 19], [19, 21],
# left hand
[20, 37], [37, 38], [38, 39], [39, 66], [20, 25], [25, 26], [26, 27], [27, 67],
[20, 28], [28, 29], [29, 30], [30, 68], [20, 34], [34, 35], [35, 36], [36, 69],
[20, 31], [31, 32], [32, 33], [33, 70],
# right hand
[21, 52], [52, 53], [53, 54], [54, 71],
[21, 40], [40, 41], [41, 42], [42, 72],
[21, 43], [43, 44], [44, 45], [45, 73],
[21, 49], [49, 50], [50, 51], [51, 74],
[21, 46], [46, 47], [47, 48], [48, 75]
]

def CreateSMPLXModel(model_folder='/media/mm/ssd3T/data/models_smplx_v1_1/models'):
model = smplx.create(model_folder, model_type='smplx',
gender='neutral', use_face_contour=True,
num_betas=300,
num_expression_coeffs=100,
ext='npz', use_pca=False)
return model

def LoadPoseVecFile(npz_file: str, smplx_model):
d = np.load(npz_file, allow_pickle=True)
betas = torch.from_numpy(d['betas'])[None]
expression = torch.from_numpy(d['expressions']).float()

betas = torch.tile(betas, (expression.shape[0], 1))
global_orient = torch.from_numpy(d['poses'][:, :3]).float()
body_pose = torch.from_numpy(d['poses'][:, 3:63 + 3]).float()

jaw_pose = torch.from_numpy(d['poses'][:, 63 + 3:63 + 3 + 3]).float()
leye = torch.from_numpy(d['poses'][:, 63 + 6:63 + 9]).float()
reye = torch.from_numpy(d['poses'][:, 63 + 9:63 + 12]).float()
left_hand = torch.from_numpy(d['poses'][:, 75:75 + 45]).float()
right_hand = torch.from_numpy(d['poses'][:, 75 + 45:75 + 90]).float()
output = smplx_model(betas=betas, expression=expression,
                     body_pose=body_pose,
                     global_orient=global_orient,
                     jaw_pose=jaw_pose,
                     leye_pose=leye,
                     reye_pose=reye,
                     left_hand_pose=left_hand,
                     right_hand_pose=right_hand,
                     return_verts=True)
vertices = output.vertices.detach()
joints = output.joints.detach()

joints = joints[:, uppper_body_bones]
vec = joints[:, :, 1] - joints[:, :, 0]

length = torch.sqrt(torch.sum(vec * vec, dim=2, keepdim=True))

vec = vec / length
return vec

def RenderPoseToFolder(pose_vec, folder):
os.makedirs(folder, exist_ok=True)
for i in tqdm(range(len(pose_vec)), total=len(pose_vec)):
vec = pose_vec[i].numpy()
fig = plt.figure(figsize=(5, 5))
ax = fig.add_subplot(111, projection='3d')
length = [0.1185, 0.1405, 0.0668, 0.1794, 0.1779, 0.1048, 0.1524, 0.2660, 0.2628,
0.1058, 0.1333, 0.2743, 0.2589, 0.0562, 0.0327, 0.0287, 0.0363, 0.1134,
0.0345, 0.0244, 0.0260, 0.1195, 0.0331, 0.0256, 0.0274, 0.1107, 0.0310,
0.0256, 0.0240, 0.1042, 0.0213, 0.0207, 0.0215, 0.0561, 0.0327, 0.0287,
0.0351, 0.1119, 0.0345, 0.0244, 0.0243, 0.1178, 0.0331, 0.0256, 0.0267,
0.1090, 0.0310, 0.0256, 0.0233, 0.1028, 0.0213, 0.0207, 0.0210]
joints = np.zeros((144, 3))
for bone, l, v in zip(uppper_body_bones, length, vec):
b1, b2 = bone
joints[b2] = joints[b1] + v * l
x = [joints[b1][0], joints[b2][0]]
y = [joints[b1][1], joints[b2][1]]
z = [joints[b1][2], joints[b2][2]]
ax.plot(x, y, z)

    ax.view_init(elev=100, azim=-91)
    plt.axis('equal')

    plt.xlim(-0.4, 0.4)
    plt.ylim(0.0, 0.8)
    # plt.zlim(-0.8, 0.8)
    ax.set_zlim(0.5, 1)
    # plt.xlim(-1,1)

    # plt.xticks([-1.2, 1.2])
    # plt.yticks([-1.2, 1.2])
    # ax.set_zticks([-1.1, 1.1])

    plt.grid(False)
    plt.axis(False)
    plt.gca().set_aspect('equal')
    plt.savefig(os.path.join(folder, f'{i}_.jpg'), dpi=100)

    plt.close(fig)

def PlotSkeleton(vec):
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
length = [0.1185, 0.1405, 0.0668, 0.1794, 0.1779, 0.1048, 0.1524, 0.2660, 0.2628,
0.1058, 0.1333, 0.2743, 0.2589, 0.0562, 0.0327, 0.0287, 0.0363, 0.1134,
0.0345, 0.0244, 0.0260, 0.1195, 0.0331, 0.0256, 0.0274, 0.1107, 0.0310,
0.0256, 0.0240, 0.1042, 0.0213, 0.0207, 0.0215, 0.0561, 0.0327, 0.0287,
0.0351, 0.1119, 0.0345, 0.0244, 0.0243, 0.1178, 0.0331, 0.0256, 0.0267,
0.1090, 0.0310, 0.0256, 0.0233, 0.1028, 0.0213, 0.0207, 0.0210]
joints = np.zeros((144, 3))
for bone, l, v in zip(uppper_body_bones, length, vec):
b1, b2 = bone
joints[b2] = joints[b1] + v * l

    x = [joints[b1][0], joints[b2][0]]
    y = [joints[b1][1], joints[b2][1]]
    z = [joints[b1][2], joints[b2][2]]
    ax.plot(x, y, z)

ax.view_init(elev=100, azim=-91)
plt.axis('equal')
plt.xlabel('X轴')
plt.ylabel('Y轴')
plt.show()

def main():
npz_file = '/media/mm/ssd3T/data/EMAGE/beat_v2.0.0/beat_english_v2.0.0/smplxflame_30/1_wayne_0_1_1.npz'
smplx_model = CreateSMPLXModel()
pose_vec = LoadPoseVecFile(npz_file, smplx_model)
# PlotSkeleton(pose_vec[0].numpy())

RenderPoseToFolder(pose_vec, 'tmp')

if name == 'main':
main()

video_path = 'tmp.mp4'
cmd = '/usr/bin/ffmpeg -r 30/1 -i tmp/%d_.jpg -c:v libx264 -pix_fmt yuv420p -vf "scale=500:500" ' + video_path
os.system(cmd)

merged_video_path = 'merged.mp4'
cmd = ['/usr/bin/ffmpeg', '-loglevel', 'panic', '-y', '-i', 'tmp.mp4', '-i',
       '/media/mm/ssd3T/data/EMAGE/beat_v2.0.0/beat_english_v2.0.0/wave16k/1_wayne_0_1_1.wav',
       merged_video_path]
l = cmd[0]
for i in range(1, len(cmd)): l = l + ' ' + cmd[i]
os.system(l)

@gh786137611
Copy link
Author

12_zhao_2_1_1和这个12_zhao_2_2_2数据有明显的动作和音频不同步现象,是不是我中间对数据理解上有问题啊?

@H-Liu1997
Copy link
Contributor

Hi 谢谢提醒 是批处理时候把zhao 的audio motion offset误设置为了0。 这几天我们会修正offet。 另一个issue中说到wayne的数据也有这个问题吗? wayne的数据是对齐的

@gh786137611
Copy link
Author

另外的应该是没有问题的。 nice work!!!!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

3 participants