In [None]:
import torch
from model.wan.pipeline_wan_ttv import AutoencoderKLWan, WanPipeline
from diffusers.utils import export_to_video
from model.wan.wan_transformer_for_video import WanTransformer3DModel


# Available models: Wan-AI/Wan2.1-T2V-14B-Diffusers, Wan-AI/Wan2.1-T2V-1.3B-Diffusers
model_id = './assets/Wan2.1-T2V-1.3B-Diffusers'
load_dtype = torch.float32
transformer = WanTransformer3DModel.from_pretrained(
        model_id, subfolder="transformer", 
        torch_dtype=load_dtype, 
        local_files_only=True,
        # low_cpu_mem_usage=False, 
        use_safetensors=True,
        ignore_mismatched_sizes=True,      # Setting for model structure changes
    )

In [4]:
import torch
from diffusers.utils import export_to_video
from diffusers import AutoencoderKLWan, WanPipeline
from diffusers.schedulers.scheduling_unipc_multistep import UniPCMultistepScheduler

# Available models: Wan-AI/Wan2.1-T2V-14B-Diffusers, Wan-AI/Wan2.1-T2V-1.3B-Diffusers
model_id = "./assets/Wan2.1-T2V-1.3B-Diffusers"
vae = AutoencoderKLWan.from_pretrained(model_id, subfolder="vae", torch_dtype=torch.float16)
flow_shift = 5.0 # 5.0 for 720P, 3.0 for 480P
scheduler = UniPCMultistepScheduler(prediction_type='flow_prediction', use_flow_sigmas=True, num_train_timesteps=1000, flow_shift=flow_shift)
pipe = WanPipeline.from_pretrained(model_id, vae=vae, torch_dtype=torch.float16)
pipe.scheduler = scheduler
pipe.to("cuda")

prompt = "A cat and a dog baking a cake together in a kitchen. The cat is carefully measuring flour, while the dog is stirring the batter with a wooden spoon. The kitchen is cozy, with sunlight streaming through the window."
negative_prompt = "Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards"

output = pipe(
     prompt=prompt,
     negative_prompt=negative_prompt,
     height=480,
     width=832,
     num_frames=81,
     guidance_scale=5.0,
    ).frames[0]
export_to_video(output, "output.mp4", fps=16)

Loading checkpoint shards: 100%|██████████| 5/5 [00:01<00:00,  3.48it/s]
Loading checkpoint shards: 100%|██████████| 2/2 [00:00<00:00,  8.21it/s]s/it]
Loading pipeline components...: 100%|██████████| 5/5 [00:02<00:00,  2.30it/s]
100%|██████████| 50/50 [02:30<00:00,  3.01s/it]
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


'output.mp4'

In [None]:
import gc
import os
from tqdm import tqdm
from diffusers import AutoModel
from diffusers.training_utils import cast_training_params, free_memory
import torch
from torch.cuda.amp import autocast

from diffusers.utils.torch_utils import randn_tensor
from diffusers.video_processor import VideoProcessor
from omegaconf import OmegaConf
from model.wan.wan_transformer_for_video import WanTransformer3DModel
from model.wan.pipeline_wan_ttv import FlowMatchEulerDiscreteScheduler, AutoencoderKLWan, WanPipeline
from diffusers.schedulers.scheduling_unipc_multistep import UniPCMultistepScheduler
from utils.text_encoding import get_t5_prompt_embeds, encode_prompt
from diffusers.utils import check_min_version, convert_unet_state_dict_to_peft, export_to_video, is_wandb_available



args = OmegaConf.load("/home/chengxin/chengxin/vagen/config/ttv_wan.yaml")
infer_dtype = torch.float16
load_dtype = torch.float32
device = "cuda:1"

vae = AutoencoderKLWan.from_pretrained(
    args.pretrained_model_name_or_path, subfolder="vae", 
    torch_dtype=load_dtype
).to(device)
tokenizer = AutoModel.from_pretrained(
    args.pretrained_model_name_or_path, subfolder="tokenizer", 
    local_files_only=True,
    use_safetensors=True,
)
transformer = WanTransformer3DModel.from_pretrained(
    args.pretrained_model_name_or_path, subfolder="transformer", 
    torch_dtype=load_dtype, 
    local_files_only=True,
    low_cpu_mem_usage=False, 
    use_safetensors=True,
    ignore_mismatched_sizes=True,      # Setting for model structure changes
).to(device)
text_encoder = AutoModel.from_pretrained(
    args.pretrained_model_name_or_path, subfolder="text_encoder", 
    torch_dtype=load_dtype, 
    local_files_only=True,
    use_safetensors=True,
).to(device)

scheduler = UniPCMultistepScheduler(prediction_type='flow_prediction', use_flow_sigmas=True, num_train_timesteps=1000, flow_shift=args.validation.flow_shift)
video_processor = VideoProcessor(vae_scale_factor=vae.config.scale_factor_spatial)





config = args.validation
with torch.no_grad():
    print("Start log_validation")
    prompt_list = config.prompt.split(config.prompt_separator)
    negative_prompt = config.negetive_prompt
    output_dir = getattr(config, "save_dir", 5.0)
    os.makedirs(output_dir, exist_ok=True)

    for prompt_idx, prompt in enumerate(prompt_list):

        torch.cuda.empty_cache()
        gc.collect()
        free_memory()

        prompt_embeds, negative_prompt_embeds = None, None
        if config.negetive_prompt_embed is not None:
            negative_prompt_embeds = torch.load(config.negetive_prompt_embed).to(device).unsqueeze(0)
            
        prompt_embeds, negative_prompt_embeds = encode_prompt(
            prompt=prompt,
            negative_prompt=negative_prompt,
            tokenizer = tokenizer,
            text_encoder = text_encoder,
            do_classifier_free_guidance=config.guidance_scale > 1.0,
            num_videos_per_prompt=config.num_videos_per_prompt,
            prompt_embeds=prompt_embeds,
            negative_prompt_embeds=negative_prompt_embeds,
            max_sequence_length=512,
            device=device,
            dtype = load_dtype,
        ) # type: ignore


        with autocast(dtype=infer_dtype):
            num_latent_frames = (config.num_frames - 1) // vae.config.scale_factor_temporal + 1
            num_channels = transformer.module.config.in_channels if hasattr(transformer, "module") else transformer.config.in_channels
            shape = (
                    config.num_videos_per_prompt,
                    num_channels,
                    num_latent_frames,
                    config.height // vae.config.scale_factor_spatial,
                    config.width // vae.config.scale_factor_spatial,
            )
            latents = randn_tensor(shape, device=device, dtype=infer_dtype)
            scheduler.set_timesteps(config.num_inference_steps, device=device)
            timesteps = scheduler.timesteps

            for i, t in tqdm(enumerate(timesteps)):
                current_model = transformer.module if hasattr(transformer, "module") else transformer
                current_guidance_scale = config.guidance_scale
                latent_model_input = latents
                timestep = t.expand(latents.shape[0])

                with current_model.cache_context("cond"):
                    noise_pred = current_model(
                        hidden_states=latent_model_input,
                        timestep=timestep,
                        encoder_hidden_states=prompt_embeds,
                        attention_kwargs=None,
                        return_dict=False,
                    )[0]

                if config.guidance_scale > 1.0:
                    with current_model.cache_context("uncond"):
                        noise_uncond = current_model(
                            hidden_states=latent_model_input,
                            timestep=timestep,
                            encoder_hidden_states=negative_prompt_embeds,
                            attention_kwargs=None,
                            return_dict=False,
                        )[0]
                    noise_pred = noise_uncond + current_guidance_scale * (noise_pred - noise_uncond)
                latents = scheduler.step(noise_pred, t, latents, return_dict=False)[0]
                                    

            latents = latents.to(vae.dtype)
            latents_mean = torch.tensor(vae.config.latents_mean).view(1, vae.config.z_dim, 1, 1, 1).to(vae.device, vae.dtype)
            latents_std = 1 / torch.tensor(vae.config.latents_std).view(1, vae.config.z_dim, 1, 1, 1).to(vae.device, vae.dtype)
            latents = latents / latents_std + latents_mean
            video = vae.decode(latents, return_dict=False)[0]
            video = video_processor.postprocess_video(video, output_type='np')
            for i in range(config.num_videos_per_prompt):
                export_to_video(video[i], f"{output_dir}/output{prompt_idx}_{i}.mp4", fps=config.fps if hasattr(config, "fps") else 16)
            

: 

In [2]:
import gc
import os
from tqdm import tqdm
from diffusers import AutoModel
from diffusers.training_utils import cast_training_params, free_memory
import torch
from torch.cuda.amp import autocast

from diffusers.utils.torch_utils import randn_tensor
from diffusers.video_processor import VideoProcessor
from omegaconf import OmegaConf
from model.wan.wan_transformer_for_video import WanTransformer3DModel
from model.wan.pipeline_wan_ttv import FlowMatchEulerDiscreteScheduler, AutoencoderKLWan, WanPipeline
from diffusers.schedulers.scheduling_unipc_multistep import UniPCMultistepScheduler
from utils.text_encoding import get_t5_prompt_embeds, encode_prompt
from diffusers.utils import check_min_version, convert_unet_state_dict_to_peft, export_to_video, is_wandb_available
from peft import LoraConfig, get_peft_model



args = OmegaConf.load("/home/chengxin/chengxin/vagen/config/tta_tuning.yaml")
infer_dtype = torch.float16
load_dtype = torch.float32
device = "cpu"


transformer = WanTransformer3DModel.from_pretrained(
    args.pretrained_model_name_or_path, subfolder="transformer", 
    torch_dtype=load_dtype, 
    local_files_only=True,
    low_cpu_mem_usage=False, 
    use_safetensors=True,
    ignore_mismatched_sizes=True,      # Setting for model structure changes
).to(device)



def set_requires_grad(transformer, target_params, print_param = False):
    for name, param in transformer.named_parameters():
        for target in target_params:
            if target in name:
                param.requires_grad = True  # 设置为需要梯度
                if print_param:
                    print(f"{target}", end = " ")
    print("\n")
    return transformer

  from .autonotebook import tqdm as notebook_tqdm
Loading checkpoint shards: 100%|██████████| 2/2 [00:00<00:00,  9.87it/s]


In [7]:
lora_config = LoraConfig(
    r=args.rank,  # LoRA 的秩（rank），通常设为 4、8、16 或 32
    lora_alpha=args.lora_alpha,  # 缩放因子，控制 LoRA 更新的幅度
    target_modules=["to_q", 
                    "to_k", 
                    "to_v", 
                    "ffn.net.0.proj", 
                    "ffn.net.2"],  # 指定应用 LoRA 的模块（如 Transformer 的查询和值投影）
    lora_dropout=0.1,  # Dropout 概率
    bias="none",  # 是否调整偏置
    # task_type="CAUSAL_LM"  # 任务类型，如 CAUSAL_LM 或 SEQ_CLS
)


transformer = get_peft_model(transformer, lora_config)
transformer = set_requires_grad(transformer, ['patch_embedding', 'proj_out', 'scale_shift_table', 'text_embedder'], True)
transformer.print_trainable_parameters()  # 查看可训练参数量


# print("=== 可训练参数 ===")
# trainable_params = 0
# total_params = 0
# for name, param in transformer.named_parameters():
#     if param.requires_grad:
#         print(f"Layer: {name}, Shape: {param.shape}")
#         trainable_params += param.numel()
#     total_params += param.numel()



scale_shift_table patch_embedding patch_embedding text_embedder text_embedder text_embedder text_embedder scale_shift_table scale_shift_table scale_shift_table scale_shift_table scale_shift_table scale_shift_table scale_shift_table scale_shift_table scale_shift_table scale_shift_table scale_shift_table scale_shift_table scale_shift_table scale_shift_table scale_shift_table scale_shift_table scale_shift_table scale_shift_table scale_shift_table scale_shift_table scale_shift_table scale_shift_table scale_shift_table scale_shift_table scale_shift_table scale_shift_table scale_shift_table scale_shift_table scale_shift_table scale_shift_table proj_out proj_out 

trainable params: 160,519,744 || all params: 1,570,384,960 || trainable%: 10.2217


# Stable Diffusion

In [3]:
from diffusers import AutoModel
from diffusers.pipelines.stable_audio.modeling_stable_audio import StableAudioProjectionModel
from model.stable_audio.stable_audio_transformer import StableAudioDiTModel
import torch
from diffusers.utils.torch_utils import randn_tensor
from diffusers.models.embeddings import get_1d_rotary_pos_embed
import inspect
from tqdm import tqdm
import soundfile as sf


load_dtype = torch.float16
device = "cuda:0"
transformer = StableAudioDiTModel.from_pretrained(
    '/home/chengxin/chengxin/vagen/assets/stable-audio-open-1.0', 
    subfolder="transformer", 
    torch_dtype = load_dtype,
    local_files_only=True,                  # From pretrained
    low_cpu_mem_usage=False, 
    ignore_mismatched_sizes=True,      
    # num_layers=10,                                 
    use_safetensors=True,                          
).to(device)


projection_model = StableAudioProjectionModel.from_pretrained(
    '/home/chengxin/chengxin/vagen/assets/stable-audio-open-1.0', 
    subfolder="projection_model",
    torch_dtype=load_dtype, 
    local_files_only=True,
    use_safetensors=True,
).to(device)

vae = AutoModel.from_pretrained(
    '/home/chengxin/chengxin/vagen/assets/stable-audio-open-1.0', 
    subfolder="vae", 
    torch_dtype = load_dtype,
    local_files_only=True,
    use_safetensors=True,                      
).to(device)


text_encoder = AutoModel.from_pretrained(
    '/home/chengxin/chengxin/vagen/assets/stable-audio-open-1.0', 
    subfolder="text_encoder", 
    torch_dtype=load_dtype, 
    local_files_only=True,
    use_safetensors=True,
).to(device)

tokenizer = AutoModel.from_pretrained(
    '/home/chengxin/chengxin/vagen/assets/stable-audio-open-1.0', 
    subfolder="tokenizer", 
    local_files_only=True,
    use_safetensors=True,
)

scheduler = AutoModel.from_pretrained(
    '/home/chengxin/chengxin/vagen/assets/stable-audio-open-1.0', 
    subfolder="scheduler", 
)


In [6]:
from utils.text_encoding import encode_prompt_sd, encode_duration_sd, prepare_extra_step_kwargs


prompt = ["The sharp, resonant sound of a bowling ball striking the pins, followed by the clattering and scattering of the pins in a chaotic dance, fills the air with a mix of impact and rolling echoes."]
negative_prompt = ["Low quality."]
batch_size = len(prompt)

do_classifier_free_guidance = True
num_waveforms_per_prompt = 1
num_inference_steps = 200
eta = 0.0
guidance_scale = 7
negative_prompt_embeds = None

audio_start_in_s = 0.0
audio_end_in_s   = 5.0

with torch.no_grad():
    prompt_embeds = encode_prompt_sd(
        prompt,
        tokenizer,
        text_encoder,
        projection_model,
        device,
        do_classifier_free_guidance,
        negative_prompt,
    )

    # Encode duration
    seconds_start_hidden_states, seconds_end_hidden_states = encode_duration_sd(
        projection_model,
        audio_start_in_s,
        audio_end_in_s,
        device,
        do_classifier_free_guidance and (negative_prompt is not None or negative_prompt_embeds is not None),
        batch_size,
    )

    # Create text_audio_duration_embeds and audio_duration_embeds
    text_audio_duration_embeds = torch.cat([prompt_embeds, seconds_start_hidden_states, seconds_end_hidden_states], dim=1)
    audio_duration_embeds = torch.cat([seconds_start_hidden_states, seconds_end_hidden_states], dim=2)

    # In case of classifier free guidance without negative prompt, we need to create unconditional embeddings and
    if do_classifier_free_guidance and negative_prompt_embeds is None and negative_prompt is None:
        negative_text_audio_duration_embeds = torch.zeros_like(text_audio_duration_embeds, device=text_audio_duration_embeds.device)
        text_audio_duration_embeds = torch.cat([negative_text_audio_duration_embeds, text_audio_duration_embeds], dim=0)
        audio_duration_embeds = torch.cat([audio_duration_embeds, audio_duration_embeds], dim=0)

    bs_embed, seq_len, hidden_size = text_audio_duration_embeds.shape
    # duplicate audio_duration_embeds and text_audio_duration_embeds for each generation per prompt, using mps friendly method
    text_audio_duration_embeds = text_audio_duration_embeds.repeat(1, num_waveforms_per_prompt, 1)
    text_audio_duration_embeds = text_audio_duration_embeds.view(bs_embed * num_waveforms_per_prompt, seq_len, hidden_size)

    # # print(audio_duration_embeds.shape)
    audio_duration_embeds = audio_duration_embeds.repeat(1, num_waveforms_per_prompt, 1)
    audio_duration_embeds = audio_duration_embeds.view(bs_embed * num_waveforms_per_prompt, -1, audio_duration_embeds.shape[-1])
    # # print(audio_duration_embeds.shape)

    # 4. Prepare timesteps
    scheduler.set_timesteps(num_inference_steps, device=device)
    timesteps = scheduler.timesteps

    # 5. Prepare latent variables
    num_channels_vae = transformer.config.in_channels
    waveform_length = int(transformer.config.sample_size)
    # waveform_length = int(audio_end_in_s * 22.5)
    shape = (batch_size * num_waveforms_per_prompt, num_channels_vae, waveform_length)
    generator = torch.Generator("cuda").manual_seed(0)
    # generator = None
    latents = randn_tensor(shape, generator=generator, device=device, dtype=load_dtype)

    # 6. Prepare extra step kwargs and rotary_embed_dim
    extra_step_kwargs = prepare_extra_step_kwargs(generator, eta, scheduler)
    rotary_embed_dim = transformer.config.attention_head_dim // 2
    rotary_embedding = get_1d_rotary_pos_embed(
        rotary_embed_dim,
        latents.shape[2] + audio_duration_embeds.shape[1],
        use_real=True,
        repeat_interleave_real=False,
    )

    # 8. Denoising loop
    for i, t in tqdm(enumerate(timesteps)):
        # expand the latents if we are doing classifier free guidance
        latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents
        latent_model_input = scheduler.scale_model_input(latent_model_input, t)
        t = torch.stack([t for _ in range(latent_model_input.shape[0])])

        # predict the noise residual
        noise_pred = transformer(
            latent_model_input,
            t,
            encoder_hidden_states=text_audio_duration_embeds,
            global_hidden_states=audio_duration_embeds,
            rotary_embedding=rotary_embedding,
            return_dict=False,
        )[0]

        # perform guidance
        if do_classifier_free_guidance:
            noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
            noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
        latents = scheduler.step(noise_pred, t, latents, **extra_step_kwargs).prev_sample

    audio = vae.decode(latents).sample

import torchaudio

for i in range(len(audio)):
    torchaudio.save(f'test{i}.wav', audio[i].to(torch.float32).cpu(), 44100)

0it [00:00, ?it/s]

200it [00:06, 28.58it/s]


# Stable Diffusion FT

In [1]:
from diffusers import AutoModel
from diffusers.pipelines.stable_audio.modeling_stable_audio import StableAudioProjectionModel
from model.stable_audio.stable_audio_transformer import StableAudioDiTModel
import torch
from diffusers.utils.torch_utils import randn_tensor
from diffusers.models.embeddings import get_1d_rotary_pos_embed
import inspect
from tqdm import tqdm
import soundfile as sf
from diffusers.schedulers.scheduling_unipc_multistep import UniPCMultistepScheduler


load_dtype = torch.float16
device = "cuda:0"
transformer = StableAudioDiTModel.from_pretrained(
    '/home/chengxin/chengxin/vagen/assets/stable-audio-open-1.0', 
    subfolder="transformer_ft", 
    torch_dtype = load_dtype,
    local_files_only=True,                  # From pretrained
    low_cpu_mem_usage=False, 
    ignore_mismatched_sizes=True,      
    # num_layers=10,                                 
    use_safetensors=True,                          
).to(device)

# sd_tta_pt_16 ckpt1
projection_model = StableAudioProjectionModel.from_pretrained(
    '/home/chengxin/chengxin/vagen/assets/stable-audio-open-1.0', 
    subfolder="projection_model",
    torch_dtype=load_dtype, 
    local_files_only=True,
    use_safetensors=True,
).to(device)

vae = AutoModel.from_pretrained(
    '/home/chengxin/chengxin/vagen/assets/stable-audio-open-1.0', 
    subfolder="vae", 
    torch_dtype = load_dtype,
    local_files_only=True,
    use_safetensors=True,                      
).to(device)


text_encoder = AutoModel.from_pretrained(
    '/home/chengxin/chengxin/vagen/assets/stable-audio-open-1.0', 
    subfolder="text_encoder", 
    torch_dtype=load_dtype, 
    local_files_only=True,
    use_safetensors=True,
).to(device)

tokenizer = AutoModel.from_pretrained(
    '/home/chengxin/chengxin/vagen/assets/stable-audio-open-1.0', 
    subfolder="tokenizer", 
    local_files_only=True,
    use_safetensors=True,
)

step_scheduler = UniPCMultistepScheduler(prediction_type='flow_prediction', use_flow_sigmas=True, num_train_timesteps=1000) 


  from .autonotebook import tqdm as notebook_tqdm
  WeightNorm.apply(module, name, dim)


In [6]:
from utils.text_encoding import encode_prompt_sd, encode_duration_sd, prepare_extra_step_kwargs
import json
import torchaudio


do_classifier_free_guidance = True
num_waveforms_per_prompt = 1
num_inference_steps = 200
eta = 0.0
guidance_scale = 7
negative_prompt_embeds = None
audio_start_in_s = 0.0
audio_end_in_s   = 10.0


with open('/home/chengxin/chengxin/vagen/data/tta/test_avsync_recap.json', 'r') as f:
    data = json.load(f)

for path, info in data.items():
    prompt = [info['label']] 
    # Lions Roaring Deeply, Bowling striking
    prompt = [ "Sharpen knife", "chicken crowing", "Bowling rolling and striking", "hammering", "Lions Roaring Deeply", 'Frog Croaking'] # ['Lions Roaring Deeply', 'Frog Croaking', 'Dog barking', 'Bowling striking']
            #  [ "Sharpen knife", "chicken crowing", "toilet flushing", "hammering"]
    negative_prompt = ["" for _ in range(len(prompt))]
    batch_size = len(prompt)

    with torch.no_grad():
        prompt_embeds = encode_prompt_sd(
            prompt,
            tokenizer,
            text_encoder,
            projection_model,
            device,
            do_classifier_free_guidance,
            negative_prompt,
        )

        # 5. Prepare latent variables
        num_channels_vae = transformer.config.in_channels
        # waveform_length = int(transformer.config.sample_size)
        waveform_length = int(audio_end_in_s * 22.5)
        shape = (batch_size * num_waveforms_per_prompt, num_channels_vae, waveform_length)
        # generator = torch.Generator("cuda").manual_seed(0)
        latents = randn_tensor(shape, device=device, dtype=load_dtype)

        # 6. Prepare extra step kwargs and rotary_embed_dim
        rotary_embed_dim = transformer.config.attention_head_dim // 2
        rotary_embedding = get_1d_rotary_pos_embed(
            rotary_embed_dim,
            latents.shape[2] + 1,
            use_real=True,
            repeat_interleave_real=False,
        )

        # 8. Denoising loop
        # TODO: Check scheduler, t = 1 for noise;  UniPCMultistepScheduler timesteps [1-1000]
        step_scheduler.set_timesteps(num_inference_steps, device=device)
        timesteps = step_scheduler.timesteps
        for i, t in tqdm(enumerate(timesteps)):
            latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents
            latent_model_input = step_scheduler.scale_model_input(latent_model_input, t)
            t = torch.stack([t for _ in range(latent_model_input.shape[0])])

            # predict the noise residual
            noise_pred = transformer(
                latent_model_input,
                t,
                encoder_hidden_states=prompt_embeds, # text_audio_duration_embeds,
                # global_hidden_states=audio_duration_embeds,
                rotary_embedding=rotary_embedding,
                return_dict=False,
            )[0]

            # perform guidance
            if do_classifier_free_guidance:
                t = t[:len(t)//2]
                noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
                noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
            latents = step_scheduler.step(noise_pred, t[0], latents).prev_sample

        audio = vae.decode(latents).sample


    for i in range(len(audio)):
        # torchaudio.save(f"/home/chengxin/chengxin/vagen/log/predict/{path.split('/')[-1][:-4]}.wav", audio[i].to(torch.float32).cpu(), 44100)
        torchaudio.save(f"./test{i}.wav", audio[i].to(torch.float32).cpu(), 44100)

    break

0it [00:00, ?it/s]

200it [00:55,  3.58it/s]


In [2]:
import os, shutil
import torchaudio

i = 0
gen_path = '/home/chengxin/chengxin/vagen/log/sd_tta_pt_16/logging/10000'    # /home/chengxin/chengxin/vagen/log/sd_tta_pt_16/checkpoints/checkpoint_1
gen_path = '/home/chengxin/chengxin/vagen/log/sd_tta_ft_16/logging/7000'     # /home/chengxin/chengxin/vagen/log/sd_tta_ft_16/checkpoints/checkpoint_13   7000 6500 7500 
gen_path = '/home/chengxin/chengxin/vagen/log/sd_tta_ft_recap_16/logging/8000'     # /home/chengxin/chengxin/vagen/log/sd_tta_ft_recap_16/checkpoints/   5500 4500   7000 8000
gen_path = '/home/chengxin/chengxin/vagen/log/wan_sd_ttva_55/logging/9000'     

target_path = '/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync'


for file in os.listdir(target_path):
    try:

        input_file = f'{gen_path}/{file}'
        output_file = f'{target_path}/{file}'
        duration = 5.4  # 截取时长（秒）

        waveform, sample_rate = torchaudio.load(input_file)
        num_samples = int(duration * sample_rate)
        waveform = waveform[:, :num_samples]
        # waveform[:, num_samples:] = 0

        torchaudio.save(output_file, waveform, sample_rate)
        # os.remove(input_file)
        i += 1

    except:
        pass
        # print(file)
i

149

In [1]:
from utils.va_processing import add_audio_to_video
from glob import glob
from tqdm import tqdm
import os
import shutil

input_dir = '/home/chengxin/chengxin/vagen/log/wan_sd_ttva_55/logging/9000'
output_dir = '/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video'
for path in tqdm(glob(f'{input_dir}/*.mp4')):
    v_path =  f"{input_dir}/{path.split('/')[-1][:-4]}.mp4"
    a_path =  f"{input_dir}/{path.split('/')[-1][:-4]}.wav"
    o_path = f"{output_dir}/{path.split('/')[-1][:-4]}.mp4"
    if os.path.exists(a_path):
        add_audio_to_video(video_path = v_path, audio_path = a_path, output_path = o_path)


 40%|███▉      | 119/300 [00:50<01:19,  2.28it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/Ft93ncEBpGo_000184.mp4



 40%|████      | 121/300 [00:50<01:16,  2.35it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/Ft93ncEBpGo_000184.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/Ft93ncEBpGo_000184.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/J2L9eJ-h9DY_000030.mp4.


 40%|████      | 121/300 [00:50<01:16,  2.35it/s]

MoviePy - Writing audio in J2L9eJ-h9DY_000030TEMP_MPY_wvf_snd.mp4


 40%|████      | 121/300 [00:51<01:16,  2.35it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/J2L9eJ-h9DY_000030.mp4



 41%|████      | 123/300 [00:51<01:20,  2.21it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/J2L9eJ-h9DY_000030.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/J2L9eJ-h9DY_000030.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/WKY-QgdWR1Y_000070.mp4.
MoviePy - Writing audio in WKY-QgdWR1Y_000070TEMP_MPY_wvf_snd.mp4


 41%|████      | 123/300 [00:52<01:20,  2.21it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/WKY-QgdWR1Y_000070.mp4



 42%|████▏     | 125/300 [00:52<01:18,  2.23it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/WKY-QgdWR1Y_000070.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/WKY-QgdWR1Y_000070.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/IWr6t7Z1mvg_000000.mp4.
MoviePy - Writing audio in IWr6t7Z1mvg_000000TEMP_MPY_wvf_snd.mp4


 42%|████▏     | 125/300 [00:52<01:18,  2.23it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/IWr6t7Z1mvg_000000.mp4



 42%|████▏     | 127/300 [00:53<01:14,  2.31it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/IWr6t7Z1mvg_000000.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/IWr6t7Z1mvg_000000.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/l0loFh-e25Y_000000.mp4.
MoviePy - Writing audio in l0loFh-e25Y_000000TEMP_MPY_wvf_snd.mp4


 42%|████▏     | 127/300 [00:53<01:14,  2.31it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/l0loFh-e25Y_000000.mp4



 43%|████▎     | 129/300 [00:54<01:09,  2.47it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/l0loFh-e25Y_000000.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/l0loFh-e25Y_000000.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/GL5tAz0bzSU_000495.mp4.
MoviePy - Writing audio in GL5tAz0bzSU_000495TEMP_MPY_wvf_snd.mp4


 43%|████▎     | 129/300 [00:54<01:09,  2.47it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/GL5tAz0bzSU_000495.mp4



 44%|████▎     | 131/300 [00:54<01:07,  2.52it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/GL5tAz0bzSU_000495.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/GL5tAz0bzSU_000495.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/Il9qAhbbeBw_000013.mp4.
MoviePy - Writing audio in Il9qAhbbeBw_000013TEMP_MPY_wvf_snd.mp4


 44%|████▎     | 131/300 [00:55<01:07,  2.52it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/Il9qAhbbeBw_000013.mp4



 44%|████▍     | 133/300 [00:55<01:07,  2.49it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/Il9qAhbbeBw_000013.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/Il9qAhbbeBw_000013.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/KzK6d6Qpu_o_000010.mp4.
MoviePy - Writing audio in KzK6d6Qpu_o_000010TEMP_MPY_wvf_snd.mp4


 44%|████▍     | 133/300 [00:56<01:07,  2.49it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/KzK6d6Qpu_o_000010.mp4



 45%|████▌     | 135/300 [00:56<01:07,  2.46it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/KzK6d6Qpu_o_000010.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/KzK6d6Qpu_o_000010.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/NRWlHRvaDcQ_000788.mp4.
MoviePy - Writing audio in NRWlHRvaDcQ_000788TEMP_MPY_wvf_snd.mp4


 45%|████▌     | 135/300 [00:56<01:07,  2.46it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/NRWlHRvaDcQ_000788.mp4



 46%|████▌     | 137/300 [00:57<01:02,  2.59it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/NRWlHRvaDcQ_000788.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/NRWlHRvaDcQ_000788.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/MTL8-cVoP64_000169.mp4.
MoviePy - Writing audio in MTL8-cVoP64_000169TEMP_MPY_wvf_snd.mp4


 46%|████▌     | 137/300 [00:57<01:02,  2.59it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/MTL8-cVoP64_000169.mp4



 46%|████▋     | 139/300 [00:57<01:03,  2.53it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/MTL8-cVoP64_000169.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/MTL8-cVoP64_000169.mp4


 46%|████▋     | 139/300 [00:58<01:03,  2.53it/s]

5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/kq3sz5uOvns_000208.mp4.
MoviePy - Writing audio in kq3sz5uOvns_000208TEMP_MPY_wvf_snd.mp4


 46%|████▋     | 139/300 [00:58<01:03,  2.53it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/kq3sz5uOvns_000208.mp4



 47%|████▋     | 141/300 [00:59<01:08,  2.34it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/kq3sz5uOvns_000208.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/kq3sz5uOvns_000208.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/FWxMWNFsdqk_000448.mp4.
MoviePy - Writing audio in FWxMWNFsdqk_000448TEMP_MPY_wvf_snd.mp4


 47%|████▋     | 141/300 [00:59<01:08,  2.34it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/FWxMWNFsdqk_000448.mp4



 48%|████▊     | 143/300 [01:00<01:08,  2.28it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/FWxMWNFsdqk_000448.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/FWxMWNFsdqk_000448.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/fp6pBJ1Iygk_000026.mp4.
MoviePy - Writing audio in fp6pBJ1Iygk_000026TEMP_MPY_wvf_snd.mp4


 48%|████▊     | 143/300 [01:00<01:08,  2.28it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/fp6pBJ1Iygk_000026.mp4



 48%|████▊     | 145/300 [01:00<01:05,  2.35it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/fp6pBJ1Iygk_000026.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/fp6pBJ1Iygk_000026.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/lptpDgCE0N4_000083.mp4.
MoviePy - Writing audio in lptpDgCE0N4_000083TEMP_MPY_wvf_snd.mp4


 48%|████▊     | 145/300 [01:01<01:05,  2.35it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/lptpDgCE0N4_000083.mp4



 49%|████▉     | 147/300 [01:01<01:07,  2.28it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/lptpDgCE0N4_000083.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/lptpDgCE0N4_000083.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/pYsT6PjPaFY_000030.mp4.
MoviePy - Writing audio in pYsT6PjPaFY_000030TEMP_MPY_wvf_snd.mp4


 49%|████▉     | 147/300 [01:01<01:07,  2.28it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/pYsT6PjPaFY_000030.mp4



 50%|████▉     | 149/300 [01:02<01:04,  2.33it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/pYsT6PjPaFY_000030.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/pYsT6PjPaFY_000030.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/_2hpQeNbpR4_000020.mp4.
MoviePy - Writing audio in _2hpQeNbpR4_000020TEMP_MPY_wvf_snd.mp4


 50%|████▉     | 149/300 [01:02<01:04,  2.33it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/_2hpQeNbpR4_000020.mp4



 50%|█████     | 151/300 [01:03<01:00,  2.47it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/_2hpQeNbpR4_000020.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/_2hpQeNbpR4_000020.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/NqLyRSDvVE0_000224.mp4.
MoviePy - Writing audio in NqLyRSDvVE0_000224TEMP_MPY_wvf_snd.mp4


 50%|█████     | 151/300 [01:03<01:00,  2.47it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/NqLyRSDvVE0_000224.mp4



 51%|█████     | 153/300 [01:04<00:59,  2.49it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/NqLyRSDvVE0_000224.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/NqLyRSDvVE0_000224.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/OIGvCw4UUYk_000616.mp4.
MoviePy - Writing audio in OIGvCw4UUYk_000616TEMP_MPY_wvf_snd.mp4


 51%|█████     | 153/300 [01:04<00:59,  2.49it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/OIGvCw4UUYk_000616.mp4



 52%|█████▏    | 155/300 [01:04<00:55,  2.60it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/OIGvCw4UUYk_000616.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/OIGvCw4UUYk_000616.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/JwJAh2X6btc_000010.mp4.


 52%|█████▏    | 155/300 [01:04<00:55,  2.60it/s]

MoviePy - Writing audio in JwJAh2X6btc_000010TEMP_MPY_wvf_snd.mp4


 52%|█████▏    | 155/300 [01:05<00:55,  2.60it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/JwJAh2X6btc_000010.mp4



 52%|█████▏    | 157/300 [01:05<00:58,  2.43it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/JwJAh2X6btc_000010.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/JwJAh2X6btc_000010.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/L_ucgLAe-TA_000599.mp4.
MoviePy - Writing audio in L_ucgLAe-TA_000599TEMP_MPY_wvf_snd.mp4


 52%|█████▏    | 157/300 [01:05<00:58,  2.43it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/L_ucgLAe-TA_000599.mp4



 53%|█████▎    | 159/300 [01:06<00:55,  2.53it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/L_ucgLAe-TA_000599.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/L_ucgLAe-TA_000599.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/akjqOhO0IM0_000010.mp4.
MoviePy - Writing audio in akjqOhO0IM0_000010TEMP_MPY_wvf_snd.mp4


 53%|█████▎    | 159/300 [01:06<00:55,  2.53it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/akjqOhO0IM0_000010.mp4



 54%|█████▎    | 161/300 [01:07<00:57,  2.43it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/akjqOhO0IM0_000010.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/akjqOhO0IM0_000010.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/krEzQuVHoKk_000001.mp4.
MoviePy - Writing audio in krEzQuVHoKk_000001TEMP_MPY_wvf_snd.mp4


 54%|█████▎    | 161/300 [01:07<00:57,  2.43it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/krEzQuVHoKk_000001.mp4



 54%|█████▍    | 163/300 [01:08<00:58,  2.33it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/krEzQuVHoKk_000001.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/krEzQuVHoKk_000001.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/O8CrIlFXN1I_000030.mp4.
MoviePy - Writing audio in O8CrIlFXN1I_000030TEMP_MPY_wvf_snd.mp4


 54%|█████▍    | 163/300 [01:08<00:58,  2.33it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/O8CrIlFXN1I_000030.mp4



 55%|█████▌    | 165/300 [01:09<00:57,  2.36it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/O8CrIlFXN1I_000030.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/O8CrIlFXN1I_000030.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/_52ntwwQyv4_000070.mp4.
MoviePy - Writing audio in _52ntwwQyv4_000070TEMP_MPY_wvf_snd.mp4


 55%|█████▌    | 165/300 [01:09<00:57,  2.36it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/_52ntwwQyv4_000070.mp4



 56%|█████▌    | 167/300 [01:09<00:54,  2.43it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/_52ntwwQyv4_000070.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/_52ntwwQyv4_000070.mp4


 56%|█████▌    | 167/300 [01:10<00:54,  2.43it/s]

5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/QIT6l8y0_cE_000039.mp4.
MoviePy - Writing audio in QIT6l8y0_cE_000039TEMP_MPY_wvf_snd.mp4


 56%|█████▌    | 167/300 [01:11<00:54,  2.43it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/QIT6l8y0_cE_000039.mp4



 56%|█████▋    | 169/300 [01:11<01:12,  1.80it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/QIT6l8y0_cE_000039.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/QIT6l8y0_cE_000039.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/dK6eZGeDjZg_000057.mp4.
MoviePy - Writing audio in dK6eZGeDjZg_000057TEMP_MPY_wvf_snd.mp4


 56%|█████▋    | 169/300 [01:11<01:12,  1.80it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/dK6eZGeDjZg_000057.mp4



 57%|█████▋    | 171/300 [01:12<01:05,  1.96it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/dK6eZGeDjZg_000057.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/dK6eZGeDjZg_000057.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/NF1lwZ24RYI_000030.mp4.
MoviePy - Writing audio in NF1lwZ24RYI_000030TEMP_MPY_wvf_snd.mp4


 57%|█████▋    | 171/300 [01:12<01:05,  1.96it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/NF1lwZ24RYI_000030.mp4



 58%|█████▊    | 173/300 [01:13<01:01,  2.08it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/NF1lwZ24RYI_000030.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/NF1lwZ24RYI_000030.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/OpJZ-KVF-K8_000030.mp4.
MoviePy - Writing audio in OpJZ-KVF-K8_000030TEMP_MPY_wvf_snd.mp4


 58%|█████▊    | 173/300 [01:13<01:01,  2.08it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/OpJZ-KVF-K8_000030.mp4



 58%|█████▊    | 175/300 [01:14<00:56,  2.19it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/OpJZ-KVF-K8_000030.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/OpJZ-KVF-K8_000030.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/V0QCCLD_0s8_000130.mp4.
MoviePy - Writing audio in V0QCCLD_0s8_000130TEMP_MPY_wvf_snd.mp4


 58%|█████▊    | 175/300 [01:14<00:56,  2.19it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/V0QCCLD_0s8_000130.mp4



 59%|█████▉    | 177/300 [01:14<00:53,  2.31it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/V0QCCLD_0s8_000130.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/V0QCCLD_0s8_000130.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/rsrWBquU8bc_000050.mp4.
MoviePy - Writing audio in rsrWBquU8bc_000050TEMP_MPY_wvf_snd.mp4


 59%|█████▉    | 177/300 [01:15<00:53,  2.31it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/rsrWBquU8bc_000050.mp4



 60%|█████▉    | 179/300 [01:15<00:52,  2.29it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/rsrWBquU8bc_000050.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/rsrWBquU8bc_000050.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/mdLD3Khu5k4_000030.mp4.
MoviePy - Writing audio in mdLD3Khu5k4_000030TEMP_MPY_wvf_snd.mp4


 60%|█████▉    | 179/300 [01:15<00:52,  2.29it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/mdLD3Khu5k4_000030.mp4



 60%|██████    | 181/300 [01:16<00:52,  2.27it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/mdLD3Khu5k4_000030.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/mdLD3Khu5k4_000030.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/iuozNmF5TcI_000037.mp4.
MoviePy - Writing audio in iuozNmF5TcI_000037TEMP_MPY_wvf_snd.mp4


 60%|██████    | 181/300 [01:16<00:52,  2.27it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/iuozNmF5TcI_000037.mp4



 61%|██████    | 183/300 [01:17<00:50,  2.29it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/iuozNmF5TcI_000037.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/iuozNmF5TcI_000037.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/T6sSJ75v9wE_000027.mp4.
MoviePy - Writing audio in T6sSJ75v9wE_000027TEMP_MPY_wvf_snd.mp4


 61%|██████    | 183/300 [01:17<00:50,  2.29it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/T6sSJ75v9wE_000027.mp4



 62%|██████▏   | 185/300 [01:18<00:49,  2.31it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/T6sSJ75v9wE_000027.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/T6sSJ75v9wE_000027.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/UR0l-ggYOfg_000041.mp4.
MoviePy - Writing audio in UR0l-ggYOfg_000041TEMP_MPY_wvf_snd.mp4


 62%|██████▏   | 185/300 [01:18<00:49,  2.31it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/UR0l-ggYOfg_000041.mp4



 62%|██████▏   | 187/300 [01:19<00:47,  2.37it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/UR0l-ggYOfg_000041.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/UR0l-ggYOfg_000041.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/g6kGCt7G2-w_000080.mp4.
MoviePy - Writing audio in g6kGCt7G2-w_000080TEMP_MPY_wvf_snd.mp4


 62%|██████▏   | 187/300 [01:19<00:47,  2.37it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/g6kGCt7G2-w_000080.mp4



 63%|██████▎   | 189/300 [01:20<00:48,  2.31it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/g6kGCt7G2-w_000080.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/g6kGCt7G2-w_000080.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/UPVn_85FRCw_000020.mp4.
MoviePy - Writing audio in UPVn_85FRCw_000020TEMP_MPY_wvf_snd.mp4


 63%|██████▎   | 189/300 [01:20<00:48,  2.31it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/UPVn_85FRCw_000020.mp4



 64%|██████▎   | 191/300 [01:20<00:45,  2.38it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/UPVn_85FRCw_000020.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/UPVn_85FRCw_000020.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/_RB9txkXq1o_000053.mp4.
MoviePy - Writing audio in _RB9txkXq1o_000053TEMP_MPY_wvf_snd.mp4


 64%|██████▎   | 191/300 [01:21<00:45,  2.38it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/_RB9txkXq1o_000053.mp4



 64%|██████▍   | 193/300 [01:21<00:44,  2.38it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/_RB9txkXq1o_000053.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/_RB9txkXq1o_000053.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/YOrImbuhsQ8_000049.mp4.
MoviePy - Writing audio in YOrImbuhsQ8_000049TEMP_MPY_wvf_snd.mp4


 64%|██████▍   | 193/300 [01:21<00:44,  2.38it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/YOrImbuhsQ8_000049.mp4



 65%|██████▌   | 195/300 [01:22<00:41,  2.53it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/YOrImbuhsQ8_000049.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/YOrImbuhsQ8_000049.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/OJqJgotD8D4_000038.mp4.
MoviePy - Writing audio in OJqJgotD8D4_000038TEMP_MPY_wvf_snd.mp4


 65%|██████▌   | 195/300 [01:22<00:41,  2.53it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/OJqJgotD8D4_000038.mp4



 66%|██████▌   | 197/300 [01:23<00:43,  2.39it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/OJqJgotD8D4_000038.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/OJqJgotD8D4_000038.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/Vwz4vOzWnLE_000078.mp4.
MoviePy - Writing audio in Vwz4vOzWnLE_000078TEMP_MPY_wvf_snd.mp4


 66%|██████▌   | 197/300 [01:23<00:43,  2.39it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/Vwz4vOzWnLE_000078.mp4



 66%|██████▋   | 199/300 [01:24<00:42,  2.39it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/Vwz4vOzWnLE_000078.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/Vwz4vOzWnLE_000078.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/Rl0aaF32hV4_000220.mp4.
MoviePy - Writing audio in Rl0aaF32hV4_000220TEMP_MPY_wvf_snd.mp4


 66%|██████▋   | 199/300 [01:24<00:42,  2.39it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/Rl0aaF32hV4_000220.mp4



 67%|██████▋   | 201/300 [01:24<00:41,  2.39it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/Rl0aaF32hV4_000220.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/Rl0aaF32hV4_000220.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/iLo-V2zvcZE_000030.mp4.
MoviePy - Writing audio in iLo-V2zvcZE_000030TEMP_MPY_wvf_snd.mp4


 67%|██████▋   | 201/300 [01:25<00:41,  2.39it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/iLo-V2zvcZE_000030.mp4



 68%|██████▊   | 203/300 [01:25<00:42,  2.31it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/iLo-V2zvcZE_000030.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/iLo-V2zvcZE_000030.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/SFnVnnubE0M_000030.mp4.
MoviePy - Writing audio in SFnVnnubE0M_000030TEMP_MPY_wvf_snd.mp4


 68%|██████▊   | 203/300 [01:26<00:42,  2.31it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/SFnVnnubE0M_000030.mp4



 68%|██████▊   | 205/300 [01:26<00:38,  2.48it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/SFnVnnubE0M_000030.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/SFnVnnubE0M_000030.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/vRfRQJfTHiE_000030.mp4.
MoviePy - Writing audio in vRfRQJfTHiE_000030TEMP_MPY_wvf_snd.mp4


 68%|██████▊   | 205/300 [01:26<00:38,  2.48it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/vRfRQJfTHiE_000030.mp4



 69%|██████▉   | 207/300 [01:27<00:39,  2.38it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/vRfRQJfTHiE_000030.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/vRfRQJfTHiE_000030.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/dDbYcXgoR9A_000030.mp4.
MoviePy - Writing audio in dDbYcXgoR9A_000030TEMP_MPY_wvf_snd.mp4


 69%|██████▉   | 207/300 [01:27<00:39,  2.38it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/dDbYcXgoR9A_000030.mp4



 70%|██████▉   | 209/300 [01:28<00:38,  2.37it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/dDbYcXgoR9A_000030.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/dDbYcXgoR9A_000030.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/YX4MzUOPA8U_000094.mp4.
MoviePy - Writing audio in YX4MzUOPA8U_000094TEMP_MPY_wvf_snd.mp4


 70%|██████▉   | 209/300 [01:28<00:38,  2.37it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/YX4MzUOPA8U_000094.mp4



 70%|███████   | 211/300 [01:29<00:36,  2.41it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/YX4MzUOPA8U_000094.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/YX4MzUOPA8U_000094.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/ol71q9DuY1Y_000128.mp4.
MoviePy - Writing audio in ol71q9DuY1Y_000128TEMP_MPY_wvf_snd.mp4


 70%|███████   | 211/300 [01:29<00:36,  2.41it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/ol71q9DuY1Y_000128.mp4



 71%|███████   | 213/300 [01:29<00:37,  2.34it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/ol71q9DuY1Y_000128.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/ol71q9DuY1Y_000128.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/YKLqVHlgSiw_000176.mp4.
MoviePy - Writing audio in YKLqVHlgSiw_000176TEMP_MPY_wvf_snd.mp4


 71%|███████   | 213/300 [01:30<00:37,  2.34it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/YKLqVHlgSiw_000176.mp4



 72%|███████▏  | 215/300 [01:30<00:34,  2.47it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/YKLqVHlgSiw_000176.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/YKLqVHlgSiw_000176.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/ZtKvvhRBs44_000247.mp4.
MoviePy - Writing audio in ZtKvvhRBs44_000247TEMP_MPY_wvf_snd.mp4


 72%|███████▏  | 215/300 [01:31<00:34,  2.47it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/ZtKvvhRBs44_000247.mp4



 72%|███████▏  | 217/300 [01:31<00:35,  2.35it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/ZtKvvhRBs44_000247.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/ZtKvvhRBs44_000247.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/irN1qoE3-uQ_000004.mp4.
MoviePy - Writing audio in irN1qoE3-uQ_000004TEMP_MPY_wvf_snd.mp4


 72%|███████▏  | 217/300 [01:31<00:35,  2.35it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/irN1qoE3-uQ_000004.mp4



 73%|███████▎  | 219/300 [01:32<00:32,  2.48it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/irN1qoE3-uQ_000004.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/irN1qoE3-uQ_000004.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/l_SEPV8JB1U_000010.mp4.
MoviePy - Writing audio in l_SEPV8JB1U_000010TEMP_MPY_wvf_snd.mp4


 73%|███████▎  | 219/300 [01:32<00:32,  2.48it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/l_SEPV8JB1U_000010.mp4



 74%|███████▎  | 221/300 [01:33<00:31,  2.52it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/l_SEPV8JB1U_000010.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/l_SEPV8JB1U_000010.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/WJV6Ey6hXEw_000005.mp4.
MoviePy - Writing audio in WJV6Ey6hXEw_000005TEMP_MPY_wvf_snd.mp4


 74%|███████▎  | 221/300 [01:33<00:31,  2.52it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/WJV6Ey6hXEw_000005.mp4



 74%|███████▍  | 223/300 [01:33<00:31,  2.46it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/WJV6Ey6hXEw_000005.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/WJV6Ey6hXEw_000005.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/_JS6F_WP17w_000010.mp4.
MoviePy - Writing audio in _JS6F_WP17w_000010TEMP_MPY_wvf_snd.mp4


 74%|███████▍  | 223/300 [01:34<00:31,  2.46it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/_JS6F_WP17w_000010.mp4



 75%|███████▌  | 225/300 [01:34<00:28,  2.65it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/_JS6F_WP17w_000010.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/_JS6F_WP17w_000010.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/YX_dIRBxMLg_000023.mp4.
MoviePy - Writing audio in YX_dIRBxMLg_000023TEMP_MPY_wvf_snd.mp4


 75%|███████▌  | 225/300 [01:35<00:28,  2.65it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/YX_dIRBxMLg_000023.mp4



 76%|███████▌  | 227/300 [01:35<00:33,  2.20it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/YX_dIRBxMLg_000023.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/YX_dIRBxMLg_000023.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/lDTrnXAu4WY_000025.mp4.
MoviePy - Writing audio in lDTrnXAu4WY_000025TEMP_MPY_wvf_snd.mp4


 76%|███████▌  | 227/300 [01:36<00:33,  2.20it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/lDTrnXAu4WY_000025.mp4



 76%|███████▋  | 229/300 [01:36<00:31,  2.22it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/lDTrnXAu4WY_000025.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/lDTrnXAu4WY_000025.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/nEgGc3Zg85E_000030.mp4.
MoviePy - Writing audio in nEgGc3Zg85E_000030TEMP_MPY_wvf_snd.mp4


 76%|███████▋  | 229/300 [01:36<00:31,  2.22it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/nEgGc3Zg85E_000030.mp4



 77%|███████▋  | 231/300 [01:37<00:29,  2.36it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/nEgGc3Zg85E_000030.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/nEgGc3Zg85E_000030.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/ZDQSxYmgyAo_000499.mp4.
MoviePy - Writing audio in ZDQSxYmgyAo_000499TEMP_MPY_wvf_snd.mp4


 77%|███████▋  | 231/300 [01:37<00:29,  2.36it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/ZDQSxYmgyAo_000499.mp4



 78%|███████▊  | 233/300 [01:38<00:29,  2.26it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/ZDQSxYmgyAo_000499.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/ZDQSxYmgyAo_000499.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/qe3eHr5Q01U_000098.mp4.
MoviePy - Writing audio in qe3eHr5Q01U_000098TEMP_MPY_wvf_snd.mp4


 78%|███████▊  | 233/300 [01:38<00:29,  2.26it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/qe3eHr5Q01U_000098.mp4



 78%|███████▊  | 235/300 [01:39<00:27,  2.33it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/qe3eHr5Q01U_000098.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/qe3eHr5Q01U_000098.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/lm8M8aEoa3c_000081.mp4.
MoviePy - Writing audio in lm8M8aEoa3c_000081TEMP_MPY_wvf_snd.mp4


 78%|███████▊  | 235/300 [01:39<00:27,  2.33it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/lm8M8aEoa3c_000081.mp4



 79%|███████▉  | 237/300 [01:40<00:26,  2.36it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/lm8M8aEoa3c_000081.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/lm8M8aEoa3c_000081.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/n_YeYjbyJu8_000000.mp4.
MoviePy - Writing audio in n_YeYjbyJu8_000000TEMP_MPY_wvf_snd.mp4


 79%|███████▉  | 237/300 [01:40<00:26,  2.36it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/n_YeYjbyJu8_000000.mp4



 80%|███████▉  | 239/300 [01:41<00:26,  2.28it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/n_YeYjbyJu8_000000.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/n_YeYjbyJu8_000000.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/Yi9Xdiq579o_000240.mp4.
MoviePy - Writing audio in Yi9Xdiq579o_000240TEMP_MPY_wvf_snd.mp4


 80%|███████▉  | 239/300 [01:41<00:26,  2.28it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/Yi9Xdiq579o_000240.mp4



 80%|████████  | 241/300 [01:41<00:23,  2.49it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/Yi9Xdiq579o_000240.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/Yi9Xdiq579o_000240.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/_f7qCwXJQmQ_000035.mp4.
MoviePy - Writing audio in _f7qCwXJQmQ_000035TEMP_MPY_wvf_snd.mp4


 80%|████████  | 241/300 [01:41<00:23,  2.49it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/_f7qCwXJQmQ_000035.mp4



 81%|████████  | 243/300 [01:42<00:21,  2.70it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/_f7qCwXJQmQ_000035.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/_f7qCwXJQmQ_000035.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/_5An8FmGzxw_000009.mp4.
MoviePy - Writing audio in _5An8FmGzxw_000009TEMP_MPY_wvf_snd.mp4


 81%|████████  | 243/300 [01:42<00:21,  2.70it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/_5An8FmGzxw_000009.mp4



 82%|████████▏ | 245/300 [01:42<00:19,  2.76it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/_5An8FmGzxw_000009.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/_5An8FmGzxw_000009.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/czaj1HwZFYk_000180.mp4.
MoviePy - Writing audio in czaj1HwZFYk_000180TEMP_MPY_wvf_snd.mp4


 82%|████████▏ | 245/300 [01:43<00:19,  2.76it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/czaj1HwZFYk_000180.mp4



 82%|████████▏ | 247/300 [01:43<00:19,  2.70it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/czaj1HwZFYk_000180.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/czaj1HwZFYk_000180.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/pLlHIfTP5R4_000030.mp4.
MoviePy - Writing audio in pLlHIfTP5R4_000030TEMP_MPY_wvf_snd.mp4


 82%|████████▏ | 247/300 [01:43<00:19,  2.70it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/pLlHIfTP5R4_000030.mp4



 83%|████████▎ | 249/300 [01:44<00:18,  2.70it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/pLlHIfTP5R4_000030.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/pLlHIfTP5R4_000030.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/hhKsZ_uP-m8_000072.mp4.
MoviePy - Writing audio in hhKsZ_uP-m8_000072TEMP_MPY_wvf_snd.mp4


 83%|████████▎ | 249/300 [01:44<00:18,  2.70it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/hhKsZ_uP-m8_000072.mp4



 84%|████████▎ | 251/300 [01:45<00:17,  2.83it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/hhKsZ_uP-m8_000072.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/hhKsZ_uP-m8_000072.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/jvIuht_NQmI_000089.mp4.
MoviePy - Writing audio in jvIuht_NQmI_000089TEMP_MPY_wvf_snd.mp4


 84%|████████▎ | 251/300 [01:45<00:17,  2.83it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/jvIuht_NQmI_000089.mp4



 84%|████████▍ | 253/300 [01:45<00:17,  2.64it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/jvIuht_NQmI_000089.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/jvIuht_NQmI_000089.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/q_sB0ur0mEA_000016.mp4.
MoviePy - Writing audio in q_sB0ur0mEA_000016TEMP_MPY_wvf_snd.mp4


 84%|████████▍ | 253/300 [01:46<00:17,  2.64it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/q_sB0ur0mEA_000016.mp4



 85%|████████▌ | 255/300 [01:46<00:16,  2.76it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/q_sB0ur0mEA_000016.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/q_sB0ur0mEA_000016.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/rMGOF-XHU58_000010.mp4.
MoviePy - Writing audio in rMGOF-XHU58_000010TEMP_MPY_wvf_snd.mp4


 85%|████████▌ | 255/300 [01:46<00:16,  2.76it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/rMGOF-XHU58_000010.mp4



 86%|████████▌ | 257/300 [01:47<00:16,  2.62it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/rMGOF-XHU58_000010.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/rMGOF-XHU58_000010.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/dIIiaO12I5Q_000006.mp4.
MoviePy - Writing audio in dIIiaO12I5Q_000006TEMP_MPY_wvf_snd.mp4


 86%|████████▌ | 257/300 [01:47<00:16,  2.62it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/dIIiaO12I5Q_000006.mp4



 86%|████████▋ | 259/300 [01:48<00:15,  2.59it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/dIIiaO12I5Q_000006.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/dIIiaO12I5Q_000006.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/_tzXSoaZ644_000021.mp4.
MoviePy - Writing audio in _tzXSoaZ644_000021TEMP_MPY_wvf_snd.mp4


 86%|████████▋ | 259/300 [01:48<00:15,  2.59it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/_tzXSoaZ644_000021.mp4



 87%|████████▋ | 261/300 [01:49<00:15,  2.49it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/_tzXSoaZ644_000021.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/_tzXSoaZ644_000021.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/i60k4LXneng_000016.mp4.
MoviePy - Writing audio in i60k4LXneng_000016TEMP_MPY_wvf_snd.mp4


 87%|████████▋ | 261/300 [01:49<00:15,  2.49it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/i60k4LXneng_000016.mp4



 88%|████████▊ | 263/300 [01:49<00:14,  2.60it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/i60k4LXneng_000016.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/i60k4LXneng_000016.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/sxqKIm4LlF0_000024.mp4.
MoviePy - Writing audio in sxqKIm4LlF0_000024TEMP_MPY_wvf_snd.mp4


 88%|████████▊ | 263/300 [01:50<00:14,  2.60it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/sxqKIm4LlF0_000024.mp4



 88%|████████▊ | 265/300 [01:50<00:14,  2.37it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/sxqKIm4LlF0_000024.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/sxqKIm4LlF0_000024.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/r1JpF0ovMFA_000034.mp4.
MoviePy - Writing audio in r1JpF0ovMFA_000034TEMP_MPY_wvf_snd.mp4


 88%|████████▊ | 265/300 [01:51<00:14,  2.37it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/r1JpF0ovMFA_000034.mp4



                                                 

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/r1JpF0ovMFA_000034.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/r1JpF0ovMFA_000034.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/fWqXK1PQfOc_000134.mp4.


 89%|████████▉ | 267/300 [01:51<00:14,  2.34it/s]

MoviePy - Writing audio in fWqXK1PQfOc_000134TEMP_MPY_wvf_snd.mp4


 89%|████████▉ | 267/300 [01:52<00:14,  2.34it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/fWqXK1PQfOc_000134.mp4



 90%|████████▉ | 269/300 [01:52<00:13,  2.24it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/fWqXK1PQfOc_000134.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/fWqXK1PQfOc_000134.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/ao6-Npm2Cbw_000030.mp4.
MoviePy - Writing audio in ao6-Npm2Cbw_000030TEMP_MPY_wvf_snd.mp4


 90%|████████▉ | 269/300 [01:52<00:13,  2.24it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/ao6-Npm2Cbw_000030.mp4



 90%|█████████ | 271/300 [01:53<00:12,  2.30it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/ao6-Npm2Cbw_000030.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/ao6-Npm2Cbw_000030.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/sxJjC9HC1Xs_000310.mp4.
MoviePy - Writing audio in sxJjC9HC1Xs_000310TEMP_MPY_wvf_snd.mp4


 90%|█████████ | 271/300 [01:53<00:12,  2.30it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/sxJjC9HC1Xs_000310.mp4



 91%|█████████ | 273/300 [01:54<00:11,  2.29it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/sxJjC9HC1Xs_000310.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/sxJjC9HC1Xs_000310.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/tBQf0NWgLbg_000040.mp4.
MoviePy - Writing audio in tBQf0NWgLbg_000040TEMP_MPY_wvf_snd.mp4


 91%|█████████ | 273/300 [01:54<00:11,  2.29it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/tBQf0NWgLbg_000040.mp4



 92%|█████████▏| 275/300 [01:55<00:10,  2.48it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/tBQf0NWgLbg_000040.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/tBQf0NWgLbg_000040.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/qHqTC2CL0cQ_000016.mp4.
MoviePy - Writing audio in qHqTC2CL0cQ_000016TEMP_MPY_wvf_snd.mp4


 92%|█████████▏| 275/300 [01:55<00:10,  2.48it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/qHqTC2CL0cQ_000016.mp4



 92%|█████████▏| 277/300 [01:55<00:09,  2.40it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/qHqTC2CL0cQ_000016.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/qHqTC2CL0cQ_000016.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/stnD0ZE3bAQ_000030.mp4.
MoviePy - Writing audio in stnD0ZE3bAQ_000030TEMP_MPY_wvf_snd.mp4


 92%|█████████▏| 277/300 [01:56<00:09,  2.40it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/stnD0ZE3bAQ_000030.mp4



 93%|█████████▎| 279/300 [01:56<00:09,  2.23it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/stnD0ZE3bAQ_000030.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/stnD0ZE3bAQ_000030.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/x3ytjRxhNf4_000028.mp4.
MoviePy - Writing audio in x3ytjRxhNf4_000028TEMP_MPY_wvf_snd.mp4


 93%|█████████▎| 279/300 [01:57<00:09,  2.23it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/x3ytjRxhNf4_000028.mp4



 94%|█████████▎| 281/300 [01:57<00:08,  2.27it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/x3ytjRxhNf4_000028.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/x3ytjRxhNf4_000028.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/uA5__Dby1C0_000170.mp4.
MoviePy - Writing audio in uA5__Dby1C0_000170TEMP_MPY_wvf_snd.mp4


 94%|█████████▎| 281/300 [01:57<00:08,  2.27it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/uA5__Dby1C0_000170.mp4



 94%|█████████▍| 283/300 [01:58<00:06,  2.46it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/uA5__Dby1C0_000170.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/uA5__Dby1C0_000170.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/tx7XKy_4f4c_000005.mp4.
MoviePy - Writing audio in tx7XKy_4f4c_000005TEMP_MPY_wvf_snd.mp4


 94%|█████████▍| 283/300 [01:58<00:06,  2.46it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/tx7XKy_4f4c_000005.mp4



 95%|█████████▌| 285/300 [01:59<00:06,  2.38it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/tx7XKy_4f4c_000005.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/tx7XKy_4f4c_000005.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/tEfjBCYLEVI_000011.mp4.
MoviePy - Writing audio in tEfjBCYLEVI_000011TEMP_MPY_wvf_snd.mp4


 95%|█████████▌| 285/300 [01:59<00:06,  2.38it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/tEfjBCYLEVI_000011.mp4



 96%|█████████▌| 287/300 [02:00<00:05,  2.28it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/tEfjBCYLEVI_000011.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/tEfjBCYLEVI_000011.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/vcT8gClKYTs_000013.mp4.
MoviePy - Writing audio in vcT8gClKYTs_000013TEMP_MPY_wvf_snd.mp4


 96%|█████████▌| 287/300 [02:00<00:05,  2.28it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/vcT8gClKYTs_000013.mp4



 96%|█████████▋| 289/300 [02:01<00:04,  2.41it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/vcT8gClKYTs_000013.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/vcT8gClKYTs_000013.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/zm-1vgWuexY_000030.mp4.
MoviePy - Writing audio in zm-1vgWuexY_000030TEMP_MPY_wvf_snd.mp4


 96%|█████████▋| 289/300 [02:01<00:04,  2.41it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/zm-1vgWuexY_000030.mp4



 97%|█████████▋| 291/300 [02:01<00:03,  2.36it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/zm-1vgWuexY_000030.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/zm-1vgWuexY_000030.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/uxEdTjXT-rs_000030.mp4.
MoviePy - Writing audio in uxEdTjXT-rs_000030TEMP_MPY_wvf_snd.mp4


 97%|█████████▋| 291/300 [02:02<00:03,  2.36it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/uxEdTjXT-rs_000030.mp4



 98%|█████████▊| 293/300 [02:02<00:03,  2.33it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/uxEdTjXT-rs_000030.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/uxEdTjXT-rs_000030.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/yUZMpGwS-OI_000230.mp4.
MoviePy - Writing audio in yUZMpGwS-OI_000230TEMP_MPY_wvf_snd.mp4


 98%|█████████▊| 293/300 [02:03<00:03,  2.33it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/yUZMpGwS-OI_000230.mp4



 98%|█████████▊| 295/300 [02:03<00:02,  2.30it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/yUZMpGwS-OI_000230.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/yUZMpGwS-OI_000230.mp4


 98%|█████████▊| 295/300 [02:04<00:02,  2.30it/s]

5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/yFuDYRwZ3EA_000237.mp4.
MoviePy - Writing audio in yFuDYRwZ3EA_000237TEMP_MPY_wvf_snd.mp4


 98%|█████████▊| 295/300 [02:04<00:02,  2.30it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/yFuDYRwZ3EA_000237.mp4



 99%|█████████▉| 297/300 [02:05<00:01,  1.73it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/yFuDYRwZ3EA_000237.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/yFuDYRwZ3EA_000237.mp4
5.4 5.39
MoviePy - Building video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/weSfQmkG35I_000002.mp4.
MoviePy - Writing audio in weSfQmkG35I_000002TEMP_MPY_wvf_snd.mp4


 99%|█████████▉| 297/300 [02:05<00:01,  1.73it/s]

MoviePy - Done.
MoviePy - Writing video /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/weSfQmkG35I_000002.mp4



100%|██████████| 300/300 [02:06<00:00,  2.38it/s]

MoviePy - Done !
MoviePy - video ready /home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/weSfQmkG35I_000002.mp4
视频已保存至：/home/chengxin/chengxin/Dataset_Sound/VGGSound/generated_audios/veo3/avsync_video/weSfQmkG35I_000002.mp4



