In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!ls "/content/drive/My Drive/sjsu/298B/298A/cogvideox_checkpoints"

test.pth  transformer_lora_epoch1.pth  transformer_lora_epoch2.pth  transformer_lora_epoch3.pth


In [None]:
import torch
import torch.nn as nn
from diffusers import CogVideoXPipeline
from diffusers.utils import export_to_video
import os

from PIL import Image
from torchvision.utils import save_image
from torchvision.transforms import ToTensor
from tqdm import tqdm


import cv2
import numpy as np

prompt = "Crack two eggs into a bowl."

In [None]:
# pipe = CogVideoXPipeline.from_pretrained(
#     "THUDM/CogVideoX-2b",
#     torch_dtype=torch.float16
# )

# pipe.enable_model_cpu_offload()
# pipe.enable_sequential_cpu_offload()
# # pipe.to("cuda")
# pipe.vae.enable_slicing()
# pipe.vae.enable_tiling()
# video = pipe(
#     prompt=prompt,
#     num_videos_per_prompt=1,
#     num_inference_steps=50,
#     num_frames=49,
#     guidance_scale=6,
#     generator=torch.manual_seed(42),
# ).frames[0]

# export_to_video(video, "output.mp4", fps=8)

In [None]:
import gc
import torch
gc.collect()
torch.cuda.empty_cache()

In [None]:
class LoRALinear(nn.Module):
    def __init__(self, base, r=8, alpha=32):
        super().__init__()
        self.base = base
        self.lora_a = nn.Linear(base.in_features, r, bias=False)
        self.lora_b = nn.Linear(r, base.out_features, bias=False)
        self.scaling = alpha / r
        nn.init.kaiming_uniform_(self.lora_a.weight, a=5 ** 0.5)
        nn.init.zeros_(self.lora_b.weight)

    def forward(self, x):
        return self.base(x) + self.lora_b(self.lora_a(x)) * self.scaling

def inject_lora(transformer, keys=["to_q", "to_k", "to_v", "to_out.0"]):
    for name, module in transformer.named_modules():
        if any(k in name for k in keys):
            parts = name.split(".")
            parent = transformer
            for part in parts[:-1]:
                parent = getattr(parent, part)
            last = parts[-1]
            if isinstance(getattr(parent, last), nn.Linear):
                setattr(parent, last, LoRALinear(getattr(parent, last)))

# === Load base pipeline ===
pipe = CogVideoXPipeline.from_pretrained("THUDM/CogVideoX-2b", torch_dtype=torch.float16).to("cuda")
inject_lora(pipe.transformer)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading pipeline components...:   0%|          | 0/5 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
ckpt_path = "/content/drive/My Drive/sjsu/298B/298A/cogvideox_checkpoints/transformer_lora_epoch3.pth"
pipe.transformer.load_state_dict(torch.load(ckpt_path, map_location="cuda"))

pipe.transformer.to("cuda", dtype=torch.float16)
pipe.vae.to("cuda", dtype=torch.float16)
pipe.text_encoder.to("cuda", dtype=torch.float16)
# pipe.scheduler.to("cuda")  # if applicable

print("✅ Loaded fine-tuned weights!")

✅ Loaded fine-tuned weights!


In [None]:
prompt = "Crack two eggs into a bowl."
# output = pipe(prompt=prompt, num_inference_steps=25, num_frames=8)
# video = output.frames[0]  # List of PIL.Image

video = pipe(
    prompt=prompt,
    num_videos_per_prompt=1,
    num_inference_steps=100,
    num_frames=49,
    guidance_scale=6,
    generator=torch.manual_seed(42),
).frames[0]


export_to_video(video, "lora_epoch3.mp4", fps=8)

# # Visualize
# import matplotlib.pyplot as plt
# fig, axs = plt.subplots(1, len(video), figsize=(20, 3))
# for i, frame in enumerate(video):
#     axs[i].imshow(frame)
#     axs[i].axis("off")
# plt.show()

  0%|          | 0/100 [00:00<?, ?it/s]

'lora_epoch3.mp4'

# prepare eval data

## gen video from model

### v1

In [None]:
def save_video_with_gt_fps_and_jpgs(frames, save_path, gt_video_path, save_frames_dir=None):
    """
    Saves a list of PIL.Image frames into a video file,
    matching the FPS and resolution of the ground truth video.
    Optionally saves individual JPG frames.

    Args:
        frames (List[PIL.Image]): List of frames (PIL format).
        save_path (str): Output path to save video (.mp4).
        gt_video_path (str): Path to ground truth video (to extract FPS).
        save_frames_dir (str): Optional. If provided, will save each frame as JPG here.
    """
    assert len(frames) > 0, "No frames to save!"

    # === Get resolution from first frame ===
    width, height = frames[0].size

    # === Get FPS from ground truth video ===
    cap = cv2.VideoCapture(gt_video_path)
    if not cap.isOpened():
        raise ValueError(f"❌ Cannot open ground truth video: {gt_video_path}")
    fps = cap.get(cv2.CAP_PROP_FPS)
    cap.release()

    # === Define video writer ===
    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
    os.makedirs(os.path.dirname(save_path), exist_ok=True)
    out = cv2.VideoWriter(save_path, fourcc, fps, (width, height))

    # === Optionally save frames to disk ===
    if save_frames_dir:
        os.makedirs(save_frames_dir, exist_ok=True)

    for i, frame in enumerate(frames):
        if isinstance(frame, Image.Image):
            np_frame = np.array(frame.convert("RGB"))
        else:
            np_frame = frame
        out.write(cv2.cvtColor(np_frame, cv2.COLOR_RGB2BGR))

        if save_frames_dir:
            frame_path = os.path.join(save_frames_dir, f"frame_{i+1:04d}.jpg")
            frame.save(frame_path)

    out.release()
    print(f"✅ Saved video to {save_path} with FPS={fps}")
    if save_frames_dir:
        print(f"🖼️  Saved frames to {save_frames_dir}")

In [None]:
output_dir = "/content/drive/MyDrive/cogvideo_ft/cogvideo_eval"
os.makedirs(output_dir, exist_ok=True)

with open("/content/drive/MyDrive/cogvideo_ft/prompts.txt") as f:
    prompts = [line.strip() for line in f.readlines()]

with open("/content/drive/MyDrive/cogvideo_ft/videos.txt") as f:
    video_names = [line.strip().split("/")[-1] for line in f.readlines()]

for i, (prompt, gt_name) in enumerate(zip(prompts[:10], video_names[:10])):
    print(f"🌀 Generating for prompt: {prompt}")
    output = pipe(prompt=prompt, num_frames=49, num_inference_steps=25)
    video_frames = output.frames[0]  # List of PIL.Image

    save_path = os.path.join(output_dir, f"generated_{i}.mp4")
    gt_path = f"/content/drive/MyDrive/cogvideo_ft/videos/{gt_name}"
    save_frames_dir = os.path.join(output_dir, f"generated_frames")

    # save_video_with_gt_fps_and_jpgs(video_frames, save_path, gt_path, save_frames_dir)


    save_video_with_gt_fps_and_jpgs(
        video_frames,
        save_path,
        gt_path,
        save_frames_dir
    )

🌀 Generating for prompt: spread margarine on two slices of white bread


  0%|          | 0/25 [00:00<?, ?it/s]

✅ Saved video to /content/generated_videos/generated_0.mp4 with FPS=30.0
🖼️  Saved frames to /content/generated_videos/generated_frames
🌀 Generating for prompt: place a slice of cheese on the bread


  0%|          | 0/25 [00:00<?, ?it/s]

✅ Saved video to /content/generated_videos/generated_1.mp4 with FPS=30.0
🖼️  Saved frames to /content/generated_videos/generated_frames
🌀 Generating for prompt: place the bread slices on top of each other and place in a hot pan


  0%|          | 0/25 [00:00<?, ?it/s]

✅ Saved video to /content/generated_videos/generated_2.mp4 with FPS=30.0
🖼️  Saved frames to /content/generated_videos/generated_frames
🌀 Generating for prompt: flip the sandwich over and press down


  0%|          | 0/25 [00:00<?, ?it/s]

✅ Saved video to /content/generated_videos/generated_3.mp4 with FPS=30.0
🖼️  Saved frames to /content/generated_videos/generated_frames
🌀 Generating for prompt: cut the sandwich in half diagonally


  0%|          | 0/25 [00:00<?, ?it/s]

✅ Saved video to /content/generated_videos/generated_4.mp4 with FPS=30.0
🖼️  Saved frames to /content/generated_videos/generated_frames
🌀 Generating for prompt: pick the ends off the verdalago


  0%|          | 0/25 [00:00<?, ?it/s]

✅ Saved video to /content/generated_videos/generated_5.mp4 with FPS=29.97
🖼️  Saved frames to /content/generated_videos/generated_frames
🌀 Generating for prompt: combine lemon juice sumac garlic salt and oil in a bowl


  0%|          | 0/25 [00:00<?, ?it/s]

✅ Saved video to /content/generated_videos/generated_6.mp4 with FPS=29.97
🖼️  Saved frames to /content/generated_videos/generated_frames
🌀 Generating for prompt: chop lettuce and place it in a bowl


  0%|          | 0/25 [00:00<?, ?it/s]

✅ Saved video to /content/generated_videos/generated_7.mp4 with FPS=29.97
🖼️  Saved frames to /content/generated_videos/generated_frames
🌀 Generating for prompt: add verdalago pepper cucumbers tomatoes herbs and onions to the lettuce in the bowl


  0%|          | 0/25 [00:00<?, ?it/s]

✅ Saved video to /content/generated_videos/generated_8.mp4 with FPS=29.97
🖼️  Saved frames to /content/generated_videos/generated_frames
🌀 Generating for prompt: pour the dressing over the salad and mix


  0%|          | 0/25 [00:00<?, ?it/s]

✅ Saved video to /content/generated_videos/generated_9.mp4 with FPS=29.97
🖼️  Saved frames to /content/generated_videos/generated_frames


In [None]:
import shutil
import os

src_dir = "/content/generated_videos"
dst_dir = "/content/drive/MyDrive/cogvideo_ft/cogvideo_eval/generated_videos"

os.makedirs(dst_dir, exist_ok=True)

for filename in os.listdir(src_dir):
    if filename.endswith(".mp4"):
        shutil.copy(os.path.join(src_dir, filename), os.path.join(dst_dir, filename))

print("✅ Videos copied to Google Drive.")

✅ Videos copied to Google Drive.


In [None]:
from pathlib import Path
import shutil

src_dir = Path("/content/generated_videos/generated_frames")
dst_dir = Path("/content/drive/MyDrive/cogvideo_ft/cogvideo_eval/generated_videos/generated_frames")
os.makedirs(dst_dir, exist_ok=True)

for filename in os.listdir(src_dir):
    if filename.endswith(".jpg"):
      shutil.copy(os.path.join(src_dir, filename), os.path.join(dst_dir, filename))

print("✅ JPGs copied to Google Drive.")

### v2

In [None]:
def save_video_with_gt_fps_and_jpgs(frames, gt_video_path, save_root_dir):
    """
    Saves generated frames into a video and individual JPGs using the same basename as the ground truth video.

    Args:
        frames (List[PIL.Image]): List of frames (PIL format).
        gt_video_path (str): Ground truth video path to extract FPS and name.
        save_root_dir (str): Root directory to save output video and frames.
    """
    assert len(frames) > 0, "No frames to save!"
    basename = os.path.splitext(os.path.basename(gt_video_path))[0]  # e.g., '1k64hSAAAd4_clip0'

    # === Paths ===
    save_video_path = os.path.join(save_root_dir, "generated_videos", f"{basename}.mp4")
    save_frames_dir = os.path.join(save_root_dir, "generated_jpgs", basename)

    # === Create directories ===
    os.makedirs(os.path.dirname(save_video_path), exist_ok=True)
    os.makedirs(save_frames_dir, exist_ok=True)

    # === Get resolution and FPS ===
    width, height = frames[0].size
    cap = cv2.VideoCapture(gt_video_path)
    if not cap.isOpened():
        raise ValueError(f"❌ Cannot open GT video: {gt_video_path}")
    fps = cap.get(cv2.CAP_PROP_FPS)
    cap.release()

    # === Video writer ===
    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
    out = cv2.VideoWriter(save_video_path, fourcc, fps, (width, height))

    for i, frame in enumerate(frames):
        if isinstance(frame, Image.Image):
            np_frame = np.array(frame.convert("RGB"))
        else:
            np_frame = frame
        out.write(cv2.cvtColor(np_frame, cv2.COLOR_RGB2BGR))

        frame_path = os.path.join(save_frames_dir, f"frame_{i+1:04d}.jpg")
        frame.save(frame_path)

    out.release()
    print(f"✅ Saved aligned video to: {save_video_path}")
    print(f"🖼️  Saved aligned frames to: {save_frames_dir}")


In [None]:
output_dir = "/content/drive/MyDrive/cogvideo_ft/cogvideo_eval_epoch3_50"
os.makedirs(output_dir, exist_ok=True)

with open("/content/drive/MyDrive/cogvideo_ft/prompts.txt") as f:
    prompts = [line.strip() for line in f.readlines()]

with open("/content/drive/MyDrive/cogvideo_ft/videos.txt") as f:
    video_names = [line.strip().split("/")[-1] for line in f.readlines()]

for i, (prompt, gt_name) in enumerate(zip(prompts[:10], video_names[:10])):
    print(f"🌀 Generating for prompt: {prompt}")
    output = pipe(prompt=prompt, num_frames=49, num_inference_steps=50)
    video_frames = output.frames[0]  # List of PIL.Image

    # save_path = os.path.join(output_dir, f"generated_{i}.mp4")
    gt_path = f"/content/drive/MyDrive/cogvideo_ft/videos/{gt_name}"
    save_frames_dir = os.path.join(output_dir, f"generated_frames")

    # save_video_with_gt_fps_and_jpgs(video_frames, save_path, gt_path, save_frames_dir)


    # save_video_with_gt_fps_and_jpgs(
    #     video_frames,
    #     save_path,
    #     gt_path,
    #     save_frames_dir
    # )

    save_video_with_gt_fps_and_jpgs(video_frames, gt_path, output_dir)

🌀 Generating for prompt: spread margarine on two slices of white bread


  0%|          | 0/50 [00:00<?, ?it/s]

✅ Saved aligned video to: /content/drive/MyDrive/cogvideo_ft/cogvideo_eval_epoch3_50/generated_videos/GLd3aX16zBg_clip0.mp4
🖼️  Saved aligned frames to: /content/drive/MyDrive/cogvideo_ft/cogvideo_eval_epoch3_50/generated_jpgs/GLd3aX16zBg_clip0
🌀 Generating for prompt: place a slice of cheese on the bread


  0%|          | 0/50 [00:00<?, ?it/s]

✅ Saved aligned video to: /content/drive/MyDrive/cogvideo_ft/cogvideo_eval_epoch3_50/generated_videos/GLd3aX16zBg_clip1.mp4
🖼️  Saved aligned frames to: /content/drive/MyDrive/cogvideo_ft/cogvideo_eval_epoch3_50/generated_jpgs/GLd3aX16zBg_clip1
🌀 Generating for prompt: place the bread slices on top of each other and place in a hot pan


  0%|          | 0/50 [00:00<?, ?it/s]

✅ Saved aligned video to: /content/drive/MyDrive/cogvideo_ft/cogvideo_eval_epoch3_50/generated_videos/GLd3aX16zBg_clip2.mp4
🖼️  Saved aligned frames to: /content/drive/MyDrive/cogvideo_ft/cogvideo_eval_epoch3_50/generated_jpgs/GLd3aX16zBg_clip2
🌀 Generating for prompt: flip the sandwich over and press down


  0%|          | 0/50 [00:00<?, ?it/s]

✅ Saved aligned video to: /content/drive/MyDrive/cogvideo_ft/cogvideo_eval_epoch3_50/generated_videos/GLd3aX16zBg_clip3.mp4
🖼️  Saved aligned frames to: /content/drive/MyDrive/cogvideo_ft/cogvideo_eval_epoch3_50/generated_jpgs/GLd3aX16zBg_clip3
🌀 Generating for prompt: cut the sandwich in half diagonally


  0%|          | 0/50 [00:00<?, ?it/s]

✅ Saved aligned video to: /content/drive/MyDrive/cogvideo_ft/cogvideo_eval_epoch3_50/generated_videos/GLd3aX16zBg_clip4.mp4
🖼️  Saved aligned frames to: /content/drive/MyDrive/cogvideo_ft/cogvideo_eval_epoch3_50/generated_jpgs/GLd3aX16zBg_clip4
🌀 Generating for prompt: pick the ends off the verdalago


  0%|          | 0/50 [00:00<?, ?it/s]

✅ Saved aligned video to: /content/drive/MyDrive/cogvideo_ft/cogvideo_eval_epoch3_50/generated_videos/xHr8X2Wpmno_clip0.mp4
🖼️  Saved aligned frames to: /content/drive/MyDrive/cogvideo_ft/cogvideo_eval_epoch3_50/generated_jpgs/xHr8X2Wpmno_clip0
🌀 Generating for prompt: combine lemon juice sumac garlic salt and oil in a bowl


  0%|          | 0/50 [00:00<?, ?it/s]

✅ Saved aligned video to: /content/drive/MyDrive/cogvideo_ft/cogvideo_eval_epoch3_50/generated_videos/xHr8X2Wpmno_clip1.mp4
🖼️  Saved aligned frames to: /content/drive/MyDrive/cogvideo_ft/cogvideo_eval_epoch3_50/generated_jpgs/xHr8X2Wpmno_clip1
🌀 Generating for prompt: chop lettuce and place it in a bowl


  0%|          | 0/50 [00:00<?, ?it/s]

✅ Saved aligned video to: /content/drive/MyDrive/cogvideo_ft/cogvideo_eval_epoch3_50/generated_videos/xHr8X2Wpmno_clip2.mp4
🖼️  Saved aligned frames to: /content/drive/MyDrive/cogvideo_ft/cogvideo_eval_epoch3_50/generated_jpgs/xHr8X2Wpmno_clip2
🌀 Generating for prompt: add verdalago pepper cucumbers tomatoes herbs and onions to the lettuce in the bowl


  0%|          | 0/50 [00:00<?, ?it/s]

✅ Saved aligned video to: /content/drive/MyDrive/cogvideo_ft/cogvideo_eval_epoch3_50/generated_videos/xHr8X2Wpmno_clip3.mp4
🖼️  Saved aligned frames to: /content/drive/MyDrive/cogvideo_ft/cogvideo_eval_epoch3_50/generated_jpgs/xHr8X2Wpmno_clip3
🌀 Generating for prompt: pour the dressing over the salad and mix


  0%|          | 0/50 [00:00<?, ?it/s]

✅ Saved aligned video to: /content/drive/MyDrive/cogvideo_ft/cogvideo_eval_epoch3_50/generated_videos/xHr8X2Wpmno_clip4.mp4
🖼️  Saved aligned frames to: /content/drive/MyDrive/cogvideo_ft/cogvideo_eval_epoch3_50/generated_jpgs/xHr8X2Wpmno_clip4


## generate jpg frames from gt data

In [None]:
def extract_frames_from_gt_videos(gt_video_dir, output_root_dir):
    os.makedirs(output_root_dir, exist_ok=True)

    for filename in tqdm(os.listdir(gt_video_dir)):
        if not filename.endswith(".mp4"):
            continue

        video_path = os.path.join(gt_video_dir, filename)
        video_name = os.path.splitext(filename)[0]
        output_dir = os.path.join(output_root_dir, video_name)
        os.makedirs(output_dir, exist_ok=True)

        cap = cv2.VideoCapture(video_path)
        if not cap.isOpened():
            print(f"❌ Failed to open {video_path}")
            continue

        frame_idx = 1
        while True:
            ret, frame = cap.read()
            if not ret:
                break
            frame_path = os.path.join(output_dir, f"frame_{frame_idx:04d}.jpg")
            cv2.imwrite(frame_path, frame)
            frame_idx += 1

        cap.release()
        print(f"✅ Extracted {frame_idx - 1} frames for {video_name}")

In [None]:


# === Run it ===
gt_video_dir = "/content/drive/MyDrive/cogvideo_ft/videos"
output_root_dir = "/content/drive/MyDrive/cogvideo_ft/gt_jpgs"

extract_frames_from_gt_videos(gt_video_dir, output_root_dir)

  0%|          | 1/300 [00:00<02:02,  2.45it/s]

✅ Extracted 49 frames for GLd3aX16zBg_clip0


  1%|          | 2/300 [00:00<01:52,  2.66it/s]

✅ Extracted 49 frames for GLd3aX16zBg_clip1


  1%|          | 3/300 [00:01<01:45,  2.82it/s]

✅ Extracted 49 frames for GLd3aX16zBg_clip2


  1%|▏         | 4/300 [00:03<04:51,  1.02it/s]

✅ Extracted 49 frames for GLd3aX16zBg_clip3


  2%|▏         | 5/300 [00:03<03:42,  1.32it/s]

✅ Extracted 49 frames for GLd3aX16zBg_clip4


  2%|▏         | 6/300 [00:03<03:01,  1.62it/s]

✅ Extracted 49 frames for xHr8X2Wpmno_clip0


  2%|▏         | 7/300 [00:04<02:35,  1.89it/s]

✅ Extracted 49 frames for xHr8X2Wpmno_clip1


  3%|▎         | 8/300 [00:04<02:17,  2.12it/s]

✅ Extracted 49 frames for xHr8X2Wpmno_clip2


  3%|▎         | 9/300 [00:04<02:04,  2.34it/s]

✅ Extracted 49 frames for xHr8X2Wpmno_clip3


  3%|▎         | 10/300 [00:05<02:10,  2.23it/s]

✅ Extracted 49 frames for xHr8X2Wpmno_clip4


  4%|▎         | 11/300 [00:05<02:02,  2.35it/s]

✅ Extracted 49 frames for xHr8X2Wpmno_clip5


  4%|▍         | 12/300 [00:06<02:02,  2.35it/s]

✅ Extracted 49 frames for V53XmPeyjIU_clip0


  4%|▍         | 13/300 [00:06<01:54,  2.51it/s]

✅ Extracted 49 frames for V53XmPeyjIU_clip1


  5%|▍         | 14/300 [00:06<01:48,  2.63it/s]

✅ Extracted 49 frames for V53XmPeyjIU_clip2


  5%|▌         | 15/300 [00:07<01:43,  2.77it/s]

✅ Extracted 49 frames for V53XmPeyjIU_clip3


  5%|▌         | 16/300 [00:07<01:46,  2.67it/s]

✅ Extracted 49 frames for V53XmPeyjIU_clip4


  6%|▌         | 17/300 [00:07<01:45,  2.69it/s]

✅ Extracted 49 frames for V53XmPeyjIU_clip5


  6%|▌         | 18/300 [00:08<01:44,  2.69it/s]

✅ Extracted 49 frames for V53XmPeyjIU_clip6


  6%|▋         | 19/300 [00:08<02:05,  2.23it/s]

✅ Extracted 49 frames for V53XmPeyjIU_clip7


  7%|▋         | 20/300 [00:09<01:57,  2.38it/s]

✅ Extracted 49 frames for mZwK0TBI1iY_clip0


  7%|▋         | 21/300 [00:09<01:50,  2.52it/s]

✅ Extracted 49 frames for mZwK0TBI1iY_clip1


  7%|▋         | 22/300 [00:09<01:48,  2.57it/s]

✅ Extracted 49 frames for mZwK0TBI1iY_clip2


  8%|▊         | 23/300 [00:10<01:44,  2.65it/s]

✅ Extracted 49 frames for mZwK0TBI1iY_clip3


  8%|▊         | 24/300 [00:10<01:47,  2.56it/s]

✅ Extracted 49 frames for mZwK0TBI1iY_clip4


  8%|▊         | 25/300 [00:11<01:43,  2.65it/s]

✅ Extracted 49 frames for mZwK0TBI1iY_clip5


  9%|▊         | 26/300 [00:11<01:41,  2.71it/s]

✅ Extracted 49 frames for mZwK0TBI1iY_clip6


  9%|▉         | 27/300 [00:11<01:45,  2.59it/s]

✅ Extracted 49 frames for mZwK0TBI1iY_clip7


  9%|▉         | 28/300 [00:12<01:45,  2.58it/s]

✅ Extracted 49 frames for H_9oM1Y60lU_clip0


 10%|▉         | 29/300 [00:12<01:46,  2.54it/s]

✅ Extracted 49 frames for H_9oM1Y60lU_clip1


 10%|█         | 30/300 [00:12<01:46,  2.53it/s]

✅ Extracted 49 frames for H_9oM1Y60lU_clip2


 10%|█         | 31/300 [00:13<01:45,  2.56it/s]

✅ Extracted 49 frames for H_9oM1Y60lU_clip3


 11%|█         | 32/300 [00:13<01:47,  2.50it/s]

✅ Extracted 49 frames for H_9oM1Y60lU_clip4


 11%|█         | 33/300 [00:14<01:43,  2.58it/s]

✅ Extracted 49 frames for XOwypmUT5cc_clip0


 11%|█▏        | 34/300 [00:14<01:38,  2.71it/s]

✅ Extracted 49 frames for XOwypmUT5cc_clip1


 12%|█▏        | 35/300 [00:14<01:38,  2.70it/s]

✅ Extracted 49 frames for XOwypmUT5cc_clip2


 12%|█▏        | 36/300 [00:15<01:36,  2.73it/s]

✅ Extracted 49 frames for XOwypmUT5cc_clip3


 12%|█▏        | 37/300 [00:15<01:36,  2.72it/s]

✅ Extracted 49 frames for XOwypmUT5cc_clip4


 13%|█▎        | 38/300 [00:15<01:37,  2.69it/s]

✅ Extracted 49 frames for XOwypmUT5cc_clip5


 13%|█▎        | 39/300 [00:16<01:38,  2.65it/s]

✅ Extracted 49 frames for XOwypmUT5cc_clip6


 13%|█▎        | 40/300 [00:16<01:39,  2.61it/s]

✅ Extracted 49 frames for XOwypmUT5cc_clip7


 14%|█▎        | 41/300 [00:17<01:36,  2.69it/s]

✅ Extracted 49 frames for XOwypmUT5cc_clip8


 14%|█▍        | 42/300 [00:17<01:34,  2.74it/s]

✅ Extracted 49 frames for nfYzqyureLo_clip0


 14%|█▍        | 43/300 [00:17<01:33,  2.75it/s]

✅ Extracted 49 frames for nfYzqyureLo_clip1


 15%|█▍        | 44/300 [00:18<01:32,  2.78it/s]

✅ Extracted 49 frames for nfYzqyureLo_clip2


 15%|█▌        | 45/300 [00:18<01:31,  2.78it/s]

✅ Extracted 49 frames for nfYzqyureLo_clip3


 15%|█▌        | 46/300 [00:18<01:30,  2.82it/s]

✅ Extracted 49 frames for nfYzqyureLo_clip4


 16%|█▌        | 47/300 [00:19<01:28,  2.84it/s]

✅ Extracted 49 frames for nfYzqyureLo_clip5


 16%|█▌        | 48/300 [00:19<01:30,  2.79it/s]

✅ Extracted 49 frames for nfYzqyureLo_clip6


 16%|█▋        | 49/300 [00:19<01:27,  2.87it/s]

✅ Extracted 49 frames for nfYzqyureLo_clip7


 17%|█▋        | 50/300 [00:20<01:31,  2.72it/s]

✅ Extracted 49 frames for EjHxMfOdl1k_clip0


 17%|█▋        | 51/300 [00:20<01:32,  2.69it/s]

✅ Extracted 49 frames for EjHxMfOdl1k_clip1


 17%|█▋        | 52/300 [00:21<01:37,  2.56it/s]

✅ Extracted 49 frames for EjHxMfOdl1k_clip2


 18%|█▊        | 53/300 [00:21<01:33,  2.63it/s]

✅ Extracted 49 frames for EjHxMfOdl1k_clip3


 18%|█▊        | 54/300 [00:21<01:34,  2.60it/s]

✅ Extracted 49 frames for EjHxMfOdl1k_clip4


 18%|█▊        | 55/300 [00:22<01:33,  2.63it/s]

✅ Extracted 49 frames for MIXkGU4TJp0_clip0


 19%|█▊        | 56/300 [00:22<01:33,  2.60it/s]

✅ Extracted 49 frames for MIXkGU4TJp0_clip1


 19%|█▉        | 57/300 [00:22<01:29,  2.72it/s]

✅ Extracted 49 frames for MIXkGU4TJp0_clip2


 19%|█▉        | 58/300 [00:23<01:26,  2.80it/s]

✅ Extracted 49 frames for 7nH0Y3agsas_clip0


 20%|█▉        | 59/300 [00:23<01:27,  2.75it/s]

✅ Extracted 49 frames for 7nH0Y3agsas_clip1


 20%|██        | 60/300 [00:23<01:23,  2.87it/s]

✅ Extracted 49 frames for 7nH0Y3agsas_clip2


 20%|██        | 61/300 [00:24<01:24,  2.82it/s]

✅ Extracted 49 frames for 7nH0Y3agsas_clip3


 21%|██        | 62/300 [00:24<01:26,  2.77it/s]

✅ Extracted 49 frames for 7nH0Y3agsas_clip4


 21%|██        | 63/300 [00:25<01:26,  2.73it/s]

✅ Extracted 49 frames for 7nH0Y3agsas_clip5


 21%|██▏       | 64/300 [00:25<01:26,  2.73it/s]

✅ Extracted 49 frames for wXA-1ZmvuEA_clip0


 22%|██▏       | 65/300 [00:25<01:28,  2.67it/s]

✅ Extracted 49 frames for wXA-1ZmvuEA_clip1


 22%|██▏       | 66/300 [00:26<01:29,  2.61it/s]

✅ Extracted 49 frames for wXA-1ZmvuEA_clip2


 22%|██▏       | 67/300 [00:26<01:26,  2.70it/s]

✅ Extracted 49 frames for wXA-1ZmvuEA_clip3


 23%|██▎       | 68/300 [00:26<01:23,  2.78it/s]

✅ Extracted 49 frames for wXA-1ZmvuEA_clip4


 23%|██▎       | 69/300 [00:27<01:21,  2.82it/s]

✅ Extracted 49 frames for pluBtIfxUvE_clip0


 23%|██▎       | 70/300 [00:27<01:25,  2.68it/s]

✅ Extracted 49 frames for pluBtIfxUvE_clip1


 24%|██▎       | 71/300 [00:28<01:23,  2.74it/s]

✅ Extracted 49 frames for pluBtIfxUvE_clip2


 24%|██▍       | 72/300 [00:28<01:25,  2.67it/s]

✅ Extracted 49 frames for pluBtIfxUvE_clip3


 24%|██▍       | 73/300 [00:28<01:24,  2.70it/s]

✅ Extracted 49 frames for pOP5Zzm19kc_clip0


 25%|██▍       | 74/300 [00:29<01:23,  2.70it/s]

✅ Extracted 49 frames for pOP5Zzm19kc_clip1


 25%|██▌       | 75/300 [00:29<01:23,  2.69it/s]

✅ Extracted 49 frames for pOP5Zzm19kc_clip2


 25%|██▌       | 76/300 [00:29<01:22,  2.70it/s]

✅ Extracted 49 frames for pOP5Zzm19kc_clip3


 26%|██▌       | 77/300 [00:30<01:21,  2.73it/s]

✅ Extracted 49 frames for pOP5Zzm19kc_clip4


 26%|██▌       | 78/300 [00:30<01:20,  2.77it/s]

✅ Extracted 49 frames for pOP5Zzm19kc_clip5


 26%|██▋       | 79/300 [00:30<01:18,  2.82it/s]

✅ Extracted 49 frames for pOP5Zzm19kc_clip6


 27%|██▋       | 80/300 [00:31<01:17,  2.85it/s]

✅ Extracted 49 frames for pOP5Zzm19kc_clip7


 27%|██▋       | 81/300 [00:31<01:17,  2.84it/s]

✅ Extracted 49 frames for dxSSb3tv5ZM_clip0


 27%|██▋       | 82/300 [00:32<01:16,  2.84it/s]

✅ Extracted 49 frames for dxSSb3tv5ZM_clip1


 28%|██▊       | 83/300 [00:32<01:17,  2.81it/s]

✅ Extracted 49 frames for dxSSb3tv5ZM_clip2


 28%|██▊       | 84/300 [00:32<01:17,  2.78it/s]

✅ Extracted 49 frames for dxSSb3tv5ZM_clip3


 28%|██▊       | 85/300 [00:33<01:17,  2.77it/s]

✅ Extracted 49 frames for dxSSb3tv5ZM_clip4


 29%|██▊       | 86/300 [00:33<01:17,  2.77it/s]

✅ Extracted 49 frames for dxSSb3tv5ZM_clip5


 29%|██▉       | 87/300 [00:33<01:18,  2.73it/s]

✅ Extracted 49 frames for dxSSb3tv5ZM_clip6


 29%|██▉       | 88/300 [00:34<01:20,  2.62it/s]

✅ Extracted 49 frames for dxSSb3tv5ZM_clip7


 30%|██▉       | 89/300 [00:34<01:39,  2.11it/s]

✅ Extracted 49 frames for dxSSb3tv5ZM_clip8


 30%|███       | 90/300 [00:35<01:33,  2.24it/s]

✅ Extracted 49 frames for dxSSb3tv5ZM_clip9


 30%|███       | 91/300 [00:35<01:25,  2.44it/s]

✅ Extracted 49 frames for acMP2DOpWeA_clip0


 31%|███       | 92/300 [00:35<01:18,  2.63it/s]

✅ Extracted 49 frames for acMP2DOpWeA_clip1


 31%|███       | 93/300 [00:36<01:17,  2.66it/s]

✅ Extracted 49 frames for acMP2DOpWeA_clip2


 31%|███▏      | 94/300 [00:36<01:16,  2.70it/s]

✅ Extracted 49 frames for acMP2DOpWeA_clip3


 32%|███▏      | 95/300 [00:37<01:11,  2.85it/s]

✅ Extracted 49 frames for acMP2DOpWeA_clip4


 32%|███▏      | 96/300 [00:37<01:09,  2.93it/s]

✅ Extracted 49 frames for acMP2DOpWeA_clip5


 32%|███▏      | 97/300 [00:37<01:10,  2.86it/s]

✅ Extracted 49 frames for acMP2DOpWeA_clip6


 33%|███▎      | 98/300 [00:38<01:11,  2.83it/s]

✅ Extracted 49 frames for acMP2DOpWeA_clip7


 33%|███▎      | 99/300 [00:38<01:10,  2.86it/s]

✅ Extracted 49 frames for acMP2DOpWeA_clip8


 33%|███▎      | 100/300 [00:38<01:12,  2.78it/s]

✅ Extracted 49 frames for acMP2DOpWeA_clip9


 34%|███▎      | 101/300 [00:39<01:10,  2.83it/s]

✅ Extracted 49 frames for acMP2DOpWeA_clip10


 34%|███▍      | 102/300 [00:39<01:07,  2.92it/s]

✅ Extracted 49 frames for acMP2DOpWeA_clip11


 34%|███▍      | 103/300 [00:39<01:04,  3.04it/s]

✅ Extracted 49 frames for acMP2DOpWeA_clip12


 35%|███▍      | 104/300 [00:40<01:09,  2.82it/s]

✅ Extracted 49 frames for Gs3OGfQbPjc_clip0


 35%|███▌      | 105/300 [00:40<01:04,  3.00it/s]

✅ Extracted 49 frames for Gs3OGfQbPjc_clip1


 35%|███▌      | 106/300 [00:40<01:03,  3.07it/s]

✅ Extracted 49 frames for Gs3OGfQbPjc_clip2


 36%|███▌      | 107/300 [00:41<01:02,  3.10it/s]

✅ Extracted 49 frames for Gs3OGfQbPjc_clip3


 36%|███▌      | 108/300 [00:41<01:04,  2.96it/s]

✅ Extracted 49 frames for Gs3OGfQbPjc_clip4


 36%|███▋      | 109/300 [00:41<01:07,  2.84it/s]

✅ Extracted 49 frames for _bICKhr9AGo_clip0


 37%|███▋      | 110/300 [00:42<01:06,  2.84it/s]

✅ Extracted 49 frames for _bICKhr9AGo_clip1


 37%|███▋      | 111/300 [00:42<01:05,  2.90it/s]

✅ Extracted 49 frames for _bICKhr9AGo_clip2


 37%|███▋      | 112/300 [00:42<01:04,  2.89it/s]

✅ Extracted 49 frames for _bICKhr9AGo_clip3


 38%|███▊      | 113/300 [00:43<01:05,  2.85it/s]

✅ Extracted 49 frames for _bICKhr9AGo_clip4


 38%|███▊      | 114/300 [00:43<01:04,  2.89it/s]

✅ Extracted 49 frames for _bICKhr9AGo_clip5


 38%|███▊      | 115/300 [00:43<01:05,  2.83it/s]

✅ Extracted 49 frames for _bICKhr9AGo_clip6


 39%|███▊      | 116/300 [00:44<01:03,  2.90it/s]

✅ Extracted 49 frames for _bICKhr9AGo_clip7


 39%|███▉      | 117/300 [00:44<01:07,  2.72it/s]

✅ Extracted 49 frames for _bICKhr9AGo_clip8


 39%|███▉      | 118/300 [00:45<01:06,  2.72it/s]

✅ Extracted 49 frames for _bICKhr9AGo_clip9


 40%|███▉      | 119/300 [00:45<01:12,  2.48it/s]

✅ Extracted 49 frames for _bICKhr9AGo_clip10


 40%|████      | 120/300 [00:45<01:09,  2.59it/s]

✅ Extracted 49 frames for _bICKhr9AGo_clip11


 40%|████      | 121/300 [00:46<01:08,  2.62it/s]

✅ Extracted 49 frames for _bICKhr9AGo_clip12


 41%|████      | 122/300 [00:46<01:06,  2.66it/s]

✅ Extracted 49 frames for frCFxOt9390_clip0


 41%|████      | 123/300 [00:46<01:05,  2.72it/s]

✅ Extracted 49 frames for frCFxOt9390_clip1


 41%|████▏     | 124/300 [00:47<01:07,  2.62it/s]

✅ Extracted 49 frames for frCFxOt9390_clip2


 42%|████▏     | 125/300 [00:47<01:06,  2.61it/s]

✅ Extracted 49 frames for frCFxOt9390_clip3


 42%|████▏     | 126/300 [00:48<01:37,  1.79it/s]

✅ Extracted 49 frames for frCFxOt9390_clip4


 42%|████▏     | 127/300 [00:49<01:25,  2.02it/s]

✅ Extracted 49 frames for frCFxOt9390_clip5


 43%|████▎     | 128/300 [00:49<01:21,  2.12it/s]

✅ Extracted 49 frames for frCFxOt9390_clip6


 43%|████▎     | 129/300 [00:49<01:16,  2.23it/s]

✅ Extracted 49 frames for frCFxOt9390_clip7


 43%|████▎     | 130/300 [00:50<01:13,  2.31it/s]

✅ Extracted 49 frames for frCFxOt9390_clip8


 44%|████▎     | 131/300 [00:50<01:16,  2.21it/s]

✅ Extracted 49 frames for frCFxOt9390_clip9


 44%|████▍     | 132/300 [00:51<01:14,  2.26it/s]

✅ Extracted 49 frames for frCFxOt9390_clip10


 44%|████▍     | 133/300 [00:51<01:09,  2.40it/s]

✅ Extracted 49 frames for HXqC_xWsKkY_clip0


 45%|████▍     | 134/300 [00:51<01:06,  2.48it/s]

✅ Extracted 49 frames for HXqC_xWsKkY_clip1


 45%|████▌     | 135/300 [00:52<01:02,  2.63it/s]

✅ Extracted 49 frames for HXqC_xWsKkY_clip2


 45%|████▌     | 136/300 [00:52<00:59,  2.75it/s]

✅ Extracted 49 frames for HXqC_xWsKkY_clip3


 46%|████▌     | 137/300 [00:52<00:56,  2.87it/s]

✅ Extracted 49 frames for HXqC_xWsKkY_clip4


 46%|████▌     | 138/300 [00:53<00:57,  2.83it/s]

✅ Extracted 49 frames for HXqC_xWsKkY_clip5


 46%|████▋     | 139/300 [00:53<00:57,  2.79it/s]

✅ Extracted 49 frames for HXqC_xWsKkY_clip6


 47%|████▋     | 140/300 [00:53<00:57,  2.76it/s]

✅ Extracted 49 frames for HXqC_xWsKkY_clip7


 47%|████▋     | 141/300 [00:54<00:58,  2.74it/s]

✅ Extracted 49 frames for HXqC_xWsKkY_clip8


 47%|████▋     | 142/300 [00:54<00:59,  2.65it/s]

✅ Extracted 49 frames for fn9anlEL4FI_clip0


 48%|████▊     | 143/300 [00:55<00:59,  2.64it/s]

✅ Extracted 49 frames for fn9anlEL4FI_clip1


 48%|████▊     | 144/300 [00:55<00:58,  2.65it/s]

✅ Extracted 49 frames for fn9anlEL4FI_clip2


 48%|████▊     | 145/300 [00:55<00:56,  2.73it/s]

✅ Extracted 49 frames for fn9anlEL4FI_clip3


 49%|████▊     | 146/300 [00:56<00:59,  2.61it/s]

✅ Extracted 49 frames for fn9anlEL4FI_clip4


 49%|████▉     | 147/300 [00:56<00:58,  2.63it/s]

✅ Extracted 49 frames for fn9anlEL4FI_clip5


 49%|████▉     | 148/300 [00:57<00:57,  2.64it/s]

✅ Extracted 49 frames for fn9anlEL4FI_clip6


 50%|████▉     | 149/300 [00:57<00:54,  2.75it/s]

✅ Extracted 49 frames for fn9anlEL4FI_clip7


 50%|█████     | 150/300 [00:57<00:55,  2.70it/s]

✅ Extracted 49 frames for JxCBGlPgr5o_clip0


 50%|█████     | 151/300 [00:58<01:12,  2.05it/s]

✅ Extracted 49 frames for JxCBGlPgr5o_clip1


 51%|█████     | 152/300 [00:58<01:05,  2.25it/s]

✅ Extracted 49 frames for JxCBGlPgr5o_clip2


 51%|█████     | 153/300 [00:59<01:01,  2.39it/s]

✅ Extracted 49 frames for JxCBGlPgr5o_clip3


 51%|█████▏    | 154/300 [00:59<00:56,  2.58it/s]

✅ Extracted 49 frames for JxCBGlPgr5o_clip4


 52%|█████▏    | 155/300 [00:59<00:55,  2.64it/s]

✅ Extracted 49 frames for JxCBGlPgr5o_clip5


 52%|█████▏    | 156/300 [01:00<00:53,  2.68it/s]

✅ Extracted 49 frames for JxCBGlPgr5o_clip6


 52%|█████▏    | 157/300 [01:00<00:57,  2.50it/s]

✅ Extracted 49 frames for JxCBGlPgr5o_clip7


 53%|█████▎    | 158/300 [01:01<01:00,  2.34it/s]

✅ Extracted 49 frames for JxCBGlPgr5o_clip8


 53%|█████▎    | 159/300 [01:01<00:56,  2.50it/s]

✅ Extracted 49 frames for JxCBGlPgr5o_clip9


 53%|█████▎    | 160/300 [01:01<00:53,  2.62it/s]

✅ Extracted 49 frames for JxCBGlPgr5o_clip10


 54%|█████▎    | 161/300 [01:02<00:52,  2.66it/s]

✅ Extracted 49 frames for JxCBGlPgr5o_clip11


 54%|█████▍    | 162/300 [01:02<00:51,  2.70it/s]

✅ Extracted 49 frames for JxCBGlPgr5o_clip12


 54%|█████▍    | 163/300 [01:02<00:51,  2.64it/s]

✅ Extracted 49 frames for UA7wqVe8Kpo_clip0


 55%|█████▍    | 164/300 [01:03<00:53,  2.52it/s]

✅ Extracted 49 frames for UA7wqVe8Kpo_clip1


 55%|█████▌    | 165/300 [01:03<00:54,  2.50it/s]

✅ Extracted 49 frames for UA7wqVe8Kpo_clip2


 55%|█████▌    | 166/300 [01:04<00:52,  2.54it/s]

✅ Extracted 49 frames for UA7wqVe8Kpo_clip3


 56%|█████▌    | 167/300 [01:04<00:50,  2.64it/s]

✅ Extracted 49 frames for NTyhMGmuWik_clip0


 56%|█████▌    | 168/300 [01:04<00:50,  2.62it/s]

✅ Extracted 49 frames for NTyhMGmuWik_clip1


 56%|█████▋    | 169/300 [01:05<00:48,  2.69it/s]

✅ Extracted 49 frames for NTyhMGmuWik_clip2


 57%|█████▋    | 170/300 [01:05<00:48,  2.67it/s]

✅ Extracted 49 frames for NTyhMGmuWik_clip3


 57%|█████▋    | 171/300 [01:06<00:47,  2.71it/s]

✅ Extracted 49 frames for NTyhMGmuWik_clip4


 57%|█████▋    | 172/300 [01:06<00:46,  2.78it/s]

✅ Extracted 49 frames for NTyhMGmuWik_clip5


 58%|█████▊    | 173/300 [01:06<00:49,  2.57it/s]

✅ Extracted 49 frames for Moh7iYf2rGo_clip0


 58%|█████▊    | 174/300 [01:07<00:47,  2.65it/s]

✅ Extracted 49 frames for Moh7iYf2rGo_clip1


 58%|█████▊    | 175/300 [01:07<01:00,  2.07it/s]

✅ Extracted 49 frames for Moh7iYf2rGo_clip2


 59%|█████▊    | 176/300 [01:08<00:55,  2.25it/s]

✅ Extracted 49 frames for Moh7iYf2rGo_clip3


 59%|█████▉    | 177/300 [01:08<00:50,  2.41it/s]

✅ Extracted 49 frames for Moh7iYf2rGo_clip4


 59%|█████▉    | 178/300 [01:08<00:47,  2.56it/s]

✅ Extracted 49 frames for ntiGX3X-spA_clip0


 60%|█████▉    | 179/300 [01:09<00:45,  2.63it/s]

✅ Extracted 49 frames for ntiGX3X-spA_clip1


 60%|██████    | 180/300 [01:09<00:44,  2.68it/s]

✅ Extracted 49 frames for ntiGX3X-spA_clip2


 60%|██████    | 181/300 [01:10<00:43,  2.72it/s]

✅ Extracted 49 frames for ntiGX3X-spA_clip3


 61%|██████    | 182/300 [01:10<00:43,  2.70it/s]

✅ Extracted 49 frames for ntiGX3X-spA_clip4


 61%|██████    | 183/300 [01:10<00:43,  2.71it/s]

✅ Extracted 49 frames for ntiGX3X-spA_clip5


 61%|██████▏   | 184/300 [01:11<00:42,  2.75it/s]

✅ Extracted 49 frames for ntiGX3X-spA_clip6


 62%|██████▏   | 185/300 [01:11<00:40,  2.84it/s]

✅ Extracted 49 frames for ntiGX3X-spA_clip7


 62%|██████▏   | 186/300 [01:11<00:40,  2.85it/s]

✅ Extracted 49 frames for ntiGX3X-spA_clip8


 62%|██████▏   | 187/300 [01:12<00:42,  2.63it/s]

✅ Extracted 49 frames for ntiGX3X-spA_clip9


 63%|██████▎   | 188/300 [01:12<00:41,  2.71it/s]

✅ Extracted 49 frames for ntiGX3X-spA_clip10


 63%|██████▎   | 189/300 [01:12<00:40,  2.76it/s]

✅ Extracted 49 frames for GmWb7W7m2vs_clip0


 63%|██████▎   | 190/300 [01:13<00:39,  2.77it/s]

✅ Extracted 49 frames for GmWb7W7m2vs_clip1


 64%|██████▎   | 191/300 [01:13<00:40,  2.72it/s]

✅ Extracted 49 frames for GmWb7W7m2vs_clip2


 64%|██████▍   | 192/300 [01:13<00:39,  2.77it/s]

✅ Extracted 49 frames for GmWb7W7m2vs_clip3


 64%|██████▍   | 193/300 [01:14<00:39,  2.73it/s]

✅ Extracted 49 frames for GmWb7W7m2vs_clip4


 65%|██████▍   | 194/300 [01:14<00:38,  2.78it/s]

✅ Extracted 49 frames for GmWb7W7m2vs_clip5


 65%|██████▌   | 195/300 [01:15<00:37,  2.77it/s]

✅ Extracted 49 frames for GmWb7W7m2vs_clip6


 65%|██████▌   | 196/300 [01:15<00:36,  2.87it/s]

✅ Extracted 49 frames for GmWb7W7m2vs_clip7


 66%|██████▌   | 197/300 [01:15<00:36,  2.84it/s]

✅ Extracted 49 frames for GmWb7W7m2vs_clip8


 66%|██████▌   | 198/300 [01:16<00:38,  2.64it/s]

✅ Extracted 49 frames for GmWb7W7m2vs_clip9


 66%|██████▋   | 199/300 [01:16<00:39,  2.58it/s]

✅ Extracted 49 frames for GmWb7W7m2vs_clip10


 67%|██████▋   | 200/300 [01:17<00:39,  2.52it/s]

✅ Extracted 49 frames for GmWb7W7m2vs_clip11


 67%|██████▋   | 201/300 [01:17<00:37,  2.65it/s]

✅ Extracted 49 frames for GmWb7W7m2vs_clip12


 67%|██████▋   | 202/300 [01:17<00:36,  2.72it/s]

✅ Extracted 49 frames for GmWb7W7m2vs_clip13


 68%|██████▊   | 203/300 [01:18<00:34,  2.81it/s]

✅ Extracted 49 frames for 2p_h3s1CPoc_clip0


 68%|██████▊   | 204/300 [01:18<00:35,  2.72it/s]

✅ Extracted 49 frames for 2p_h3s1CPoc_clip1


 68%|██████▊   | 205/300 [01:18<00:33,  2.80it/s]

✅ Extracted 49 frames for 2p_h3s1CPoc_clip2


 69%|██████▊   | 206/300 [01:19<00:34,  2.73it/s]

✅ Extracted 49 frames for 2p_h3s1CPoc_clip3


 69%|██████▉   | 207/300 [01:19<00:33,  2.79it/s]

✅ Extracted 49 frames for 2p_h3s1CPoc_clip4


 69%|██████▉   | 208/300 [01:19<00:31,  2.92it/s]

✅ Extracted 49 frames for 2p_h3s1CPoc_clip5


 70%|██████▉   | 209/300 [01:20<00:32,  2.82it/s]

✅ Extracted 49 frames for 2p_h3s1CPoc_clip6


 70%|███████   | 210/300 [01:20<00:31,  2.85it/s]

✅ Extracted 49 frames for 2p_h3s1CPoc_clip7


 70%|███████   | 211/300 [01:20<00:32,  2.74it/s]

✅ Extracted 49 frames for 2p_h3s1CPoc_clip8


 71%|███████   | 212/300 [01:21<00:41,  2.11it/s]

✅ Extracted 49 frames for 2p_h3s1CPoc_clip9


 71%|███████   | 213/300 [01:21<00:37,  2.31it/s]

✅ Extracted 49 frames for 2p_h3s1CPoc_clip10


 71%|███████▏  | 214/300 [01:22<00:37,  2.32it/s]

✅ Extracted 49 frames for DXYCXcuifM8_clip0


 72%|███████▏  | 215/300 [01:23<00:44,  1.89it/s]

✅ Extracted 49 frames for DXYCXcuifM8_clip1


 72%|███████▏  | 216/300 [01:24<00:53,  1.58it/s]

✅ Extracted 49 frames for DXYCXcuifM8_clip2


 72%|███████▏  | 217/300 [01:24<00:47,  1.76it/s]

✅ Extracted 49 frames for DXYCXcuifM8_clip3


 73%|███████▎  | 218/300 [01:24<00:40,  2.02it/s]

✅ Extracted 49 frames for DXYCXcuifM8_clip4


 73%|███████▎  | 219/300 [01:25<00:36,  2.22it/s]

✅ Extracted 49 frames for DXYCXcuifM8_clip5


 73%|███████▎  | 220/300 [01:25<00:33,  2.41it/s]

✅ Extracted 49 frames for DXYCXcuifM8_clip6


 74%|███████▎  | 221/300 [01:25<00:31,  2.51it/s]

✅ Extracted 49 frames for UfWND-lc4sc_clip0


 74%|███████▍  | 222/300 [01:26<00:30,  2.59it/s]

✅ Extracted 49 frames for UfWND-lc4sc_clip1


 74%|███████▍  | 223/300 [01:26<00:30,  2.52it/s]

✅ Extracted 49 frames for UfWND-lc4sc_clip2


 75%|███████▍  | 224/300 [01:26<00:29,  2.56it/s]

✅ Extracted 49 frames for UfWND-lc4sc_clip3


 75%|███████▌  | 225/300 [01:27<00:29,  2.53it/s]

✅ Extracted 49 frames for UfWND-lc4sc_clip4


 75%|███████▌  | 226/300 [01:27<00:28,  2.58it/s]

✅ Extracted 49 frames for UfWND-lc4sc_clip5


 76%|███████▌  | 227/300 [01:28<00:28,  2.57it/s]

✅ Extracted 49 frames for UfWND-lc4sc_clip6


 76%|███████▌  | 228/300 [01:28<00:28,  2.55it/s]

✅ Extracted 49 frames for UfWND-lc4sc_clip7


 76%|███████▋  | 229/300 [01:28<00:26,  2.68it/s]

✅ Extracted 49 frames for y-Jxli-JfPg_clip0


 77%|███████▋  | 230/300 [01:29<00:24,  2.81it/s]

✅ Extracted 49 frames for y-Jxli-JfPg_clip1


 77%|███████▋  | 231/300 [01:29<00:25,  2.74it/s]

✅ Extracted 49 frames for y-Jxli-JfPg_clip2


 77%|███████▋  | 232/300 [01:29<00:24,  2.80it/s]

✅ Extracted 49 frames for y-Jxli-JfPg_clip3


 78%|███████▊  | 233/300 [01:30<00:23,  2.85it/s]

✅ Extracted 49 frames for y-Jxli-JfPg_clip4


 78%|███████▊  | 234/300 [01:30<00:23,  2.80it/s]

✅ Extracted 49 frames for y-Jxli-JfPg_clip5


 78%|███████▊  | 235/300 [01:31<00:31,  2.03it/s]

✅ Extracted 49 frames for y-Jxli-JfPg_clip6


 79%|███████▊  | 236/300 [01:31<00:29,  2.20it/s]

✅ Extracted 49 frames for C_6yMh3tEQk_clip0


 79%|███████▉  | 237/300 [01:32<00:26,  2.41it/s]

✅ Extracted 49 frames for C_6yMh3tEQk_clip1


 79%|███████▉  | 238/300 [01:32<00:26,  2.36it/s]

✅ Extracted 49 frames for C_6yMh3tEQk_clip2


 80%|███████▉  | 239/300 [01:32<00:24,  2.48it/s]

✅ Extracted 49 frames for C_6yMh3tEQk_clip3


 80%|████████  | 240/300 [01:33<00:22,  2.63it/s]

✅ Extracted 49 frames for C_6yMh3tEQk_clip4


 80%|████████  | 241/300 [01:33<00:21,  2.72it/s]

✅ Extracted 49 frames for C_6yMh3tEQk_clip5


 81%|████████  | 242/300 [01:33<00:20,  2.78it/s]

✅ Extracted 49 frames for C_6yMh3tEQk_clip6


 81%|████████  | 243/300 [01:34<00:20,  2.80it/s]

✅ Extracted 49 frames for C_6yMh3tEQk_clip7


 81%|████████▏ | 244/300 [01:34<00:19,  2.81it/s]

✅ Extracted 49 frames for C_6yMh3tEQk_clip8


 82%|████████▏ | 245/300 [01:34<00:19,  2.80it/s]

✅ Extracted 49 frames for 7-WEdqJBXoQ_clip0


 82%|████████▏ | 246/300 [01:35<00:19,  2.81it/s]

✅ Extracted 49 frames for 7-WEdqJBXoQ_clip1


 82%|████████▏ | 247/300 [01:35<00:18,  2.85it/s]

✅ Extracted 49 frames for 7-WEdqJBXoQ_clip2


 83%|████████▎ | 248/300 [01:36<00:18,  2.83it/s]

✅ Extracted 49 frames for 7-WEdqJBXoQ_clip3


 83%|████████▎ | 249/300 [01:36<00:18,  2.77it/s]

✅ Extracted 49 frames for 7-WEdqJBXoQ_clip4


 83%|████████▎ | 250/300 [01:36<00:16,  3.02it/s]

✅ Extracted 49 frames for DDRzeomsyzU_clip0


 84%|████████▎ | 251/300 [01:37<00:16,  2.96it/s]

✅ Extracted 49 frames for DDRzeomsyzU_clip1


 84%|████████▍ | 252/300 [01:37<00:15,  3.01it/s]

✅ Extracted 49 frames for DDRzeomsyzU_clip2


 84%|████████▍ | 253/300 [01:37<00:17,  2.71it/s]

✅ Extracted 49 frames for DDRzeomsyzU_clip3


 85%|████████▍ | 254/300 [01:38<00:17,  2.68it/s]

✅ Extracted 49 frames for DDRzeomsyzU_clip4


 85%|████████▌ | 255/300 [01:38<00:16,  2.73it/s]

✅ Extracted 49 frames for DDRzeomsyzU_clip5


 85%|████████▌ | 256/300 [01:38<00:15,  2.82it/s]

✅ Extracted 49 frames for a4RwXrA1hiE_clip0


 86%|████████▌ | 257/300 [01:39<00:15,  2.77it/s]

✅ Extracted 49 frames for a4RwXrA1hiE_clip1


 86%|████████▌ | 258/300 [01:39<00:15,  2.64it/s]

✅ Extracted 49 frames for a4RwXrA1hiE_clip2


 86%|████████▋ | 259/300 [01:40<00:15,  2.70it/s]

✅ Extracted 49 frames for a4RwXrA1hiE_clip3


 87%|████████▋ | 260/300 [01:40<00:14,  2.78it/s]

✅ Extracted 49 frames for a4RwXrA1hiE_clip4


 87%|████████▋ | 261/300 [01:40<00:14,  2.74it/s]

✅ Extracted 49 frames for a4RwXrA1hiE_clip5


 87%|████████▋ | 262/300 [01:41<00:14,  2.71it/s]

✅ Extracted 49 frames for a4RwXrA1hiE_clip6


 88%|████████▊ | 263/300 [01:41<00:13,  2.75it/s]

✅ Extracted 49 frames for a4RwXrA1hiE_clip7


 88%|████████▊ | 264/300 [01:41<00:12,  2.84it/s]

✅ Extracted 49 frames for Krmi9Is522c_clip0


 88%|████████▊ | 265/300 [01:42<00:12,  2.88it/s]

✅ Extracted 49 frames for Krmi9Is522c_clip1


 89%|████████▊ | 266/300 [01:42<00:11,  3.00it/s]

✅ Extracted 49 frames for Krmi9Is522c_clip2


 89%|████████▉ | 267/300 [01:42<00:11,  2.81it/s]

✅ Extracted 49 frames for Krmi9Is522c_clip3


 89%|████████▉ | 268/300 [01:43<00:10,  2.94it/s]

✅ Extracted 49 frames for Krmi9Is522c_clip4


 90%|████████▉ | 269/300 [01:43<00:10,  2.97it/s]

✅ Extracted 49 frames for Krmi9Is522c_clip5


 90%|█████████ | 270/300 [01:43<00:10,  2.99it/s]

✅ Extracted 49 frames for Krmi9Is522c_clip6


 90%|█████████ | 271/300 [01:44<00:09,  3.08it/s]

✅ Extracted 49 frames for Krmi9Is522c_clip7


 91%|█████████ | 272/300 [01:44<00:09,  2.97it/s]

✅ Extracted 49 frames for 1k64hSAAAd4_clip0


 91%|█████████ | 273/300 [01:44<00:09,  2.79it/s]

✅ Extracted 49 frames for 1k64hSAAAd4_clip1


 91%|█████████▏| 274/300 [01:45<00:09,  2.78it/s]

✅ Extracted 49 frames for 1k64hSAAAd4_clip2


 92%|█████████▏| 275/300 [01:45<00:08,  2.78it/s]

✅ Extracted 49 frames for 1k64hSAAAd4_clip3


 92%|█████████▏| 276/300 [01:45<00:08,  2.74it/s]

✅ Extracted 49 frames for 1k64hSAAAd4_clip4


 92%|█████████▏| 277/300 [01:46<00:08,  2.63it/s]

✅ Extracted 49 frames for 1k64hSAAAd4_clip5


 93%|█████████▎| 278/300 [01:47<00:10,  2.15it/s]

✅ Extracted 49 frames for 1k64hSAAAd4_clip6


 93%|█████████▎| 279/300 [01:47<00:08,  2.34it/s]

✅ Extracted 49 frames for 1k64hSAAAd4_clip7


 93%|█████████▎| 280/300 [01:47<00:08,  2.38it/s]

✅ Extracted 49 frames for 1k64hSAAAd4_clip8


 94%|█████████▎| 281/300 [01:48<00:07,  2.40it/s]

✅ Extracted 49 frames for 1k64hSAAAd4_clip9


 94%|█████████▍| 282/300 [01:48<00:07,  2.50it/s]

✅ Extracted 49 frames for Vi9BxQvpdpQ_clip0


 94%|█████████▍| 283/300 [01:48<00:06,  2.61it/s]

✅ Extracted 49 frames for Vi9BxQvpdpQ_clip1


 95%|█████████▍| 284/300 [01:49<00:05,  2.72it/s]

✅ Extracted 49 frames for Vi9BxQvpdpQ_clip2


 95%|█████████▌| 285/300 [01:49<00:05,  2.70it/s]

✅ Extracted 49 frames for Vi9BxQvpdpQ_clip3


 95%|█████████▌| 286/300 [01:49<00:05,  2.79it/s]

✅ Extracted 49 frames for Vi9BxQvpdpQ_clip4


 96%|█████████▌| 287/300 [01:50<00:04,  2.76it/s]

✅ Extracted 49 frames for Vi9BxQvpdpQ_clip5


 96%|█████████▌| 288/300 [01:50<00:04,  2.76it/s]

✅ Extracted 49 frames for Vi9BxQvpdpQ_clip6


 96%|█████████▋| 289/300 [01:51<00:03,  2.81it/s]

✅ Extracted 49 frames for Vi9BxQvpdpQ_clip7


 97%|█████████▋| 290/300 [01:51<00:03,  2.79it/s]

✅ Extracted 49 frames for Vi9BxQvpdpQ_clip8


 97%|█████████▋| 291/300 [01:51<00:03,  2.84it/s]

✅ Extracted 49 frames for Vi9BxQvpdpQ_clip9


 97%|█████████▋| 292/300 [01:52<00:02,  2.88it/s]

✅ Extracted 49 frames for Vi9BxQvpdpQ_clip10


 98%|█████████▊| 293/300 [01:52<00:02,  2.81it/s]

✅ Extracted 49 frames for 3r8A4xCZgPQ_clip0


 98%|█████████▊| 294/300 [01:52<00:02,  2.57it/s]

✅ Extracted 49 frames for 3r8A4xCZgPQ_clip1


 98%|█████████▊| 295/300 [01:53<00:01,  2.57it/s]

✅ Extracted 49 frames for 3r8A4xCZgPQ_clip2


 99%|█████████▊| 296/300 [01:53<00:01,  2.62it/s]

✅ Extracted 49 frames for 3r8A4xCZgPQ_clip3


 99%|█████████▉| 297/300 [01:53<00:01,  2.78it/s]

✅ Extracted 49 frames for 3r8A4xCZgPQ_clip4


 99%|█████████▉| 298/300 [01:54<00:00,  2.82it/s]

✅ Extracted 49 frames for c9eELn4axpg_clip0


100%|█████████▉| 299/300 [01:54<00:00,  2.80it/s]

✅ Extracted 49 frames for c9eELn4axpg_clip1


100%|██████████| 300/300 [01:55<00:00,  2.61it/s]

✅ Extracted 49 frames for c9eELn4axpg_clip2





### gen jpg frames for modeled videos

In [None]:
# === Run it ===
gt_video_dir = "/content/drive/MyDrive/cogvideo_ft/cogvideo_eval/generated_videos"
output_root_dir = "/content/drive/MyDrive/cogvideo_ft/cogvideo_eval/generated_jpgs"

extract_frames_from_gt_videos(gt_video_dir, output_root_dir)

  9%|▉         | 1/11 [00:00<00:02,  3.46it/s]

✅ Extracted 49 frames for generated_3


 18%|█▊        | 2/11 [00:00<00:02,  3.34it/s]

✅ Extracted 49 frames for generated_6


 27%|██▋       | 3/11 [00:00<00:02,  3.22it/s]

✅ Extracted 49 frames for generated_4


 36%|███▋      | 4/11 [00:01<00:02,  3.21it/s]

✅ Extracted 49 frames for generated_2


 45%|████▌     | 5/11 [00:01<00:01,  3.16it/s]

✅ Extracted 49 frames for generated_5


 55%|█████▍    | 6/11 [00:01<00:01,  3.30it/s]

✅ Extracted 49 frames for generated_0


 64%|██████▎   | 7/11 [00:02<00:01,  3.00it/s]

✅ Extracted 49 frames for generated_8


 73%|███████▎  | 8/11 [00:02<00:00,  3.21it/s]

✅ Extracted 49 frames for generated_1


 82%|████████▏ | 9/11 [00:02<00:00,  3.16it/s]

✅ Extracted 49 frames for generated_7


100%|██████████| 11/11 [00:03<00:00,  3.44it/s]

✅ Extracted 49 frames for generated_9





# metrics

In [None]:
!pip install lpips

Collecting lpips
  Downloading lpips-0.1.4-py3-none-any.whl.metadata (10 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=0.4.0->lpips)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=0.4.0->lpips)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=0.4.0->lpips)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=0.4.0->lpips)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=0.4.0->lpips)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch>=0.4.0->lpips)
  Downloading nvidia_cufft

## v1

In [None]:
import os
import glob
import torch
import lpips
import numpy as np
import pandas as pd
import cv2
from PIL import Image
from skimage.metrics import structural_similarity as ssim
from skimage.metrics import peak_signal_noise_ratio as psnr
from torchvision import transforms

In [None]:
# === Paths ===
gt_jpg_dir = "/content/drive/MyDrive/cogvideo_ft/gt_jpgs"
gen_jpg_dir = "/content/drive/MyDrive/cogvideo_ft/cogvideo_eval/generated_jpgs"

# === Setup ===
transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor()
])
lpips_fn = lpips.LPIPS(net='alex')

def load_image(path):
    return transform(Image.open(path).convert("RGB")).unsqueeze(0)

def calc_metrics(gt_folder, gen_folder):
    gt_paths = sorted(glob.glob(os.path.join(gt_folder, "frame_*.jpg")))
    gen_paths = sorted(glob.glob(os.path.join(gen_folder, "frame_*.jpg")))

    if len(gt_paths) != len(gen_paths):
        print(f"❌ Frame count mismatch: {len(gt_paths)} vs {len(gen_paths)}")
        return None

    ssim_total, psnr_total, lpips_total = 0, 0, 0
    for gt_p, gen_p in zip(gt_paths, gen_paths):
        gt_img = load_image(gt_p)
        gen_img = load_image(gen_p)

        gt_np = gt_img.squeeze().permute(1, 2, 0).numpy()
        gen_np = gen_img.squeeze().permute(1, 2, 0).numpy()

        ssim_val = ssim(gt_np, gen_np, channel_axis=2, data_range=1.0)
        psnr_val = psnr(gt_np, gen_np, data_range=1.0)
        lpips_val = lpips_fn(gt_img, gen_img).item()

        ssim_total += ssim_val
        psnr_total += psnr_val
        lpips_total += lpips_val

    n = len(gt_paths)
    return {
        "SSIM": ssim_total / n,
        "PSNR": psnr_total / n,
        "LPIPS": lpips_total / n
    }


Setting up [LPIPS] perceptual loss: trunk [alex], v[0.1], spatial [off]


Downloading: "https://download.pytorch.org/models/alexnet-owt-7be5be79.pth" to /root/.cache/torch/hub/checkpoints/alexnet-owt-7be5be79.pth
100%|██████████| 233M/233M [00:01<00:00, 153MB/s]


Loading model from: /usr/local/lib/python3.11/dist-packages/lpips/weights/v0.1/alex.pth


In [None]:
# === Run for first 10 samples ===
results = []
for i in range(10):
    gt_name = os.path.splitext(os.listdir("/content/drive/MyDrive/cogvideo_ft/videos")[i])[0]
    gt_path = os.path.join(gt_jpg_dir, gt_name)
    gen_path = os.path.join(gen_jpg_dir, f"generated_{i}")

    if os.path.exists(gt_path) and os.path.exists(gen_path):
        metrics = calc_metrics(gt_path, gen_path)
        if metrics:
            results.append({"sample": gt_name, **metrics})

# === Show table ===
df = pd.DataFrame(results)
df

Unnamed: 0,sample,SSIM,PSNR,LPIPS
0,GLd3aX16zBg_clip0,0.337731,6.417313,0.777678
1,GLd3aX16zBg_clip1,0.371166,6.631341,0.839214
2,GLd3aX16zBg_clip2,0.152496,6.07647,0.736084
3,GLd3aX16zBg_clip3,0.381072,8.934414,0.670342
4,GLd3aX16zBg_clip4,0.339789,8.887317,0.707281
5,xHr8X2Wpmno_clip0,0.258513,7.664188,0.797911
6,xHr8X2Wpmno_clip1,0.454138,8.373821,0.572199
7,xHr8X2Wpmno_clip2,0.375904,8.708074,0.667186
8,xHr8X2Wpmno_clip3,0.232518,9.414665,0.717656
9,xHr8X2Wpmno_clip4,0.158187,10.276742,0.511281


## v2

In [None]:
!pip uninstall -y clip

Found existing installation: clip 0.2.0
Uninstalling clip-0.2.0:
  Successfully uninstalled clip-0.2.0


In [None]:
!pip install git+https://github.com/openai/CLIP.git

Collecting git+https://github.com/openai/CLIP.git
  Cloning https://github.com/openai/CLIP.git to /tmp/pip-req-build-a3a8oj4d
  Running command git clone --filter=blob:none --quiet https://github.com/openai/CLIP.git /tmp/pip-req-build-a3a8oj4d
  Resolved https://github.com/openai/CLIP.git to commit dcba3cb2e2827b402d2701e7e1c7d9fed8a20ef1
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting ftfy (from clip==1.0)
  Downloading ftfy-6.3.1-py3-none-any.whl.metadata (7.3 kB)
Downloading ftfy-6.3.1-py3-none-any.whl (44 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.8/44.8 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: clip
  Building wheel for clip (setup.py) ... [?25l[?25hdone
  Created wheel for clip: filename=clip-1.0-py3-none-any.whl size=1369490 sha256=84ebcb0ca0b09977943a62de5ed51b20018385b301ac431fc3ff4dc62e2728b5
  Stored in directory: /tmp/pip-ephem-wheel-cache-il9i8c3r/wheels/3f/7c/a4/9b490845988bf7a4d

In [None]:
import os
import torch
import numpy as np
from PIL import Image
from torchvision import transforms
from torchvision.utils import save_image
from tqdm import tqdm
import lpips
import cv2
import clip

from skimage.metrics import structural_similarity as ssim
from skimage.metrics import peak_signal_noise_ratio as psnr

In [None]:
# === Metric setup ===
lpips_model = lpips.LPIPS(net="alex").cuda()
clip_model, clip_preprocess = clip.load("ViT-B/32", device="cuda")

# === Utility Functions ===

def load_image_tensor(path):
    img = Image.open(path).convert("RGB")
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
    ])
    return transform(img).unsqueeze(0).cuda()

def compute_ssim(img1, img2):
    img1 = np.array(img1.convert("L"))
    img2 = np.array(img2.convert("L"))
    return ssim(img1, img2)

def compute_psnr(img1, img2):
    img1 = np.array(img1)
    img2 = np.array(img2)
    return psnr(img1, img2)

def compute_lpips(img1_path, img2_path):
    img1 = lpips.im2tensor(lpips.load_image(img1_path)).cuda()
    img2 = lpips.im2tensor(lpips.load_image(img2_path)).cuda()
    return lpips_model(img1, img2).item()

def compute_clip_similarity(img_path, prompt):
    image = clip_preprocess(Image.open(img_path)).unsqueeze(0).cuda()
    text = clip.tokenize([prompt]).cuda()
    with torch.no_grad():
        image_features = clip_model.encode_image(image)
        text_features = clip_model.encode_text(text)
        similarity = torch.cosine_similarity(image_features, text_features).item()
    return similarity


Setting up [LPIPS] perceptual loss: trunk [alex], v[0.1], spatial [off]




Loading model from: /usr/local/lib/python3.11/dist-packages/lpips/weights/v0.1/alex.pth


100%|███████████████████████████████████████| 338M/338M [00:11<00:00, 29.7MiB/s]


In [None]:
# === Main Evaluation Loop ===

gt_jpg_root = "/content/drive/MyDrive/cogvideo_ft/gt_jpgs"
# gen_jpg_root = "/content/drive/MyDrive/cogvideo_ft/cogvideo_eval/generated_jpgs"
gen_jpg_root = "/content/drive/MyDrive/cogvideo_ft/cogvideo_eval_epoch3_50/generated_jpgs"
prompt_file = "/content/drive/MyDrive/cogvideo_ft/prompts.txt"
video_list_file = "/content/drive/MyDrive/cogvideo_ft/videos.txt"

results = []

with open(prompt_file, "r") as pf, open(video_list_file, "r") as vf:
    prompts = [p.strip() for p in pf.readlines()]
    video_files = [v.strip().split("/")[-1] for v in vf.readlines()]  # e.g., 1k64hSAAAd4_clip0.mp4

    for i, video_name in enumerate(tqdm(video_files)):
        basename = video_name.replace(".mp4", "")
        gt_dir = os.path.join(gt_jpg_root, basename)
        # gen_dir = os.path.join(gen_jpg_root, f"generated_{i}")
        gen_dir = os.path.join(gen_jpg_root, basename)

        if not os.path.exists(gt_dir) or not os.path.exists(gen_dir):
            print(f"Skipping missing: {basename}")
            continue

        frame_names = sorted(os.listdir(gt_dir))
        ssim_vals, psnr_vals, lpips_vals = [], [], []

        for f in frame_names:
            gt_frame = os.path.join(gt_dir, f)
            gen_frame = os.path.join(gen_dir, f)
            if not os.path.exists(gt_frame) or not os.path.exists(gen_frame):
                continue

            img1 = Image.open(gt_frame).convert("RGB")
            img2 = Image.open(gen_frame).convert("RGB")

            ssim_vals.append(compute_ssim(img1, img2))
            psnr_vals.append(compute_psnr(img1, img2))
            lpips_vals.append(compute_lpips(gt_frame, gen_frame))

        clip_score = compute_clip_similarity(gen_frame, prompts[i])  # last frame and prompt

        results.append({
            "sample": basename,
            "SSIM": np.mean(ssim_vals),
            "PSNR": np.mean(psnr_vals),
            "LPIPS": np.mean(lpips_vals),
            "CLIP": clip_score
        })

# === Display Results ===
import pandas as pd
df = pd.DataFrame(results)
print(df)

# Optionally save:
df.to_csv("/content/drive/MyDrive/cogvideo_ft/cogvideo_eval/metrics_eval.csv", index=False)

100%|██████████| 300/300 [00:37<00:00,  7.99it/s]

Skipping missing: xHr8X2Wpmno_clip5
Skipping missing: V53XmPeyjIU_clip0
Skipping missing: V53XmPeyjIU_clip1
Skipping missing: V53XmPeyjIU_clip2
Skipping missing: V53XmPeyjIU_clip3
Skipping missing: V53XmPeyjIU_clip4
Skipping missing: V53XmPeyjIU_clip5
Skipping missing: V53XmPeyjIU_clip6
Skipping missing: V53XmPeyjIU_clip7
Skipping missing: mZwK0TBI1iY_clip0
Skipping missing: mZwK0TBI1iY_clip1
Skipping missing: mZwK0TBI1iY_clip2
Skipping missing: mZwK0TBI1iY_clip3
Skipping missing: mZwK0TBI1iY_clip4
Skipping missing: mZwK0TBI1iY_clip5
Skipping missing: mZwK0TBI1iY_clip6
Skipping missing: mZwK0TBI1iY_clip7
Skipping missing: H_9oM1Y60lU_clip0
Skipping missing: H_9oM1Y60lU_clip1
Skipping missing: H_9oM1Y60lU_clip2
Skipping missing: H_9oM1Y60lU_clip3
Skipping missing: H_9oM1Y60lU_clip4
Skipping missing: XOwypmUT5cc_clip0
Skipping missing: XOwypmUT5cc_clip1
Skipping missing: XOwypmUT5cc_clip2
Skipping missing: XOwypmUT5cc_clip3
Skipping missing: XOwypmUT5cc_clip4
Skipping missing: XOwypmUT5c




In [None]:
df

Unnamed: 0,sample,SSIM,PSNR,LPIPS,CLIP
0,GLd3aX16zBg_clip0,0.423538,8.063868,0.826841,0.345947
1,GLd3aX16zBg_clip1,0.439133,7.894355,0.817969,0.332275
2,GLd3aX16zBg_clip2,0.285385,5.997174,0.847467,0.341309
3,GLd3aX16zBg_clip3,0.487052,9.033134,0.817116,0.278809
4,GLd3aX16zBg_clip4,0.371357,8.322691,0.83986,0.279541
5,xHr8X2Wpmno_clip0,0.387925,8.092943,0.976756,0.255371
6,xHr8X2Wpmno_clip1,0.449516,8.223316,0.851917,0.281738
7,xHr8X2Wpmno_clip2,0.508822,12.754574,0.808669,0.333984
8,xHr8X2Wpmno_clip3,0.349934,9.989094,0.785435,0.318115
9,xHr8X2Wpmno_clip4,0.27861,8.975298,0.747874,0.278564
