In [2]:
import cv2
import torch
import numpy as np
import torch.nn.functional as F

In [6]:
chunk_path = "/data/ai_club/nes_2025/swag/idm/data/numpy/4814603/frames_1024_2048.npy"

chunk_array = np.load(chunk_path, mmap_mode="r+")
chunk_tensor = torch.from_numpy(chunk_array).float()
chunk_tensor = F.interpolate(
    chunk_tensor,
    size=(64, 60),
    mode="bilinear",
    align_corners=False,
)
frame = chunk_tensor[600].permute(1, 2, 0)
frame = (frame * 255).byte().numpy()
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
cv2.imwrite("/data/ai_club/nes_2025/swag/frame_2.png", frame)

True

In [28]:
chunk_path = "/data/ai_club/nes_2025/swag/idm/data/numpy/4814603/frames_1024_2048.npy"

chunk_array = np.load(chunk_path, mmap_mode="r+")
frame = chunk_array[600].transpose(1, 2, 0)  # CHW -> HWC
frame = cv2.resize(frame, (60, 64), interpolation=cv2.INTER_AREA)
cv2.imwrite("/data/ai_club/nes_2025/swag/frame_1.png", frame)

True

In [3]:
chunk_path = "/data/ai_club/nes_2025/swag/idm/data/numpy/4814603/frames_1024_2048.npy"

chunk_array = np.load(chunk_path, mmap_mode="r+")
frame = chunk_array[600]  # Keep as CHW
frame = F.interpolate(torch.from_numpy(frame).float().unsqueeze(0), size=(64, 60), mode='area')[0].numpy()
frame = frame.transpose(1, 2, 0)  # Convert to HWC at the end for cv2
cv2.imwrite("/data/ai_club/nes_2025/swag/frame_1.png", frame)

[ WARN:0@3.104] global loadsave.cpp:848 imwrite_ Unsupported depth image for selected encoder is fallbacked to CV_8U.


True

In [6]:
chunk_path = "/data/ai_club/nes_2025/swag/idm/data/numpy/4814603/frames_1024_2048.npy"

chunk_array = np.load(chunk_path, mmap_mode="r+")
frame = F.interpolate(torch.from_numpy(chunk_array).float(), size=(64, 60), mode='area')[600].numpy()
frame = frame.transpose(1, 2, 0)  # Convert to HWC at the end for cv2
cv2.imwrite("/data/ai_club/nes_2025/swag/frame_1.png", frame)

True

In [58]:
chunk_path = "/data/ai_club/nes_2025/swag/idm/data/numpy/4814603/frames_1024_2048.npy"

chunk_array = np.load(chunk_path, mmap_mode="r+")
chunk_tensor = torch.from_numpy(chunk_array).float()
frame = F.interpolate(chunk_tensor, size=(64, 60), mode='area')
print(frame.shape)
frame = frame.numpy()[600]
frame = frame.transpose(1, 2, 0)  # Single transpose at end for HWC
cv2.imwrite("/data/ai_club/nes_2025/swag/frame_0.png", frame)

torch.Size([1024, 3, 64, 60])


True

In [7]:
chunk_path = "/data/ai_club/nes_2025/swag/idm/data/numpy/4814603/frames_1024_2048.npy"

chunk_array = np.load(chunk_path, mmap_mode="r+")
frame = chunk_array[600].transpose(1, 2, 0)  # CHW -> HWC
frame = cv2.resize(frame, (60, 64), interpolation=cv2.INTER_LINEAR)
frame = (frame * 255).astype(np.uint8)
frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
cv2.imwrite("/data/ai_club/nes_2025/swag/frame_3.png", frame)


True

In [19]:
import os
import cv2
import time


def main():
    args = {
        "input_dir": "/data/ai_club/nes_2025/swag/idm/data/raw",
        "output_dir": "/data/ai_club/nes_2025/swag/idm/data/formatted",
        "width": 256,
        "height": 240,
        "labels": True,
        "video_id": "4814603",
    }

    video_filepath = os.path.join(args['input_dir'], f"video_{args['video_id']}.mp4")
    if not os.path.exists(video_filepath):
        print(f"Video file {video_filepath} does not exist.")
        return

    cap = cv2.VideoCapture(video_filepath)
    if not cap.isOpened():
        print("Failed to open video.")
        return

    original_fps = cap.get(cv2.CAP_PROP_FPS)
    print(f"Original FPS: {original_fps}")
    frame_skip_interval = max(1, int(original_fps / 30))
    print(f"Frame skip interval: {frame_skip_interval}")

    output_video_path = os.path.join(args['output_dir'], f"video_{args['video_id']}.mp4")
    os.makedirs(args['output_dir'], exist_ok=True)

    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
    video_writer = cv2.VideoWriter(
        output_video_path, fourcc, 30, (args['width'], args['height'])
    )

    frame_idx = 0
    saved_frames = 0
    start_time = time.time()

    while True:
        ret, frame = cap.read()
        if not ret:
            break
        if frame_idx % frame_skip_interval == 0:
            frame_resized = cv2.resize(frame, (args['width'], args['height']))
            video_writer.write(frame_resized)
            saved_frames += 1
        if frame_idx == 1624 * 2:
            frame_resized = torch.from_numpy(frame_resized).permute(2,0,1).unsqueeze(0).float()
            frame_resized = F.interpolate(frame_resized, size=(60, 64), mode='linear', align_corners=True)
            frame_resized = frame_resized.squeeze(0).permute(1,2,0).numpy()
            cv2.imwrite(os.path.join(args['output_dir'], f"frame_{frame_idx}.png"), frame_resized)
            cap.release()
            return
        frame_idx += 1

    video_writer.release()
    elapsed_time = time.time() - start_time

    if args['labels']:
        with open(os.path.join(args['input_dir'], f"labels_{args['video_id']}.txt"), "r") as f:
            labels = f.readlines()

        sampled_labels = labels[::frame_skip_interval]

        with open(os.path.join(args['output_dir'], f"labels_{args['video_id']}.txt"), "w") as f:
            f.writelines(sampled_labels)

    print(f"Saved {saved_frames} frames to output video: {output_video_path}")
    print(f"Processing completed in {elapsed_time:.2f} seconds.")


main()

Original FPS: 60.0
Frame skip interval: 2


NotImplementedError: Got 4D input, but linear mode needs 3D input

In [20]:
import numpy as np
import cv2

# Load the numpy file containing frame 1642
start_frame = 1024  # Based on sequence length from convert_to_numpy_2.py
end_frame = 2048
numpy_path = os.path.join("/data/ai_club/nes_2025/swag/idm/data/testing", f"{'4814603'}/frames_{start_frame}_{end_frame}.npy")

# Load the numpy array
video_data = np.load(numpy_path)

# Get frame 1642 (adjusting for sequence offset)
frame_idx = 1642 - start_frame
frame = video_data[frame_idx]

# Convert from TCHW to HWC format
frame = frame.transpose(1, 2, 0)

# Save the frame
output_path = os.path.join('/data/ai_club/nes_2025/swag/idm/data/testing', f"frame_1642_from_numpy.png") 
cv2.imwrite(output_path, frame)



True

In [46]:
chunk_path = "/data/ai_club/nes_2025/swag/idm/data/numpy/4814603/frames_1024_2048.npy"

chunk_array = np.load(chunk_path, mmap_mode="r+")
chunk_tensor = torch.from_numpy(chunk_array).float()
chunk_tensor = F.interpolate(
    chunk_tensor.permute(0,3,1,2),
    size=(64, 60),
    mode="area"
).permute(0,2,3,1)
cv2.imwrite("/data/ai_club/nes_2025/swag/frame_0.png", chunk_tensor[600].numpy())

error: OpenCV(4.11.0) /io/opencv/modules/imgcodecs/src/loadsave.cpp:843: error: (-215:Assertion failed) image.channels() == 1 || image.channels() == 3 || image.channels() == 4 in function 'imwrite_'
