In [1]:
import sys
import torch
import numpy as np
import cv2
import glob
import os
import torch
from torchinfo import summary
import torch.nn.functional as F

In [2]:
def read_video(path: str):
    frames = []
    cap = cv2.VideoCapture(path)
    ret = True
    while ret:
        ret, img = cap.read()
        if ret:
            frames.append(img)
    video = np.stack(frames, axis = 0) #(T,H,W,C) Total frames, Height, Width, Channels
    video_tensor = torch.tensor(np.transpose(video, (3,0,1,2))[np.newaxis, ...]).detach().to('cuda').float()
    return video_tensor

#TypeError: can't convert np.ndarray of type numpy.object_, indicates that the video array is being stored as a numpy.object_ type, which typically happens when the array contains elements of varying types or shapes. PyTorch’s torch.tensor() function expects a numpy array with a specific, uniform data type (such as float32, int32, etc.), and numpy.object_ isn't one of the supported types.

In [3]:

def read_video_from_frames(path: str, num_frames: int):
    frames = []
    for i in range(num_frames):
        full_path = f'{path}/frame_{i:04d}.jpg'
        im = cv2.imread(full_path)
        
        # Ensure the image is in RGB format
        if im is None:
            raise FileNotFoundError(f"Frame {full_path} not found.")
        if len(im.shape) == 2:  # If the image is grayscale
            im = cv2.cvtColor(im, cv2.COLOR_GRAY2RGB)  # Convert grayscale to RGB
        
        frames.append(im)
    
    # Stack frames into a numpy array with shape (T, H, W, C)
    video = np.stack(frames, axis=0)
    
    # Transpose to (C, T, H, W) and add batch dimension
    video_tensor = torch.tensor(np.transpose(video, (3, 0, 1, 2))[np.newaxis, ...]).detach().to('cuda').float()
    
    return video_tensor


In [4]:
# def read_video(path: str):
#   frames = []
#   cap = cv2.VideoCapture(path)
#   ret = True
#   while ret:
#       ret, img = cap.read() # read one frame from the 'capture' object; img is (H, W, C)
#       if ret:
#           frames.append(img)
#   video = np.stack(frames, axis=0) # dimensions (T, H, W, C)
#   return torch.tensor(np.transpose(video, (3,0,1,2))[np.newaxis, ...]).detach().to('cuda').float()

# def read_video_from_frames(path: str, num_frames: int):
#   frames = []
#   for i in range(num_frames):
#     full_path = f'{path}/{i:04d}.png'
#     im = cv2.imread(full_path)
#     frames.append(im)
#   video = np.stack(frames, axis=0) # dimensions (T, H, W, C)
#   # return video
#   return torch.tensor(np.transpose(video, (3,0,1,2))[np.newaxis, ...]).detach().to('cuda').float()

In [5]:
from hpvaegan_code.evaluate_sifid_our_samples import (
    calculate_activation_statistics, calculate_frechet_distance
)
from hpvaegan_code.C3D_model import C3D
import numpy as np

dims = 256
block_idx = C3D.BLOCK_INDEX_BY_DIM[dims]
model = C3D(block_idx)
model.load_state_dict(torch.load(os.path.join(os.getcwd(), "c3d.pickle") , weights_only = True))
model = model.cuda().eval() 
# model = model.eval()

In [6]:
print(model)

C3D(
  (conv1): Conv3d(3, 64, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
  (pool1): MaxPool3d(kernel_size=(1, 2, 2), stride=(1, 2, 2), padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv3d(64, 128, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
  (pool2): MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2), padding=0, dilation=1, ceil_mode=False)
  (conv3a): Conv3d(128, 256, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
  (conv3b): Conv3d(256, 256, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
  (pool3): MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2), padding=0, dilation=1, ceil_mode=False)
  (conv4a): Conv3d(256, 512, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
  (conv4b): Conv3d(512, 512, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
  (pool4): MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2), padding=0, dilation=1, ceil_mode=False)
  (conv5a): Conv3d(512, 512, kernel_size=(3, 3, 3), stride=

In [11]:
summary(model, input_size = [64, 3, 16, 128, 128] , device = "cpu")

Layer (type:depth-idx)                   Output Shape              Param #
C3D                                      [64, 64, 16, 64, 64]      77,109,735
├─Conv3d: 1-1                            [64, 64, 16, 128, 128]    (5,248)
├─ReLU: 1-2                              [64, 64, 16, 128, 128]    --
├─MaxPool3d: 1-3                         [64, 64, 16, 64, 64]      --
├─Conv3d: 1-4                            [64, 128, 16, 64, 64]     (221,312)
├─ReLU: 1-5                              [64, 128, 16, 64, 64]     --
├─MaxPool3d: 1-6                         [64, 128, 8, 32, 32]      --
├─Conv3d: 1-7                            [64, 256, 8, 32, 32]      (884,992)
├─ReLU: 1-8                              [64, 256, 8, 32, 32]      --
├─Conv3d: 1-9                            [64, 256, 8, 32, 32]      (1,769,728)
├─ReLU: 1-10                             [64, 256, 8, 32, 32]      --
├─MaxPool3d: 1-11                        [64, 256, 4, 16, 16]      --
Total params: 79,991,015
Trainable params: 0
Non-

In [8]:
cd VidPanos/

[Errno 2] No such file or directory: 'VidPanos/'
/home/jaykumaran/Vision_Projects/VidPanos


  bkms = self.shell.db.get('bookmarks', {})


In [9]:
frames_path = "data/data/real/VID_20160326_121131_02/original"
num_frames = int(len(os.listdir(frames_path)))
print("Total frames in dir", num_frames)
video = read_video_from_frames(frames_path, num_frames)
print("Video rendered from frames", video.shape) #B,C,T,H,W

Total frames in dir 88
Video rendered from frames torch.Size([1, 3, 88, 1280, 720])


In [10]:
pred = model(video.to("cuda"))

#sample code 
#https://drive.google.com/drive/folders/1T2Gff-e7jbimxioPUDq6e7Y5kQs3jwDa

OutOfMemoryError: CUDA out of memory. Tried to allocate 19.34 GiB. GPU 0 has a total capacity of 5.76 GiB of which 3.50 GiB is free. Including non-PyTorch memory, this process has 2.21 GiB memory in use. Of the allocated memory 2.11 GiB is allocated by PyTorch, and 7.11 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)