In [1]:
import cv2
import numpy as np
from torchvision import transforms

def get_frames(filename, n_frames=3):
    frames = []
    v_cap = cv2.VideoCapture(filename)
    v_len = int(v_cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_list = np.linspace(0, v_len - 1, v_len // n_frames, dtype=np.int16)

    for fn in range(v_len):
        success, frame = v_cap.read()
        if not success:
            continue
        if fn in frame_list:
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            asarray = np.asarray(frame)
            frames.append(asarray)

    v_cap.release()
    # Change dimensions to Frames x Channel x Height x Width
    np_asarray = np.transpose(np.asarray(frames),  (0,3,2,1))
    return np_asarray,  v_len


In [2]:
frames, length = get_frames("data/passes/pass260.mp4")
print(frames.shape)
print(frames[0])

(17, 3, 1280, 720)
[[[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  ...
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]]

 [[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  ...
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]]

 [[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  ...
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]]]


In [3]:
np.linspace(0, 63, 18, dtype=np.int16)

array([ 0,  3,  7, 11, 14, 18, 22, 25, 29, 33, 37, 40, 44, 48, 51, 55, 59,
       63], dtype=int16)

In [4]:
from PIL import Image
data_transform = transforms.Compose([
    transforms.Resize((720, 720)),
    transforms.ToTensor(),

    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225]),
])

image =  Image.fromarray(frames[10].T, "RGB")
new_frames = data_transform(image)
tensor_to_pil = transforms.ToPILImage()(new_frames.squeeze_(0))
# new_frames
image.show()
tensor_to_pil.show()

In [5]:
l = list(range(10))
l[3:len(l)-2]

[3, 4, 5, 6, 7]

In [14]:
def get_frames(filename, n_frames=3):
    frames = []
    v_cap = cv2.VideoCapture(filename)
    v_len = int(v_cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_list = np.linspace(0, v_len - 1, v_len // n_frames, dtype=np.int16)

    for fn in range(v_len):
        success, frame = v_cap.read()
        if not success:
            continue
        if fn in frame_list:
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            asarray = np.asarray(frame)
            frames.append(asarray)

    v_cap.release()
    # Change dimensions to Frames x Channel x Height x Width
    np_asarray = np.transpose(np.asarray(frames), (0,3,2,1))
    return np_asarray, len(np_asarray)


In [38]:
frames, length = get_frames("data/passes/pass121.mp4")
frames.shape

(21, 3, 1280, 720)

In [16]:
def _cut_frames(frames, length, number_of_frames_wanted):
    difference = length - number_of_frames_wanted
    half_of_frames_to_delete = difference // 2

    return frames[half_of_frames_to_delete: length - half_of_frames_to_delete]

In [42]:
image = Image.fromarray(frames[1], "RGB")
image = data_transform(image)
image.shape

torch.Size([3, 256, 256])

In [21]:
import cv2
from torch.utils.data import Dataset, DataLoader
import numpy as np
import torch
from torchvision import transforms
from PIL import Image

class VideoDataSet(Dataset):
    def __init__(self, all_video_file, transformers, how_many_frames):
        # This maps csv which has file path and label to numpy arrray 
        self.videos = np.genfromtxt(all_video_file, delimiter=",", dtype=np.unicode_)
        self.transformers = transformers
        self.how_many_frames = how_many_frames

    def __len__(self):
        return len(self.videos)

    def __getitem__(self, idx):
        movie, label = self.videos[idx]
        frames, length = get_frames(movie)
        frames = _cut_frames(frames, length, self.how_many_frames)
        frames_torch = []

        for frame in frames:
            image = Image.fromarray(frame, "RGB")
            frame = self.transformers(image)
            frames_torch.append(frame)
        return frames_torch, label


data_transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225]),
])

In [48]:
ds = VideoDataSet("data/videos.csv", data_transform, 15)
frames = ds.__getitem__(9)[0]
torch.stack(frames).shape

torch.Size([15, 3, 256, 256])