In [8]:
import os
import numpy as np
import torch
import torch.nn as nn

## Testing speed difference between loading to cpu and gpu

In [46]:
from time import perf_counter_ns

processed_video = np.load("..\\processed_data_10_frames\\Val\\fake\\1(smile).npy")

# device = "cuda"
device = "cpu"

start = perf_counter_ns()
video = torch.from_numpy(processed_video).to(device)
torch.cuda.synchronize()
end = perf_counter_ns()

print(f"total time in microseconds: {(end-start)/1000}")
print(video.shape)

total time in microseconds: 133.4
torch.Size([10, 64, 64, 3])


## Defining base dataset class for videos

In [2]:
# I want the dataset class to construct the batches, instead of the data loader object.


class VideoDataset(torch.utils.data.Dataset):
    def __init__(self, processed_dir = "../Processed_data", subset = "Train", batch_size = 32, device = "cuda"):
        self.directory = processed_dir+"/"+subset
        self.batch_size = batch_size
        self.subset = subset
        self.device = device

        self.videos = self._get_video_paths()
        self._shuffle_data()

        self.length = int(len(self.videos)/self.batch_size) + 1
    
    def _get_video_paths(self):
        videos = []
        for filename in os.listdir(self.directory + "/real"):
            if filename.endswith(".npy"):
                videos.append((self.directory + "/real/"+ filename,0))
        for filename in os.listdir(self.directory + "/fake"):
            if filename.endswith(".npy"):
                videos.append((self.directory + "/fake/"+ filename,1))
        return videos
    
    def _shuffle_data(self):
        np.random.shuffle(self.videos)
    
    def __len__(self):
        return self.length
    
    def __getitem__(self, index):
        #this function will return an array of the following shape:
        #(batch_size, n_frames, height, width, n_channels)
        # and a vector of length batch_size that indicates the 
        # class: 0 for real videos, 1 for fake videos
        features = []
        targets = []
        for video_path, label in self.videos[index*self.batch_size:(index+1)*self.batch_size]:
            array = torch.from_numpy(np.load(video_path)).to(self.device)
            features.append(array)
            targets.append(label)
        return torch.stack(features), torch.tensor(targets).to(self.device)



## Testing the speed of the data loader

In [5]:
val_dataset = VideoDataset(processed_dir= "..\\Processed_Data", subset= "Val", batch_size= 128, device = "cuda")
val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size= None, collate_fn= lambda x: x)

In [7]:
from time import perf_counter

start = perf_counter()
for i,thing in enumerate(val_dataloader):
    if i == 23:
        print(f"The type is {type(thing[0])}, the lenght is {len(thing[0])}")
        print(thing[0].shape)
        break
torch.cuda.synchronize()
end = perf_counter()
print(f"Time elapsed was {end-start}")

The type is <class 'torch.Tensor'>, the lenght is 56
torch.Size([56, 20, 64, 64, 3])
Time elapsed was 3.7737520000664517


From the testing I am doing it looks like this dataloader has roughly the same speed as the one we use with tensorflow. I tested videos with 10, 20, 40 frames and batch sizes of 32 and 128 videos.

## Making the Recurrent Neural Network

In [None]:
# first I have to make the Time distributed class, that applies the same CNN to each temporal slice of the input

#this code is provided by chatGPT, I have to modify it and test it

class TimeDistributed(nn.Module):
    def __init__(self, layer):
        super(TimeDistributed, self).__init__()
        self.layer = layer

    def forward(self, x):
        #  x is of shape (batch_size, n_frames, height, width, n_channels)
        batch_size, n_frames, height, width, n_channels = x.size()
        
         # Reshape input to (batch_size * n_frames, n_channels, height, width)
        x = x.view(batch_size * n_frames, n_channels, height, width)
        
        # Apply the layer to the reshaped tensor
        y = self.layer(x)
        
        # Get output dimensions
        output_dim = y.size(1)
        new_height = y.size(2)
        new_width = y.size(3)
        
        # Reshape the output back to (batch_size, n_frames, output_dim, new_height, new_width)
        y = y.view(batch_size, n_frames, output_dim, new_height, new_width)
        
        return y