In [1]:
from pytorchvideo.data.encoded_video import EncodedVideo
from torchvision.transforms import (
    Compose,
    Lambda,
    RandomCrop,
    RandomHorizontalFlip
)

from pytorchvideo.transforms import (
    ApplyTransformToKey,
    Normalize,
    RandomShortSideScale,
    RemoveKey,
    ShortSideScale,
    UniformTemporalSubsample
)
import pytorch_lightning
import pytorchvideo.models.resnet
import torch.nn as nn
import pathlib

In [2]:
transform = Compose(
  [
    ApplyTransformToKey(
      key="video",
      transform=Compose(
        [
          UniformTemporalSubsample(8),
          Lambda(lambda x: x / 255.0),
          Normalize((0.45, 0.45, 0.45), (0.225, 0.225, 0.225)),
        ]
      ),
    ),
  ]
)

device = 'cpu'

In [3]:
def make_kinetics_resnet():
  return pytorchvideo.models.resnet.create_resnet(
      input_channel=3, # RGB input from Kinetics
      model_depth=50, # For the tutorial let's just use a 50 layer network
      model_num_class=2, # Kinetics has 400 classes so we need out final head to align
      norm=nn.BatchNorm3d,
      activation=nn.ReLU,
  )


class VideoClassificationLightningModule(pytorch_lightning.LightningModule):
  def __init__(self):
    super().__init__()
    self.model = make_kinetics_resnet()

  def forward(self, x):
    return self.model(x)

  def training_step(self, batch, batch_idx):
    # The model expects a video tensor of shape (B, C, T, H, W), which is the
    # format provided by the dataset
    y_hat = self.model(batch["video"])

    # Compute cross entropy loss, loss.backwards will be called behind the scenes
    # by PyTorchLightning after being returned from this method.
    loss = F.cross_entropy(y_hat, batch["label"])

    # Log the train loss to Tensorboard
    self.log("train_loss", loss.item())

    return loss

  def validation_step(self, batch, batch_idx):
    y_hat = self.model(batch["video"])
    loss = F.cross_entropy(y_hat, batch["label"])
    self.log("val_loss", loss)
    return loss

  def configure_optimizers(self):
    """
    Setup the Adam optimizer. Note, that this function also can return a lr scheduler, which is
    usually useful for training video models.
    """
    return torch.optim.Adam(self.parameters(), lr=1e-1)

In [4]:
classifier = VideoClassificationLightningModule.load_from_checkpoint('/home/scott/Documents/fh/project_coursework/lightning_logs/version_1/checkpoints/epoch=16-step=935.ckpt')

In [7]:

def predict(video_filename):
    video = EncodedVideo.from_path(video_filename)


    # Select the duration of the clip to load by specifying the start and end duration
    # The start_sec should correspond to where the action occurs in the video
    start_sec = 0
    clip_duration = int(video.duration)
    end_sec = start_sec + clip_duration    

    # Load the desired clip
    video_data = video.get_clip(start_sec=start_sec, end_sec=2)

    # Apply a transform to normalize the video input
    video_data = transform(video_data)

    # Move the inputs to the desired device
    inputs = video_data["video"]
    inputs = inputs.to(device)

    # Pass the input clip through the model
    preds_pre_act = classifier(inputs[None, ...])
    if preds_pre_act[0][0] > preds_pre_act[0][1]:
        return 'day'
    else:
        return 'night'
    

In [23]:
preds_pre_act

tensor([[ 1.5931, -1.5851]], grad_fn=<ViewBackward0>)

In [14]:
DIR = pathlib.Path('/mnt/usb/bdd/bdd100k/videos/test/')
MAX_FILES = 50

file_counter = 0
results = {'day': [], 'night': []}

for file in DIR.iterdir():
    file_name = file.resolve()
    print(f'Processing file {file}')
    result = predict(file)
    results[result].append(str(file_name))
    
    file_counter += 1
    if file_counter >= MAX_FILES:
        break
        
print('done')
    

Processing file /mnt/usb/bdd/bdd100k/videos/test/cabc30fc-e7726578.mov
Processing file /mnt/usb/bdd/bdd100k/videos/test/cabc30fc-eb673c5a.mov
Processing file /mnt/usb/bdd/bdd100k/videos/test/cabc30fc-fd79926f.mov
Processing file /mnt/usb/bdd/bdd100k/videos/test/cabc9045-1b8282ba.mov
Processing file /mnt/usb/bdd/bdd100k/videos/test/cabc9045-581f64de.mov
Processing file /mnt/usb/bdd/bdd100k/videos/test/cabc9045-5a50690f.mov
Processing file /mnt/usb/bdd/bdd100k/videos/test/cabc9045-b3349548.mov
Processing file /mnt/usb/bdd/bdd100k/videos/test/cabc9045-c6dc9529.mov
Processing file /mnt/usb/bdd/bdd100k/videos/test/cabc9045-cd422b81.mov
Processing file /mnt/usb/bdd/bdd100k/videos/test/cabc9045-d91ecb66.mov
Processing file /mnt/usb/bdd/bdd100k/videos/test/cabddb96-ca0ac856.mov
Processing file /mnt/usb/bdd/bdd100k/videos/test/cabe1040-5f02711e.mov
Processing file /mnt/usb/bdd/bdd100k/videos/test/cabe1040-c59cb390.mov
Processing file /mnt/usb/bdd/bdd100k/videos/test/cabea010-6882cf41.mov
Proces

In [16]:
TYPES = ['day', 'night']
for type in TYPES:
    for file in results[type]:
        print(f'cp {file} {type}')

cp /mnt/usb/bdd/bdd100k/videos/test/cabc30fc-eb673c5a.mov day
cp /mnt/usb/bdd/bdd100k/videos/test/cabc30fc-fd79926f.mov day
cp /mnt/usb/bdd/bdd100k/videos/test/cabe1040-5f02711e.mov day
cp /mnt/usb/bdd/bdd100k/videos/test/cabe1040-c59cb390.mov day
cp /mnt/usb/bdd/bdd100k/videos/test/cabf7be1-36a39a28.mov day
cp /mnt/usb/bdd/bdd100k/videos/test/cabf7be1-f1a7e00d.mov day
cp /mnt/usb/bdd/bdd100k/videos/test/cabf9f3c-d58a6760.mov day
cp /mnt/usb/bdd/bdd100k/videos/test/cac07407-0396e053.mov day
cp /mnt/usb/bdd/bdd100k/videos/test/cac07407-0eb1c8bf.mov day
cp /mnt/usb/bdd/bdd100k/videos/test/cac07407-15b814db.mov day
cp /mnt/usb/bdd/bdd100k/videos/test/cac07407-196cd6f8.mov day
cp /mnt/usb/bdd/bdd100k/videos/test/cac07407-76e4c968.mov day
cp /mnt/usb/bdd/bdd100k/videos/test/cac07407-ba37148a.mov day
cp /mnt/usb/bdd/bdd100k/videos/test/cac07407-bc0b048a.mov day
cp /mnt/usb/bdd/bdd100k/videos/test/cac07407-e969f06a.mov day
cp /mnt/usb/bdd/bdd100k/videos/test/cac07407-fe32e494.mov day
cp /mnt/