In [1]:
from google.colab import drive

drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import sys
sys.path.append('/content/drive/MyDrive/hackathon_swf/module/')

In [3]:
%load_ext autoreload
%autoreload 2

In [4]:
import cv2
import pandas as pd
import numpy as np

from pathlib import Path

import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F

import torchvision
from torchvision import transforms

from model import EventDetector
from eval import ToTensor, Normalize

In [5]:
model = EventDetector(pretrain=True,
                      width_mult=1.,
                      lstm_layers=1,
                      lstm_hidden=256,
                      bidirectional=True,
                      dropout=False)

save_dict = torch.load('/content/drive/MyDrive/hackathon_swf/weights/swingnet_1800.pth.tar')
model.load_state_dict(save_dict['model_state_dict'])
model.cuda()
model.eval()

EventDetector(
  (cnn): Sequential(
    (0): Sequential(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inplace=True)
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
        (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU6(inplace=True)
        (3): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (4): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): InvertedResidual(
      (conv): Sequential(
        (0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU6(inplace=True)
        (3

In [6]:
!python /content/drive/MyDrive/hackathon_swf/module/test_video.py -p /content/drive/MyDrive/hackathon_swf/videos/test_video.mp4 -s 256

Preparing video: /content/drive/MyDrive/hackathon_swf/videos/test_video.mp4
Using device: cuda
Loaded model weights
Testing...
Predicted event frames: [ 53  86  98 114 132 143 151 236]
Condifence: [0.177, 0.604, 0.8, 0.728, 0.926, 0.975, 0.788, 0.29]


In [6]:
class SampleVideo(Dataset):
    def __init__(self, path, input_size=160, transform=None):
        self.path = path
        self.input_size = input_size
        self.transform = transform

    def __len__(self):
        return 1

    def __getitem__(self, idx):
        cap = cv2.VideoCapture(self.path)
        frame_size = [cap.get(cv2.CAP_PROP_FRAME_HEIGHT), cap.get(cv2.CAP_PROP_FRAME_WIDTH)]
        ratio = self.input_size / max(frame_size)
        new_size = tuple([int(x * ratio) for x in frame_size])
        delta_w = self.input_size - new_size[1]
        delta_h = self.input_size - new_size[0]
        top, bottom = delta_h // 2, delta_h - (delta_h // 2)
        left, right = delta_w // 2, delta_w - (delta_w // 2)

        # preprocess and return frames
        images = []
        for pos in range(int(cap.get(cv2.CAP_PROP_FRAME_COUNT))):
            _, img = cap.read()
            resized = cv2.resize(img, (new_size[1], new_size[0]))
            b_img = cv2.copyMakeBorder(resized, top, bottom, left, right, cv2.BORDER_CONSTANT,
                                       value=[0.406 * 255, 0.456 * 255, 0.485 * 255])  # ImageNet means (BGR)
            b_img_rgb = cv2.cvtColor(b_img, cv2.COLOR_BGR2RGB)

            images.append(b_img_rgb)
        cap.release()
        labels = np.zeros(len(images)) # only for compatibility with transforms
        sample = {'images': np.asarray(images), 'labels': np.asarray(labels)}
        if self.transform:
            sample = self.transform(sample)
        return sample

In [7]:
ds = SampleVideo('/content/drive/MyDrive/hackathon_swf/videos/test_video.mp4', transform=transforms.Compose([ToTensor(),
                                Normalize([0.485, 0.456, 0.406],
                                          [0.229, 0.224, 0.225])]))

dl = DataLoader(ds, batch_size=1, shuffle=False, drop_last=False)


In [8]:
seq_length = 64

In [9]:
print('Testing...')
for sample in dl:
    images = sample['images']
    # full samples do not fit into GPU memory so evaluate sample in 'seq_length' batches
    batch = 0
    while batch * seq_length < images.shape[1]:
        if (batch + 1) * seq_length > images.shape[1]:
            image_batch = images[:, batch * seq_length:, :, :, :]
        else:
            image_batch = images[:, batch * seq_length:(batch + 1) * seq_length, :, :, :]
        logits = model(image_batch.cuda())
        if batch == 0:
            probs = F.softmax(logits.data, dim=1).cpu().numpy()
        else:
            probs = np.append(probs, F.softmax(logits.data, dim=1).cpu().numpy(), 0)
        batch += 1

events = np.argmax(probs, axis=0)[:-1]
print('Predicted event frames: {}'.format(events))

confidence = []
for i, e in enumerate(events):
    confidence.append(probs[e, i])
print('Condifence: {}'.format([np.round(c, 3) for c in confidence]))


Testing...
Predicted event frames: [ 74  86  98 114 132 143 151 236]
Condifence: [0.095, 0.593, 0.795, 0.718, 0.865, 0.975, 0.765, 0.159]


### ONNX export and test

In [48]:
import torch.onnx

X = torch.randn(1, 64, 3, 160, 160).cuda()
dynamic_axes = {'input': {1: 'batch'}, 'output': {1: 'batch'}}
torch.onnx.export(model, X, "swing_stages.onnx", input_names=["input"], output_names=["output"], dynamic_axes=dynamic_axes)



verbose: False, log level: Level.ERROR



In [49]:
import onnx

onnx_model = onnx.load("swing_stages.onnx")
onnx.checker.check_model(onnx_model)

In [50]:
import onnxruntime as ort

In [51]:
ort_sess = ort.InferenceSession('swing_stages.onnx', providers=['CUDAExecutionProvider'])

In [45]:
from scipy.special import softmax

In [52]:
for sample in dl:
    images = sample['images']
    # full samples do not fit into GPU memory so evaluate sample in 'seq_length' batches
    batch = 0
    while batch * seq_length < images.shape[1]:
        if (batch + 1) * seq_length > images.shape[1]:
            image_batch = images[:, batch * seq_length:, :, :, :]
        else:
            image_batch = images[:, batch * seq_length:(batch + 1) * seq_length, :, :, :]
        logits = ort_sess.run(["output"], {'input': image_batch.numpy()})[0]

        if batch == 0:
            probs = softmax(logits, axis=1)
        else:
            probs = np.append(probs, softmax(logits, axis=1), 0)
        batch += 1

events = np.argmax(probs, axis=0)[:-1]
print('Predicted event frames: {}'.format(events))

confidence = []
for i, e in enumerate(events):
    confidence.append(probs[e, i])
print('Condifence: {}'.format([np.round(c, 3) for c in confidence]))

Predicted event frames: [ 74  86  98 114 132 143 151 236]
Condifence: [0.095, 0.593, 0.795, 0.718, 0.865, 0.975, 0.765, 0.159]


In [35]:
ort_sess.get_inputs()[0].name

'input'

In [43]:
logits = ort_sess.run(["output"], {'input': image_batch.numpy()})[0]

In [44]:
logits.shape

(64, 9)

In [None]:
x, y = test_data[0][0], test_data[0][1]
ort_sess = ort.InferenceSession('swing_stages.onnx')
outputs = ort_sess.run(None, {'input': image_batch.numpy()})

# Print Result
predicted, actual = classes[outputs[0][0].argmax(0)], classes[y]
print(f'Predicted: "{predicted}", Actual: "{actual}"')