In [1]:
import torch
from efficientnet_pytorch import EfficientNet
from PIL import Image, ImageDraw, ImageFont
import numpy as np
from pathlib import Path

In [2]:
# you can find a pretrained model at model/b3.pth
MODEL_F = 'model/b2/best.pth'
# directory with the numpy optical flow images you want to use for inference
OF_NPY_DIR = '../opical-flow-estimation-with-RAFT/output'
# OF_NPY_DIR = 'npy'
Video_dir = 'images/train.mp4'
GT_dir = 'train.txt'

In [3]:
# check if cuda is available
device = 'cuda' if torch.cuda.is_available() else 'cpu'

## Load Model

In [4]:
V = 2     # what version of efficientnet did you use
IN_C = 2  # number of input channels
NUM_C = 1 # number of classes to predict

In [5]:
model = EfficientNet.from_pretrained(f'efficientnet-b{V}', in_channels=IN_C, num_classes=NUM_C)
state = torch.load(MODEL_F)
model.load_state_dict(state)
model.to(device);

Loaded pretrained weights for efficientnet-b2


In [6]:
def inference(of_f):
    of = np.load(of_f)
    i = torch.from_numpy(of).to(device)
    pred = model(i)
    del i
    torch.cuda.empty_cache()
    return pred

In [7]:
import cv2
import os

f = open(GT_dir)
GT = f.readlines()

# Input
vidcap = cv2.VideoCapture(Video_dir)
success, frame = vidcap.read()

# Define the codec and create VideoWriter object
fourcc = cv2.VideoWriter_fourcc(*'MP4V')
out = cv2.VideoWriter('output.mp4', fourcc, 20.0, (640,  480))
out.write(frame)

count = 0
while success and count < 3600:
    success, frame = vidcap.read()
    f = OF_NPY_DIR + '/' + str(count) + '.npy'
    if not os.path.isfile(f):
        break
    
    if count >= 3000:
        y_hat = inference(f).item()
        pred = 'Pred: ' + str(round(y_hat, 2)) + 'mph'
        gt = 'GT: ' + GT[count].split('.')[0] + '.' + GT[count].split('.')[1][:2] + 'mph'
        diff = 'diff: ' + str(round(float(GT[count]) - y_hat, 2))
        cv2.putText(frame, pred, (0, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2, cv2.LINE_AA)
        cv2.putText(frame, gt, (0, 100), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2, cv2.LINE_AA)
        cv2.putText(frame, diff, (0, 150), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2, cv2.LINE_AA)
        out.write(frame)
    count += 1
out.release()

In [8]:
out.release()

In [9]:
import os
os.path.isfile('../opical-flow-estimation-with-RAFT/output/20398.npy')

True

In [10]:
of = np.load('../opical-flow-estimation-with-RAFT/output1/20398.npy')
print(of.shape)

(1, 2, 320, 640)


In [11]:
print(of)

[[[[-0.1273618  -0.13245387 -0.13652012 ...  2.4380157   2.4340835
     2.4277139 ]
   [-0.12973347 -0.13490614 -0.1383553  ...  2.4388032   2.4351673
     2.4324136 ]
   [-0.13076115 -0.13579664 -0.13839947 ...  2.4393146   2.436145
     2.4348946 ]
   ...
   [-2.3053896  -2.3054514  -2.3026412  ...  1.4114022   1.6261985
     1.8427379 ]
   [-2.2984984  -2.3005958  -2.297174   ...  1.2677288   1.3977143
     1.5467987 ]
   [-2.2910452  -2.2942796  -2.292251   ...  1.1680415   1.2705206
     1.3680716 ]]

  [[-0.0727099  -0.06925774 -0.06701323 ... -1.0413845  -1.0378234
    -1.036514  ]
   [-0.07134464 -0.06826704 -0.06633865 ... -1.0406389  -1.0356193
    -1.0314789 ]
   [-0.07090738 -0.06831974 -0.06688016 ... -1.0368154  -1.0328224
    -1.0278083 ]
   ...
   [ 0.66085434  0.6659317   0.6702154  ...  0.4179353   0.5327205
     0.6494239 ]
   [ 0.6722083   0.6745251   0.67922896 ...  0.33245742  0.40263432
     0.48313284]
   [ 0.6828357   0.6826558   0.6857843  ...  0.26856136  0.3