In [None]:
# import modules
import skvideo.io
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
# define video file name
file_name = '/media/atcold/VIDEODATA/video-dataset/data/car/20160706_180451.mp4'

# create an iterator over the video frames
frame_iterator = skvideo.io.vreader(file_name)

# load the whole video
video_data = skvideo.io.vread(file_name)

# print video_data shape
print('The video_data has shape:', video_data.shape)

In [None]:
# display N frames: 1st, ..., ..., last
def showSamples(data, N=4, height=12):
    rows = np.floor(N ** 0.5)
    columns = N / rows
    for p in range(0, N):
        plt.subplot(rows, columns, p + 1)
        idx = p * (data.shape[0] - 1) // (N - 1) if N > 1 else 0
        plt.imshow(data[idx])
        plt.axis('off')

    plt.subplots_adjust(left=0, bottom=0, right=1, top=1, wspace=0, hspace=0)
    s = data.shape
    plt.gcf().set_size_inches((height*s[2]/s[1]/rows, height/columns))
    
showSamples(video_data)

In [None]:
# some image processing exploration
IN_SIZE = 224  # define a constant
img = video_data[0]  # get first frame
print('The input image has size', img.shape)
scaling_factor = max(IN_SIZE / img.shape[0], IN_SIZE / img.shape[1])
img_rescaled = rescale(img, scaling_factor)
print('The rescaled image has size', img_rescaled.shape)
r = ((img_rescaled.shape[0] - IN_SIZE) / 2, (img_rescaled.shape[1] - IN_SIZE) / 2, 0)
r = tuple((np.floor(z), np.ceil(z)) for z in r)
img_cropped_1 = crop(img_rescaled, r)
img_cropped_2 = crop(img_rescaled, r, copy=True)
print('The cropped image has size', img_cropped_1.shape)
print('img_cropped_1 is contiguous:', img_cropped_1.flags['C_CONTIGUOUS'])
print('img_cropped_2 is contiguous:', img_cropped_2.flags['C_CONTIGUOUS'])
plt.imshow(img_cropped_2); plt.axis('off')

In [None]:
# preproces the whole video
processed_data = np.ndarray((video_data.shape[0], IN_SIZE, IN_SIZE, video_data.shape[3]), dtype=np.float32)
for frame in range(0, video_data.shape[0]):
    processed_data[frame] = crop(rescale(video_data[frame], scaling_factor), r)

In [None]:
showSamples(processed_data)

In [None]:
# define some utility function
def to_torch(x):
    dim = x.ndim
    if dim == 3:
        return torch.from_numpy(skimage.img_as_float(x).astype(np.float32).transpose(2, 0, 1))
    elif dim == 4:
        return torch.from_numpy(skimage.img_as_float(x).astype(np.float32).transpose(0, 3, 1, 2))
    else:
        raise ValueError('Bad input dimensionality, dim:', dim)

In [None]:
# convert data from numpy to torch
torch_data = to_torch(processed_data)
print(torch_data.type())
print(torch_data.size())

In [None]:
# normalise the input data
def normalise(x):
    mean = (0.485, 0.456, 0.406)
    std = (0.229, 0.224, 0.225)
    for c in range(0, 3):
        x[:,c,:,:].sub_(mean[c]).div_(std[c])
    return x
        
norm_data = normalise(torch_data.clone())

In [None]:
# print some stats
for i, c in enumerate(('red', 'green', 'blue')):
    print(c, 'stats:')
    r = torch_data[:,i,:,:]
    print('min: {:.3f}, max: {:.3f}, mean: {:.3f}, std: {:.3f}'.format(r.min(), r.max(), r.mean(), r.std()))
    r = norm_data[:,i,:,:]
    print('min: {:.3f}, max: {:.3f}, mean: {:.3f}, std: {:.3f}'.format(r.min(), r.max(), r.mean(), r.std()))

In [None]:
import os

# get classes
file_name = 'synset_words.txt'
if not os.access(file_name, os.W_OK):
    synset_URL = 'https://github.com/szagoruyko/functional-zoo/raw/master/synset_words.txt'
    os.system('wget ' + synset_URL)

classes = list()
with open(file_name) as f:
    for line in f:
        classes.append(line.strip().split(' ', 1)[1].split(', ')[0])
classes = tuple(classes)

In [None]:
# print first 5 classes and indeces

for i in range(0, 5):
    print(i, classes[i])

In [None]:
import torchvision

# get network

resnet_18 = torchvision.models.resnet18(pretrained=True)
resnet_18.eval()

In [None]:
from torch.autograd import Variable as V
from torch.nn import functional as f

# forward first frame

x = V(norm_data.narrow(0, 0, 1), volatile=True)
x = V(tr_center_crop(video_data[0]))
logit = resnet_18.forward(x)
h_x = f.softmax(logit).data.squeeze()


In [None]:
from matplotlib import rc

# set back background

plt.style.use(['dark_background'])
rc('figure', figsize=(16, 8), facecolor='none')
rc('axes', facecolor='none')

In [None]:
plt.bar(np.arange(len(h_x)), h_x.numpy())

In [None]:
probs, idx = h_x.sort(0, True)
for i in range(0, 5):
    print('{:.3f} -> {}'.format(probs[i], classes[idx[i]]))

In [None]:
probs, idx = h_x.sort(0, True)
for i in range(0, 5):
    print('{:.3f} -> {}'.format(probs[i], classes[idx[i]]))

In [None]:
import skimage.io

# verify net health

file_name = '26132.jpg'
if not os.access(file_name, os.W_OK):
    file_URL = 'http://www.zooclub.ru/attach/26000/26132.jpg'
    os.system('wget ' + file_URL)
img = skimage.io.imread(file_name)

scaling_factor = max(IN_SIZE / img.shape[0], IN_SIZE / img.shape[1])
img_rescaled = rescale(img, scaling_factor)
r = ((img_rescaled.shape[0] - IN_SIZE) / 2, (img_rescaled.shape[1] - IN_SIZE) / 2, 0)
r = tuple((np.floor(z), np.ceil(z)) for z in r)
img_cropped = crop(img_rescaled, r)
plt.imshow(img_cropped); plt.axis('off')

img_torch = normalise(to_torch(img_cropped).unsqueeze(0))

In [None]:
x = V(img_torch, volatile=True)
logit = resnet_18.forward(x)
h_x = f.softmax(logit).data.squeeze()
probs, idx = h_x.sort(0, True)
for i in range(0, 5):
    print(probs[i], classes[idx[i]])

In [None]:
img_torch.max()

In [None]:
import torchvision.transforms as transforms
import torch.nn.functional as f


# define image transformation
tr_center_crop = transforms.Compose([
        transforms.ToPILImage(),
        transforms.Scale(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

probs, idx = f.softmax(resnet_18.forward(V(tr_center_crop(img).unsqueeze(0), volatile=True))).data.view(-1).sort(0, True)

for i in range(0, 5):
    print(probs[i], classes[idx[i]])