In [None]:
import tensorflow as tf
import numpy as np
import cv2
import matplotlib.pyplot as plt
from datetime import datetime

In [None]:
now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
root_logdir = "tf_logs"
logdir = "{}/run-{}/".format(root_logdir, now)

In [None]:
# a list that specifies convolution-pooling architecture
# list index indicate layer position in stack; 
# a pooling layer is represented by a tuple: (pooling type, kernel_size, strides)
# a convolution layer is represented by a typle: (filter_height, filter_width, depth)
layers = [(5, 5, 6),
          ('MAX', (1,2,2,1), (1,2,2,1)),
          (5, 5, 16),
          ('MAX', (1,2,2,1), (1,2,2,1)),
          (5, 5, 60),
          ('MAX', (1,2,2,1), (1,2,2,1))]

ResNet_block_layers = [(1, 1, 128, 'relu'),
                       (3, 3, 128, 'relu'),
                       (1, 1, 512, None)]
inception_depths = [64, (96, 128), (16, 32), 32]

def conv_pool(x, layers):
    out = x
    n_conv, n_pool = 0, 0
    prev_depth = int(x.shape[3])
    for l in layers:
        if type(l[0]) == int:
            n_conv += 1
            with tf.variable_scope('conv_{}'.format(n_conv), reuse = tf.AUTO_REUSE):
                w = tf.get_variable('filter', initializer=tf.truncated_normal((l[0],l[1],prev_depth,l[2]),0,0.1))
                b = tf.get_variable('bias', initializer=tf.zeros(l[2]))
            out = tf.nn.relu(tf.nn.conv2d(out, w, strides=(1,1,1,1), padding='SAME') + b)
            prev_depth = l[2]
        else:
            n_pool += 1
            out = tf.nn.pool(out, pooling_type=l[0], window_shape=l[1], strides=l[2],
                             padding='SAME', name='pool_{}'.format(n_pool))
    return out

def ResNet_block(x, layers, name):
    out = x
    n_conv = 0
    if int(x.shape[3]) != layers[-1][2]:
        print('Input to ResNet block must have the same shape as output of convolution layers')
        return
    prev_depth = int(x.shape[3])
    with tf.variable_scope(name, reuse = tf.AUTO_REUSE):
        for l in layers:
            with tf.variable_scope('conv_'.format(n_conv), reuse = tf.AUTO_REUSE):
                n_conv += 1
                w = tf.get_variable('filter', initializer=tf.truncated_normal((l[0],l[1],prev_depth,l[2]),0,0.1))
                b = tf.get_variable('bias', initializer=tf.zeros(l[2]))
            out = tf.nn.conv2d(out, w, strides=(1,1,1,1), padding='SAME') + b
            if l[3] == 'relu':
                out = tf.nn.relu(out)
            prev_depth = l[2]
    return tf.nn.relu(out + x)


def Inception_module(x, depths):
    layers = [[(1, 1, depths[0])]
              [(1, 1, depths[1][0]), (3, 3, depths[1][1])],
              [(1, 1, depths[2][0]), (5, 5, depths[2][1])], 
              [('MAX', (1,3,3,1), (1,1,1,1)), (1, 1, depths[3])]]
    out = []
    for i in range(4):
         with tf.variable_scope('component_{}'.format(i+1), reuse = tf.AUTO_REUSE):
                out.append(conv_pool(x, layers[i]))
    return tf.concat(out, axis=-1)

In [None]:
# get all frames from video downscaled by a factor\n"
# return an ndarray of shape (n_frames, height, width, channels)

def get_frames(path, n_frames, downscale_factor):
    cap = cv2.VideoCapture(path)
    seq = []
    count = 0
    while True:
        success,frame = cap.read()
        if count == n_frames or not success:
            break
        # downscale frame
        width = int(frame.shape[1] / downscale_factor)
        height = int(frame.shape[0] / downscale_factor)
        resized = cv2.resize(frame, (width, height), interpolation = cv2.INTER_AREA)
        if resized.shape[0] > resized.shape[1]:
            resized = np.transpose(resized, (1,0,2))
        seq.append(resized)
        count += 1
    return np.stack(seq)

# mini-batch generator
def next_batch(path, labels, n_batches, batch_size, n_frames, downscale_factor):
    perm = np.random.permutation(300)
    for i in range(n_batches):
        x_batch, y_batch = [], []
        for j in range(0, batch_size):
            all_frames = get_frames(path.format(perm[i*batch_size+j]+1), n_frames, downscale_factor)
            #print(all_frames.shape)
            x_batch.append(all_frames)
            y_batch.append(labels[perm[i*batch_size+j]])
        x_batch = np.stack(x_batch)
        yield x_batch, y_batch
            
# generate feature maps for each video in mini-batch
# x has shape (batch_size, n_frames, height, width, channels)
def get_feature_maps(x):
    instances = []
    for i in range(x.shape[0]):
        instances.append(tf.contrib.layers.flatten(conv_pool(x[i, :, :, :, :], layers)))
    return tf.stack(instances, axis=0)

def score_to_label(scores, thresh_1, thresh_2):
    for x in np.nditer(scores, op_flags=['readwrite']):
        if x < thresh_1:
            x[...] = 0
        elif x < thresh_2:
            x[...] = 1
        else:
            x[...] = 2
    return scores

In [None]:
path = '/home/mallesh/video-qoe-labeling_1/VQA-Deep-Learning/data/set1-4/trace_{}.mp4'
height, width, n_channels = 1080, 1920, 3
downscale_factor = 8
n_frames = 100
n_classes = 3
n_batches, batch_size = 30, 10
n_hidden = 100 # number of hidden cells in LSTM\n"
X = tf.placeholder(tf.float32, shape=
                   (batch_size, n_frames, int(height/downscale_factor), int(width/downscale_factor), n_channels))
y = tf.placeholder(tf.int32, shape=(batch_size,))
labels = score_to_label(np.loadtxt('/home/mallesh/video-qoe-labeling_1/VQA-Deep-Learning/data/set1-4.txt'), 2, 3.8)
X_features = get_feature_maps(X)
print(X_features.shape)
cell = tf.contrib.rnn.BasicLSTMCell(n_hidden)
output, _ = tf.nn.dynamic_rnn(cell, X_features, initial_state = cell.zero_state(batch_size, dtype=tf.float32))
with tf.variable_scope('out', reuse = tf.AUTO_REUSE):
    w = tf.get_variable('weight', shape=(n_hidden, n_classes))
    b = tf.get_variable('bias', initializer=tf.zeros(n_classes))
    pred = tf.matmul(output[:,-1,:], w) + b
    loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=pred, labels=y))
    optimizer = tf.train.AdamOptimizer()
    training_op = optimizer.minimize(loss)
    loss_summary = tf.summary.scalar('loss', loss)
    file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())

In [None]:
saver = tf.train.Saver()
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    batch_num = 0
    for X_batch, y_batch in next_batch(path, labels, n_batches, batch_size, n_frames, downscale_factor):
        print(X_batch.shape)
        batch_num += 1
        summary_str = loss_summary.eval(feed_dict={X: X_batch, y: y_batch})
        file_writer.add_summary(summary_str, batch_num)
        sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        saver.save(sess, '/tmp/after_batch_{}.ckpt'.format(batch_num))
        print(pred.eval(feed_dict={X: X_batch, y: y_batch}))
        print(loss.eval(feed_dict={X: X_batch, y: y_batch}))
        saver.save(sess, '/tmp/final.ckpt')
        file_writer.close()