In [8]:
#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
@author: DeRafael
In this part we write the hw4
"""
import numpy as np
import tensorflow as tf
# from plot import plot
from tqdm import tqdm
# from plot import visual, plot

training_data = np.load('videoframes_clips_train.npy') # 5964 x 16 x 224 x 224 x 3
training_data = training_data[0:2000, :, :, :, :]
testing_data = np.load('videoframes_clips_valid.npy') # 5964 x 16 x 224 x 224 x 3

training_label = np.load('joint_3d_clips_train.npy') # 1368 x 17 x 3
testing_label = np.load('joint_3d_clips_valid.npy') # 1368 x 17 x 3
training_label = training_label[0:2000, :, :]


In [9]:
# hyper-parameter
N_train = 500
N_test = testing_data.shape[0]
frames = training_data.shape[1]
batch_size = 2
num_units = 1024
step_size = 0.001
training_epochs = 10

In [10]:
# video length
train_dataset = (
    tf.data.Dataset.from_tensor_slices((
        training_data, training_label)).batch(batch_size).shuffle(buffer_size=N_train, seed=0)
)

train_dataset = (train_dataset.map(lambda x, y:
                                   (tf.divide(tf.cast(x, tf.float32), 255.0),tf.cast(y, tf.float32))))

test_dataset = (
    tf.data.Dataset.from_tensor_slices((testing_data, testing_label)).batch(batch_size)
)

test_dataset = (
    test_dataset.map(lambda x, y:
                      (tf.divide(tf.cast(x, tf.float32), 255.0),tf.cast(y, tf.float32))))

In [11]:
class input_block(tf.keras.Model):
    def __init__(self, seed=1):
        super(input_block, self).__init__()
        # use random seed to make the initialization repeat
        tf.random.set_seed(seed)
        # define convolutional layers
        self.c1 = tf.keras.layers.Conv2D(64, kernel_size=7, strides=2, padding='VALID', activation = None, name='c1')
        self.bn1 = tf.keras.layers.BatchNormalization()
        self.relu = tf.keras.activations.relu
        self.maxpool = tf.keras.layers.MaxPool2D(3, strides=2, padding='VALID')

    def forward(self, input):
        # batch_size x frame x 224 x 224 x 3
        x = self.c1(input)
        # batch_size x frame x 112 x 112 x 64
        x = self.bn1(x)
        x = self.relu(x)
        output = self.maxpool(x)
        # batch_size x frame x 56 x 56 x 64
        return output

In [14]:
class basic_block(tf.keras.Model):
    def __init__(self, planes, stride=1, seed=1):
        super(basic_block, self).__init__()
        # use random seed to make the initialization repeat
        tf.random.set_seed(seed)
        # define convolutional layers
        self.conv1 = tf.keras.layers.Conv2D(planes, kernel_size=3, strides=stride, padding='SAME', activation = None)
        self.bn1 = tf.keras.layers.BatchNormalization()
        self.conv2 = tf.keras.layers.Conv2D(planes, kernel_size=3, padding='SAME', activation=None)
        self.bn2 = tf.keras.layers.BatchNormalization()
        self.relu = tf.keras.activations.relu
        self.stride = stride
        if self.stride != 1:
            self.short_cut = tf.keras.layers.Conv2D(planes, kernel_size=1, strides=stride, padding='SAME',
                                                    activation = None)
            self.bn_short = tf.keras.layers.BatchNormalization()

    def forward(self, input):
        # batch_size x frame x H x W x planes
        x_1 = self.conv1(input)
        x_1 = self.bn1(x_1)
        x_1 = self.relu(x_1)
        x_1 = self.conv2(x_1)
        x_1 = self.bn2(x_1)
        if self.stride != 1:
            # shot cut
            x_2 = self.short_cut(input)
            x_2 = self.bn_short(x_2)
            # batch_size x frame x H/2 x W/2 x planes
            output = self.relu(x_1 + x_2)
        else:
            output = self.relu(x_1 + input)
            # batch_size x frame x H x W x planes
        return output

In [20]:
class ResNet18_LSTM(tf.keras.Model):
    def __init__(self):
        super(ResNet18_LSTM, self).__init__()
        # use random seed to make the initialization repeat
        # CNN 1-2
        self.input_part = input_block()
        # CNN 3-6
        self.block_11 = basic_block(planes=64, seed=1)
        self.block_12 = basic_block(planes=64, seed=2)
        # CNN 6-10
        self.block_21 = basic_block(planes=128, stride=2, seed=1)
        self.block_22 = basic_block(planes=128, seed=2)
        # CNN 10-14
        self.block_31 = basic_block(planes=256, stride=2, seed=1)
        self.block_32 = basic_block(planes=256, seed=2)
        # CNN 14-18
        self.block_41 = basic_block(planes=512, stride=2, seed=1)
        self.block_42 = basic_block(planes=512, seed=2)
        # Avg pooling
        self.Avg = tf.keras.layers.AvgPool2D(7, padding='VALID')
        # self.RNN = tf.keras.layers.LSTM(units=num_units, activation='tanh')
        self.LSTM = tf.keras.layers.LSTM(units=num_units, activation='tanh', return_sequences=True, dropout=0.3)
        # multilayer perception
        self.p1 = tf.keras.layers.Dense(1024, activation=None, name='p1')
        self.bn1 = tf.keras.layers.BatchNormalization()
        self.p2 = tf.keras.layers.Dense(1024, activation=None, name='p2')
        self.bn2 = tf.keras.layers.BatchNormalization()
        self.p3 = tf.keras.layers.Dense(51, name='p3')
        self.relu = tf.keras.activations.relu
        self.dropout = tf.keras.layers.Dropout(0.5)


    def forward(self, input):
        '''
        here we define the forward function
        :param input: the input data
        :return: output tensor
        '''
        # For each layer, a bias will also be initialized and add to the output after matrix multiply.
        # reshape

        x = tf.reshape(input, [-1, 224, 224, 3])
        # batch size x frames x 224 x 224 x 3
        x = self.input_part.forward(x)
        # batch size x frames x 56 x 56 x 64
        x = self.block_11.forward(x)
        x = self.block_12.forward(x)
        # batch size x frames x 56 x 56 x 64
        x = self.block_21.forward(x)
        x = self.block_22.forward(x)
        # batch size x frames x 28 x 28 x 128
        x = self.block_31.forward(x)
        x = self.block_32.forward(x)
        # batch size x frames x 14 x 14 x 256
        x = self.block_41.forward(x)
        x = self.block_42.forward(x)
        # batch size x frames x 7 x 7 x 512
        x = self.Avg(x)
        # batch size x frames x 1 x 1 x 512
        x = tf.reshape(x, [-1, frames, 512])
        # batch size x frames x 512
        middle_input = self.LSTM(x)
        # middle_input = self.RNN(x)
        # batch size x 1024
        # batch size x frames x 1024
        x = self.p1(middle_input)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.dropout(x)
        # batch size x frames x 256
        x = self.p2(x)
        x = self.bn2(x)
        x = self.relu(x)
        x = self.dropout(x) + middle_input
        # batch size x frames x 64
        x = self.p3(x)
        # batch size x frames x 51
        output = tf.reshape(x, [-1, frames, 17, 3])
        return output

In [18]:
def train(train_data, test_data):
    epoch = 0
    model = ResNet18_LSTM()
    optimizer = tf.keras.optimizers.Adam(learning_rate=step_size)
    training_loss = []
    testing_loss = []
    training_MPJPE =[]
    testing_MPJPE = []
    while epoch < training_epochs:
        epoch += 1
        training_tmp_loss = []
        training_tmp_MPJPE = []
        for x, y in tqdm(train_data):
            with tf.GradientTape() as tape:
                predict = model.forward(x)
                # F norm(y-predict) ^ 2 / batch_size
                loss = tf.math.reduce_sum(tf.losses.mean_squared_error(y, predict))/batch_size
                # batch_size x frame x 17 x 1
                MPJPE = tf.math.reduce_mean(tf.math.reduce_euclidean_norm((y - predict), axis = 3)) * 1000
                training_tmp_loss.append(loss)
                training_tmp_MPJPE.append(MPJPE)
                print('Epoch', epoch, 'Loss:', loss.numpy(), 'MPJPE:', MPJPE.numpy())
            gradients = tape.gradient(loss, model.trainable_variables)
            optimizer.apply_gradients(zip(gradients, model.trainable_variables))
        training_loss.append(tf.reduce_mean(training_tmp_loss))
        training_MPJPE.append(tf.reduce_mean(training_tmp_MPJPE))
        # At the end of each epoch test on validation dataset
        testing_tmp_MPJPE = []
        testing_tmp_loss = []
        for x, y in tqdm(test_data):
            predict = model.forward(x)
            # F norm(y-predict) ^ 2 / batch_size
            loss = tf.math.reduce_sum(tf.losses.mean_squared_error(y, predict)) / batch_size
            # batch_size x frame x 17 x 1
            MPJPE = tf.math.reduce_mean(tf.math.reduce_euclidean_norm((y - predict), axis=3)) * 1000
            testing_tmp_MPJPE.append(MPJPE)
            testing_tmp_loss.append(loss)
        testing_loss.append(tf.reduce_mean(testing_tmp_loss))
        testing_MPJPE.append(tf.reduce_mean(testing_tmp_MPJPE))
    plot(training_loss, training_MPJPE, testing_loss, testing_MPJPE)
    model.save_weights('model_weights.h5')

In [None]:
if __name__ == '__main__':
    train(train_dataset, test_dataset)
    model = ResNet18_RNN()
    # initialize
    model.forward(tf.ones(shape=[1, 8, 224, 224, 3]))
    # load model
    model.load_weights('model_weights.h5')
    # testing_loss = []
    tmp_MPJPE = []
    tmp_loss = []
    # test_MPJPE = []
    for x, y in test_dataset:
        predict = model.forward(x)
        # F norm(y-predict) ^ 2 / batch_size
        loss = tf.math.reduce_sum(tf.losses.mean_squared_error(y, predict)) / batch_size
        # batch_size x frame x 17 x 1
        MPJPE = tf.math.reduce_mean(tf.math.reduce_euclidean_norm((y - predict), axis=3)) * 1000
        tmp_MPJPE.append(MPJPE)
        tmp_loss.append(loss)
    testing_los = tf.reduce_mean(tmp_loss)
    test_MPJPE = tf.reduce_mean(tmp_MPJPE)
    print('MPJPE:', test_MPJPE.numpy())

  0%|          | 0/1000 [00:00<?, ?it/s]

Epoch 1 Loss: 10.775 MPJPE: 427.9449


  0%|          | 1/1000 [00:07<1:56:37,  7.00s/it]

Epoch 1 Loss: 316.65753 MPJPE: 2244.5127


  0%|          | 2/1000 [00:09<1:33:42,  5.63s/it]

Epoch 1 Loss: 9.451082 MPJPE: 397.58835


  0%|          | 3/1000 [00:11<1:17:14,  4.65s/it]

Epoch 1 Loss: 9.9628 MPJPE: 404.04443


  0%|          | 4/1000 [00:14<1:09:00,  4.16s/it]

Epoch 1 Loss: 9.603626 MPJPE: 400.31525


  0%|          | 5/1000 [00:17<1:01:24,  3.70s/it]

Epoch 1 Loss: 9.71401 MPJPE: 404.45074


  1%|          | 6/1000 [00:20<57:43,  3.48s/it]  

Epoch 1 Loss: 7.62173 MPJPE: 360.157


  1%|          | 7/1000 [00:23<55:52,  3.38s/it]

Epoch 1 Loss: 7.7739115 MPJPE: 383.97745


  1%|          | 8/1000 [00:26<55:59,  3.39s/it]

Epoch 1 Loss: 5.1689234 MPJPE: 296.61844


  1%|          | 9/1000 [00:31<1:00:04,  3.64s/it]

Epoch 1 Loss: 3.3940787 MPJPE: 235.29631


  1%|          | 10/1000 [00:35<1:03:39,  3.86s/it]

Epoch 1 Loss: 4.380954 MPJPE: 269.11676


  1%|          | 11/1000 [00:40<1:09:21,  4.21s/it]

Epoch 1 Loss: 2.3498895 MPJPE: 191.81694


  1%|          | 12/1000 [00:46<1:17:51,  4.73s/it]

Epoch 1 Loss: 2.893571 MPJPE: 225.42056


  1%|▏         | 13/1000 [01:04<2:24:23,  8.78s/it]

Epoch 1 Loss: 3.941758 MPJPE: 266.0924


  1%|▏         | 14/1000 [01:29<3:41:40, 13.49s/it]

Epoch 1 Loss: 3.4117608 MPJPE: 242.9956


  2%|▏         | 15/1000 [01:44<3:51:39, 14.11s/it]

Epoch 1 Loss: 2.0794775 MPJPE: 190.20354


  2%|▏         | 16/1000 [01:50<3:10:53, 11.64s/it]

Epoch 1 Loss: 2.6799998 MPJPE: 207.78168


  2%|▏         | 17/1000 [01:55<2:36:03,  9.53s/it]

Epoch 1 Loss: 2.671225 MPJPE: 209.53691


  2%|▏         | 18/1000 [01:58<2:06:58,  7.76s/it]

Epoch 1 Loss: 3.1262245 MPJPE: 235.18875


  2%|▏         | 19/1000 [02:01<1:43:57,  6.36s/it]

Epoch 1 Loss: 2.3591743 MPJPE: 198.76404


  2%|▏         | 20/1000 [02:05<1:28:52,  5.44s/it]

Epoch 1 Loss: 2.8509505 MPJPE: 215.85239


  2%|▏         | 21/1000 [02:08<1:16:40,  4.70s/it]

Epoch 1 Loss: 3.3693624 MPJPE: 246.12758


  2%|▏         | 22/1000 [02:10<1:06:49,  4.10s/it]

Epoch 1 Loss: 1.7109208 MPJPE: 171.1846


  2%|▏         | 23/1000 [02:13<1:00:46,  3.73s/it]

Epoch 1 Loss: 3.1650271 MPJPE: 236.66034


  2%|▏         | 24/1000 [02:16<55:24,  3.41s/it]  

Epoch 1 Loss: 3.677926 MPJPE: 250.43774


  2%|▎         | 25/1000 [02:19<51:58,  3.20s/it]

Epoch 1 Loss: 1.5503982 MPJPE: 153.58496


  3%|▎         | 26/1000 [02:21<48:29,  2.99s/it]

Epoch 1 Loss: 2.8482537 MPJPE: 219.43744


  3%|▎         | 27/1000 [02:24<46:20,  2.86s/it]

Epoch 1 Loss: 3.0434642 MPJPE: 230.46744


  3%|▎         | 28/1000 [02:26<43:51,  2.71s/it]

Epoch 1 Loss: 1.7502508 MPJPE: 164.85783


  3%|▎         | 29/1000 [02:29<43:03,  2.66s/it]

Epoch 1 Loss: 2.301634 MPJPE: 195.32565


  3%|▎         | 30/1000 [02:31<41:23,  2.56s/it]

Epoch 1 Loss: 2.1816885 MPJPE: 184.93019


  3%|▎         | 31/1000 [02:34<41:48,  2.59s/it]

Epoch 1 Loss: 2.5034242 MPJPE: 203.92812


  3%|▎         | 32/1000 [02:36<41:25,  2.57s/it]

Epoch 1 Loss: 2.117117 MPJPE: 179.42043


  3%|▎         | 33/1000 [02:39<41:36,  2.58s/it]

Epoch 1 Loss: 2.499815 MPJPE: 205.32358


  3%|▎         | 34/1000 [02:41<41:11,  2.56s/it]

Epoch 1 Loss: 1.2517272 MPJPE: 139.03574


  4%|▎         | 35/1000 [02:44<43:07,  2.68s/it]

Epoch 1 Loss: 2.6301804 MPJPE: 205.03867


  4%|▎         | 36/1000 [02:47<44:12,  2.75s/it]

Epoch 1 Loss: 2.1123476 MPJPE: 186.73523


  4%|▎         | 37/1000 [02:50<46:21,  2.89s/it]

Epoch 1 Loss: 2.120538 MPJPE: 188.71355


  4%|▍         | 38/1000 [02:54<47:50,  2.98s/it]

Epoch 1 Loss: 1.2304196 MPJPE: 143.63028


  4%|▍         | 39/1000 [02:57<50:00,  3.12s/it]

Epoch 1 Loss: 1.4604802 MPJPE: 152.61662


  4%|▍         | 40/1000 [03:00<50:47,  3.17s/it]

Epoch 1 Loss: 1.6147171 MPJPE: 161.78653


  4%|▍         | 41/1000 [03:04<51:58,  3.25s/it]

Epoch 1 Loss: 2.0955124 MPJPE: 180.12448


  4%|▍         | 42/1000 [03:07<53:09,  3.33s/it]

Epoch 1 Loss: 2.005729 MPJPE: 173.23203


  4%|▍         | 43/1000 [03:11<57:01,  3.57s/it]

Epoch 1 Loss: 1.7459724 MPJPE: 165.9908


  4%|▍         | 44/1000 [03:16<1:02:27,  3.92s/it]

Epoch 1 Loss: 1.7833881 MPJPE: 164.0035


  4%|▍         | 45/1000 [03:21<1:05:59,  4.15s/it]

Epoch 1 Loss: 1.5257199 MPJPE: 158.62445


  5%|▍         | 46/1000 [03:25<1:07:57,  4.27s/it]

Epoch 1 Loss: 1.8489678 MPJPE: 180.46379


  5%|▍         | 47/1000 [03:30<1:10:49,  4.46s/it]

Epoch 1 Loss: 2.0877414 MPJPE: 185.3549


  5%|▍         | 48/1000 [03:35<1:12:19,  4.56s/it]

Epoch 1 Loss: 1.9664807 MPJPE: 179.97147


  5%|▍         | 49/1000 [03:40<1:14:31,  4.70s/it]

Epoch 1 Loss: 1.2522106 MPJPE: 143.53726


  5%|▌         | 50/1000 [03:45<1:15:50,  4.79s/it]

Epoch 1 Loss: 1.7660236 MPJPE: 173.63705


  5%|▌         | 51/1000 [03:50<1:17:06,  4.88s/it]

Epoch 1 Loss: 2.2320693 MPJPE: 182.21681


  5%|▌         | 52/1000 [03:55<1:17:17,  4.89s/it]

Epoch 1 Loss: 0.98329985 MPJPE: 128.51553


  5%|▌         | 53/1000 [03:59<1:13:56,  4.68s/it]

Epoch 1 Loss: 1.9683518 MPJPE: 177.83575


  5%|▌         | 54/1000 [04:03<1:09:40,  4.42s/it]

Epoch 1 Loss: 1.801974 MPJPE: 174.96503


  6%|▌         | 55/1000 [04:06<1:04:47,  4.11s/it]

Epoch 1 Loss: 1.7356324 MPJPE: 158.95682


  6%|▌         | 56/1000 [04:10<1:02:31,  3.97s/it]