In [1]:
import tensorflow as tf
%matplotlib notebook
%matplotlib inline
import os
import time
import numpy as np
import glob
import matplotlib.pyplot as plt
import PIL
import imageio
import cv2
import datetime
import random
from tensorflow.keras.layers import Dense, GRU, TimeDistributed, BatchNormalization, InputLayer

from IPython import display

from tqdm.notebook import tqdm, trange
import seaborn as sns
# sns.set()
tf.keras.backend.floatx()

latent_dim = 50
batch_size = 16
sequence_len = 30 * 3
image_shape = [128, 128, 3]

cvae_max_batch = 64

In [2]:
config = tf.compat.v1.ConfigProto()
config.gpu_options.allow_growth = True
session = tf.compat.v1.InteractiveSession(config=config)

In [3]:
from convolutional_vae import CVAE
cvae = CVAE()
cvae.load_weights('models/cvae_lat50.chkpt')

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7f705417bd10>

In [4]:
def video_generator(video_path):
    while True:
        vidcap = cv2.VideoCapture(video_path)
        success, img = vidcap.read()

        while success:
            yield cv2.resize(img, (128, 128)).astype(np.float32) / 255
            success, img = vidcap.read()

def sequences_generator(video_gen, sequence_len):
    sample = next(video_gen)
    while True:
        sequence = np.zeros((sequence_len, *sample.shape))
        for i in range(sequence_len):
            sequence[i] = next(video_gen)
        yield sequence
            
def batches_generator(batch_size, sequences_gen, buffer_size=64):
    buffer = []
    while True:
        while len(buffer) < buffer_size:
            buffer.append(next(sequences_gen))
        random.shuffle(buffer)
        batch, buffer = buffer[:batch_size], buffer[batch_size:]
        
        # Feed to cvae in small batches (can't fit all sequence in gpu)
        batch = np.array(batch).reshape([-1, *image_shape])
        splits = np.array_split(batch, np.ceil(len(batch) / cvae_max_batch))
        final_batch = []
        for subbatch in splits:
            subbatch = cvae.encode(subbatch)[0]
            final_batch.append(subbatch)
            
        final_batch = np.concatenate(final_batch)
        yield final_batch.reshape(-1, sequence_len, latent_dim)
#         yield np.random.random(size=[batch_size, sequence_len, latent_dim]).astype(np.float32)
        

train_gen = batches_generator(batch_size, 
                              sequences_generator(video_generator('data/train.mp4'),
                                                  sequence_len),
                              buffer_size=128)
test_gen = batches_generator(batch_size, 
                             sequences_generator(video_generator('data/test.mp4'),
                                                 sequence_len))

In [5]:
next(train_gen).shape



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



(16, 90, 50)

In [6]:
if False:
    %matplotlib notebook
    %matplotlib notebook

    import matplotlib.pyplot as plt
    from matplotlib.animation import FuncAnimation

    def roll_demo():
        batch = next(train_gen)
        for entry in batch:
            for frame in entry:
                yield cvae.decode(frame[np.newaxis], apply_sigmoid=True)[0]

    demo_gen = roll_demo()
    im = plt.imshow(next(demo_gen))

    def update(frame):
        plt.title(frame)
        im.set_array(next(demo_gen))
        return [im]

    anim = FuncAnimation(plt.gcf(), update, frames=10000, interval=10, blit=True)

In [7]:
class MDN_RNN(tf.keras.Model):
    def __init__(self, n_mixtures, out_dims):
        super(MDN_RNN, self).__init__()
        
        self.n_mixtures = n_mixtures
        self.out_dims = out_dims

        self.rnn = tf.keras.Sequential(
            [
                InputLayer(input_shape=(None, latent_dim), dtype=tf.float32),
                Dense(64, activation='relu'),
                BatchNormalization(),
                Dense(128, activation='relu'),
                BatchNormalization(),
                GRU(1024, return_sequences=True),
                BatchNormalization(),
                Dense(128, activation='relu'),
                BatchNormalization(),
                Dense(3 * n_mixtures * out_dims)
            ])
        
    def predict_distribution(self, x):
        ''' 
            x.shape = [batch_size, seq_length, latent_dim]
        '''
        x = self.rnn(x)
        
        mean, log_std, alpha = tf.split(x, 3, axis=2)
        mean = tf.reshape(mean, [-1, self.n_mixtures])
        log_std = tf.reshape(log_std, [-1, self.n_mixtures])
        alpha = tf.reshape(alpha, [-1, self.n_mixtures])
        alpha = tf.nn.softmax(alpha, axis=1)

        return mean, log_std, alpha
    
    def sample(self, x, temperature=1.0, verbose=False):
        # Predicting distribution
        mean, log_std, alpha = self.predict_distribution(x)
        std = tf.exp(log_std) * np.sqrt(temperature)
#         print(alpha)
#         alpha = alpha / temperature
#         alpha -= tf.reduce_max(alpha)
#         alpha = tf.nn.softmax(alpha, axis=-1)
#         print(alpha)
        
        seq_length = x.shape[1]
        
        # Picking component
        rnd = tf.random.uniform(shape=[x.shape[0] * self.out_dims * seq_length, 1], maxval=1)
        rnd = tf.repeat(rnd, self.n_mixtures, axis=-1)
        pdf = tf.cumsum(alpha, axis=-1)
        component_idx = np.argmax(pdf > rnd, axis=-1)
        
        # Gathering std and mean
        idx_flattened = tf.range(0, mean.shape[0]) * mean.shape[1] + component_idx
        component_std = tf.gather(tf.reshape(std, [-1]), idx_flattened)
        component_mean = tf.gather(tf.reshape(mean, [-1]), idx_flattened)

        # Sampling
        samples = tf.random.normal(shape=[x.shape[0] * self.out_dims * seq_length])
        samples = samples * component_std + component_mean
        samples = tf.reshape(samples, [-1, seq_length, self.out_dims])
        
        return samples
    
    def call(self, x):
        return self.sample(x)

In [8]:
def gaussian_pdf(x, mu, sigmasq):
    return (1 / tf.sqrt(2 * np.pi * sigmasq)) * tf.exp((-1 / (2 * sigmasq)) * (x - mu) ** 2)

def loss_fn(model, x, y_true):
    mean, log_std, alpha = model.predict_distribution(x)
    std = tf.exp(log_std) + 0.00001
    y_true = tf.reshape(y_true, [-1])

    prob_sum = 0
    for i in range(model.n_mixtures):
        prob = gaussian_pdf(y_true, mean[:, i], std[:, i])
        prob_sum += prob * alpha[:, i]

    log_prob_sum = tf.reduce_mean(-tf.math.log(prob_sum + 0.00001))
    return log_prob_sum

In [9]:
def train(model, train_gen, iterations):
#     @tf.function
    def gradient_step(model, x, y_true, optimizer):
        with tf.GradientTape() as tape:
            loss = loss_fn(model, x, y_true)

        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))
        return loss

#     @tf.function
    def fit(model, x, y, optimizer):
        x, y = tf.identity(x), tf.identity(y)
        idx = tf.random.shuffle(tf.range(len(x)))
        x = tf.gather(x, idx)
        y = tf.gather(y, idx)
        for j in range(len(x) // batch_size):
            loss = gradient_step(model, 
                                 x[j * batch_size: (j + 1) * batch_size], 
                                 y[j * batch_size: (j + 1) * batch_size], 
                                 optimizer)
        return loss
    
    optimizer = tf.keras.optimizers.Adam(1e-3)
    t = trange(iterations)
    for i in t:
        batch = next(train_gen)
        x, y = batch[:, :-1], batch[:, 1:]
        loss = fit(model, x, y, optimizer)
        history.append(loss.numpy())
        t.set_description('%.2f' % loss.numpy())

In [10]:
n_mixtures = 10
history = []
model = MDN_RNN(n_mixtures=n_mixtures, out_dims=latent_dim)

In [11]:
train(model, train_gen, 10000)

HBox(children=(FloatProgress(value=0.0, max=10000.0), HTML(value='')))

KeyboardInterrupt: 

In [None]:
plt.plot(np.arange(len(history)), history)

In [None]:
# model_name = f'./models/mdn-rnn_lat{latent_dim}'
# print('Saved as ', model_name)
# model.save_weights(model_name)

In [None]:
# model_name = f'./models/mdn-rnn_lat{latent_dim}'
# print('Loaded', model_name)
# model.load_weights(model_name)

In [None]:
# sns.set(style='white')

In [None]:
# %matplotlib inline
sequence = next(test_gen)[0]
result = []
for i in trange(300):
    latent = model.sample(np.expand_dims(sequence, axis=0), temperature=1)[0, -1].numpy().reshape([-1, latent_dim])
    sequence = np.concatenate([sequence[1:], latent])
    result.append(latent)


In [None]:
animation = np.array(result)[:, 0]


In [None]:
# del plt
# del FuncAnimation

In [None]:
%matplotlib notebook
%matplotlib notebook

import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation

def get_frame(i):
    return cvae.decode(animation[i % len(animation), np.newaxis], apply_sigmoid=True)[0]

im = plt.imshow(get_frame(0))

def update(frame):
    plt.title(frame)
    im.set_array(get_frame(frame))
    return [im]

anim = FuncAnimation(plt.gcf(), update, frames=10000, interval=100, blit=True)

In [None]:
film = np.array([get_frame(i).numpy() for i in trange(300)])
film = np.clip(film * 255, 0, 255)
# film = np.rollaxis(film, 3, 1)  

In [None]:
from moviepy.editor import ImageSequenceClip
clip = ImageSequenceClip(list(film), fps=20).resize(2.4)
clip.write_gif('mdn-rnn.gif')

In [None]:
from IPython.display import Image
Image(filename="mdn-rnn.gif")