In [1]:
from models.networks import AE, LSTM_predictor
import tensorflow as tf
import numpy as np
from random import randint
from utils.generators import video2tensor, BatchGenerator
import pandas as pd
from glob import glob


In [2]:
AE = AE(latent_dim=16, size=(64, 64))
LSTM_predictor = LSTM_predictor(hidden_size=128)


AE_optimizer = tf.keras.optimizers.Adam(1e-4)
LSTM_optimizer = tf.keras.optimizers.Adam(1e-4)

bce = tf.keras.losses.BinaryCrossentropy()


In [3]:
def train_step(AE, AE_optimizer, LSTM_predictor, LSTM_optimizer, x, label):


    with tf.GradientTape(persistent=True) as tape:
        z = AE.encode(x)
        reconstructed_x = AE.decode(z)
        features = tf.expand_dims(z, axis=0)
        prediction = LSTM_predictor.predict(features)
        encoder_loss = bce(x, reconstructed_x)
        print(encoder_loss)
        predictor_loss = bce(label, prediction)

    AE_gradients = tape.gradient(encoder_loss, AE.trainable_variables)
    AE_optimizer.apply_gradients(zip(AE_gradients, AE.trainable_variables))
    
    LSTM_gradients = tape.gradient(predictor_loss, LSTM_predictor.trainable_variables)
    LSTM_optimizer.apply_gradients(zip(LSTM_gradients, LSTM_predictor.trainable_variables))
    
    return encoder_loss, predictor_loss


In [4]:
df = pd.read_csv("../res/data/train_metadata.csv")
df.set_index('filename', inplace=True)
print(df.head())
mp4files = glob("../res/data/micro/*.mp4")
#print(df.head())
#print(mp4files)
labels = [df.loc[f.split("/")[-1], 'crowd_score'] for f in mp4files]
labels = [int(round(l)) for l in labels]
gen = BatchGenerator(mp4files, labels, size=(64, 64))


url project_id  \
filename                                                                   
100000.mp4  s3://drivendata-competition-clog-loss/train/10...          M   
100001.mp4  s3://drivendata-competition-clog-loss/train/10...          F   
100002.mp4  s3://drivendata-competition-clog-loss/train/10...          H   
100003.mp4  s3://drivendata-competition-clog-loss/train/10...          E   
100004.mp4  s3://drivendata-competition-clog-loss/train/10...          C   

            num_frames  crowd_score  tier1  micro   nano  
filename                                                  
100000.mp4          54     0.000000   True  False  False  
100001.mp4          48     0.022769  False  False  False  
100002.mp4         122     0.000000   True  False  False  
100003.mp4          55     0.000000   True  False  False  
100004.mp4          56     0.000000   True  False  False  


In [5]:
for b in gen:
    x = b[0]
    print(type(x))
    label = b[1]
    print(type(label))
    enc_loss, pred_loss = train_step(AE, AE_optimizer, LSTM_predictor, LSTM_optimizer, x, label)

<class 'numpy.ndarray'>
<class 'int'>
tf.Tensor(1.1448157, shape=(), dtype=float32)
(<tf.Tensor: id=668, shape=(), dtype=float32, numpy=1.1448157>, <tf.Tensor: id=709, shape=(), dtype=float32, numpy=0.7011006>)


In [7]:
x.shape

(73, 256, 256, 1)