In [1]:
import os
import numpy as np


import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.losses import binary_crossentropy

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler



In [2]:
# build the scaler model
scaler = MinMaxScaler()

def create_dataset(main_path):
    
    classes = os.listdir(main_path)
       
    x_v = []
    x_t = []
    y = []


    y_categorical = None

    for idx, name in enumerate(classes):
        
        """
        if idx == 5:
            break
        """
        
        class_path = os.path.join(main_path, name)
        class_videos = os.listdir(class_path)
        
        class_set_v = []
        class_set_t = []
        class_id = []
        
        for idx_sub, class_video in enumerate(class_videos):
                            
            
            class_video_path = os.path.join(class_path, class_video)
            # Elmo_Mean.npz, Elmo
            # InceptionResnetV2_MaxPooling.npz, InceptionResnetV2
            class_video_visual_feature = np.load(os.path.join(class_video_path, "DenseNet_201.npz"))['DenseNet_201']
            class_video_textual_feature = np.load(os.path.join(class_video_path, "Elmo_Mean.npz"))['Elmo']
            
            class_video_visual_feature = scaler.fit_transform(class_video_visual_feature.reshape(-1, 1))
            class_video_textual_feature = scaler.fit_transform(class_video_textual_feature.reshape(-1, 1))
            
            class_set_v.append(class_video_visual_feature)
            class_set_t.append(class_video_textual_feature)
            class_id.append(idx)
            
        
        x_v.extend(class_set_v)
        x_t.extend(class_set_t)
        y.extend(class_id)
        
    y_categorical = to_categorical(y)
    
    return x_v, x_t, y_categorical

In [3]:
main_path = "C:/HolisticVideoUnderstanding/uniform_train"
val_path = "C:/HolisticVideoUnderstanding/uniform_val"
test_path = "C:/HolisticVideoUnderstanding/uniform_test"

x_v, x_t, y_categorical = create_dataset(main_path)
x_v_val, x_t_val, y_categorical_val = create_dataset(val_path)
x_v_test, x_t_test, y_categorical_test = create_dataset(test_path)

In [4]:
len(x_v_val), len(x_t_test), len(x_v)

(1200, 2400, 6400)

In [5]:
for i in range(len(x_v_test)):
    print(np.min(x_v_test[i]), np.max(x_v_test[i]), np.mean(x_v_test[i]))

0.0 1.0 0.056571446
0.0 0.99999994 0.041012824
0.0 1.0 0.07269867
0.0 0.99999994 0.062962174
0.0 1.0000001 0.05611865
0.0 1.0 0.03492614
0.0 1.0000001 0.056702446
0.0 1.0 0.049358606
0.0 1.0 0.052871935
0.0 0.99999994 0.07086514
0.0 0.99999994 0.049141202
0.0 1.0000001 0.06814185
0.0 1.0 0.044126496
0.0 0.99999994 0.07252855
0.0 1.0 0.04252123
0.0 1.0000001 0.051552895
0.0 0.99999994 0.04252598
0.0 1.0000001 0.082687624
0.0 0.99999994 0.06230986
0.0 0.99999994 0.06256585
0.0 0.99999994 0.07041583
0.0 0.99999994 0.06294951
0.0 0.99999994 0.057688866
0.0 0.99999994 0.09084181
0.0 1.0 0.0860213
0.0 1.0 0.042560454
0.0 0.9999999 0.046358608
0.0 1.0 0.055154
0.0 1.0000001 0.054443195
0.0 0.99999994 0.03969652
0.0 0.99999994 0.049592335
0.0 1.0000001 0.06543127
0.0 1.0 0.03817304
0.0 0.99999994 0.06635466
0.0 1.0 0.05264983
0.0 1.0 0.052789457
0.0 1.0000001 0.05808616
0.0 1.0 0.044312943
0.0 0.99999994 0.038735013
0.0 0.99999994 0.06308194
0.0 1.0 0.07137179
0.0 0.99999994 0.04753487
0.0 0.9

In [6]:
x_v, x_t, y_categorical = np.array(x_v), np.array(x_t), np.array(y_categorical)
x_v_val, x_t_val, y_categorical_val = np.array(x_v_val), np.array(x_t_val), np.array(y_categorical_val)
x_v_test, x_t_test, y_categorical_test = np.array(x_v_test), np.array(x_t_test), np.array(y_categorical_test)

In [9]:
inputs_v = keras.Input(shape=(1920,))
e_dense_v = layers.Dense(960, activation="relu")(inputs_v)
e_dense_v = layers.Dense(512, activation="relu")(e_dense_v)
z_v = layers.Dense(64)(e_dense_v)

d_dense_v = layers.Dense(512, activation="relu")(z_v)
d_dense_v = layers.Dense(960, activation="relu")(d_dense_v)
outputs_v = layers.Dense(1920, activation="sigmoid")(d_dense_v)
model = keras.Model(inputs=inputs_v, outputs=outputs_v, name="feature_v")

model.summary()

Model: "feature_v"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 1920)]            0         
_________________________________________________________________
dense_6 (Dense)              (None, 960)               1844160   
_________________________________________________________________
dense_7 (Dense)              (None, 512)               492032    
_________________________________________________________________
dense_8 (Dense)              (None, 64)                32832     
_________________________________________________________________
dense_9 (Dense)              (None, 512)               33280     
_________________________________________________________________
dense_10 (Dense)             (None, 960)               492480    
_________________________________________________________________
dense_11 (Dense)             (None, 1920)              18

In [11]:
model.compile(
    loss=keras.losses.BinaryCrossentropy(),
    optimizer=keras.optimizers.RMSprop(),
    metrics=["MeanAbsoluteError"]
)

history = model.fit(x_v, x_v, batch_size=32, epochs=10, validation_data=(x_v_val,x_v_val))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [12]:
test_scores = model.evaluate(x_v_test, x_v_test, verbose=2)

75/75 - 0s - loss: 0.1591 - mean_absolute_error: 0.0278


In [13]:
test_img = x_v_test[0]
x_test_predicted = model.predict(np.expand_dims(test_img, 0))

In [14]:
for i in range(len(x_test_predicted[0])):
    print("{} --- {}\n".format(test_img[i], x_test_predicted[0][i]))

[3.4179193e-05] --- 8.549859558115713e-06

[0.00015691] --- 5.6421264162054285e-05

[0.00022658] --- 5.665082790073939e-05

[0.0003168] --- 0.00015693684690631926

[0.0001331] --- 3.956883301725611e-05

[3.9130085e-05] --- 1.1557238394743763e-05

[6.208317e-05] --- 1.944380892382469e-05

[8.562842e-05] --- 2.0805080566788092e-05

[1.0963103e-05] --- 7.743093192402739e-06

[0.00384446] --- 0.002076035365462303

[0.00010251] --- 2.93970024358714e-05

[0.0004157] --- 0.00015177794557530433

[2.8587436e-05] --- 1.0060097338282503e-05

[0.00038144] --- 0.00011169951176270843

[0.00028137] --- 0.0004690060450229794

[4.068649e-05] --- 1.1220707165193744e-05

[0.00021943] --- 6.802451389376074e-05

[2.2839013e-05] --- 1.110894027078757e-05

[3.373104e-05] --- 2.4253427909570746e-05

[2.220697e-05] --- 1.190304828924127e-05

[2.2502104e-06] --- 3.960412414016901e-06

[1.3822147e-05] --- 1.0405391549284104e-05

[6.0479855e-05] --- 1.9876877558999695e-05

[0.00235805] --- 0.0015629244735464454

