In [1]:
from keras.layers import Input, Dense, Lambda
from keras.models import Model
from keras import backend as K
from keras.datasets import mnist
from keras.losses import mse, binary_crossentropy
from keras.optimizers import RMSprop
from keras import initializers
from keras import regularizers
from keras import optimizers
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
pd.options.display.max_rows = 100
pd.options.display.min_rows = 100
pd.options.display.max_columns = 100
# pd.options.display.min_columns = 100

Using TensorFlow backend.


In [2]:
batch_size = 271
original_dim = 327
latent_dim = 25
intermediate_dim = 100
epochs = 500
epsilon_std = 1
z_log_sigma_prior = np.log(0.7)

In [3]:
# encoder #
x = Input(batch_shape=(batch_size, original_dim))
h = Dense(intermediate_dim, activation='relu')(x)
z_mean = Dense(latent_dim)(h)
# z_log_sigma = Dense(latent_dim)(h)
## log_z_scale


z_log_sigma_input = Input(batch_shape = (batch_size, 1))
z_log_sigma = Dense(units = 1,  activation = "linear",
                    kernel_initializer=initializers.Ones(),
                    use_bias = False)(z_log_sigma_input)
# the weight of the prior #
# z_sigma_weight = Dense(units = 1, activation = "sigmoid",\
#                       kernel_regularizer = regularizers.l2(1e-4),\
#                       kernel_initializer = initializers.Zeros(),
#                       bias_regularizer = regularizers.l1_l2(l1=1e-5, l2=1e-4),
#                       bias_initializer = initializers.Zeros())(z_sigma_input)

In [4]:
# sampling from latent space #
def sampling(args):
    z_mean, z_log_sigma = args
    epsilon = K.random_normal(shape=(batch_size, latent_dim))
    return z_mean + K.exp(z_log_sigma) * epsilon

# note that "output_shape" isn't necessary with the TensorFlow backend
# so you could write `Lambda(sampling)([z_mean, z_log_sigma])`
z = Lambda(sampling, output_shape=(latent_dim,))([z_mean, z_log_sigma])
# z = Lambda(sampling)([z_mean, z_log_sigma])

In [5]:
# decoder #
decoder_h = Dense(intermediate_dim, activation='relu')
decoder_mean = Dense(original_dim, activation='sigmoid')
h_decoded = decoder_h(z)
x_decoded_mean = decoder_mean(h_decoded)

In [6]:
# end-to-end autoencoder
vae = Model([x, z_log_sigma_input], x_decoded_mean)

# encoder, from inputs to latent space
encoder_z_mean = Model([x, z_log_sigma_input], z_mean)
encoder_z_log_sigma = Model([x, z_log_sigma_input], z_log_sigma)

# generator, from latent space to reconstructed inputs
decoder_input = Input(shape=(latent_dim,))
_h_decoded = decoder_h(decoder_input)
_x_decoded_mean = decoder_mean(_h_decoded)
generator = Model(decoder_input, _x_decoded_mean)

In [7]:
def vae_loss(x, x_decoded_mean):
    xent_loss = original_dim * binary_crossentropy(x, x_decoded_mean, label_smoothing=0.1)
    kl_loss = - 0.5 * K.mean(1 + z_log_sigma - K.square(z_mean) - K.exp(z_log_sigma), axis=-1)
    return xent_loss + kl_loss

In [8]:
def recon_metric(x, x_decoded_mean):
    xent_loss = original_dim * binary_crossentropy(x, x_decoded_mean, label_smoothing=0.1)
    return xent_loss

In [9]:
# vae.summary()

In [10]:
opt = optimizers.RMSprop(learning_rate=0.0005)
# opt = optimizers.Adam(learning_rate=0.001)
vae.compile(optimizer=opt, loss=vae_loss, metrics = [recon_metric])

In [11]:
movie = pd.read_csv('movie_analysis/movie.csv')
cast = movie.iloc[:,26:].to_numpy()
log_sigma_input = np.full((cast.shape[0], 1), z_log_sigma_prior)

In [12]:
import copy
test_manip = np.empty(shape=(327*2, 327))
idx = np.random.choice(a=list(range(2439)), size=327*2, replace=False)
for i in range(327):
    t1 = copy.deepcopy(cast[idx[2*i]])
    t1[i] = 1
    test_manip[(2*i)] = t1
    t0 = copy.deepcopy(cast[idx[2*i+1]])
    t0[i] = 0   
    test_manip[(2*i+1)] = t0
test_manip = test_manip[np.random.choice(a=list(range(327*2)), size=271*2, replace=False)]

In [13]:
from sklearn.model_selection import train_test_split
cast_train, test_obs = train_test_split(cast,train_size=1897,test_size = 542)
cast_test = np.concatenate((test_obs, test_manip), axis = 0)
log_sigma_input_train = np.full((cast_train.shape[0], 1), z_log_sigma_prior)
log_sigma_input_test = np.full((cast_test.shape[0], 1), z_log_sigma_prior)

In [14]:
# callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3)
from keras.callbacks import EarlyStopping
callback = EarlyStopping(monitor='val_loss', patience=3)

In [15]:
# fitting VAE #
# vae.fit([cast,log_sigma_input], cast,
#         shuffle=True,
#         epochs=epochs,
#         batch_size=batch_size)

# fitting VAE #
history = vae.fit([cast_train,log_sigma_input_train], cast_train,
        shuffle=True,
        epochs=epochs,
        verbose = 0,
        batch_size=batch_size,
        validation_data=([cast_test, log_sigma_input_test], cast_test),
        callbacks = [callback])

Train on 1897 samples, validate on 1084 samples
Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500

Epoch 104/500
Epoch 105/500
Epoch 106/500
Epoch 107/500
Epoch 108/500
Epoch 109/500
Epoch 110/500
Epoch 111/500
Epoch 112/500
Epoch 113/500
Epoch 114/500
Epoch 115/500
Epoch 116/500
Epoch 117/500
Epoch 118/500
Epoch 119/500
Epoch 120/500
Epoch 121/500
Epoch 122/500
Epoch 123/500
Epoch 124/500
Epoch 125/500
Epoch 126/500
Epoch 127/500
Epoch 128/500
Epoch 129/500
Epoch 130/500
Epoch 131/500
Epoch 132/500
Epoch 133/500
Epoch 134/500
Epoch 135/500
Epoch 136/500
Epoch 137/500
Epoch 138/500
Epoch 139/500
Epoch 140/500
Epoch 141/500
Epoch 142/500
Epoch 143/500
Epoch 144/500
Epoch 145/500
Epoch 146/500
Epoch 147/500
Epoch 148/500
Epoch 149/500
Epoch 150/500
Epoch 151/500
Epoch 152/500
Epoch 153/500
Epoch 154/500
Epoch 155/500
Epoch 156/500
Epoch 157/500
Epoch 158/500
Epoch 159/500
Epoch 160/500
Epoch 161/500
Epoch 162/500
Epoch 163/500
Epoch 164/500
Epoch 165/500
Epoch 166/500
Epoch 167/500
Epoch 168/500
Epoch 169/500
Epoch 170/500
Epoch 171/500
Epoch 172/500
Epoch 173/500
Epoch 174/500
Epoch 

Epoch 206/500
Epoch 207/500
Epoch 208/500
Epoch 209/500
Epoch 210/500
Epoch 211/500
Epoch 212/500
Epoch 213/500
Epoch 214/500
Epoch 215/500
Epoch 216/500
Epoch 217/500
Epoch 218/500
Epoch 219/500
Epoch 220/500
Epoch 221/500
Epoch 222/500
Epoch 223/500
Epoch 224/500
Epoch 225/500
Epoch 226/500
Epoch 227/500
Epoch 228/500
Epoch 229/500
Epoch 230/500
Epoch 231/500
Epoch 232/500
Epoch 233/500
Epoch 234/500
Epoch 235/500
Epoch 236/500
Epoch 237/500
Epoch 238/500
Epoch 239/500
Epoch 240/500
Epoch 241/500
Epoch 242/500
Epoch 243/500
Epoch 244/500
Epoch 245/500
Epoch 246/500
Epoch 247/500
Epoch 248/500
Epoch 249/500
Epoch 250/500
Epoch 251/500
Epoch 252/500
Epoch 253/500
Epoch 254/500
Epoch 255/500
Epoch 256/500
Epoch 257/500
Epoch 258/500
Epoch 259/500
Epoch 260/500
Epoch 261/500
Epoch 262/500
Epoch 263/500
Epoch 264/500
Epoch 265/500
Epoch 266/500
Epoch 267/500
Epoch 268/500
Epoch 269/500
Epoch 270/500
Epoch 271/500
Epoch 272/500
Epoch 273/500
Epoch 274/500
Epoch 275/500
Epoch 276/500
Epoch 

Epoch 308/500
Epoch 309/500
Epoch 310/500
Epoch 311/500
Epoch 312/500
Epoch 313/500
Epoch 314/500
Epoch 315/500
Epoch 316/500
Epoch 317/500
Epoch 318/500
Epoch 319/500
Epoch 320/500
Epoch 321/500
Epoch 322/500
Epoch 323/500
Epoch 324/500
Epoch 325/500
Epoch 326/500
Epoch 327/500
Epoch 328/500
Epoch 329/500
Epoch 330/500
Epoch 331/500
Epoch 332/500
Epoch 333/500
Epoch 334/500
Epoch 335/500
Epoch 336/500
Epoch 337/500
Epoch 338/500
Epoch 339/500
Epoch 340/500
Epoch 341/500
Epoch 342/500
Epoch 343/500
Epoch 344/500
Epoch 345/500
Epoch 346/500
Epoch 347/500
Epoch 348/500
Epoch 349/500
Epoch 350/500
Epoch 351/500
Epoch 352/500
Epoch 353/500
Epoch 354/500
Epoch 355/500
Epoch 356/500
Epoch 357/500
Epoch 358/500
Epoch 359/500
Epoch 360/500
Epoch 361/500
Epoch 362/500
Epoch 363/500
Epoch 364/500
Epoch 365/500
Epoch 366/500
Epoch 367/500
Epoch 368/500
Epoch 369/500
Epoch 370/500
Epoch 371/500
Epoch 372/500
Epoch 373/500
Epoch 374/500
Epoch 375/500
Epoch 376/500
Epoch 377/500
Epoch 378/500
Epoch 

In [16]:
# training #
print(min(history.history['loss']))
# print(min(history.history['recon_metric']))
# validation #
# print(min(history.history['val_loss']))
# print(min(history.history['val_recon_metric']))

70.13056400844029
69.52062
71.43329620361328
70.88510131835938
