In [61]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Flatten, Conv2D, Reshape, Conv2DTranspose, Activation
from tensorflow.keras import Model, backend as K
import numpy as np

In [28]:

INPUT_DIM = 9018


In [46]:
feature_description = {
    'rating': tf.io.VarLenFeature(tf.int64),
    'mask': tf.io.VarLenFeature(tf.int64)
}

def parse_example(example_proto):
    parsed = tf.io.parse_single_example(example_proto, feature_description)
    ratings = tf.sparse.to_indicator(parsed['rating'], INPUT_DIM)
    X = (ratings, tf.sparse.to_indicator(parsed['mask'], INPUT_DIM), ratings)
    return X, ratings

In [47]:
filenames = ["train.tfrecord"]
dataset = tf.data.TFRecordDataset(filenames).map(parse_example)
dataset = dataset.batch(32)
dataset = dataset.repeat()
dataset = dataset.shuffle(1000)


In [31]:

latent_dim = 64
inputs = Input(shape=(INPUT_DIM,))
inputs_noisy = Input(shape=(INPUT_DIM,))

inputs_mask = Input(shape=(INPUT_DIM,))
x = inputs_noisy
x = Dense(64, activation='relu')(x)
latent = Dense(latent_dim, name='latent_vector', activation='relu')(x)

encoder = Model(inputs_noisy, latent, name='encoder')
encoder.summary()

latent_inputs = Input(shape=(latent_dim,), name='decoder_input')
x = latent_inputs
x = Dense(64, activation='relu')(x)

outputs = Dense(INPUT_DIM, name='decoder_output', activation='sigmoid')(x)
decoder = Model(latent_inputs, outputs, name='decoder')
decoder.summary()


def custom_mse(x_noisy, x_estimated):
  num_ratings = tf.reduce_sum(inputs_mask, 1)
  num_ratings = tf.where(tf.math.equal(num_ratings, 0), tf.ones_like(num_ratings), num_ratings)
  x_noisy = tf.cast(x_noisy, dtype=tf.dtypes.float32)
  x_estimated = tf.cast(x_estimated, dtype=tf.dtypes.float32)
  
  losses = tf.math.square(x_estimated - inputs)
  return tf.reduce_sum(tf.multiply(losses, inputs_mask), 1) /  num_ratings

def augumented_loss(actual, estimated):
  noisy = inputs_noisy
  mask = inputs_mask
  alpha = 0.0
  beta = 0.5
  gamma = 0.5
  num_ratings = tf.reduce_sum(inputs_mask)
  error_constraint = alpha * tf.reduce_sum(estimated * actual)
  novelty_constraint = beta * tf.reduce_sum(estimated * (tf.reduce_sum(estimated, 0) / tf.cast(tf.shape(estimated)[0], tf.float32)))
  diversity_constraint = gamma * tf.reduce_sum(estimated * noisy)
  
  return tf.math.square(tf.norm((estimated -actual) * mask)) + (novelty_constraint + diversity_constraint)

mse_model = Model(inputs=[inputs,inputs_mask,inputs_noisy], outputs=decoder(encoder(inputs_noisy)), name='autoencoder')
mse_model.compile(optimizer='adam', loss=augumented_loss, metrics=['accuracy', 'mse'])
mse_model.summary()



Model: "encoder"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_8 (InputLayer)         [(None, 9018)]            0         
_________________________________________________________________
dense_4 (Dense)              (None, 64)                577216    
_________________________________________________________________
latent_vector (Dense)        (None, 64)                4160      
Total params: 581,376
Trainable params: 581,376
Non-trainable params: 0
_________________________________________________________________
Model: "decoder"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
decoder_input (InputLayer)   [(None, 64)]              0         
_________________________________________________________________
dense_5 (Dense)              (None, 64)                4160      
____________________________________

In [49]:

mse_model.fit(dataset, epochs=20, steps_per_epoch=19)


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7f6dbc13e668>

In [74]:
def calculateMSE(predictions, actual, mask):
  return tf.reduce_sum(tf.math.square(predictions - tf.cast(actual, tf.float32)) * tf.cast(mask, tf.float32)) / tf.reduce_sum(tf.cast(mask, tf.float32))
  
  
def calculatePrecision(predictions, actual, mask):
#   tp / (tp + fp)
  pred = predictions > 0.5
  tp = tf.math.reduce_sum(tf.cast(pred & actual & mask, tf.float32))
  fp = tf.math.reduce_sum(tf.cast(pred & (~actual) & mask, tf.float32))
  return tp / (tp + fp)

def calculateRecall(predictions, actual, mask):
#   tp / (tp + fn)
  pred = predictions > 0.5
  tp = tf.math.reduce_sum(tf.cast(pred & actual & mask, tf.float32))
  fn = tf.math.reduce_sum(tf.cast(~pred & actual & mask, tf.float32))
  return tp / (tp + fn)


for test in dataset.take(10):
    predictions = mse_model.predict(test[0])
    actual = test[1]
    mask = test[0][1]
    error = calculateMSE(predictions, actual, mask)
    precision = calculatePrecision(predictions, actual, mask)
    recall = calculateRecall(predictions, actual, mask)
    print("MSE", error.numpy(), "Precision:", precision.numpy(), "Recall:", recall.numpy())
    

MSE 0.5951485 Precision: 0.9883914 Recall: 0.12571187
MSE 0.5552211 Precision: 0.9814433 Recall: 0.16975749
MSE 0.5982419 Precision: 0.98630136 Recall: 0.1434263
MSE 0.5982419 Precision: 0.98630136 Recall: 0.1434263
MSE 0.5744253 Precision: 0.91780823 Recall: 0.10374729
MSE 0.5744253 Precision: 0.91780823 Recall: 0.10374729
MSE 0.58425844 Precision: 0.9760349 Recall: 0.12666102
MSE 0.5951485 Precision: 0.9883914 Recall: 0.12571187
MSE 0.5982419 Precision: 0.98630136 Recall: 0.1434263
MSE 0.5552211 Precision: 0.9814433 Recall: 0.16975749
