In [None]:
import import_ipynb

from keras.callbacks import ModelCheckpoint, Callback
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.utils import plot_model
from IPython.display import display, Image
from keras.layers import Input, Dense
from keras.models import Model
import tensorflow as tf
import numpy as np
import datetime
import dataset
import tools
import users
import time
import ads
import os

In [None]:
root_directories = ["ADS16_Benchmark_part1", "ADS16_Benchmark_part2"]

In [None]:
def load_user_and_ad_ftrs(root_directories, try_load_files=True, save_files=True):
    ad_ftrs_path = os.path.join("data", "ad_ftrs.npy")
    user_ftrs_and_ratings_path = os.path.join("data", "user_ftrs_and_ratings.npz")

    if try_load_files and os.path.exists(ad_ftrs_path):
        ad_ftrs = np.load(ad_ftrs_path)
    else:
        ad_ftrs = ads.load_ftrs(root_directories)
        if save_files:
            np.save(ad_ftrs_path, ad_ftrs)
    
    if try_load_files and os.path.exists(user_ftrs_and_ratings_path):
        with np.load(user_ftrs_and_ratings_path) as user_ftrs_and_ratings:
            user_ftrs = user_ftrs_and_ratings["user_ftrs"]
            ratings = user_ftrs_and_ratings["ratings"]
    else:
        raw_user_ftrs, ratings = users.load_raw_ftrs(root_directories)
        *_, user_ftrs = users.calculate_pca_ftrs(raw_user_ftrs)
        if save_files:
            np.savez(user_ftrs_and_ratings_path, user_ftrs=user_ftrs, ratings=ratings)
    
    return ad_ftrs, user_ftrs, ratings

if __name__ == "__main__":
    ad_ftrs, user_ftrs, ratings = load_user_and_ad_ftrs(root_directories)

In [None]:
def std_dev_pct_error(y_true, y_pred):
    std_true = tf.math.reduce_std(y_true)
    std_pred = tf.math.reduce_std(y_pred)
    return (std_pred - std_true)/std_pred if std_pred > 0 else tf.constant(np.nan)

def create_model():
    ad_ftrs, user_ftrs, _ = load_user_and_ad_ftrs(root_directories)

    inputs = Input((user_ftrs.shape[1]+ad_ftrs.shape[1]))
    output = inputs
    output = Dense(5, activation="relu", kernel_regularizer="l2")(output)
    output = Dense(5, activation="relu", kernel_regularizer="l2")(output)
    output = Dense(1, kernel_regularizer="l2")(output)

    model = Model(inputs=[inputs], outputs=[output])
    return model

if __name__ == "__main__":
    model = create_model()
    model.compile(optimizer="adam", loss="mse", metrics=["mae", std_dev_pct_error])
    model.summary()
    model_plot_filename = "user_ad_interaction_model.png"
    plot_model(model, to_file=model_plot_filename, show_shapes=True, show_layer_activations=True)
    display(Image(model_plot_filename))
    

In [None]:
if __name__ == "__main__":
    X = tools.ftr_cartesian_product(user_ftrs, ad_ftrs)
    ratings_normalizer = StandardScaler()
    Y = ratings_normalizer.fit_transform(ratings.flatten()[:, np.newaxis])[:,0]
    print(X.shape)
    print(Y.shape)

In [None]:
class Printer(Callback):
    def __init__(self):
        super().__init__()
        self.last_l = 0

    def on_epoch_end(self, epoch, logs=None):
        s = f"Epoch {epoch}"
        if logs is not None:
            log_strings = []
            for k, v in logs.items():
                log_strings.append(f"{k}={v:.5f}")
            s += f": {', '.join(log_strings)}"
        l = len(s)
        padding = max(0, self.last_l - l)
        self.last_l = l
        print(s, end=" "*padding + "\r")
    
    def on_train_end(self, logs=None):
        print("")


if __name__ == "__main__":
    checkpoints_dir="user_ad_interaction_model_checkpoints"
    filepath = os.path.join(checkpoints_dir, "model-{epoch:06d}-{val_loss:06f}.hdf5")
    checkpoint = ModelCheckpoint(filepath, monitor=["val_loss"], save_weights_only=True, verbose=0, save_best_only=False, mode="min")

    log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

    printer = Printer()

In [None]:
if __name__ == "__main__":
    batch_size = 32
    epochs = 1000

    if not os.path.isdir(checkpoints_dir):
        os.makedirs(checkpoints_dir)

    try:
        model.fit(
            X,
            Y,
            batch_size=batch_size,
            verbose=0,
            shuffle=True,
            validation_split=0.2,
            epochs=epochs,
            callbacks=[tensorboard_callback, checkpoint, printer]
        )
    except KeyboardInterrupt:
        pass