In [1]:
import argparse
import keras
import numpy as np

rmse_best = 0.85
rmse_naive = 1.13

def TrainMLP(train_x, train_y, test_x, test_y, fresh_x, fresh_y, emb_dim,
             epochs, batch_size, learning_rate, first_layer_mult):
    """Trains a MLP and computes its RMSE on the given datasets."""
    layer_num_hidden = [first_layer_mult * emb_dim * 2,
                        first_layer_mult * emb_dim * 1,
                        int(first_layer_mult * emb_dim / 2)]

    model = keras.models.Sequential()
    model.add(keras.layers.Flatten(input_shape=(2, emb_dim)))
    for hidden in layer_num_hidden:
        # as suggested in the paper
        model.add(keras.layers.Dense(hidden, activation='relu'))
    model.add(keras.layers.Dense(1, activation='linear'))
    model.summary()
    model.compile(
        loss='mean_squared_error',
        optimizer=keras.optimizers.Adam(lr=learning_rate))  # as suggested

    model.fit(
        train_x, train_y,
        batch_size=batch_size,
        epochs=epochs,
        verbose=0,
        validation_data=(test_x, test_y),
        workers=4,
        use_multiprocessing=True)

    rmse_train = np.sqrt(model.evaluate(train_x, train_y, verbose=2))
    rmse_test = np.sqrt(model.evaluate(test_x, test_y, verbose=2))
    rmse_fresh = np.sqrt(model.evaluate(fresh_x, fresh_y, verbose=2))

    return rmse_train, rmse_test, rmse_fresh

In [2]:
results = dict()

In [3]:
embedding_dims = [16, 32, 64, 128]
num_users_ = [4000, 8000, 16000, 32000]
first_layer_mults = [1, 2, 4]
epochs = 32
batch_size = 256
learning_rate = 0.001
dirr = 'datasets'

In [4]:
dirr = 'datasets'
for embedding_dim in embedding_dims:
    for num_users in num_users_:
        for first_layer_mult in first_layer_mults:
            num_items = num_users
            print('#' * 100)
            print(f'\t{embedding_dim}\t{num_users}\t{first_layer_mult}')
            print()
            load_spec = f'{embedding_dim}_{num_users}_{num_items}'
            train_x = np.load(f'{dirr}/train_x_{load_spec}.npy')
            train_y = np.load(f'{dirr}/train_y_{load_spec}.npy')
            test_x = np.load(f'{dirr}/test_x_{load_spec}.npy')
            test_y = np.load(f'{dirr}/test_y_{load_spec}.npy')
            fresh_x = np.load(f'{dirr}/fresh_x_{load_spec}.npy')
            fresh_y = np.load(f'{dirr}/fresh_y_{load_spec}.npy')
            
            train_rmse, test_rmse, fresh_rmse = TrainMLP(
                        train_x, train_y, test_x, test_y, fresh_x, fresh_y,
                        emb_dim=embedding_dim, epochs=epochs,
                        batch_size=batch_size, learning_rate=learning_rate,
                        first_layer_mult=first_layer_mult)
            
            
            results[f'emb{embedding_dim}_num_u{num_users}_layer_m{first_layer_mult}_mlp_train_rmse'] = train_rmse 
            results[f'emb{embedding_dim}_num_u{num_users}_layer_m{first_layer_mult}_mlp_test_rmse'] = test_rmse 
            results[f'emb{embedding_dim}_num_u{num_users}_layer_m{first_layer_mult}_mlp_fresh_rmse'] = fresh_rmse             

####################################################################################################
	16	4000	1

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 32)                0         
_________________________________________________________________
dense (Dense)                (None, 32)                1056      
_________________________________________________________________
dense_1 (Dense)              (None, 16)                528       
_________________________________________________________________
dense_2 (Dense)              (None, 8)                 136       
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 9         
Total params: 1,729
Trainable params: 1,729
Non-trainable params: 0
_________________________________________________________________
11250/1

In [5]:
results

{'emb16_num_u4000_layer_m1_mlp_train_rmse': 0.889709604549457,
 'emb16_num_u4000_layer_m1_mlp_test_rmse': 0.9009705039498126,
 'emb16_num_u4000_layer_m1_mlp_fresh_rmse': 0.8993258852401651,
 'emb16_num_u4000_layer_m2_mlp_train_rmse': 0.8547316558980413,
 'emb16_num_u4000_layer_m2_mlp_test_rmse': 0.8762250226823906,
 'emb16_num_u4000_layer_m2_mlp_fresh_rmse': 0.8701945226619235,
 'emb16_num_u4000_layer_m4_mlp_train_rmse': 0.8329065223280918,
 'emb16_num_u4000_layer_m4_mlp_test_rmse': 0.8767599390063179,
 'emb16_num_u4000_layer_m4_mlp_fresh_rmse': 0.8747042428581561,
 'emb16_num_u8000_layer_m1_mlp_train_rmse': 0.8866623175724706,
 'emb16_num_u8000_layer_m1_mlp_test_rmse': 0.9013681132765424,
 'emb16_num_u8000_layer_m1_mlp_fresh_rmse': 0.8968451943578792,
 'emb16_num_u8000_layer_m2_mlp_train_rmse': 0.8557154678545356,
 'emb16_num_u8000_layer_m2_mlp_test_rmse': 0.8733845853387373,
 'emb16_num_u8000_layer_m2_mlp_fresh_rmse': 0.8692676237095447,
 'emb16_num_u8000_layer_m4_mlp_train_rmse': 0.

In [6]:
import pandas as pd
df = pd.DataFrame.from_dict(results, orient='index', columns=['rmse'])

In [7]:
df.to_parquet('mlp_results.parquet')

In [8]:
df

Unnamed: 0,rmse
emb16_num_u4000_layer_m1_mlp_train_rmse,0.889710
emb16_num_u4000_layer_m1_mlp_test_rmse,0.900971
emb16_num_u4000_layer_m1_mlp_fresh_rmse,0.899326
emb16_num_u4000_layer_m2_mlp_train_rmse,0.854732
emb16_num_u4000_layer_m2_mlp_test_rmse,0.876225
...,...
emb128_num_u32000_layer_m2_mlp_test_rmse,0.915735
emb128_num_u32000_layer_m2_mlp_fresh_rmse,0.923478
emb128_num_u32000_layer_m4_mlp_train_rmse,0.721137
emb128_num_u32000_layer_m4_mlp_test_rmse,0.946188
