In [1]:
from models.afn import AFN
from models.deepfm import DeepFM 
from models.fm import FM
from models.lr import LR

In [2]:
import pandas as pd
import gc
import tensorflow as tf
import numpy as np

In [3]:
BATCH_SIZE = 4096

In [None]:
"""
Possible data strings:

criteo_data
avazu_data
frappe_data
movielens_data
"""

#data_string = "criteo_data"
data_string = "avazu_data"
#data_string = "frappe_data"
#data_string = "movielens_data"

df = pd.read_csv(f"{data_string}/train_0.csv")
feats = [c for c in df.columns if c != "click"]
del df
gc.collect()

print(f"Current data directory is: {data_string}")

In [None]:
gc.collect()
tf.keras.backend.clear_session()
tf.compat.v1.reset_default_graph()

models = []

# BASELINES
models.append(LR(num_feats=len(feats), num_bins=5500))
models.append(FM(num_feats=len(feats), num_bins=5500, num_factors=5))
models.append(DeepFM(num_feats=len(feats), num_bins=5500, num_factors=5, dnn_depth=2, dnn_width=5))

# NOVEL PROPOSAL
models.append(AFN(num_feats=len(feats), num_bins=5500, num_factors=5, hidden_size=110, hidden_size_2=110, nlayers=2))

for i in range(19):
    cur = f"{data_string}/train_{i}.csv"
    print(f"Reading {cur}")
    cur_df = pd.read_csv(cur)
    for m in models:
        m.fit(cur_df[feats].values, cur_df["click"].values, epochs=1, batch_size=BATCH_SIZE)
    
    del cur_df
    gc.collect()

In [26]:
test_dfs = []

for i in range(5):
    test_dfs.append(pd.read_csv(f"{data_string}/test_{i}.csv"))
    test_df = pd.concat(test_dfs, ignore_index=True)
    test_x = test_df[feats].values
    test_y = test_df["click"].values
    del test_df
    gc.collect()

In [None]:
for m in models:
    cur_result = m.evaluate(test_x, test_y, batch_size=BATCH_SIZE)
    print(cur_result)

In [None]:
for m in models:
    print(m.summary())