add kernel_type in pnn2

Atomu2014 · Jun 19, 2018 · fa0ec47 · fa0ec47
1 parent 1c2f36f
commit fa0ec47
Show file tree

Hide file tree

Showing 2 changed files with 34 additions and 21 deletions.
diff --git a/README.md b/README.md
@@ -52,6 +52,9 @@ Since there are $n(n-1)/2$ feature interactions, we propose some tricks to reduc
 However, we find these tricks restrict model capacity and are unecessary.
 In recent update of the code, we remove the tricks for better performance. 
 
+In our implementation, we add the parameter ``kernel_type: {mat, vec, num}`` for outer product.
+The default type is mat, and you can switch to other types to save time and memory.
+
 A potential risk may happen in training the first hidden layer. Feature embeddings and interactions are concatenated and fed to the first hidden layer, but the embeddings and interactions have different distribution. A simple method is adding linear transformation to the embeddings to balance the distributions. ``Layer norm`` is also worth to try.
 
 ## How to Use
@@ -73,8 +76,8 @@ As for dataset, we build a repository on github serving as a benchmark in our La
 This repository contains detailed data processing, feature engineering, 
 data storage/buffering/access and other implementations.
 For better I/O performance, this benchmark provides hdf5 APIs.
-Currently we provide download links of two large scale ad-click datasets (already processed), 
-iPinYou and Criteo. Movielens, Netflix, and Yahoo Music will be updated later.
+Currently we provide download links of 4 large scale ad-click datasets (already processed),
+Criteo-8day, Avazu, iPinYou-all, and Criteo Challenge. More datasets will be updated later.
 
 This code is originally written in python 2.7, numpy, scipy and tensorflow are required. 
 In recent update, we make it consistent with python 3.x. 

diff --git a/python/models.py b/python/models.py
@@ -402,15 +402,20 @@ def __init__(self, field_sizes=None, embed_size=10, layer_sizes=None, layer_acts
 class PNN2(Model):
     def __init__(self, field_sizes=None, embed_size=10, layer_sizes=None, layer_acts=None, drop_out=None,
                  embed_l2=None, layer_l2=None, init_path=None, opt_algo='gd', learning_rate=1e-2, random_seed=None,
-                 layer_norm=True):
+                 layer_norm=True, kernel_type='mat'):
         Model.__init__(self)
         init_vars = []
         num_inputs = len(field_sizes)
         for i in range(num_inputs):
             init_vars.append(('embed_%d' % i, [field_sizes[i], embed_size], 'xavier', dtype))
         num_pairs = int(num_inputs * (num_inputs - 1) / 2)
         node_in = num_inputs * embed_size + num_pairs
-        init_vars.append(('kernel', [embed_size, num_pairs, embed_size], 'xavier', dtype))
+        if kernel_type == 'mat':
+            init_vars.append(('kernel', [embed_size, num_pairs, embed_size], 'xavier', dtype))
+        elif kernel_type == 'vec':
+            init_vars.append(('kernel', [num_pairs, embed_size], 'xavier', dtype))
+        elif kernel_type == 'num':
+            init_vars.append(('kernel', [num_pairs, 1], 'xavier', dtype))
         for i in range(len(layer_sizes)):
             init_vars.append(('w%d' % i, [node_in, layer_sizes[i]], 'xavier', dtype))
             init_vars.append(('b%d' % i, [layer_sizes[i]], 'zero',  dtype))
@@ -455,26 +460,31 @@ def __init__(self, field_sizes=None, embed_size=10, layer_sizes=None, layer_acts
             p = tf.reshape(p, [-1, num_pairs, embed_size])
             # b * p * k
             q = tf.reshape(q, [-1, num_pairs, embed_size])
-            # k * p * k
             k = self.vars['kernel']
 
-            # batch * 1 * pair * k
-            p = tf.expand_dims(p, 1)
-            # batch * pair
-            kp = tf.reduce_sum(
-                # batch * pair * k
-                tf.multiply(
+            if kernel_type == 'mat':
+                # batch * 1 * pair * k
+                p = tf.expand_dims(p, 1)
+                # batch * pair
+                kp = tf.reduce_sum(
                     # batch * pair * k
-                    tf.transpose(
-                        # batch * k * pair
-                        tf.reduce_sum(
-                            # batch * k * pair * k
-                            tf.multiply(
-                                p, k),
-                            -1),
-                        [0, 2, 1]),
-                    q),
-                -1)
+                    tf.multiply(
+                        # batch * pair * k
+                        tf.transpose(
+                            # batch * k * pair
+                            tf.reduce_sum(
+                                # batch * k * pair * k
+                                tf.multiply(
+                                    p, k),
+                                -1),
+                            [0, 2, 1]),
+                        q),
+                    -1)
+            else:
+                # 1 * pair * (k or 1)
+                k = tf.expand_dims(k, 0)
+                # batch * pair
+                kp = tf.reduce_sum(p * q * k, -1)
 
             #
             # if layer_norm: