DeepRec-AI · aiden-law-tian · Jul 28, 2022 · Aug 3, 2022 · Aug 19, 2022 · Aug 19, 2022
diff --git a/modelzoo/CAN/README.md b/modelzoo/CAN/README.md
@@ -0,0 +1,25 @@
+# Co-Action Network
+
+Implementation of paper "CAN: Revisiting Feature Co-Action for Click Through Rate Prediction".
+
+paper: [arxiv (to be released)]()
+
+## Installation
+dependences：
+
+tensorflow：1.4.1
+
+python: 2.7
+
+higher version of tensorflow and python3 will be supported soon!
+
+## Getting Started
+training:
+
+CUDA_VISIBLE_DEVICES=0  python  train.py train {model}
+
+model: CAN,Cartesion,PNN, etc. (check the train.py)
+
+## Citation
+## Contact
+## License
diff --git a/modelzoo/CAN/data/README.md b/modelzoo/CAN/data/README.md
@@ -0,0 +1,25 @@
+# Co-Action Network
+
+Implementation of paper "CAN: Revisiting Feature Co-Action for Click Through Rate Prediction".
+
+paper: [arxiv (to be released)]()
+
+## Installation
+dependences：
+
+tensorflow：1.4.1
+
+python: 2.7
+
+higher version of tensorflow and python3 will be supported soon!
+
+## Getting Started
+training:
+
+CUDA_VISIBLE_DEVICES=0  python  train.py train {model}
+
+model: CAN,Cartesion,PNN, etc. (check the train.py)
+
+## Citation
+## Contact
+## License
diff --git a/modelzoo/CAN/data/prepare_data.sh b/modelzoo/CAN/data/prepare_data.sh
@@ -0,0 +1,9 @@
+export PATH="~/anaconda4/bin:$PATH"
+wget http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/reviews_Books.json.gz
+wget http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_Books.json.gz
+gunzip reviews_Books.json.gz
+gunzip meta_Books.json.gz
+python script/process_data.py meta_Books.json reviews_Books.json
+python script/local_aggretor.py
+python script/split_by_user.py
+python script/generate_voc.py
diff --git a/modelzoo/CAN/data/script/Dice.py b/modelzoo/CAN/data/script/Dice.py
@@ -0,0 +1,35 @@
+import tensorflow as tf
+
+def dice(_x, axis=-1, epsilon=0.000000001, name=''):
+  with tf.variable_scope(name, reuse=tf.AUTO_REUSE):
+    alphas = tf.get_variable('alpha'+name, _x.get_shape()[-1],
+                         initializer=tf.constant_initializer(0.0),
+                         dtype=tf.float32)
+    input_shape = list(_x.get_shape())
+
+    reduction_axes = list(range(len(input_shape)))
+    del reduction_axes[axis]
+    broadcast_shape = [1] * len(input_shape)
+    broadcast_shape[axis] = input_shape[axis]
+
+  # case: train mode (uses stats of the current batch)
+  mean = tf.reduce_mean(_x, axis=reduction_axes)
+  brodcast_mean = tf.reshape(mean, broadcast_shape)
+  std = tf.reduce_mean(tf.square(_x - brodcast_mean) + epsilon, axis=reduction_axes)
+  std = tf.sqrt(std)
+  brodcast_std = tf.reshape(std, broadcast_shape)
+  x_normed = (_x - brodcast_mean) / (brodcast_std + epsilon)
+  # x_normed = tf.layers.batch_normalization(_x, center=False, scale=False)
+  x_p = tf.sigmoid(x_normed)
+
+
+  return alphas * (1.0 - x_p) * _x + x_p * _x
+
+def parametric_relu(_x):
+  alphas = tf.get_variable('alpha', _x.get_shape()[-1],
+                       initializer=tf.constant_initializer(0.0),
+                       dtype=tf.float32)
+  pos = tf.nn.relu(_x)
+  neg = alphas * (_x - abs(_x)) * 0.5
+
+  return pos + neg
diff --git a/modelzoo/CAN/data/script/calc_ckpt.py b/modelzoo/CAN/data/script/calc_ckpt.py
@@ -0,0 +1,12 @@
+
+ckpt = tf.train.get_checkpoint_state("./ckpt_path/").model_checkpoint_path
+saver = tf.train.import_meta_graph(ckpt+'.meta')
+variables = tf.trainable_variables()
+total_parameters = 0
+for variable in variables:
+    shape = variable.get_shape()
+    variable_parameters = 1
+    for dim in shape:
+        variable_parameters *= dim.value
+    total_parameters += variable_parameters
+
diff --git a/modelzoo/CAN/data/script/data_iterator.py b/modelzoo/CAN/data/script/data_iterator.py
@@ -0,0 +1,224 @@
+import numpy
+import json
+import _pickle as cPickle
+import random
+
+import gzip
+
+import data.script.shuffle
+
+def unicode_to_utf8(d):
+    return dict((key.encode("UTF-8"), value) for (key,value) in d.items())
+def dict_unicode_to_utf8(d):
+    return dict(((key[0].encode("UTF-8"), key[1].encode("UTF-8")), value) for (key,value) in d.items())
+
+def load_dict(filename):
+    try:
+        with open(filename, 'rb') as f:
+            return unicode_to_utf8(json.load(f))
+    except:
+        try:
+            with open(filename, 'rb') as f:
+                return unicode_to_utf8(cPickle.load(f))
+        except:
+            with open(filename, 'rb') as f:
+                return dict_unicode_to_utf8(cPickle.load(f))
+
+
+def fopen(filename, mode='r'):
+    if filename.endswith('.gz'):
+        return gzip.open(filename, mode)
+    return open(filename, mode)
+
+
+class DataIterator:
+
+    def __init__(self, source,
+                 uid_voc,
+                 mid_voc,
+                 cat_voc,
+                 batch_size=128,
+                 maxlen=100,
+                 skip_empty=False,
+                 shuffle_each_epoch=False,
+                 sort_by_length=True,
+                 max_batch_size=20,
+                 minlen=None,
+                 label_type=1):
+        if shuffle_each_epoch:
+            self.source_orig = source
+            self.source = shuffle.main(self.source_orig, temporary=True)
+        else:
+            self.source = fopen(source, 'r')
+        self.source_dicts = []
+        for source_dict in [uid_voc, mid_voc, cat_voc, '../CAN/data/item_carte_voc.pkl', '../CAN/data/cate_carte_voc.pkl']:
+            self.source_dicts.append(load_dict(source_dict))
+
+        f_meta = open("../CAN/data/item-info", "r")
+        meta_map = {}
+        for line in f_meta:
+            arr = line.strip().split("\t")
+            if arr[0] not in meta_map:
+                meta_map[arr[0]] = arr[1]
+        self.meta_id_map ={}
+        for key in meta_map:
+            val = meta_map[key]
+            if key in self.source_dicts[1]:
+                mid_idx = self.source_dicts[1][key]
+            else:
+                mid_idx = 0
+            if val in self.source_dicts[2]:
+                cat_idx = self.source_dicts[2][val]
+            else:
+                cat_idx = 0
+            self.meta_id_map[mid_idx] = cat_idx
+
+        f_review = open("../CAN/data/reviews-info", "r")
+        self.mid_list_for_random = []
+        for line in f_review:
+            arr = line.strip().split("\t")
+            tmp_idx = 0
+            if arr[1] in self.source_dicts[1]:
+                tmp_idx = self.source_dicts[1][arr[1]]
+            self.mid_list_for_random.append(tmp_idx)
+
+        self.batch_size = batch_size
+        self.maxlen = maxlen
+        self.minlen = minlen
+        self.skip_empty = skip_empty
+
+        self.n_uid = len(self.source_dicts[0])
+        self.n_mid = len(self.source_dicts[1])
+        self.n_cat = len(self.source_dicts[2])
+        self.n_carte = [len(self.source_dicts[3]), len(self.source_dicts[4])]
+
+        self.shuffle = shuffle_each_epoch
+        self.sort_by_length = sort_by_length
+
+        self.source_buffer = []
+        self.k = batch_size * max_batch_size
+
+        self.end_of_data = False
+        self.label_type = label_type
+
+    def get_n(self):
+        return self.n_uid, self.n_mid, self.n_cat, self.n_carte
+
+    def __iter__(self):
+        return self
+
+    def reset(self):
+        if self.shuffle:
+            self.source= shuffle.main(self.source_orig, temporary=True)
+        else:
+            self.source.seek(0)
+
+    def __next__(self):
+        if self.end_of_data:
+            self.end_of_data = False
+            self.reset()
+            raise StopIteration
+
+        source = []
+        target = []
+
+        if len(self.source_buffer) == 0:
+            for k_ in range(self.k):
+                ss = self.source.readline()
+                if ss == "":
+                    break
+                self.source_buffer.append(ss.strip("\n").split("\t"))
+
+            # sort by  history behavior length
+            if self.sort_by_length:
+                his_length = numpy.array([len(s[4].split("")) for s in self.source_buffer])
+                tidx = his_length.argsort()
+
+                _sbuf = [self.source_buffer[i] for i in tidx]
+                self.source_buffer = _sbuf
+            else:
+                self.source_buffer.reverse()
+
+        if len(self.source_buffer) == 0:
+            self.end_of_data = False
+            self.reset()
+            raise StopIteration
+
+        try:
+
+            # actual work here
+            while True:
+
+                # read from source file and map to word index
+                try:
+                    ss = self.source_buffer.pop()
+                except IndexError:
+                    break
+
+                uid = self.source_dicts[0][ss[1]] if ss[1] in self.source_dicts[0] else 0
+                mid = self.source_dicts[1][ss[2]] if ss[2] in self.source_dicts[1] else 0
+                cat = self.source_dicts[2][ss[3]] if ss[3] in self.source_dicts[2] else 0
+
+                tmp = []
+                item_carte = []
+                for fea in ss[4].split(""):
+                    m = self.source_dicts[1][fea] if fea in self.source_dicts[1] else 0
+                    tmp.append(m)
+                    i_c = self.source_dicts[3][(ss[2], fea)] if (ss[2], fea) in self.source_dicts[3] else 0
+                    item_carte.append(i_c)
+                mid_list = tmp
+
+                tmp1 = []
+                cate_carte = []
+                for fea in ss[5].split(""):
+                    c = self.source_dicts[2][fea] if fea in self.source_dicts[2] else 0
+                    tmp1.append(c)
+                    c_c = self.source_dicts[4][(ss[3], fea)] if (ss[3], fea) in self.source_dicts[4] else 0
+                    cate_carte.append(c_c)
+                cat_list = tmp1
+
+                # read from source file and map to word index
+
+                if self.minlen != None:
+                    if len(mid_list) <= self.minlen:
+                        continue
+                if self.skip_empty and (not mid_list):
+                    continue
+
+                noclk_mid_list = []
+                noclk_cat_list = []
+                for pos_mid in mid_list:
+                    noclk_tmp_mid = []
+                    noclk_tmp_cat = []
+                    noclk_index = 0
+                    while True:
+                        noclk_mid_indx = random.randint(0, len(self.mid_list_for_random)-1)
+                        noclk_mid = self.mid_list_for_random[noclk_mid_indx]
+                        if noclk_mid == pos_mid:
+                            continue
+                        noclk_tmp_mid.append(noclk_mid)
+                        noclk_tmp_cat.append(self.meta_id_map[noclk_mid])
+                        noclk_index += 1
+                        if noclk_index >= 5:
+                            break
+                    noclk_mid_list.append(noclk_tmp_mid)
+                    noclk_cat_list.append(noclk_tmp_cat)
+                carte_list = [item_carte, cate_carte]
+                source.append([uid, mid, cat, mid_list, cat_list, noclk_mid_list, noclk_cat_list, carte_list])
+                if self.label_type == 1:
+                    target.append([float(ss[0])])
+                else:
+                    target.append([float(ss[0]), 1-float(ss[0])])
+
+                if len(source) >= self.batch_size or len(target) >= self.batch_size:
+                    break
+        except IOError:
+            self.end_of_data = True
+
+        # all sentence pairs in maxibatch filtered out because of length
+        if len(source) == 0 or len(target) == 0:
+            source, target = self.next()
+
+        return source, target
+
+