In [1]:
import tensorflow as tf
import numpy as np
from scipy import sparse
import os
import bottleneck as bn


In [2]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="0"

In [20]:
class bigan:
    def __init__(self, p_dims, d_dims,q_dims=None,random_seed=98765,g_weight = 0.5,lr=1e-3):
        
        self.p_dims = p_dims
        self.d_dims = d_dims
        self.random_seed = random_seed
        self.lr = lr
        self.g_weight = g_weight
        if q_dims is None:
            self.q_dims = p_dims[::-1]
        else:
            assert q_dims[0] == p_dims[-1], "Input and output dimension must equal each other for autoencoders."
            assert q_dims[-1] == p_dims[0], "Latent dimension for p- and q-network mismatches."
            
        assert d_dims is not None, "d_dims can't be None"
        assert d_dims[0] == self.p_dims[0] + self.q_dims[0], "Shape mismatch: discriminate network\
        should be equal to the sum of input shape and output shape of p."
            
        self._construct_weights()
        self.construct_placeholders()
    def construct_placeholders(self):        
        self.input_X = tf.placeholder(
            dtype=tf.float32, shape=[None, self.q_dims[0]])
        self.input_z = tf.placeholder(
            dtype=tf.float32, shape=[None, self.p_dims[0]])
        self.keep_prob_ph = tf.placeholder_with_default(1.0, shape=None)
        self.is_training_ph = tf.placeholder_with_default(0.0, shape=None)
    
    def _construct_weights(self):
        self.weights_q, self.biases_q = [], []
        
        for i, (d_in, d_out) in enumerate(zip(self.q_dims[:-1], self.q_dims[1:])):
            weight_key = "weight_q_{}to{}".format(i, i+1)
            bias_key = "bias_q_{}".format(i+1)
            
            self.weights_q.append(tf.get_variable(
                name=weight_key, shape=[d_in, d_out],
                initializer=tf.contrib.layers.xavier_initializer(
                    seed=self.random_seed)))
            
            self.biases_q.append(tf.get_variable(
                name=bias_key, shape=[d_out],
                initializer=tf.truncated_normal_initializer(
                    stddev=0.001, seed=self.random_seed)))
            
            # add summary stats
            #tf.summary.histogram(weight_key, self.weights_q[-1])
            #tf.summary.histogram(bias_key, self.biases_q[-1])
            
        self.weights_p, self.biases_p = [], []
        
        for i, (d_in, d_out) in enumerate(zip(self.p_dims[:-1], self.p_dims[1:])):
            weight_key = "weight_p_{}to{}".format(i, i+1)
            bias_key = "bias_p_{}".format(i+1)
            self.weights_p.append(tf.get_variable(
                name=weight_key, shape=[d_in, d_out],
                initializer=tf.contrib.layers.xavier_initializer(
                    seed=self.random_seed)))
            
            self.biases_p.append(tf.get_variable(
                name=bias_key, shape=[d_out],
                initializer=tf.truncated_normal_initializer(
                    stddev=0.001, seed=self.random_seed)))
        self.weights_d,self.biases_d = [],[]
        
        for i, (d_in, d_out) in enumerate(zip(self.d_dims[:-1], self.d_dims[1:])):
            weight_key = "weight_d_{}to{}".format(i, i+1)
            bias_key = "bias_d_{}".format(i+1)
            self.weights_d.append(tf.get_variable(
                name=weight_key, shape=[d_in, d_out],
                initializer=tf.contrib.layers.xavier_initializer(
                    seed=self.random_seed)))
            
            self.biases_d.append(tf.get_variable(
                name=bias_key, shape=[d_out],
                initializer=tf.truncated_normal_initializer(
                    stddev=0.001, seed=self.random_seed)))
        weight_key = "weight_d_out"
        bias_key = "bias_d_out"
        self.weights_d.append(tf.get_variable(
                name=weight_key, shape=[self.d_dims[-1], 1],
                initializer=tf.contrib.layers.xavier_initializer(
                    seed=self.random_seed)))
            
        self.biases_d.append(tf.get_variable(
                name=bias_key, shape=[1],
                initializer=tf.truncated_normal_initializer(
                    stddev=0.001, seed=self.random_seed)))
    def p_graph(self, z):
        h = z
        
        for i, (w, b) in enumerate(zip(self.weights_p, self.biases_p)):
            h = tf.matmul(h, w) + b
            
            if i != len(self.weights_p) - 1:
                h = tf.nn.tanh(h)
        return h
    def q_graph(self,x):
        
        h = tf.nn.l2_normalize(x,1)
        
        h = tf.nn.dropout(h, self.keep_prob_ph)
        
        for i, (w, b) in enumerate(zip(self.weights_q, self.biases_q)):
            h = tf.matmul(h, w) + b
            h = tf.nn.tanh(h)
        return h
    def forward(self,x):
        z = self.q_graph(x)
        x_hat = self.p_graph(z)
        return x_hat
    def d_graph(self,x,z):
        concat = tf.concat([x,z],axis=1)
        h = concat
        # adding the random noise to make it harder for disriminator to do its job
        #randNoise = tf.random_normal(tf.shape(h)) 
        #h += randNoise
        
        for i, (w, b) in enumerate(zip(self.weights_d, self.biases_d)):
            if i != len(self.weights_d):
                h = tf.matmul(h, w) + b
                h = tf.nn.tanh(h)
            else:
                h = tf.matmul(h,w) + b
        logits = h
        #return tf.nn.softmax(logits,axis=1)
        return logits
    
    
    def cost(self,x,z_hat,z,x_hat):
        
        # critic for encoder
        
        # adding some noise to real x or there will be a lot of 0s, which is totally different from x_hat
       
        
        
        #x = x + noise
        
        #x_hat = tf.nn.dropout(x_hat,10)
        
        #x = tf.nn.l2_normalize(x, 1)
        
        pred_q = self.d_graph(x,z_hat)
        
        # critic for decoder
        pred_p = self.d_graph(x_hat,z)
        
        #D(x,E(x))
        sig_q = tf.nn.sigmoid(pred_q)
        
        #D(G(z),z)
        sig_p = tf.nn.sigmoid(pred_p)
        
        # loss for D is decode - encoder this value should be negative in this case minimizing the negative value
        # 
        
        real = tf.reduce_mean(pred_q)
        fake = tf.reduce_mean(pred_p)
        
        #loss_d = tf.reduce_mean(pred_p - pred_q)
        
        loss_d = fake - real
        
        # loss for G and E 
        #loss_eg = tf.reduce_mean(pred_q - pred_p)
        
        loss_eg = real - fake
        
        para_eg = self.weights_q + self.weights_p + self.biases_q + self.biases_p
        para_d = self.weights_d + self.biases_d
        
        EG_solver = (tf.train.RMSPropOptimizer(learning_rate=self.lr)
            .minimize(loss_eg, var_list=para_eg))
        D_solver = (tf.train.RMSPropOptimizer(learning_rate=self.lr)
            .minimize(loss_d, var_list=para_d))
        
        clip_D = [p.assign(tf.clip_by_value(p, -0.02, 0.02)) for p in para_d]
        #tf.summary.scalar("loss1",loss1)
        #tf.summary.scalar("loss2",loss2)
        #tf.summary.scalar("loss3",loss3)
        #tf.summary.scalar("loss4",loss4)
        tf.summary.scalar("loss_eg",loss_eg)
        tf.summary.scalar("loss_d", loss_d)
        tf.summary.scalar("fake_mean", fake)
        tf.summary.scalar("real_mean", real)
        merged = tf.summary.merge_all()
        return loss_eg, loss_d, EG_solver, D_solver, clip_D, merged
    
    
    def build_graph(self):
        z_hat = self.q_graph(self.input_X)
        z = tf.random_normal(tf.shape(z_hat))
        x_hat = self.p_graph(self.is_training_ph * z + (1- self.is_training_ph) * z_hat)
        loss_eg, loss_d, EG_solver, D_solver, clip_D ,merged = self.cost(self.input_X,z_hat,z,x_hat)
        saver = tf.train.Saver()
        return saver,x_hat,loss_eg, loss_d, EG_solver, clip_D , D_solver, merged
        
    def train(self,train,val_tr = None,val_te = None,n_epochs = 200,batch_size = 500):
        #pass
        ndcgs_vad = []
        global predict_vad 
        N = train.shape[0]
        idxlist = list(range(N))
        saver, x_hat, loss_eg, loss_d, EG_solver, clip_D ,D_solver, merged = self.build_graph()
        with tf.Session() as sess:
            init = tf.global_variables_initializer()
            training_writer = tf.summary.FileWriter('./train_summary/g_{}/'.format(self.g_weight),
                                      sess.graph)
            sess.run(init)
            count = 0
            for epoch in range(n_epochs):
                np.random.shuffle(idxlist)
                start_idxs = list(range(0,N,batch_size))
                end_idxs = start_idxs[1:] + [N]
                for batch_num in range(len(start_idxs)):
                    start_idx = start_idxs[batch_num]
                    end_idx = end_idxs[batch_num]
                    
                    X = train[idxlist[start_idx:end_idx]]
                    
                    if sparse.isspmatrix(X):
                        X = X.toarray()
                    X = X.astype("float32")
                    
                    feed_dict = {self.input_X:X,
                                 self.keep_prob_ph:1,
                                 self.is_training_ph:1}
                    
                    for _ in range(5): 
                        d_batch = np.random.randint(len(start_idxs))
                        D_X = train[idxlist[start_idxs[d_batch]:end_idxs[d_batch]]]
                        if sparse.isspmatrix(D_X):
                            D_X = D_X.toarray()
                            
                        D_X = D_X.astype("float32")
                        
                        feed_D = {self.input_X:D_X,
                                 self.keep_prob_ph:1,
                                 self.is_training_ph:1}
                        #print (feed_D)
                        sess.run(D_solver,feed_dict=feed_D)
                        sess.run(clip_D,feed_dict=feed_D)
                    sess.run(EG_solver, feed_dict=feed_dict)
                    if batch_num % 100 == 0:
                        #summary_train = sess.run(merged,feed_dict = feed_dict)
                        merged_summary = sess.run(merged, feed_dict = feed_dict)
                        training_writer.add_summary(merged_summary, global_step=epoch*int(N/batch_size/100) + batch_num)
                        if val_tr != None and val_te != None:
                            val_predict = self.predict(val_tr,sess,x_hat)
                            #predict_vad.append(val_predict)
                            #print (val_predict)
                            val_predict[val_tr.nonzero()] = -np.inf
                            ndcg_dist = np.array(NDCG_binary_at_k_batch(val_predict,val_te))
                            ndcg_val = ndcg_dist.mean()
                            print ("Validation of NDCG @ 100 at epoch {} is {}".format(epoch,ndcg_val))
                            ndcgs_vad.append(ndcg_val)
                    count += 1
        return ndcgs_vad          
    def predict(self,test,sess,x_hat,batch_size = 500):
        
        N = test.shape[0]
        res = []
        for start_idx in range(0,N,batch_size):
            end_idx = min(start_idx + batch_size, N)
            X = test[start_idx:end_idx]
            if sparse.isspmatrix(X):
                X = X.toarray()
            X = X.astype("float32")
            feed_dict = {self.input_X:X,
                         self.is_training_ph:0}
            res.append(sess.run(x_hat,feed_dict=feed_dict))
        return np.vstack(res)
        
        
                    

# Calculating the NDCG@K

In [21]:
def NDCG_binary_at_k_batch(X_pred, heldout_batch, k=100):
    '''
    normalized discounted cumulative gain@k for binary relevance
    ASSUMPTIONS: all the 0's in heldout_data indicate 0 relevance
    '''
    batch_users = X_pred.shape[0]
    idx_topk_part = bn.argpartition(-X_pred, k, axis=1)
    topk_part = X_pred[np.arange(batch_users)[:, np.newaxis],
                       idx_topk_part[:, :k]]
    idx_part = np.argsort(-topk_part, axis=1)
    # X_pred[np.arange(batch_users)[:, np.newaxis], idx_topk] is the sorted
    # topk predicted score
    idx_topk = idx_topk_part[np.arange(batch_users)[:, np.newaxis], idx_part]
    # build the discount template
    tp = 1. / np.log2(np.arange(2, k + 2))

    DCG = (heldout_batch[np.arange(batch_users)[:, np.newaxis],
                         idx_topk].toarray() * tp).sum(axis=1)
    IDCG = np.array([(tp[:min(n, k)]).sum()
                     for n in heldout_batch.getnnz(axis=1)])
    return DCG / IDCG
def Recall_at_k_batch(X_pred, heldout_batch, k=100):
    batch_users = X_pred.shape[0]

    idx = bn.argpartition(-X_pred, k, axis=1)
    X_pred_binary = np.zeros_like(X_pred, dtype=bool)
    X_pred_binary[np.arange(batch_users)[:, np.newaxis], idx[:, :k]] = True

    X_true_binary = (heldout_batch > 0).toarray()
    tmp = (np.logical_and(X_true_binary, X_pred_binary).sum(axis=1)).astype(
        np.float32)
    recall = tmp / np.minimum(k, X_true_binary.sum(axis=1))
    return recall

# Loading the training, validating, and testing data 

In [22]:
import os
import pandas as pd
from scipy import sparse
import numpy as np
DATA_DIR = './data/ml-20m/'
DATA_DIR = "/home/jz2884/ADV/data/ml-20m"
pro_dir = os.path.join(DATA_DIR, 'pro_sg')

unique_sid = list()
with open(os.path.join(pro_dir, 'unique_sid.txt'), 'r') as f:
    for line in f:
        unique_sid.append(line.strip())

n_items = len(unique_sid)

In [23]:
def load_train_data(csv_file):
    tp = pd.read_csv(csv_file)
    n_users = tp['uid'].max() + 1

    rows, cols = tp['uid'], tp['sid']
    data = sparse.csr_matrix((np.ones_like(rows),
                             (rows, cols)), dtype='float64',
                             shape=(n_users, n_items))
    return data

In [24]:
def load_tr_te_data(csv_file_tr, csv_file_te):
    tp_tr = pd.read_csv(csv_file_tr)
    tp_te = pd.read_csv(csv_file_te)

    start_idx = min(tp_tr['uid'].min(), tp_te['uid'].min())
    end_idx = max(tp_tr['uid'].max(), tp_te['uid'].max())

    rows_tr, cols_tr = tp_tr['uid'] - start_idx, tp_tr['sid']
    rows_te, cols_te = tp_te['uid'] - start_idx, tp_te['sid']

    data_tr = sparse.csr_matrix((np.ones_like(rows_tr),
                             (rows_tr, cols_tr)), dtype='float64', shape=(end_idx - start_idx + 1, n_items))
    data_te = sparse.csr_matrix((np.ones_like(rows_te),
                             (rows_te, cols_te)), dtype='float64', shape=(end_idx - start_idx + 1, n_items))
    return data_tr, data_te

In [25]:
train_data = load_train_data(os.path.join(pro_dir, 'train.csv'))
vad_data_tr, vad_data_te = load_tr_te_data(os.path.join(pro_dir, 'validation_tr.csv'),
                                           os.path.join(pro_dir, 'validation_te.csv'))

# training the data

In [28]:
class bigan:
    def __init__(self, p_dims, d_dims,q_dims=None,random_seed=98765,g_weight = 0.5,lr=1e-3):
        
        self.p_dims = p_dims
        self.d_dims = d_dims
        self.random_seed = random_seed
        self.lr = lr
        self.g_weight = g_weight
        if q_dims is None:
            self.q_dims = p_dims[::-1]
        else:
            assert q_dims[0] == p_dims[-1], "Input and output dimension must equal each other for autoencoders."
            assert q_dims[-1] == p_dims[0], "Latent dimension for p- and q-network mismatches."
            
        assert d_dims is not None, "d_dims can't be None"
        assert d_dims[0] == self.p_dims[0] + self.q_dims[0], "Shape mismatch: discriminate network\
        should be equal to the sum of input shape and output shape of p."
            
        self._construct_weights()
        self.construct_placeholders()
    def construct_placeholders(self):        
        self.input_X = tf.placeholder(
            dtype=tf.float32, shape=[None, self.q_dims[0]])
        self.input_z = tf.placeholder(
            dtype=tf.float32, shape=[None, self.p_dims[0]])
        self.keep_prob_ph = tf.placeholder_with_default(1.0, shape=None)
        self.is_training_ph = tf.placeholder_with_default(0.0, shape=None)
    
    def _construct_weights(self):
        self.weights_q, self.biases_q = [], []
        
        for i, (d_in, d_out) in enumerate(zip(self.q_dims[:-1], self.q_dims[1:])):
            weight_key = "weight_q_{}to{}".format(i, i+1)
            bias_key = "bias_q_{}".format(i+1)
            
            self.weights_q.append(tf.get_variable(
                name=weight_key, shape=[d_in, d_out],
                initializer=tf.contrib.layers.xavier_initializer(
                    seed=self.random_seed)))
            
            self.biases_q.append(tf.get_variable(
                name=bias_key, shape=[d_out],
                initializer=tf.truncated_normal_initializer(
                    stddev=0.001, seed=self.random_seed)))
            
            # add summary stats
            #tf.summary.histogram(weight_key, self.weights_q[-1])
            #tf.summary.histogram(bias_key, self.biases_q[-1])
            
        self.weights_p, self.biases_p = [], []
        
        for i, (d_in, d_out) in enumerate(zip(self.p_dims[:-1], self.p_dims[1:])):
            weight_key = "weight_p_{}to{}".format(i, i+1)
            bias_key = "bias_p_{}".format(i+1)
            self.weights_p.append(tf.get_variable(
                name=weight_key, shape=[d_in, d_out],
                initializer=tf.contrib.layers.xavier_initializer(
                    seed=self.random_seed)))
            
            self.biases_p.append(tf.get_variable(
                name=bias_key, shape=[d_out],
                initializer=tf.truncated_normal_initializer(
                    stddev=0.001, seed=self.random_seed)))
        self.weights_d,self.biases_d = [],[]
        
        for i, (d_in, d_out) in enumerate(zip(self.d_dims[:-1], self.d_dims[1:])):
            weight_key = "weight_d_{}to{}".format(i, i+1)
            bias_key = "bias_d_{}".format(i+1)
            self.weights_d.append(tf.get_variable(
                name=weight_key, shape=[d_in, d_out],
                initializer=tf.contrib.layers.xavier_initializer(
                    seed=self.random_seed)))
            
            self.biases_d.append(tf.get_variable(
                name=bias_key, shape=[d_out],
                initializer=tf.truncated_normal_initializer(
                    stddev=0.001, seed=self.random_seed)))
        weight_key = "weight_d_out"
        bias_key = "bias_d_out"
        self.weights_d.append(tf.get_variable(
                name=weight_key, shape=[self.d_dims[-1], 1],
                initializer=tf.contrib.layers.xavier_initializer(
                    seed=self.random_seed)))
            
        self.biases_d.append(tf.get_variable(
                name=bias_key, shape=[1],
                initializer=tf.truncated_normal_initializer(
                    stddev=0.001, seed=self.random_seed)))
    def p_graph(self, z):
        h = z
        
        for i, (w, b) in enumerate(zip(self.weights_p, self.biases_p)):
            h = tf.matmul(h, w) + b
            
            if i != len(self.weights_p) - 1:
                h = tf.nn.tanh(h)
        return h
    def q_graph(self,x):
        
        
        h = tf.nn.l2_normalize(x, 1)
        h = tf.nn.dropout(h, self.keep_prob_ph)
        
        for i, (w, b) in enumerate(zip(self.weights_q, self.biases_q)):
            h = tf.matmul(h, w) + b
            h = tf.nn.tanh(h)
        return h
    def forward(self,x):
        z = self.q_graph(x)
        x_hat = self.p_graph(z)
        return x_hat
    def d_graph(self,x,z):
        concat = tf.concat([x,z],axis=1)
        h = concat
        # adding the random noise to make it harder for disriminator to do its job
        #randNoise = tf.random_normal(tf.shape(h)) 
        #h += randNoise
        
        for i, (w, b) in enumerate(zip(self.weights_d, self.biases_d)):
            if i != len(self.weights_d):
                h = tf.matmul(h, w) + b
                h = tf.nn.tanh(h)
            else:
                h = tf.matmul(h,w) + b
        logits = h
        #return tf.nn.softmax(logits,axis=1)
        return logits
        
    '''def cost(self,x,z_hat,z,x_hat):
        def log(tensor):
            return tf.log(tensor + 1e-8)
        pred_q = self.d_graph(x,z_hat)
        pred_p = self.d_graph(x_hat,z)
        
        #D(x,E(x))
        sig_q = tf.nn.sigmoid(pred_q)
        
        #D(G(z),z)
        sig_p = tf.nn.sigmoid(pred_p)
        
        # log(D(x,E(x))) encoder is true
        loss1 = -tf.reduce_mean(log(sig_q))
        
        # log(1 - D(x,E(x))) encoder is false
        loss2 = -tf.reduce_mean(log(1-sig_q))
        
        
        # log(1 - D(G(z),z)) generator is false
        loss3 = -tf.reduce_mean(log(1-sig_p))
        
        # log(D(G(z),z)) generator is true
        
        loss4 = -tf.reduce_mean(log(sig_p))
        
        
        loss_d =  loss1 + loss3 
        
        #G is trying to fool the D, thus generator is approaching true and encoder is approaching false
        loss_eg = loss2 + loss4
        
        para_eg = self.weights_q + self.weights_p + self.biases_q + self.biases_p
        para_d = self.weights_d + self.biases_d
        
        """
        grad_eg = tf.gradients(self.g_weight * loss_eg, para_eg)
        grad_d = tf.gradients((1 - self.g_weight)*loss_d, para_d)
        
        train_op = tf.train.AdamOptimizer(self.lr)
        
        train_op = train_op.apply_gradients(zip(grad_eg + grad_d, para_eg + para_d))
        """
        EG_solver = (tf.train.AdamOptimizer(learning_rate=self.lr)
            .minimize(loss_eg, var_list=para_eg))
        D_solver = (tf.train.AdamOptimizer(learning_rate=self.lr)
            .minimize(loss_d, var_list=para_d))
        #tf.summary.scalar("loss1",loss1)
        #tf.summary.scalar("loss2",loss2)
        #tf.summary.scalar("loss3",loss3)
        #tf.summary.scalar("loss4",loss4)
        tf.summary.scalar("loss_eg",loss_eg)
        tf.summary.scalar("loss_d", loss_d)
        tf.summary.scalar("D_E_accuracy", tf.reduce_mean(tf.cast(sig_q > 0.5, tf.float32)))
        tf.summary.scalar("D_G_accuracy", tf.reduce_mean(tf.cast(sig_p < 0.5, tf.float32)))
        merged = tf.summary.merge_all()
        return loss_eg, loss_d, EG_solver, D_solver, merged'''
    
    
    def cost(self,x,z_hat,z,x_hat):
        
        # critic for encoder
        pred_q = self.d_graph(x,z_hat)
        
        # critic for decoder
        pred_p = self.d_graph(x_hat,z)
        
        #D(x,E(x))
        sig_q = tf.nn.sigmoid(pred_q)
        
        #D(G(z),z)
        sig_p = tf.nn.sigmoid(pred_p)
        
        # loss for D is decode - encoder this value should be negative in this case minimizing the negative value
        # 
        loss_d = tf.reduce_mean(pred_p - pred_q)
        
        # loss for G and E 
        loss_eg = tf.reduce_mean(pred_q - pred_p)
        
        
        para_eg = self.weights_q + self.weights_p + self.biases_q + self.biases_p
        para_d = self.weights_d + self.biases_d
        
        EG_solver = (tf.train.RMSPropOptimizer(learning_rate=self.lr)
            .minimize(loss_eg, var_list=para_eg))
        D_solver = (tf.train.RMSPropOptimizer(learning_rate=self.lr)
            .minimize(loss_d, var_list=para_d))
        
        clip_D = [p.assign(tf.clip_by_value(p, -0.02, 0.02)) for p in para_d]
        #tf.summary.scalar("loss1",loss1)
        #tf.summary.scalar("loss2",loss2)
        #tf.summary.scalar("loss3",loss3)
        #tf.summary.scalar("loss4",loss4)
        tf.summary.scalar("loss_eg",loss_eg)
        tf.summary.scalar("loss_d", loss_d)
        tf.summary.scalar("D_E_accuracy", tf.reduce_mean(tf.cast(sig_q > 0.5, tf.float32)))
        tf.summary.scalar("D_G_accuracy", tf.reduce_mean(tf.cast(sig_p < 0.5, tf.float32)))
        merged = tf.summary.merge_all()
        return loss_eg, loss_d, EG_solver, D_solver, clip_D, merged
    
    
    def build_graph(self):
        z_hat = self.q_graph(self.input_X)
        z = tf.random_normal(tf.shape(z_hat))
        x_hat = self.p_graph(self.is_training_ph * z + (1- self.is_training_ph) * z_hat)
        loss_eg, loss_d, EG_solver, D_solver, clip_D ,merged = self.cost(self.input_X,z_hat,z,x_hat)
        saver = tf.train.Saver()
        return saver,x_hat,loss_eg, loss_d, EG_solver, clip_D , D_solver, merged
        
    def train(self,train,val_tr = None,val_te = None,n_epochs = 200,batch_size = 500):
        #pass
        ndcgs_vad = []
        global predict_vad 
        N = train.shape[0]
        idxlist = list(range(N))
        saver, x_hat, loss_eg, loss_d, EG_solver, clip_D ,D_solver, merged = self.build_graph()
        with tf.Session() as sess:
            init = tf.global_variables_initializer()
            training_writer = tf.summary.FileWriter('./train_summary/g_{}/'.format(self.g_weight),
                                      sess.graph)
            sess.run(init)
            count = 0
            for epoch in range(n_epochs):
                np.random.shuffle(idxlist)
                start_idxs = list(range(0,N,batch_size))
                end_idxs = start_idxs[1:] + [N]
                for batch_num in range(len(start_idxs)):
                    start_idx = start_idxs[batch_num]
                    end_idx = end_idxs[batch_num]
                    
                    X = train[idxlist[start_idx:end_idx]]
                    
                    if sparse.isspmatrix(X):
                        X = X.toarray()
                    X = X.astype("float32")
                    
                    feed_dict = {self.input_X:X,
                                 self.keep_prob_ph:0.5,
                                 self.is_training_ph:1}
                    
                    for _ in range(5): 
                        d_batch = np.random.randint(len(start_idxs))
                        D_X = train[idxlist[start_idxs[d_batch]:end_idxs[d_batch]]]
                        if sparse.isspmatrix(D_X):
                            D_X = D_X.toarray()
                            
                        D_X = D_X.astype("float32")
                        
                        feed_D = {self.input_X:D_X,
                                 self.keep_prob_ph:0.5,
                                 self.is_training_ph:1}
                        #print (feed_D)
                        sess.run(D_solver,feed_dict=feed_D)
                        sess.run(clip_D,feed_dict=feed_D)
                    sess.run(EG_solver, feed_dict=feed_dict)
                    if batch_num % 100 == 0:
                        #summary_train = sess.run(merged,feed_dict = feed_dict)
                        merged_summary = sess.run(merged, feed_dict = feed_dict)
                        training_writer.add_summary(merged_summary, global_step=epoch*int(N/batch_size/100) + batch_num)
                        if val_tr != None and val_te != None:
                            val_predict = self.predict(val_tr,sess,x_hat)
                            #predict_vad.append(val_predict)
                            #print (val_predict)
                            val_predict[val_tr.nonzero()] = -np.inf
                            ndcg_dist = np.array(NDCG_binary_at_k_batch(val_predict,val_te))
                            ndcg_val = ndcg_dist.mean()
                            #print ("Validation of NDCG @ 100 at epoch {} is {}".format(epoch,ndcg_val))
                            ndcgs_vad.append(ndcg_val)
                    count += 1
        return ndcgs_vad          
    def predict(self,test,sess,x_hat,batch_size = 500):
        
        N = test.shape[0]
        res = []
        for start_idx in range(0,N,batch_size):
            end_idx = min(start_idx + batch_size, N)
            X = test[start_idx:end_idx]
            if sparse.isspmatrix(X):
                X = X.toarray()
            X = X.astype("float32")
            feed_dict = {self.input_X:X,
                         self.is_training_ph:0}
            res.append(sess.run(x_hat,feed_dict=feed_dict))
        return np.vstack(res)

In [3]:
predict_vad = []
tf.reset_default_graph()
bg = bigan([200,600,n_items],[n_items+200,100],random_seed=98765,g_weight = .5, lr=2e-4)
ndcgs_vad = bg.train(train_data,vad_data_tr,vad_data_te,batch_size=100)


