In [1]:

from keras.layers import Input, Embedding, Dense, concatenate, Flatten, Subtract
from keras.regularizers import l2
from keras.models import Model
from keras.optimizers import Adam
from utils2 import *
from time import time
import csv
import numpy as np
import pandas as pd
import random
import timeit

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
ranker_layers = [128, 64, 32, 8]
ranker_reg_layers = [0 for _ in range(len(ranker_layers))]
discriminator_layers = [32, 8]
discriminator_reg_layers = [0 for _ in range(len(discriminator_layers))]
K = 2
epochs = 20
d_lr, r_lr = 0.01, 0.05

out = True
dataset = 'FoodData' #"CarData" #
now = time()
ranker_out_file = 'checkpoints/%d_%s_Ranker_%s.h5' % (now, dataset, ranker_layers)
dis_out_file = 'checkpoints/%d_%s_Dis_%s.h5' % (now, dataset, discriminator_layers)
log_file = 'checkpoints/%d_%s_%d.txt' % (now, dataset, K)

pre_train = False
in_time = 1549967959
in_path = 'checkpoints'
ranker_in_file = '%s/%s_%s_Ranker_%s.h5' % (in_path, in_time, dataset, ranker_layers)
dis_in_file = '%s/%s_%s_Dis_%s.h5' % (in_path, in_time, dataset, discriminator_layers)

In [3]:
num_users, num_items, train_u_input, train_i_input, train_j_input = get_pairwise_train_dataset(
    path='data/%s_train.dat' % dataset)

num_users += 1
num_items += 1
train_labels = [1 for _ in range(len(train_u_input))]


testItems, testRatings = get_test_data(path='data/%s_test_ratings.lsvm' % dataset)
AllItems, AllRatings = get_test_data(path='data/%s_all_ratings.lsvm' % dataset)


input = Input(shape=(1,), dtype='float32', name='input')
embedding_u = Embedding(input_dim=num_users, output_dim=int((discriminator_layers[0] - 1) / 3),
                        name='rank_embedding_item',
                        embeddings_initializer='random_normal', activity_regularizer=l2(discriminator_reg_layers[0]) , input_length=1)
latent_u = Flatten()(embedding_u(input))
embedding_model_u = Model(inputs=input, outputs=latent_u)
train_u_latent = embedding_model_u.predict(train_u_input)

embedding_i = Embedding(input_dim=num_items, output_dim=int((discriminator_layers[0] - 1) / 3),
                        name='rank_embedding_item',
                        embeddings_initializer='random_normal', activity_regularizer=l2(discriminator_reg_layers[0]), input_length=1)
latent_i = Flatten()(embedding_i(input))
embedding_model_i = Model(inputs=input, outputs=latent_i)
train_i_latent = embedding_model_i.predict(train_i_input)
train_j_latent = embedding_model_i.predict(train_j_input)


loading pair-wise data from flie data/FoodData_train.dat...
loading test data from file data/FoodData_test_ratings.lsvm...
loading test data from file data/FoodData_all_ratings.lsvm...


In [4]:
class CRGAN:

    def build_ranker(self):
        r_u_input = Input(shape=(1,), dtype='int32', name='user_input')
        r_i_input = Input(shape=(1,), dtype='int32', name='item_input')

        Rank_Embedding_User = Embedding(input_dim=num_users, output_dim=int(ranker_layers[0] / 2),
                                        name='rank_embedding_user',
                                        init='random_normal', W_regularizer=l2(ranker_reg_layers[0]), input_length=1)
        Rank_Embedding_Item = Embedding(input_dim=num_items, output_dim=int(ranker_layers[0] / 2),
                                        name='rank_embedding_item',
                                        init='random_normal', W_regularizer=l2(ranker_reg_layers[0]), input_length=1)

        r_u_latent = Flatten()(Rank_Embedding_User(r_u_input))
        r_i_latent = Flatten()(Rank_Embedding_Item(r_i_input))
        vector = concatenate([r_u_latent, r_i_latent], axis=-1)
        for idx in range(1, len(ranker_layers)):
            layer = Dense(ranker_layers[idx], W_regularizer=l2(ranker_reg_layers[idx]), activation='relu',
                          name='r_layer%d' % idx)
            vector = layer(vector)
        prediction = Dense(1, activation='sigmoid', init='lecun_uniform', name='r_prediction')(vector)
        ranker = Model(input=[r_u_input, r_i_input],
                       output=prediction)
        return ranker

    def build_discriminator(self):
        d_u_input = Input((int((discriminator_layers[0] - 1) / 3),), dtype='float32', name='d_u_input')
        d_i_input = Input((int((discriminator_layers[0] - 1) / 3),), dtype='float32', name='d_i_input')
        d_j_input = Input((int((discriminator_layers[0] - 1) / 3),), dtype='float32', name='d_j_input')
        d_r_input = Input((1,), dtype='float32', name='d_r_input')
        d_input = concatenate([d_u_input, d_i_input, d_j_input, d_r_input], axis=-1)
        vector = Dense(discriminator_layers[0], W_regularizer=l2(discriminator_reg_layers[0]), activation='relu',
                       name='d_layer0')(d_input)
        for idx in range(1, len(discriminator_layers)):
            vector = Dense(discriminator_layers[idx], W_regularizer=l2(discriminator_reg_layers[idx]),
                           activation='relu', name='d_layer%d' % idx)(vector)
        prediction = Dense(1, activation='sigmoid', init='lecun_uniform', name='d_prediction')(vector)
        discriminator = Model(input=[d_u_input, d_i_input, d_j_input, d_r_input], output=prediction)
        return discriminator

    def __init__(self, d_lr=d_lr, r_lr=r_lr):
        # Build and compile the discriminator
        self.discriminator = self.build_discriminator()
        self.discriminator.summary()
        self.discriminator.compile(loss=['binary_crossentropy'],
                                   optimizer=Adam(d_lr),
                                   metrics=['accuracy'])

        # Build the generator
        u_input = Input(shape=(1,), dtype='float32', name='u_input')
        i_input = Input(shape=(1,), dtype='float32', name='i_input')
        j_input = Input(shape=(1,), dtype='float32', name='j_input')
        Dis_Embedding_U = Embedding(input_dim=num_users, output_dim=int((discriminator_layers[0] - 1) / 3),
                                    name='dis_embedding_u',
                                    init='random_normal', W_regularizer=l2(discriminator_reg_layers[0]), input_length=1)
        Dis_Embedding_I = Embedding(input_dim=num_items, output_dim=int((discriminator_layers[0] - 1) / 3),
                                    name='dis_embedding_i',
                                    init='random_normal', W_regularizer=l2(discriminator_reg_layers[0]), input_length=1)
        Dis_Embedding_J = Embedding(input_dim=num_items, output_dim=int((discriminator_layers[0] - 1) / 3),
                                    name='dis_embedding_j',
                                    init='random_normal', W_regularizer=l2(discriminator_reg_layers[0]), input_length=1)

        d_u_latent = Flatten()(Dis_Embedding_U(u_input))
        d_i_latent = Flatten()(Dis_Embedding_I(i_input))
        d_j_latent = Flatten()(Dis_Embedding_J(j_input))
        self.ranker = self.build_ranker()
        r_i = self.ranker([u_input, i_input])
        r_j = self.ranker([u_input, j_input])
        r = Subtract()([r_i, r_j])
        # For the combined model we will only train the generator
        self.discriminator.trainable = False
        y_pred = self.discriminator([d_u_latent, d_i_latent, d_j_latent, r])
        self.combined = Model([u_input, i_input, j_input], y_pred)
        self.combined.summary()
        self.combined.compile(loss=['binary_crossentropy'],
                              optimizer=Adam(r_lr),
                              metrics=['accuracy'])

    def train(self):

        if pre_train is True:
            self.ranker.load_weights(ranker_in_file)
            # self.discriminator.load_weights(dis_in_file)
        metrics = evaluate_model(self.ranker, testItems, testRatings, K)

        print('init: ', metrics)
        with open(log_file, 'w') as log:
            print('-1', ' '.join('%.4f' % i for i in metrics), file=log)
        best_metrics, best_epoch = metrics, -1
        for epoch in range(epochs):
            # ---------------------
            #  Train Discriminator
            # ---------------------
            gen_r_i = self.ranker.predict([np.array(train_u_input), np.array(train_i_input)])
            gen_r_j = self.ranker.predict([np.array(train_u_input), np.array(train_j_input)])
            gen_r = gen_r_i - gen_r_j
            # print(gen_r_j)

            valid = np.ones_like(gen_r)
            d_loss_real = self.discriminator.train_on_batch(
                [train_u_latent, train_i_latent, train_j_latent, np.array(train_labels)],
                valid)
            fake = np.zeros_like(gen_r)
            d_loss_gen = self.discriminator.train_on_batch(
                [train_u_latent, train_i_latent, train_j_latent, gen_r], fake)
            d_loss = (d_loss_real[0] + d_loss_gen[0]) / 2  # 0: loss, 1: acc
            # print('epoch %d : d_loss = %.4f' % (epoch, d_loss))

            # ---------------------
            #  Train Generator
            # ---------------------
            g_loss = self.combined.train_on_batch([train_u_input, train_i_input, train_j_input], valid)
            metrics = evaluate_model(self.ranker, testItems, testRatings, K)
            with open(log_file, 'a') as log:
                important_index = 1
                if metrics[important_index] > best_metrics[important_index]:
                    best_metrics, best_epoch = metrics, epoch
                    print('epoch %d: ' % epoch, metrics, '[best]')
                    print('%d' % epoch, ' '.join('%.4f' % i for i in metrics), g_loss[0], '[best]', file=log)
                    if out is True:
                        self.ranker.save_weights(ranker_out_file, overwrite=True)
                        self.discriminator.save_weights(dis_out_file, overwrite=True)
                else:
                    print('epoch %d: ' % epoch, metrics)
                    print('%d' % epoch, ' '.join('%.4f' % i for i in metrics), g_loss[0], file=log)
        
    
cr_gan = CRGAN()
cr_gan.train()






Model: "model_3"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
d_u_input (InputLayer)          (None, 10)           0                                            
__________________________________________________________________________________________________
d_i_input (InputLayer)          (None, 10)           0                                            
__________________________________________________________________________________________________
d_j_input (InputLayer)          (None, 10)           0                                            
__________________________________________________________________________________________________
d_r_input (InputLayer)          (None, 1)            0                                            
____________________________________________________________________________________________

  if __name__ == '__main__':
  if sys.path[0] == '':


Model: "model_5"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
u_input (InputLayer)            (None, 1)            0                                            
__________________________________________________________________________________________________
i_input (InputLayer)            (None, 1)            0                                            
__________________________________________________________________________________________________
j_input (InputLayer)            (None, 1)            0                                            
__________________________________________________________________________________________________
dis_embedding_u (Embedding)     (None, 1, 10)        210         u_input[0][0]                    
____________________________________________________________________________________________

  'Discrepancy between trainable weights and collected trainable'


epoch 0:  [0.95       0.875      0.88065736 0.85       0.68333333 0.57708333]
epoch 1:  [0.95       0.875      0.88065736 0.85       0.68333333 0.57708333]
epoch 2:  [0.95       0.9        0.91131472 0.9        0.70416667 0.59375   ]
epoch 3:  [0.95       0.9        0.9        0.875      0.69166667 0.58125   ]
epoch 4:  [0.95       0.875      0.88065736 0.85       0.68333333 0.57708333]
epoch 5:  [0.95       0.9        0.9        0.875      0.69166667 0.58125   ]
epoch 6:  [0.95       0.9        0.9        0.875      0.69166667 0.58125   ]
epoch 7:  [0.95       0.9        0.9        0.875      0.69166667 0.58125   ]


  'Discrepancy between trainable weights and collected trainable'


epoch 8:  [0.95       0.9        0.9        0.875      0.69166667 0.58125   ]
epoch 9:  [1.         0.925      0.91934264 0.8875     0.69583333 0.58541667] [best]
epoch 10:  [0.95       0.875      0.88065736 0.85       0.68333333 0.57708333]
epoch 11:  [0.95       0.875      0.88065736 0.85       0.68333333 0.57708333]
epoch 12:  [0.95       0.875      0.88065736 0.85       0.68333333 0.57708333]
epoch 13:  [0.95       0.875      0.88065736 0.85       0.68333333 0.57708333]
epoch 14:  [0.95       0.875      0.88065736 0.85       0.68333333 0.57708333]
epoch 15:  [0.95       0.875      0.88065736 0.85       0.68333333 0.57708333]
epoch 16:  [0.95       0.9        0.9        0.875      0.69166667 0.58125   ]
epoch 17:  [0.95       0.875      0.88065736 0.85       0.68333333 0.57708333]
epoch 18:  [0.95       0.875      0.88065736 0.85       0.68333333 0.57708333]
epoch 19:  [0.95       0.875      0.88065736 0.85       0.68333333 0.57708333]


Now, after training the model, we obtain all the users predicted ratings on all items, then call the aggregation function to obtain the groups predicted rating. Then, by Evaluation, we evaluate the difference between the predicted and real group ratings.

In [5]:

#print(AllItems)
print(AllRatings)
print(AllRatings[0][3])

# Predicted ratings for all users on all items:
#num_users = 20
#num_items = 6
PredictedRatings = dict()
for user in range(num_users):
    PredictedRatings[user] = []
    # Assuming predict_user_ratings(Model, user, items) returns the ratings for the given user
    ratings = predict_user_ratings(user, AllItems[0])  
    # Flatten the nested list of ratings
    flattened_ratings = [round(item, 2) for sublist in ratings for item in sublist]   
    # Convert the ratings to float and append to the dictionary
    PredictedRatings[user] = flattened_ratings

#print("PredictedRatings:", PredictedRatings)

{0: [2, 2, 3, 2, 4, 5], 1: [3, 3, 3, 3, 4, 3], 2: [3, 3, 3, 3, 3, 3], 3: [4, 4, 3, 3, 2, 2], 4: [2, 4, 2, 3, 3, 3], 5: [2, 3, 3, 3, 4, 4], 6: [1, 3, 3, 3, 5, 3], 7: [3, 3, 3, 4, 3, 2], 8: [2, 2, 3, 3, 4, 4], 9: [2, 4, 2, 4, 3, 3], 10: [2, 2, 3, 3, 4, 4], 11: [3, 3, 3, 2, 4, 3], 12: [3, 3, 3, 3, 3, 3], 13: [4, 4, 3, 3, 2, 2], 14: [2, 4, 3, 3, 3, 3], 15: [2, 3, 3, 3, 4, 4], 16: [2, 3, 2, 3, 5, 3], 17: [2, 3, 3, 4, 3, 3], 18: [2, 2, 3, 3, 4, 4], 19: [2, 3, 3, 4, 4, 3]}
2


In [6]:
# Uploading the group IDs comming from SimGNN:

file_path = "./data/group_Mambers_IDs_%s.csv"% dataset

# Initialize an empty list to store the data from the CSV file
Clustered_groups = []

# Read the CSV file and populate the 2D list
with open(file_path, 'r', newline='') as csvfile:
    csv_reader = csv.reader(csvfile)
    for row in csv_reader:
        # Convert each row to integers and append it to the 2D list
        row_int = [int(cell)-1 for cell in row]
        Clustered_groups.append(row_int)

print("Clustered_groups IDs list:")
print(Clustered_groups)


Groups_ActualRatings = Aggregation(Clustered_groups, AllItems, AllRatings)   
Groups_PredictedRatings = Aggregation(Clustered_groups, AllItems, PredictedRatings)  *5
start = timeit.default_timer()
#print("AllItems: \n", AllItems)
#print("Groups_ActualRatings: \n", Groups_ActualRatings)
items = [i for i in range(1, num_items)]
METRICS = np.zeros([3,11])
for groupID in range(0,3):
    METRICS[groupID] = eval_groups(groupID, Clustered_groups, Groups_ActualRatings, Groups_PredictedRatings, items, AllRatings)
Average_Metrics = np.round(np.mean(METRICS, axis=0),2)
print("Average_Metrics:\n", Average_Metrics )#print("Groups_PredictedRatings: \n", Groups_PredictedRatings)
print("hr, p, ndcg_bin, auc, map, mrr, accuracy, precision, recall, f1, fairness")
stop = timeit.default_timer()
execution_time = stop - start
print("\n Program Executed in "+str(execution_time)) # It returns time in seconds

Clustered_groups IDs list:
[[1, 3, 4, 9, 10, 11, 12, 13, 15, 16], [2, 5, 6, 18, 19], [0, 14]]
Average_Metrics:
 [1.   0.83 0.81 0.75 0.87 0.48 0.78 0.89 0.75 0.81 0.47]
hr, p, ndcg_bin, auc, map, mrr, accuracy, precision, recall, f1, fairness

 Program Executed in 0.014035300999999833


In [7]:
# Making random groups with random size:
# Repeating the experiment 20 times and get the average of metrics.

num_repeat = 20
num_Mertics = 11
TotallMetrics = []

for i in range(num_repeat):    
    Random_groups = create_random_groups(Clustered_groups, num_users)
    # print(Random_groups)

    Groups_ActualRatings = Aggregation(Random_groups, AllItems, AllRatings)   
    Groups_PredictedRatings = Aggregation(Random_groups, AllItems, PredictedRatings) * 5

    # print("AllItems: \n", AllItems)
    # print("Groups_ActualRatings: \n", Groups_ActualRatings)
    # print("Groups_PredictedRatings: \n", Groups_PredictedRatings)
    start = timeit.default_timer()
    items = [i for i in range(1, num_items)]
    METRICS = np.zeros([3, num_Mertics])
    for groupID in range(3):
        METRICS[groupID] = eval_groups(groupID, Random_groups, Groups_ActualRatings, Groups_PredictedRatings, items, AllRatings)
    Average_Metrics = np.round(np.mean(METRICS, axis=0), 2)
    stop = timeit.default_timer()
    execution_time = stop - start
    print("Average_Metrics:\n", Average_Metrics, execution_time)
    print("hr, p, ndcg_bin, auc, map, mrr, accuracy, precision, recall, f1, execution_time")
    # print("\n Program Executed in "+str(execution_time)) # It returns time in seconds
    TotallMetrics.append(list(Average_Metrics))

Final_Metrics = np.round(np.mean(TotallMetrics, axis=0), 2)
print("****** Final_Metrics:", Final_Metrics)

Average_Metrics:
 [1.   0.83 0.88 0.88 0.95 0.54 0.78 0.89 0.75 0.81 0.54] 0.010978895000000932
hr, p, ndcg_bin, auc, map, mrr, accuracy, precision, recall, f1, execution_time
Average_Metrics:
 [1.   0.81 0.84 0.85 0.9  0.49 0.72 0.89 0.67 0.76 0.4 ] 0.010344864000000342
hr, p, ndcg_bin, auc, map, mrr, accuracy, precision, recall, f1, execution_time
Average_Metrics:
 [1.   0.81 0.83 0.81 0.88 0.51 0.67 0.78 0.64 0.7  0.52] 0.011662291999998686
hr, p, ndcg_bin, auc, map, mrr, accuracy, precision, recall, f1, execution_time
Average_Metrics:
 [1.   0.78 0.74 0.74 0.78 0.49 0.78 0.78 0.78 0.78 0.75] 0.01079312500000107
hr, p, ndcg_bin, auc, map, mrr, accuracy, precision, recall, f1, execution_time
Average_Metrics:
 [1.   0.67 0.72 0.78 0.83 0.54 0.72 0.7  0.78 0.73 0.52] 0.011608089999999294
hr, p, ndcg_bin, auc, map, mrr, accuracy, precision, recall, f1, execution_time
Average_Metrics:
 [1.   0.67 0.75 0.78 0.86 0.52 0.61 0.67 0.58 0.62 0.49] 0.01117822600000018
hr, p, ndcg_bin, auc, map,