# Importing necessary libraries

In [None]:
pip install tensorflow 

In [None]:
pip install numpy 

In [None]:
pip install panda

In [None]:
pip install scikit-learn

In [None]:
import numpy as np  # NumPy for numerical operations
import pandas as pd  # Pandas for data manipulation and analysis
import tensorflow as tf # TensorFlow for machine learning
from sklearn.preprocessing import StandardScaler  # Class for feature scaling
import math  # Python math module for mathematical functions

# Step 1: Load the Dataset

In [None]:
# Specify the file paths for the rating and trust datasets
rating_path = 'Data/ratings.txt'
trust_path = 'Data/trust.txt'

# Define the column names for the ratings dataset
rating_columns = ['user_id', 'item_id', 'rating']

# Read the ratings dataset from the specified file, using space (' ') as the separator
ratings_df = pd.read_csv(rating_path, sep=' ', header=None, names=rating_columns)

# Define the column names for the trust dataset
trust_columns = ['trustor_id', 'trustee_id', 'trust_label']

# Read the trust dataset from the specified file, using space (' ') as the separator
trust_df = pd.read_csv(trust_path, sep=' ', header=None, names=trust_columns)


In [None]:
ratings_df

In [None]:
trust_df

## Insights into the uniqueness of IDs in the datasets

In [None]:
# Count unique user IDs in the ratings dataset
unique_user_count = ratings_df['user_id'].nunique()
print("Unique number of user IDs:", unique_user_count)

# Count unique item IDs in the ratings dataset
unique_item_count = ratings_df['item_id'].nunique()
print("Unique number of item IDs:", unique_item_count)

# Count unique trustor IDs in the trust dataset
unique_trustor_count = trust_df['trustor_id'].nunique()
print("Unique number of trustor IDs:", unique_trustor_count)

# Count unique trustee IDs in the trust dataset
unique_trustee_count = trust_df['trustee_id'].nunique()
print("Unique number of trustee IDs:", unique_trustee_count)


## Creating rating matrix

In [None]:
# Check for duplicate entries based on 'user_id' and 'item_id'
duplicates = ratings_df.duplicated(subset=['user_id', 'item_id'])

# Print the duplicate entries
print("Duplicate entries:\n", ratings_df[duplicates])

# Remove duplicate entries
ratings_df = ratings_df.drop_duplicates(subset=['user_id', 'item_id'])

# Pivot the DataFrame to create a ratings matrix
ratings_matrix = ratings_df.pivot(index='user_id', columns='item_id', values='rating')

# Fill NaN values with 0 if needed
ratings_matrix = ratings_matrix.fillna(0)

# Display the resulting ratings matrix
ratings_matrix


## Creating trust matrix

In [None]:
# Check for duplicate entries based on 'trustor_id' and 'trustee_id'
duplicates = trust_df.duplicated(subset=['trustor_id', 'trustee_id'])

# Print the duplicate entries
print("Duplicate entries:\n", trust_df[duplicates])

# Remove duplicate entries
trust_df = trust_df.drop_duplicates(subset=['trustor_id', 'trustee_id'])

# Pivot the DataFrame to create a trust matrix
trust_matrix = trust_df.pivot(index='trustor_id', columns='trustee_id', values='trust_label')

# Fill NaN values with 0 if needed
trust_matrix = trust_matrix.fillna(0)

# Display the resulting trust matrix
trust_matrix


### Creating a full trust matrix with a specified size and updating its values based on an existing trust matrix.

In [None]:
# Create an empty full_trust_matrix with size 1508x1508
full_trust_matrix = pd.DataFrame(index=range(1, unique_user_count + 1), columns=range(1, unique_user_count + 1))

# Update the values in full_trust_matrix based on trust_matrix
for trustor_id in trust_matrix.index:
    if trustor_id in range(1, unique_user_count):
        
        for trustee_id in trust_matrix.columns:
            if trustee_id in range(1, unique_user_count):
                
                full_trust_matrix.at[trustor_id, trustee_id] = trust_matrix.at[trustor_id, trustee_id]

# Convert the filled values to 0 where NaN (indicating missing values)
full_trust_matrix.fillna(0, inplace=True)

# Optionally, convert the DataFrame to a NumPy array if needed
#full_trust_matrix_array = full_trust_matrix.to_numpy()

# Display or use the filled full_trust_matrix
full_trust_matrix


# Step 2: Information Aggregation

### Calculating Jaccard similarity based on users' rating matrix

In [None]:
# Create a binary matrix where 1 indicates a positive rating and 0 indicates no rating
binary_ratings = ((ratings_matrix > 0).astype(int)).values

# the Jaccard similarity function
def jaccard_similarity(matrix):
    num_users = matrix.shape[0]
    jaccard_matrix = np.zeros((num_users, num_users))

    for i in range(num_users):
        for j in range(i + 1, num_users):
            intersection = np.sum(np.logical_and(matrix[i, :], matrix[j, :]))
            union = np.sum(np.logical_or(matrix[i, :], matrix[j, :]))

            # Avoid division by zero
            if union == 0:
                jaccard_matrix[i, j] = 0
            else:
                jaccard_matrix[i, j] = intersection / union

            # Since the matrix is symmetric, we can fill in the values on the other side of the diagonal
            jaccard_matrix[j, i] = jaccard_matrix[i, j]

    return jaccard_matrix

# Calculate Jaccard similarity matrix using its function
jaccard_matrix = jaccard_similarity(binary_ratings)

# Print the Jaccard similarity matrix
print("Jaccard Similarity Matrix:")
print(jaccard_matrix)


### Calculating the Adamic-Adar similarity measure for trustors based on a binary trust matrix.

In [None]:
def adamic_adar_for_trustors(matrix, trustor1, trustor2):
    # Find neighbors of trustor1 and trustor2
    neighbors1 = set(np.where(matrix[trustor1] == 1)[0])
    neighbors2 = set(np.where(matrix[trustor2] == 1)[0])
    
    # Find common neighbors
    common_neighbors = neighbors1.intersection(neighbors2)
    
    # Calculate Adamic-Adar similarity
    adamic_adar = 0
    for common_neighbor in common_neighbors:
        trustor_degree = np.sum(matrix[:, common_neighbor])
        adamic_adar += 1 / math.log(trustor_degree) if trustor_degree > 1 else 0
    
    return adamic_adar

def calculate_adamic_adar_for_all_users(matrix):
    num_users = matrix.shape[0]
    adamic_adar_matrix = np.zeros((num_users, num_users))

    for i in range(num_users):
        for j in range(i + 1, num_users):
            # Calculate Adamic-Adar similarity for each pair of trustors
            adamic_adar_matrix[i, j] = adamic_adar_for_trustors(matrix, i, j)
            adamic_adar_matrix[j, i] = adamic_adar_matrix[i, j]

    return adamic_adar_matrix

# Coverting 'trust_matrix' into a numpy array
trust_matrix_np = np.array(full_trust_matrix)

# Calculate Adamic-Adar Similarity Matrix for Users
adamic_adar_matrix = calculate_adamic_adar_for_all_users(trust_matrix_np)

# Print the Adamic-Adar Similarity Matrix
print("Adamic-Adar Similarity Matrix for Users:")
print(adamic_adar_matrix)


## Aggregating the Adamic-Adar Similarity and the Jaccard Similarity

In [None]:
# Generate a random weight matrix with values between 0 and 1
random_weight_matrix = np.random.rand(unique_user_count, unique_user_count)

# Combine matrices using element-wise multiplication
H_information = (jaccard_matrix + adamic_adar_matrix) * random_weight_matrix

# Print the resulting matrix
print(H_information)

# Step 3: Bulding the Graph Neural Network

### Define the Mirror model

In [None]:
from tensorflow.keras import layers

# Define the Mirror model
class MirrorModel(tf.keras.Model):
    def __init__(self, input_dim, output_dim):
        super(MirrorModel, self).__init__()
        self.dense1 = layers.Dense(128, activation='relu', input_dim=input_dim)
        self.dense2 = layers.Dense(output_dim, activation='sigmoid')

    def call(self, inputs):
        x = self.dense1(inputs)
        output = self.dense2(x)
        return output

### Split the data

In [None]:
from sklearn.model_selection import train_test_split  # Function for splitting dataset

# Split the data
X_train, X_test, y_train, y_test = train_test_split(H_information, full_trust_matrix, test_size=0.2, random_state=42)


### Instantiating and Compilling the model

In [None]:
# Assuming H_information.shape[1] is the number of features (user correlations)
num_features = X_train.shape[1]

# Assuming full_trust_matrix.shape[1] is the number of output nodes (users for whom trust is predicted)
num_users = y_train.shape[1]

# Instantiate the Mirror model
mirror_model = MirrorModel(input_dim=num_features, output_dim=num_users)

# Compile the model
mirror_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


### Train the Mirror model and Predicting

In [None]:
# Train the Mirror model
mirror_model.fit(X_train, y_train, epochs=20, batch_size=64, validation_split=0.3)

# Predicting by the Mirror model
y_pred = mirror_model.predict(X_test) # y_pred contains predicted scores or probabilities
y_pred_binary = (y_pred >= 0.3).astype(int)  # Thresholding at 0.5 for binary predictions


# Step 4: Evaluation 

### Measuring the Accuracy

In [None]:
from sklearn.metrics import accuracy_score  # Function for evaluating model performance by accuracy

accuracy = accuracy_score(y_test, y_pred_binary)

print("Accuracy:", accuracy * 100)

### Calculating the Rank-Score (RS)

In [None]:

def rank(edge, y_pred_row):
    # Placeholder for your ranking function based on predicted probabilities or other criterion
    # Replace this function with your actual implementation
    return np.argsort(y_pred_row)[::-1].tolist().index(edge) + 1

def calculate_RS(y_test_np, y_pred_binary):
    num_rows, num_edges = y_test_np.shape

    RS_list = []
    
    for row_idx in range(num_rows):
        U = set(range(num_edges))  # Assuming each element in y_test_np represents an edge
        Ep = set([i for i, label in enumerate(y_test_np[row_idx]) if label == 1])  # Set of labeled links

        if len(Ep) == 0:
            RS = 0.0  # Handle division by zero
        else:
            RSe_list = [rank(e, y_pred_binary[row_idx]) / len(U - Ep) for e in Ep]
            RS = np.mean(RSe_list)

        RS_list.append(RS)

    return np.mean(RS_list)


# Converting "y_test" into a numpy array
y_test_np = np.array(y_test)

# Calculate RS
RS_score = calculate_RS(y_test_np, y_pred_binary)

# Print RS Score
print("RS Score:", RS_score * 100)

### Calculating the Area Under Cruve (AUC)

In [None]:
from sklearn.metrics import roc_auc_score

# cast y_test_np and y_pred_binary to integer arrays before calculating the ROC AUC score
y_test_np = y_test_np.astype(int)
y_pred_binary = y_pred_binary.astype(int)

# Flatten y_test_np and y_pred_binary if they are a multi-dimensional array or a nested list
flat_y_test_np = y_test_np.flatten()
flat_y_pred_binary = y_pred_binary.flatten()

# Check if both classes are still present in flat_y_test_np
unique_classes_flat_y_test = np.unique(flat_y_test_np)
if len(unique_classes_flat_y_test) < 2:
    print("Error: Both positive and negative classes are required for ROC AUC score calculation.")
else:
    # Calculate AUC
    auc_score = roc_auc_score(flat_y_test_np, flat_y_pred_binary)

    # Print AUC Score
    print("AUC Score:", auc_score * 100)
