In [None]:
import numpy as np  # NumPy for numerical operations
import pandas as pd  # Pandas for data manipulation and analysis
from scipy.io import loadmat
# Importing PyTorch and PyG modules, and the necessary libraries for this end
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.data import Data
from torch_geometric.nn import ChebConv
from torch_geometric.nn import RGCNConv
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import roc_auc_score, accuracy_score, f1_score, precision_score, recall_score
import time

In [None]:

enron_path = 'Useable data/ia-enron-email-dynamic/enron_dynamic.csv'


# Read the trust dataset from the specified file
df = pd.read_csv(enron_path)
df

# Proposed Confidence metric

In [None]:
# Calculate the combined confidence score
numerator = 2 * df['jaccard_similarity'] * df['adamic_similarity']   # * df['common_neighbor']
denominator = df['jaccard_similarity'] + df['adamic_similarity']     # + df['common_neighbor']

# Handle potential division by zero
df.loc[:, 'confidence'] = np.where(denominator != 0, (numerator / denominator), 0)

# Normalize the confidence scores to the range [0, 1]
df.loc[:, 'confidence'] = df['confidence'] / df['confidence'].max()

# Print the DataFrame with the new columns
df[['source_id', 'target_id', 'jaccard_similarity', 'adamic_similarity', 'confidence']]

# Preparing data for test and train

In [None]:
# prepares data for a PyTorch Geometric model

# Assuming you have unique node IDs for source users and target users
source_users = df['source_id'].unique()
target_users = df['target_id'].unique()

# Create a mapping of node IDs to indices
node_to_index = {node: index for index, node in enumerate(set(source_users) | set(target_users))}

# Map node IDs in the dataframe to indices
df.loc[:,'source_index'] = df['source_id'].map(node_to_index)
df.loc[:,'target_index'] = df['target_id'].map(node_to_index)


In [None]:
# Assuming df has columns 'source_id', 'target_id', 'timestamp', and others
# Convert timestamps to Unix time
df['timesdate'] = pd.to_datetime(df['timestamp'])
df['timesdate'] = df['timesdate'].astype('int64') // 1e9
df = df.sort_values(by='timestamp')

# Split the dataset into training and testing sets
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)


In [None]:
# 'adamic_similarity',  'jaccard_similarity', 
# Concatenate polynomial features with the original features
train_features = torch.tensor(train_df[['jaccard_similarity', 'confidence']].values, dtype=torch.float)
test_features = torch.tensor(test_df[['jaccard_similarity', 'confidence']].values, dtype=torch.float)


# Convert list of NumPy arrays to a single NumPy array
edge_index_train_np = np.array([train_df['source_index'].values, train_df['target_index'].values])
edge_index_test_np = np.array([test_df['source_index'].values, test_df['target_index'].values])
# Convert the NumPy arrays to PyTorch tensors
edge_index_train = torch.tensor(edge_index_train_np, dtype=torch.long)
edge_index_test = torch.tensor(edge_index_test_np, dtype=torch.long)

# 'timestamp', 'normalized_time'
edge_attr_train = torch.tensor(train_df['normalized_time'].values, dtype=torch.float).view(-1, 1)
edge_attr_test = torch.tensor(test_df['normalized_time'].values, dtype=torch.float).view(-1, 1)

train_labels = torch.tensor(train_df['relation_label'].values, dtype=torch.long)
test_labels = torch.tensor(test_df['relation_label'].values, dtype=torch.long)

train_data = Data(x=train_features, edge_index=edge_index_train, edge_attr=edge_attr_train, y=train_labels)
test_data = Data(x=test_features, edge_index=edge_index_test, edge_attr=edge_attr_test, y=test_labels)


In [None]:
lr = 0.001
N_Epo = 200
hidden_dim_ae = 64
hidden_dim_rgcn = 128

# Proposed Model

In [None]:
# Define the autoencoder model
class Autoencoder(nn.Module):
    def __init__(self, input_dim, hidden_dim, activation_fn=nn.ReLU()):
        super(Autoencoder, self).__init__()

        # Encoder layers
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            activation_fn
        )

        # Decoder layers
        self.decoder = nn.Sequential(
            nn.Linear(hidden_dim, input_dim),
            nn.Sigmoid()  # Sigmoid activation for reconstruction
        )

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return encoded, decoded

In [None]:
class TGCNConv(ChebConv):
    def __init__(self, in_channels, out_channels, K=2, normalization='sym', bias=True, **kwargs):
        super(TGCNConv, self).__init__(in_channels, out_channels, K, normalization, bias, **kwargs)
        self.lin = nn.Linear(in_channels, out_channels)
        # Initialize weights for the ChebConv layers
        nn.init.kaiming_uniform_(self.lin.weight, mode='fan_in', nonlinearity='relu')
        nn.init.zeros_(self.lin.bias)

    def forward(self, x, edge_index, edge_attr):
        # L2 normalization on input features
        x = F.normalize(x, p=2, dim=1)
        edge_attr = edge_attr.view(-1, 1)
        return super(TGCNConv, self).forward(x, edge_index, edge_attr)

class RealationPrediction_Model(nn.Module):
    def __init__(self, input_dim, hidden_dim_ae, hidden_dim_rgcn, output_dim, num_relations, num_bases, num_time_embeddings, poly_degree, dropout_rate=0.5):
        super(RealationPrediction_Model, self).__init__()
        self.hidden_dim_ae = hidden_dim_ae
        self.hidden_dim_rgcn = hidden_dim_rgcn
        self.autoencoder = Autoencoder(input_dim * poly_degree, hidden_dim_ae)
        self.time_embedding = nn.Embedding(num_time_embeddings, hidden_dim_ae)
        
        self.rgcn = TGCNConv(hidden_dim_ae, hidden_dim_rgcn, K=2, normalization='sym')
        self.dropout1 = nn.Dropout(dropout_rate)
        self.rgcn1 = TGCNConv(hidden_dim_rgcn, hidden_dim_rgcn, K=2, normalization='sym')
        self.dropout2 = nn.Dropout(dropout_rate)

        self.rgcn2 = RGCNConv(hidden_dim_rgcn, hidden_dim_rgcn, num_relations, num_bases=2)
        self.linear = nn.Linear(hidden_dim_rgcn, output_dim)

        self.poly_degree = poly_degree
        
        #nn.init.kaiming_uniform_(self.linear.weight, mode='fan_in', nonlinearity='relu')
        #nn.init.zeros_(self.linear.bias)

    
    def generate_polynomial_features(self, x):
        poly_features = [x[:, i] ** d for d in range(1, self.poly_degree + 1) for i in range(x.shape[1])]
        return torch.stack(poly_features, dim=1)

    
    def forward(self, data):
        x, edge_index, edge_attr, edge_type = data.x, data.edge_index, data.edge_attr, data.y
        
        poly_features = self.generate_polynomial_features(x)
        encoded, _ = self.autoencoder(poly_features)
        time_embedding = self.time_embedding(edge_attr.long())
        if time_embedding.dim() == 3:
            time_embedding = time_embedding.squeeze(1)
            
        x_with_temporal = torch.cat([encoded, time_embedding], dim=1)
        x_with_temporal = x_with_temporal.view(-1, self.hidden_dim_ae * 2)

        x = F.relu(self.rgcn(encoded, edge_index, edge_attr))
        x = self.dropout1(x)
        x = F.relu(self.rgcn1(x, edge_index, edge_attr))
        x = self.dropout2(x)
        x = self.rgcn2(x, edge_index, edge_type)
        x = self.linear(x)
        return x


# Initialize the proposed method
input_dim_ae = train_data.x.shape[1]
num_time_embeddings = 10  # Example value
num_relations = df['relation_label'].nunique()  # Example value
output_dim = 1  # Binary classification (trust/distrust)

proposed_model = RealationPrediction_Model(
    input_dim_ae,
    hidden_dim_ae,
    hidden_dim_rgcn,
    output_dim,
    num_relations,
    num_bases=2,
    num_time_embeddings=num_time_embeddings,
    poly_degree=2
)

# Define loss function and optimizer with weight decay
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(proposed_model.parameters(), lr, weight_decay=1e-4)

start_time = time.time()
# Train the model with dropout and weight decay
proposed_model.train()
for epoch in range(N_Epo):
    optimizer.zero_grad()
    out = proposed_model(train_data)

    target = train_data.y.float().view(-1, 1)

    loss = criterion(out, target)
    loss.backward()
    optimizer.step()

    if epoch % 10 == 0:
        print(f'Epoch {epoch}, Loss: {loss.item()}')

end_time = time.time()
# Evaluation on test data
proposed_model.eval()
with torch.no_grad():
    proposed_model_pred = proposed_model(test_data)



runtime = end_time - start_time
print("Runtime:", runtime, "seconds")
ourmodel_time = runtime

proposed_model_probabilities = torch.sigmoid(proposed_model_pred)

# Compute AUC
ourmodel_auc = roc_auc_score(test_labels.numpy(), proposed_model_probabilities.numpy())


# Get the predicted rankings for each edge
indices, _ = torch.sort(proposed_model_probabilities, descending=True)
ranks = torch.zeros_like(indices, dtype=torch.float)  # Initialize ranks

# Loop through the sorted indices to calculate ranks
for i, idx in enumerate(indices):
    ranks[i] = idx + 1  # Adjust rank starting from 1

# Number of unlabeled links
num_positive_links = torch.sum(test_labels == 1)
num_negative_links = torch.sum(test_labels == 0)
denominator = num_positive_links / (num_positive_links - num_negative_links)

# Calculate RS for each positive link
positive_indices = test_labels == 1
positive_ranks = ranks[positive_indices]
RSe_values = positive_ranks / denominator
ourmodel_RS = torch.mean(RSe_values).item()


print(f'AUC: {ourmodel_auc}, RS: {ourmodel_RS}')

In [None]:
# confusion matrix in sklearn
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

# actual values
actual = test_labels.numpy()
# predicted values
predicted = (proposed_model_probabilities >= torch.mean(proposed_model_probabilities)).int()


# confusion matrix
ourmodel_matrix = confusion_matrix(actual,predicted, labels=[1,0])
print('Confusion matrix : \n',ourmodel_matrix)

# outcome values order in sklearn
tp, fn, fp, tn = confusion_matrix(actual,predicted,labels=[1,0]).reshape(-1)
print('Outcome values : \n', tp, fn, fp, tn)

# classification report for precision, recall f1-score and accuracy
ourmodel_matrix = classification_report(actual,predicted,labels=[1,0])
print('Classification report : \n',ourmodel_matrix)

# Calculate Accuracy, Precision, Recall, F1
ourmodel_accuracy = accuracy_score(test_labels.numpy(), predicted.numpy())
ourmodel_precision = precision_score(test_labels.numpy(), predicted.numpy())
ourmodel_recall = recall_score(test_labels.numpy(), predicted.numpy())
ourmodel_f1 = f1_score(test_labels.numpy(), predicted.numpy())
print(f'Accuracy: {ourmodel_accuracy}, Precision: {ourmodel_precision}, Recall: {ourmodel_recall}, F1: {ourmodel_f1}')

# Compared Models

## Other variant of our proposed 

### With out PFs

In [None]:
train_features = torch.tensor(train_df[['jaccard_similarity', 'adamic_similarity', 'confidence']].values, dtype=torch.float)
test_features = torch.tensor(test_df[['jaccard_similarity', 'adamic_similarity', 'confidence']].values, dtype=torch.float)
edge_index_train_np = np.array([train_df['source_index'].values, train_df['target_index'].values])
edge_index_test_np = np.array([test_df['source_index'].values, test_df['target_index'].values])
edge_index_train = torch.tensor(edge_index_train_np, dtype=torch.long)
edge_index_test = torch.tensor(edge_index_test_np, dtype=torch.long)
edge_attr_train = torch.tensor(train_df['normalized_time'].values, dtype=torch.float).view(-1, 1)
edge_attr_test = torch.tensor(test_df['normalized_time'].values, dtype=torch.float).view(-1, 1)
train_labels = torch.tensor(train_df['relation_label'].values, dtype=torch.long)
test_labels = torch.tensor(test_df['relation_label'].values, dtype=torch.long)
train_data = Data(x=train_features, edge_index=edge_index_train, edge_attr=edge_attr_train, y=train_labels)
test_data = Data(x=test_features, edge_index=edge_index_test, edge_attr=edge_attr_test, y=test_labels)

# Define the autoencoder model
class Autoencoder2(nn.Module):
    def __init__(self, input_dim, hidden_dim, activation_fn=nn.ReLU()):
        super(Autoencoder2, self).__init__()

        # Encoder layers
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            activation_fn
        )

        # Decoder layers
        self.decoder = nn.Sequential(
            nn.Linear(hidden_dim, input_dim),
            nn.Sigmoid()  # Sigmoid activation for reconstruction
        )

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return encoded, decoded
        
class TGCNConv2(ChebConv):
    def __init__(self, in_channels, out_channels, K=2, normalization='sym', bias=True, **kwargs):
        super(TGCNConv2, self).__init__(in_channels, out_channels, K, normalization, bias, **kwargs)
        self.lin = nn.Linear(in_channels, out_channels)
        # Initialize weights for the ChebConv layers
        nn.init.kaiming_uniform_(self.lin.weight, mode='fan_in', nonlinearity='relu')
        nn.init.zeros_(self.lin.bias)

    def forward(self, x, edge_index, edge_attr):
        # L2 normalization on input features
        x = F.normalize(x, p=2, dim=1)
        edge_attr = edge_attr.view(-1, 1)
        return super(TGCNConv2, self).forward(x, edge_index, edge_attr)
        
class no_PFs_Model(nn.Module):
    def __init__(self, input_dim, hidden_dim_ae, hidden_dim_rgcn, output_dim, num_relations, num_bases, num_time_embeddings, dropout_rate=0.5):
        super(no_PFs_Model, self).__init__()
        self.hidden_dim_ae = hidden_dim_ae
        self.hidden_dim_rgcn = hidden_dim_rgcn
        self.autoencoder = Autoencoder2(input_dim, hidden_dim_ae)
        self.time_embedding = nn.Embedding(num_time_embeddings, hidden_dim_ae)
        self.rgcn1 = TGCNConv2(hidden_dim_ae + hidden_dim_ae, hidden_dim_rgcn, K=2, normalization='sym')
        self.dropout1 = nn.Dropout(dropout_rate)
        self.rgcn2 = TGCNConv2(hidden_dim_rgcn, hidden_dim_rgcn, K=2, normalization='sym')
        self.dropout2 = nn.Dropout(dropout_rate)
        self.linear = nn.Linear(hidden_dim_rgcn, hidden_dim_rgcn)
        self.linear2 = nn.Linear(hidden_dim_rgcn, output_dim)

    def forward(self, data):
        x, edge_index, edge_attr = data.x, data.edge_index, data.edge_attr
        
        encoded, _ = self.autoencoder(x)
        time_embedding = self.time_embedding(edge_attr.long())

        if time_embedding.dim() == 3:
            time_embedding = time_embedding.squeeze(1)

        x_with_temporal = torch.cat([encoded, time_embedding], dim=1)
        x_with_temporal = x_with_temporal.view(-1, self.hidden_dim_ae * 2)

        x = F.relu(self.rgcn1(x_with_temporal, edge_index, edge_attr))
        x = self.dropout1(x)
        x = F.relu(self.rgcn2(x, edge_index, edge_attr))
        x = self.dropout2(x)
        x = self.linear(x)
        x = torch.sigmoid(self.linear2(x))
        return x


# Initialize the proposed method
input_dim_ae = train_data.x.shape[1]
num_time_embeddings = 10  # Example value
num_relations = df['relation_label'].nunique()  # Example value
output_dim = 1  # Binary classification (trust/distrust)

no_PFs_model = no_PFs_Model(
    input_dim_ae,
    hidden_dim_ae,
    hidden_dim_rgcn,
    output_dim,
    num_relations,
    num_bases=2,
    num_time_embeddings=num_time_embeddings
)

# Define loss function and optimizer with weight decay
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(no_PFs_model.parameters(), lr, weight_decay=1e-4)

start_time = time.time()
# Train the model with dropout and weight decay
no_PFs_model.train()
for epoch in range(N_Epo):
    optimizer.zero_grad()
    out = no_PFs_model(train_data)

    target = train_data.y.float().view(-1, 1)

    loss = criterion(out, target)
    loss.backward()
    optimizer.step()

    if epoch % 10 == 0:
        print(f'Epoch {epoch}, Loss: {loss.item()}')

end_time = time.time()

# Evaluation on test data
no_PFs_model.eval()
with torch.no_grad():
    no_PFs_model_pred = no_PFs_model(test_data)



no_PFs_pred_probabilities = torch.sigmoid(no_PFs_model_pred)


runtime = end_time - start_time
print("Runtime:", runtime, "seconds")
no_PFs_time = runtime


# Compute AUC
# Compute AUC
no_PFs_auc = roc_auc_score(test_labels.numpy(), no_PFs_pred_probabilities.numpy())

# Get the predicted rankings for each edge
indices, _ = torch.sort(no_PFs_model_pred, descending=True)
ranks = torch.zeros_like(indices, dtype=torch.float)  # Initialize ranks

# Loop through the sorted indices to calculate ranks
for i, idx in enumerate(indices):
    ranks[i] = idx + 1  # Adjust rank starting from 1

# Number of unlabeled links
num_positive_links = torch.sum(test_labels == 1)
num_negative_links = torch.sum(test_labels == 0)
denominator = num_positive_links / (num_positive_links - num_negative_links)

# Calculate RS for each positive link
positive_indices = test_labels == 1
positive_ranks = ranks[positive_indices]
RSe_values = positive_ranks / denominator
no_PFs_RS = torch.mean(RSe_values).item()

print('Evaluation results for proposed without polynomial features:')
print(f'AUC: {no_PFs_auc}, RS: {no_PFs_RS}')

In [None]:
# actual values
actual = test_labels.numpy()
# predicted values
predicted = (no_PFs_pred_probabilities > torch.mean(no_PFs_pred_probabilities)).int()


# confusion matrix
no_PFs_matrix = confusion_matrix(actual,predicted, labels=[1,0])
print('Confusion matrix : \n',no_PFs_matrix)

# outcome values order in sklearn
tp, fn, fp, tn = confusion_matrix(actual,predicted,labels=[1,0]).reshape(-1)
print('Outcome values : \n', tp, fn, fp, tn)

# classification report for precision, recall f1-score and accuracy
no_PFs_matrix = classification_report(actual,predicted,labels=[1,0])
print('Classification report : \n',no_PFs_matrix)

# Calculate Accuracy, Precision, Recall, F1
no_PFs_accuracy = accuracy_score(test_labels.numpy(), predicted.numpy())
no_PFs_precision = precision_score(test_labels.numpy(), predicted.numpy())
no_PFs_recall = recall_score(test_labels.numpy(), predicted.numpy())
no_PFs_f1 = f1_score(test_labels.numpy(), predicted.numpy())
print(f'Accuracy: {no_PFs_accuracy}, Precision: {no_PFs_precision}, Recall: {no_PFs_recall}, F1: {no_PFs_f1}')

### With out confidence

In [None]:
train_features = torch.tensor(train_df[['jaccard_similarity','adamic_similarity']].values, dtype=torch.float)
test_features = torch.tensor(test_df[['jaccard_similarity','adamic_similarity']].values, dtype=torch.float)
edge_index_train_np = np.array([train_df['source_index'].values, train_df['target_index'].values])
edge_index_test_np = np.array([test_df['source_index'].values, test_df['target_index'].values])
edge_index_train = torch.tensor(edge_index_train_np, dtype=torch.long)
edge_index_test = torch.tensor(edge_index_test_np, dtype=torch.long)
edge_attr_train = torch.tensor(train_df['normalized_time'].values, dtype=torch.float).view(-1, 1)
edge_attr_test = torch.tensor(test_df['normalized_time'].values, dtype=torch.float).view(-1, 1)
train_labels = torch.tensor(train_df['relation_label'].values, dtype=torch.long)
test_labels = torch.tensor(test_df['relation_label'].values, dtype=torch.long)
train_data = Data(x=train_features, edge_index=edge_index_train, edge_attr=edge_attr_train, y=train_labels)
test_data = Data(x=test_features, edge_index=edge_index_test, edge_attr=edge_attr_test, y=test_labels)
# Define the autoencoder model
class Autoencoder3(nn.Module):
    def __init__(self, input_dim, hidden_dim, activation_fn=nn.ReLU()):
        super(Autoencoder3, self).__init__()

        # Encoder layers
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            activation_fn
        )

        # Decoder layers
        self.decoder = nn.Sequential(
            nn.Linear(hidden_dim, input_dim),
            nn.Sigmoid()  # Sigmoid activation for reconstruction
        )

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return encoded, decoded
        
class TGCNConv3(ChebConv):
    def __init__(self, in_channels, out_channels, K=2, normalization='sym', bias=True, **kwargs):
        super(TGCNConv3, self).__init__(in_channels, out_channels, K, normalization, bias, **kwargs)
        self.lin = nn.Linear(in_channels, out_channels)
        # Initialize weights for the ChebConv layers
        nn.init.kaiming_uniform_(self.lin.weight, mode='fan_in', nonlinearity='relu')
        nn.init.zeros_(self.lin.bias)

    def forward(self, x, edge_index, edge_attr):
        # L2 normalization on input features
        x = F.normalize(x, p=2, dim=1)
        edge_attr = edge_attr.view(-1, 1)
        return super(TGCNConv3, self).forward(x, edge_index, edge_attr)


class RealationPrediction_Model_2(nn.Module):
    def __init__(self, input_dim, hidden_dim_ae, hidden_dim_rgcn, output_dim, num_relations, num_bases, num_time_embeddings, poly_degree, dropout_rate=0.5):
        super(RealationPrediction_Model_2, self).__init__()
        self.hidden_dim_ae = hidden_dim_ae
        self.hidden_dim_rgcn = hidden_dim_rgcn
        self.autoencoder = Autoencoder3(input_dim * poly_degree, hidden_dim_ae)
        self.time_embedding = nn.Embedding(num_time_embeddings, hidden_dim_ae)
        self.rgcn1 = TGCNConv3(hidden_dim_ae + hidden_dim_ae, hidden_dim_rgcn, K=2, normalization='sym')
        self.dropout1 = nn.Dropout(dropout_rate)
        self.rgcn2 = TGCNConv3(hidden_dim_rgcn, hidden_dim_rgcn, K=2, normalization='sym')
        self.dropout2 = nn.Dropout(dropout_rate)
        self.linear = nn.Linear(hidden_dim_rgcn, hidden_dim_rgcn)
        self.linear2 = nn.Linear(hidden_dim_rgcn, output_dim)
        self.poly_degree = poly_degree
        
        # Initialize weights for the linear layer
        nn.init.kaiming_uniform_(self.linear.weight, mode='fan_in', nonlinearity='relu')
        nn.init.zeros_(self.linear.bias)

    def generate_polynomial_features(self, x):
        poly_features = [x[:, i] ** d for d in range(1, self.poly_degree + 1) for i in range(x.shape[1])]
        return torch.stack(poly_features, dim=1)

    
    def forward(self, data):
        x, edge_index, edge_attr = data.x, data.edge_index, data.edge_attr
        
        poly_features = self.generate_polynomial_features(x)
        encoded, _ = self.autoencoder(poly_features)
        time_embedding = self.time_embedding(edge_attr.long())

        if time_embedding.dim() == 3:
            time_embedding = time_embedding.squeeze(1)

        x_with_temporal = torch.cat([encoded, time_embedding], dim=1)
        x_with_temporal = x_with_temporal.view(-1, self.hidden_dim_ae * 2)

        x = F.relu(self.rgcn1(x_with_temporal, edge_index, edge_attr))
        x = self.dropout1(x)
        x = F.relu(self.rgcn2(x, edge_index, edge_attr))
        x = self.dropout2(x)
        x = F.relu(self.rgcn2(x, edge_index, edge_attr))
        x = self.dropout2(x)
        x = torch.sigmoid(self.linear2(x))
        return x


# Initialize the proposed method
input_dim_ae = train_data.x.shape[1]
num_time_embeddings = 10  # Example value
num_relations = df['relation_label'].nunique()  # Example value
output_dim = 1  # Binary classification (trust/distrust)

no_confidence_model = RealationPrediction_Model_2(
    input_dim_ae,
    hidden_dim_ae,
    hidden_dim_rgcn,
    output_dim,
    num_relations,
    num_bases=2,
    num_time_embeddings=num_time_embeddings,
    poly_degree=2
)

# Define loss function and optimizer with weight decay
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(no_confidence_model.parameters(), lr, weight_decay=1e-4)

start_time = time.time()
# Train the model with dropout and weight decay
no_confidence_model.train()
for epoch in range(N_Epo):
    optimizer.zero_grad()
    out = no_confidence_model(train_data)

    target = train_data.y.float().view(-1, 1)

    loss = criterion(out, target)
    loss.backward()
    optimizer.step()

    if epoch % 10 == 0:
        print(f'Epoch {epoch}, Loss: {loss.item()}')

end_time = time.time()
# Evaluation on test data
no_confidence_model.eval()
with torch.no_grad():
    no_confidence_model_pred = no_confidence_model(test_data)


no_confidence_probabilities =  torch.sigmoid(no_confidence_model_pred)

runtime = end_time - start_time
print("Runtime:", runtime, "seconds")
no_confidence_time = runtime

# Compute AUC
no_confidence_auc = roc_auc_score(test_labels.numpy(), no_confidence_probabilities.numpy())

# Get the predicted rankings for each edge
indices, _ = torch.sort(no_confidence_probabilities, descending=True)
ranks = torch.zeros_like(indices, dtype=torch.float)  # Initialize ranks

# Loop through the sorted indices to calculate ranks
for i, idx in enumerate(indices):
    ranks[i] = idx + 1  # Adjust rank starting from 1

# Number of unlabeled links
num_positive_links = torch.sum(test_labels == 1)
num_negative_links = torch.sum(test_labels == 0)
denominator = num_positive_links / (num_positive_links - num_negative_links)

# Calculate RS for each positive link
positive_indices = test_labels == 1
positive_ranks = ranks[positive_indices]
RSe_values = positive_ranks / denominator
no_confidence_RS = torch.mean(RSe_values).item()

print('Evaluation results for proposed without confidence:')
print(f'AUC: {no_confidence_auc}, RS: {no_confidence_RS}')

In [None]:
# actual values
actual = test_labels.numpy()
# predicted values
predicted = (no_confidence_probabilities >= torch.mean(no_confidence_probabilities)).int()


# confusion matrix
no_confidence_matrix = confusion_matrix(actual,predicted, labels=[1,0])
print('Confusion matrix : \n',no_confidence_matrix)

# outcome values order in sklearn
tp, fn, fp, tn = confusion_matrix(actual,predicted,labels=[1,0]).reshape(-1)
print('Outcome values : \n', tp, fn, fp, tn)

# classification report for precision, recall f1-score and accuracy
no_confidence_matrix = classification_report(actual,predicted,labels=[1,0])
print('Classification report : \n',no_confidence_matrix)

# Calculate Accuracy, Precision, Recall, F1
no_confidence_accuracy = accuracy_score(test_labels.numpy(), predicted.numpy())
no_confidence_precision = precision_score(test_labels.numpy(), predicted.numpy())
no_confidence_recall = recall_score(test_labels.numpy(), predicted.numpy())
no_confidence_f1 = f1_score(test_labels.numpy(), predicted.numpy())
print(f'Accuracy: {no_confidence_accuracy}, Precision: {no_confidence_precision}, Recall: {no_confidence_recall}, F1: {no_confidence_f1}')

### With out both PFs and Confidence

In [None]:
train_features = torch.tensor(train_df[['jaccard_similarity','adamic_similarity']].values, dtype=torch.float)
test_features = torch.tensor(test_df[['jaccard_similarity','adamic_similarity']].values, dtype=torch.float)
edge_index_train_np = np.array([train_df['source_index'].values, train_df['target_index'].values])
edge_index_test_np = np.array([test_df['source_index'].values, test_df['target_index'].values])
edge_index_train = torch.tensor(edge_index_train_np, dtype=torch.long)
edge_index_test = torch.tensor(edge_index_test_np, dtype=torch.long)
edge_attr_train = torch.tensor(train_df['normalized_time'].values, dtype=torch.float).view(-1, 1)
edge_attr_test = torch.tensor(test_df['normalized_time'].values, dtype=torch.float).view(-1, 1)
train_labels = torch.tensor(train_df['relation_label'].values, dtype=torch.long)
test_labels = torch.tensor(test_df['relation_label'].values, dtype=torch.long)
train_data = Data(x=train_features, edge_index=edge_index_train, edge_attr=edge_attr_train, y=train_labels)
test_data = Data(x=test_features, edge_index=edge_index_test, edge_attr=edge_attr_test, y=test_labels)

# Define the autoencoder model
class Autoencoder4(nn.Module):
    def __init__(self, input_dim, hidden_dim, activation_fn=nn.ReLU()):
        super(Autoencoder4, self).__init__()

        # Encoder layers
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            activation_fn
        )

        # Decoder layers
        self.decoder = nn.Sequential(
            nn.Linear(hidden_dim, input_dim),
            nn.Sigmoid()  # Sigmoid activation for reconstruction
        )

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return encoded, decoded
        
class TGCNConv4(ChebConv):
    def __init__(self, in_channels, out_channels, K=2, normalization='sym', bias=True, **kwargs):
        super(TGCNConv4, self).__init__(in_channels, out_channels, K, normalization, bias, **kwargs)
        self.lin = nn.Linear(in_channels, out_channels)
        # Initialize weights for the ChebConv layers
        nn.init.kaiming_uniform_(self.lin.weight, mode='fan_in', nonlinearity='relu')
        nn.init.zeros_(self.lin.bias)

    def forward(self, x, edge_index, edge_attr):
        # L2 normalization on input features
        x = F.normalize(x, p=1, dim=1)
        edge_attr = edge_attr.view(-1, 1)
        return super(TGCNConv4, self).forward(x, edge_index, edge_attr)
        
class no_PFs_Model2(nn.Module):
    def __init__(self, input_dim, hidden_dim_ae, hidden_dim_rgcn, output_dim, num_relations, num_bases, num_time_embeddings, dropout_rate=0.5):
        super(no_PFs_Model2, self).__init__()
        self.hidden_dim_ae = hidden_dim_ae
        self.hidden_dim_rgcn = hidden_dim_rgcn
        self.autoencoder = Autoencoder4(input_dim, hidden_dim_ae)
        self.time_embedding = nn.Embedding(num_time_embeddings, hidden_dim_ae)
        self.rgcn1 = TGCNConv4(hidden_dim_ae + hidden_dim_ae, hidden_dim_rgcn, K=2, normalization='sym')
        self.dropout1 = nn.Dropout(dropout_rate)
        self.rgcn2 = TGCNConv4(hidden_dim_rgcn, hidden_dim_rgcn, K=2, normalization='sym')
        self.dropout2 = nn.Dropout(dropout_rate)
        self.linear = nn.Linear(hidden_dim_rgcn, hidden_dim_rgcn)
        self.linear2 = nn.Linear(hidden_dim_rgcn, output_dim)
 
        # Initialize weights for the linear layer
        nn.init.kaiming_uniform_(self.linear.weight, mode='fan_in', nonlinearity='relu')
        nn.init.zeros_(self.linear.bias)

    
    def forward(self, data):
        x, edge_index, edge_attr = data.x, data.edge_index, data.edge_attr
        
        encoded, _ = self.autoencoder(x)
        time_embedding = self.time_embedding(edge_attr.long())

        if time_embedding.dim() == 3:
            time_embedding = time_embedding.squeeze(1)

        x_with_temporal = torch.cat([encoded, time_embedding], dim=1)
        x_with_temporal = x_with_temporal.view(-1, self.hidden_dim_ae * 2)

        x = F.relu(self.rgcn1(x_with_temporal, edge_index, edge_attr))
        x = self.dropout1(x)
        x = F.relu(self.rgcn2(x, edge_index, edge_attr))
        x = self.dropout2(x)
        x = self.linear(x)
        x = self.linear2(x)
        return x


# Initialize the proposed method
input_dim_ae = train_data.x.shape[1]
num_time_embeddings = 10  # Example value
num_relations = df['relation_label'].nunique()  # Example value
output_dim = 1  # Binary classification (trust/distrust)

no_PFsConf_model = no_PFs_Model2(
    input_dim_ae,
    hidden_dim_ae,
    hidden_dim_rgcn,
    output_dim,
    num_relations,
    num_bases=2,
    num_time_embeddings=num_time_embeddings
)

# Define loss function and optimizer with weight decay
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(no_PFsConf_model.parameters(), lr, weight_decay=1e-4)

start_time = time.time()
# Train the model with dropout and weight decay
no_PFsConf_model.train()
for epoch in range(N_Epo):
    optimizer.zero_grad()
    out = no_PFsConf_model(train_data)

    target = train_data.y.float().view(-1, 1)

    loss = criterion(out, target)
    loss.backward()
    optimizer.step()

    if epoch % 10 == 0:
        print(f'Epoch {epoch}, Loss: {loss.item()}')

end_time = time.time()
# Evaluation on test data
no_PFsConf_model.eval()
with torch.no_grad():
    no_PFsConf_model_pred = no_PFsConf_model(test_data)



no_PFsConf_pred_probabilities = torch.sigmoid(no_PFsConf_model_pred)


runtime = end_time - start_time
print("Runtime:", runtime, "seconds")
no_PFsConf_time = runtime


no_PFsConf_pred_labels = ((no_PFsConf_model_pred) > torch.mean(no_PFsConf_model_pred)).int()
# Compute AUC
no_PFsConf_auc = roc_auc_score(test_labels.numpy(), no_PFsConf_pred_labels.numpy())


# Get the predicted rankings for each edge
indices, _ = torch.sort(no_PFsConf_model_pred, descending=True)
ranks = torch.zeros_like(indices, dtype=torch.float)  # Initialize ranks

# Loop through the sorted indices to calculate ranks
for i, idx in enumerate(indices):
    ranks[i] = idx + 1  # Adjust rank starting from 1

# Number of unlabeled links
num_positive_links = torch.sum(test_labels == 1)
num_negative_links = torch.sum(test_labels == 0)
denominator = num_positive_links / (num_positive_links - num_negative_links)

# Calculate RS for each positive link
positive_indices = test_labels == 1
positive_ranks = ranks[positive_indices]
RSe_values = positive_ranks / denominator
no_PFsConf_RS = torch.mean(RSe_values).item()

print('Evaluation results for proposed without polynomial features and confidence:')
print(f'AUC: {no_PFsConf_auc}, RS: {no_PFsConf_RS}')

In [None]:
# actual values
actual = test_labels.numpy()
# predicted values
predicted = (no_PFsConf_pred_probabilities >= torch.mean(no_PFsConf_pred_probabilities)).int()

# confusion matrix
no_PFsConf_matrix = confusion_matrix(actual,predicted, labels=[1,0])
print('Confusion matrix : \n',no_PFsConf_matrix)

# outcome values order in sklearn
tp, fn, fp, tn = confusion_matrix(actual,predicted,labels=[1,0]).reshape(-1)
print('Outcome values : \n', tp, fn, fp, tn)

# classification report for precision, recall f1-score and accuracy
no_PFsConf_matrix = classification_report(actual,predicted,labels=[1,0])
print('Classification report : \n',no_PFsConf_matrix)

# Calculate Accuracy, Precision, Recall, F1
no_PFsConf_accuracy = accuracy_score(test_labels.numpy(), predicted.numpy())
no_PFsConf_precision = precision_score(test_labels.numpy(), predicted.numpy())
no_PFsConf_recall = recall_score(test_labels.numpy(), predicted.numpy())
no_PFsConf_f1 = f1_score(test_labels.numpy(), predicted.numpy())
print(f'Accuracy: {no_PFsConf_accuracy}, Precision: {no_PFsConf_precision}, Recall: {no_PFsConf_recall}, F1: {no_PFsConf_f1}')

In [None]:
### saving the results

results = {
    'Metric': ['AUC', 'RS', 'Accuracy', 'Precision', 'Recall', 'F1'],
    'Without PFs and Confidence': [no_PFsConf_auc, no_PFsConf_RS, no_PFsConf_accuracy, no_PFsConf_precision, no_PFsConf_recall, no_PFsConf_f1],
    'Without Confidence': [no_confidence_auc, no_confidence_RS, no_confidence_accuracy, no_confidence_precision, no_confidence_recall, no_confidence_f1],
    'Without PFs': [no_PFs_auc, no_PFs_RS, no_PFs_accuracy, no_PFs_precision, no_PFs_recall, no_PFs_f1],
    'TGCN': [ourmodel_auc, ourmodel_RS, ourmodel_accuracy, ourmodel_precision, ourmodel_recall, ourmodel_f1]
}
results

In [None]:
### saving the results
# Create a DataFrame
results_df = pd.DataFrame(results)

# Save to Excel
results_df.to_excel('results/withConfusionMatrix/results_Enron_dataset_200epoch.xlsx', index=False)
results_df

### Plotting the results of proposed approach and its variant on bar chart for comparison

In [None]:
import matplotlib.pyplot as plt

# Assuming `results_df` is your DataFrame containing the results
rows_to_drop = ["Accuracy", "Precision", "Recall", "F1"]

# Filter rows where the "Metric" column is not in rows_to_drop
metrics_filter1 = results_df[~results_df['Metric'].isin(rows_to_drop)]

# Metrics and labels
metrics = metrics_filter1['Metric']
no_PFsConf_values = metrics_filter1['Without PFs and Confidence']
no_Conf_values = metrics_filter1['Without Confidence']
no_PFs_values = metrics_filter1['Without PFs']
TGCN_values = metrics_filter1['TGCN']

# Bar width and positions
bar_width = 0.15
index = range(len(metrics))
positions1 = [i - 1.5 * bar_width for i in index]
positions2 = [i - 0.5 * bar_width for i in index]
positions3 = [i + 0.5 * bar_width for i in index]
positions4 = [i + 1.5 * bar_width for i in index]

# Colors
colors = ['blue', 'green', 'orange', 'purple']

# Create bar chart
fig, ax = plt.subplots(figsize=(12, 6))
bar1 = ax.bar(positions1, no_PFsConf_values, bar_width, label='Model 1', color=colors[0])
bar2 = ax.bar(positions2, no_Conf_values, bar_width, label='Model 2', color=colors[1])
bar3 = ax.bar(positions3, no_PFs_values, bar_width, label='Model 3', color=colors[2])
bar4 = ax.bar(positions4, TGCN_values, bar_width, label='R-CTGCN', color=colors[3])

# Add labels on top of each bar
for bars, values in zip([bar1, bar2, bar3, bar4], [no_PFsConf_values, no_Conf_values, no_PFs_values, TGCN_values]):
    for bar, value in zip(bars, values):
        yval = bar.get_height()
        ax.text(bar.get_x() + bar.get_width()/2, yval + 0.005, round(value, 3), ha='center', va='bottom')

# Adjust other chart elements
ax.set_xlabel('Metrics')
ax.set_ylabel('Values')
ax.set_title('Comparison of Different Model Configurations')
ax.set_xticks([i for i in index])
ax.set_xticklabels(metrics)
ax.legend(loc='upper center', bbox_to_anchor=(0.5, 1.15), ncol=4)

# Save the figure
plt.savefig('results/withConfusionMatrix/chart_Enron_dataset_200epoch.png', bbox_inches='tight')

# Display the chart
plt.show()


In [None]:
import matplotlib.pyplot as plt

rows_to_drop = ["AUC", "RS"]
# Filter rows where the "Metric" column is not in rows_to_drop
metrics_filter2 = results_df[~results_df['Metric'].isin(rows_to_drop)]

# Metrics and labels
metrics = metrics_filter2['Metric']
no_PFsConf_values = metrics_filter2['Without PFs and Confidence']
no_Conf_values = metrics_filter2['Without Confidence']
no_PFs_values = metrics_filter2['Without PFs']
TGCN_values = metrics_filter2['TGCN']

# Bar width and positions
bar_width = 0.2
index = range(len(metrics))
positions1 = [i - 1.5 * bar_width for i in index]
positions2 = [i - 0.5 * bar_width for i in index]
positions3 = [i + 0.5 * bar_width for i in index]
positions4 = [i + 1.5 * bar_width for i in index]

# Colors
colors = ['blue', 'green', 'orange', 'purple']

# Create bar chart
fig, ax = plt.subplots(figsize=(12, 6))
bar1 = ax.bar(positions1, no_PFsConf_values, bar_width, label='Model 1', color=colors[0])
bar2 = ax.bar(positions2, no_Conf_values, bar_width, label='Model 2', color=colors[1])
bar3 = ax.bar(positions3, no_PFs_values, bar_width, label='Model 3', color=colors[2])
bar4 = ax.bar(positions4, TGCN_values, bar_width, label='R-CTGCN', color=colors[3])

# Add labels on top of each bar
for bars, values in zip([bar1, bar2, bar3, bar4], [no_PFsConf_values, no_Conf_values, no_PFs_values, TGCN_values]):
    for bar, value in zip(bars, values):
        yval = bar.get_height()
        ax.text(bar.get_x() + bar.get_width()/2, yval + 0.005, round(value, 3), ha='center', va='bottom')

# Adjust other chart elements
ax.set_xlabel('Metrics')
ax.set_ylabel('Values')
ax.set_title('Comparison of Different Model Configurations')
ax.set_xticks([i for i in index])
ax.set_xticklabels(metrics, rotation=45, ha='right')
ax.legend(loc='upper center', bbox_to_anchor=(0.5, 1.15), ncol=4)


# Save the figure
plt.savefig('results/withConfusionMatrix/Confusionmatrix_chart_Enron_dataset_200epoch.png', bbox_inches='tight')

# Display the chart
plt.show()


# Comparison with Mirror and RGCN

In [None]:
# Generate a random weight matrix with values between 0 and 1
random_weight_matrix = np.random.rand(df.shape[0], 1)
df['jaccard_similarity'] = df['jaccard_similarity'] / df['jaccard_similarity'].max()
df['adamic_similarity'] = df['adamic_similarity'] / df['adamic_similarity'].max()
# Combine matrices using horizontal stacking
aggregated_matrix = np.hstack([df['jaccard_similarity'].values.reshape(-1, 1), 
                               df['adamic_similarity'].values.reshape(-1, 1), 
                               random_weight_matrix])

df['aggregated_similarity'] = aggregated_matrix[:, 2]
#df['aggregated_similarity'] = df['aggregated_similarity'] * 0.5


### Mirror

In [None]:

# Split the dataset into training and testing sets
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

train_features = torch.tensor(train_df[['jaccard_similarity', 'aggregated_similarity']].values, dtype=torch.float)
test_features = torch.tensor(test_df[['jaccard_similarity', 'aggregated_similarity']].values, dtype=torch.float)
edge_index_train_np = np.array([train_df['source_index'].values, train_df['target_index'].values])
edge_index_test_np = np.array([test_df['source_index'].values, test_df['target_index'].values])
edge_index_train = torch.tensor(edge_index_train_np, dtype=torch.long)
edge_index_test = torch.tensor(edge_index_test_np, dtype=torch.long)
train_labels = torch.tensor(train_df['relation_label'].values, dtype=torch.long)
test_labels = torch.tensor(test_df['relation_label'].values, dtype=torch.long)
train_data = Data(x=train_features, edge_index=edge_index_train, y=train_labels, edge_type=train_labels)
test_data = Data(x=test_features, edge_index=edge_index_test, y=test_labels, edge_type=test_labels)
from torch_geometric.nn import RGCNConv
# Define the autoencoder model
class MirrorAutoencoder(nn.Module):
    def __init__(self, input_dim, hidden_dim):
        super(MirrorAutoencoder, self).__init__()
        self.encoder = nn.Linear(input_dim, hidden_dim)
        self.decoder = nn.Linear(hidden_dim, input_dim)

    def forward(self, x):
        encoded = F.relu(self.encoder(x))
        decoded = torch.sigmoid(self.decoder(encoded))
        return encoded, decoded

# Define the TrustRGCNAutoencoder model with the autoencoder
class Mirror(nn.Module):
    def __init__(self, input_dim, hidden_dim_ae, hidden_dim_rgcn, output_dim, num_relations, num_bases):
        super(Mirror, self).__init__()
        self.autoencoder = MirrorAutoencoder(input_dim, hidden_dim_ae)
        self.rgcn1 = ChebConv(hidden_dim_ae, hidden_dim_rgcn, K=2, normalization='sym')
        self.rgcn2 = RGCNConv(hidden_dim_rgcn, hidden_dim_rgcn, num_relations, num_bases=2)
        self.rgcn3 = ChebConv(hidden_dim_rgcn, hidden_dim_rgcn, K=2, normalization='sym')
        self.rgcn4 = RGCNConv(hidden_dim_rgcn, output_dim, num_relations, num_bases=2)

        
    def forward(self, data):
        x, edge_index, edge_type = data.x, data.edge_index, data.edge_type
        encoded, _ = self.autoencoder(x)
        x = F.relu(self.rgcn1(encoded, edge_index))
        x = F.dropout(x, training=self.training)
        x = F.relu(self.rgcn2(x, edge_index, edge_type))
        x = F.relu(self.rgcn3(x, edge_index))
        x = F.dropout(x, training=self.training)
        x = self.rgcn4(x, edge_index, edge_type)
        return torch.sigmoid(x)


# Initialize the Mirror method
input_dim_ae = train_data.x.shape[1]
num_relations = df['relation_label'].nunique()  # Example value
output_dim = 1  # Binary classification

Mirror_model = Mirror(input_dim_ae, hidden_dim_ae, hidden_dim_rgcn, output_dim, num_relations, num_bases=2)

# Define loss function and optimizer
criterion = nn.BCELoss() # BCELoss  # BCEWithLogitsLoss
optimizer = torch.optim.Adam(Mirror_model.parameters(), lr)
# .optim.SGD
# Train the model
Mirror_model.train()
for epoch in range(N_Epo):
    optimizer.zero_grad()
    out = Mirror_model(train_data)
    
    # Modify target to match the output shape
    target = train_data.y.float().view(-1, 1)
    
    loss = criterion(out, target)
    #loss2 = loss * 0.1
    loss.backward()
    optimizer.step()

    # Print training loss for monitoring
    if epoch % 10 == 0:
        print(f'Epoch {epoch}, Loss: {loss.item()}')


# Evaluation on test data
Mirror_model.eval()
with torch.no_grad():
    pred = Mirror_model(test_data)


# Apply sigmoid to get probability scores
Mirrorpred_probabilities = torch.sigmoid(torch.sigmoid(pred))

# Compute AUC
Mirror_auc = roc_auc_score(test_labels.numpy(), (Mirrorpred_probabilities >= torch.mean(Mirrorpred_probabilities)).int().numpy())

# Get the predicted rankings for each edge
indices, _ = torch.sort(Mirrorpred_probabilities, descending=True)
ranks = torch.zeros_like(indices, dtype=torch.float)  # Initialize ranks

# Loop through the sorted indices to calculate ranks
for i, idx in enumerate(indices):
    ranks[i] = idx + 1  # Adjust rank starting from 1

# Number of unlabeled links

num_positive_links = torch.sum(test_labels == 1)
num_negative_links = torch.sum(test_labels == 0)
denominator = (num_positive_links / (num_positive_links - num_negative_links))

# Calculate RS for each positive link
positive_indices = test_labels == 1
positive_ranks = ranks[positive_indices]
RSe_values = positive_ranks / denominator

Mirror_RS = torch.mean(RSe_values).item()

print('Evaluation results for Mirror:')
print(f'AUC: {Mirror_auc}, RS: {Mirror_RS}')

In [None]:
# actual values
actual = test_labels.numpy()
# predicted values
predicted = (Mirrorpred_probabilities >= torch.mean(Mirrorpred_probabilities)).int()


# confusion matrix
Mirror_matrix = confusion_matrix(actual,predicted, labels=[1,0])
print('Confusion matrix : \n',Mirror_matrix)

# outcome values order in sklearn
tp, fn, fp, tn = confusion_matrix(actual,predicted,labels=[1,0]).reshape(-1)
print('Outcome values : \n', tp, fn, fp, tn)

# classification report for precision, recall f1-score and accuracy
Mirror_matrix = classification_report(actual,predicted,labels=[1,0])
print('Classification report : \n',Mirror_matrix)

# Calculate Accuracy, Precision, Recall, F1
Mirror_accuracy = accuracy_score(test_labels.numpy(), predicted.numpy())
Mirror_precision = precision_score(test_labels.numpy(), predicted.numpy())
Mirror_recall = recall_score(test_labels.numpy(), predicted.numpy())
Mirror_f1 = f1_score(test_labels.numpy(), predicted.numpy())
print(f'Accuracy: {Mirror_accuracy}, Precision: {Mirror_precision}, Recall: {Mirror_recall}, F1: {Mirror_f1}')

### RGCN

In [None]:
train_features = torch.tensor(train_df[['jaccard_similarity', 'adamic_similarity']].values, dtype=torch.float)
test_features = torch.tensor(test_df[['jaccard_similarity', 'adamic_similarity']].values, dtype=torch.float)
edge_index_train_np = np.array([train_df['source_index'].values, train_df['target_index'].values])
edge_index_test_np = np.array([test_df['source_index'].values, test_df['target_index'].values])
edge_index_train = torch.tensor(edge_index_train_np, dtype=torch.long)
edge_index_test = torch.tensor(edge_index_test_np, dtype=torch.long)
train_labels = torch.tensor(train_df['relation_label'].values, dtype=torch.long)
test_labels = torch.tensor(test_df['relation_label'].values, dtype=torch.long)
train_data = Data(x=train_features, edge_index=edge_index_train, y=train_labels, edge_type=train_labels)
test_data = Data(x=test_features, edge_index=edge_index_test, y=test_labels, edge_type=test_labels)

from torch_geometric.nn import RGCNConv

# Define the TrustRGCN model with the autoencoder
class RGCN(nn.Module):
    def __init__(self, input_dim, hidden_dim_rgcn, output_dim, num_relations):
        super(RGCN, self).__init__()
        self.rgcn1 = RGCNConv(input_dim, hidden_dim_rgcn, num_relations, num_bases=2)
        self.rgcn2 = RGCNConv(hidden_dim_rgcn, hidden_dim_rgcn, num_relations, num_bases=2)
        self.rgcn3 = RGCNConv(hidden_dim_rgcn, output_dim, num_relations, num_bases=2)
        
    def forward(self, data):
        x, edge_index, edge_type = data.x, data.edge_index, data.edge_type
        x = F.normalize(x, p=2, dim=1)
        x = F.relu(self.rgcn1(x, edge_index, edge_type))
        x = F.dropout(x, training=self.training)
        x = F.relu(self.rgcn2(x, edge_index, edge_type))
        x = self.rgcn3(x, edge_index, edge_type)
        return torch.sigmoid(x)




# Initialize the TrustRGCNAutoencoder model
input_dim_ae = train_data.x.shape[1]
num_relations = df['relation_label'].nunique()  # Example value
output_dim = 1  # Binary classification (trust/distrust)

RGCNmodel = RGCN(input_dim_ae, hidden_dim_rgcn, output_dim, num_relations)

# Define loss function and optimizer
criterion = nn.BCELoss()
optimizer = torch.optim.Adadelta(RGCNmodel.parameters(), lr)


# Train the model
RGCNmodel.train()
for epoch in range(N_Epo):  # You can adjust the number of epochs
    optimizer.zero_grad()
    out = RGCNmodel(train_data)
    target = train_data.y.float().view(-1, 1)
    loss = criterion(out, target)
    loss.backward()
    optimizer.step()

    # Print training loss for monitoring
    if epoch % 10 == 0:
        print(f'Epoch {epoch}, Loss: {loss.item()}')


# Evaluate the model on the test set
RGCNmodel.eval()
with torch.no_grad():
    pred_rgcn = RGCNmodel(test_data)

# Apply sigmoid to get probability scores
pred_rgcn_probabilities = torch.sigmoid(pred_rgcn)

# Compute AUC
RGCN_auc = roc_auc_score(test_labels.numpy(), (pred_rgcn_probabilities >= torch.mean(pred_rgcn_probabilities)).int().numpy())


# Get the predicted rankings for each edge
indices, _ = torch.sort(pred_rgcn_probabilities, descending=True)
ranks = torch.zeros_like(indices, dtype=torch.float)  # Initialize ranks

# Loop through the sorted indices to calculate ranks
for i, idx in enumerate(indices):
    ranks[i] = idx + 1  # Adjust rank starting from 1

# Number of unlabeled links
num_positive_links = torch.sum(test_labels == 1)
num_negative_links = torch.sum(test_labels == 0)
denominator = (num_positive_links / (num_positive_links - num_negative_links))

# Calculate RS for each positive link
positive_indices = test_labels == 1
positive_ranks = ranks[positive_indices]
RSe_values = positive_ranks / denominator
RGCN_RS = torch.mean(RSe_values).item()

print('Evaluation results for R-GCN:')
print(f'AUC: {RGCN_auc}, RS: {RGCN_RS}')

In [None]:
# actual values
actual = test_labels.numpy()
# predicted values
predicted = (pred_rgcn_probabilities >= torch.mean(pred_rgcn_probabilities)).int()


# confusion matrix
RGCN_matrix = confusion_matrix(actual,predicted, labels=[1,0])
print('Confusion matrix : \n',RGCN_matrix)

# outcome values order in sklearn
tp, fn, fp, tn = confusion_matrix(actual,predicted,labels=[1,0]).reshape(-1)
print('Outcome values : \n', tp, fn, fp, tn)

# classification report for precision, recall f1-score and accuracy
RGCN_matrix = classification_report(actual,predicted,labels=[1,0])
print('Classification report : \n',RGCN_matrix)

# Calculate Accuracy, Precision, Recall, F1
RGCN_accuracy = accuracy_score(test_labels.numpy(), predicted.numpy())
RGCN_precision = precision_score(test_labels.numpy(), predicted.numpy())
RGCN_recall = recall_score(test_labels.numpy(), predicted.numpy())
RGCN_f1 = f1_score(test_labels.numpy(), predicted.numpy())
print(f'Accuracy: {RGCN_accuracy}, Precision: {RGCN_precision}, Recall: {RGCN_recall}, F1: {RGCN_f1}')

### Add resutls of Mirror and RGCN to the results df and resave

In [None]:
results_df['Mirror'] = [Mirror_auc, Mirror_RS, Mirror_accuracy, Mirror_precision, Mirror_recall, Mirror_f1]
results_df['R-GCN'] = [RGCN_auc, RGCN_RS, RGCN_accuracy, RGCN_precision, RGCN_recall, RGCN_f1]

# Save new one to Excel
results_df.to_excel('results/withConfusionMatrix/results_Enron_dataset_200epoch.xlsx', index=False)
results_df

In [None]:
rows_to_drop = ["Accuracy", "Precision", "Recall", "F1"]

# Filter rows where the "Metric" column is not in rows_to_drop
metrics_filter1 = results_df[~results_df['Metric'].isin(rows_to_drop)]
metrics_filter1

In [None]:
rows_to_drop = ["AUC", "RS"]
# Filter rows where the "Metric" column is not in rows_to_drop
metrics_filter2 = results_df[~results_df['Metric'].isin(rows_to_drop)]
metrics_filter2

In [None]:
import matplotlib.pyplot as plt

# Metrics and labels
metrics = metrics_filter1['Metric']
RCTGCN_values = metrics_filter1['TGCN']
RGCN_values = metrics_filter1['R-GCN']
Mirror_values = metrics_filter1['Mirror']

# Bar width and positions
bar_width = 0.2
index = range(len(metrics))
positions1 = [i - 1.1 * bar_width for i in index]
positions2 = [i for i in index]
positions3 = [i + 1.1 * bar_width for i in index]

# Colors
colors = ['blue', 'green', 'orange', 'purple', 'maroon', 'cyan']

# Create bar chart
fig, ax = plt.subplots(figsize=(12, 6))
bar1 = ax.bar(positions1, RCTGCN_values, bar_width, label='R-CTGCN', color=colors[3])
bar2 = ax.bar(positions2, RGCN_values, bar_width, label='R-GCN', color=colors[4])
bar3 = ax.bar(positions3, Mirror_values, bar_width, label='Mirror', color=colors[5])

# Add labels on top of each bar
all_bars = [bar1, bar2, bar3]
all_values = [RCTGCN_values, RGCN_values, Mirror_values]
for bars, values in zip(all_bars, all_values):
    for bar, value in zip(bars, values):
        yval = bar.get_height()
        ax.text(bar.get_x() + bar.get_width()/2, yval + 0.005, round(value, 3), ha='center', va='bottom')

# Adjust other chart elements
ax.set_xlabel('Metrics')
ax.set_ylabel('Values')
ax.set_title('Comparison of Different Models')
ax.set_xticks([i for i in index])
ax.set_xticklabels(metrics)
ax.legend(loc='upper center', bbox_to_anchor=(0.5, 1.15), ncol=6)

# Save the new figure
plt.savefig('results/withConfusionMatrix/02_chart_Enron_data_200epoch_2.png', bbox_inches='tight')

# Display the chart
plt.show()


In [None]:
import matplotlib.pyplot as plt

# Metrics and labels
metrics = metrics_filter2['Metric']
RCTGCN_values = metrics_filter2['TGCN']
RGCN_values = metrics_filter2['R-GCN']
Mirror_values = metrics_filter2['Mirror']

# Bar width and positions
bar_width = 0.2
index = range(len(metrics))
positions1 = [i - 1.1 * bar_width for i in index]
positions2 = [i for i in index]
positions3 = [i + 1.1 * bar_width for i in index]

# Colors
colors = ['purple', 'maroon', 'cyan']

# Create bar chart
fig, ax = plt.subplots(figsize=(12, 6))
bar1 = ax.bar(positions1, RCTGCN_values, bar_width, label='R-CTGCN', color=colors[0])
bar2 = ax.bar(positions2, RGCN_values, bar_width, label='R-GCN', color=colors[1])
bar3 = ax.bar(positions3, Mirror_values, bar_width, label='Mirror', color=colors[2])

# Add labels on top of each bar
all_bars = [bar1, bar2, bar3]
all_values = [RCTGCN_values, RGCN_values, Mirror_values]
for bars, values in zip(all_bars, all_values):
    for bar, value in zip(bars, values):
        yval = bar.get_height()
        ax.text(bar.get_x() + bar.get_width()/2, yval + 0.005, round(value, 3), ha='center', va='bottom')

# Adjust other chart elements
ax.set_xlabel('Metrics')
ax.set_ylabel('Values')
ax.set_title('Comparison of Different Models')
ax.set_xticks([i for i in index])
ax.set_xticklabels(metrics)
ax.legend(loc='upper center', bbox_to_anchor=(0.5, 1.15), ncol=6)

# Save the new figure
plt.savefig('results/withConfusionMatrix/02_Confusionmatrix_chart_Enron_data_200epoch_2.png', bbox_inches='tight')

# Display the chart
plt.show()
