In [2]:
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from metrics import *
from data_prepare import testloader
from models import GCNN, AttGNN
import numpy as np
from sklearn.metrics import roc_auc_score, average_precision_score, confusion_matrix

(97316, 7)
Size is : 
97316
Length
19463
4866
GCNN Loaded
GCNN(
  (pro1_conv1): GCNConv(1024, 1024)
  (pro1_fc1): Linear(in_features=1024, out_features=128, bias=True)
  (pro2_conv1): GCNConv(1024, 1024)
  (pro2_fc1): Linear(in_features=1024, out_features=128, bias=True)
  (relu): LeakyReLU(negative_slope=0.01)
  (dropout): Dropout(p=0.2, inplace=False)
  (sigmoid): Sigmoid()
  (fc1): Linear(in_features=256, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=64, bias=True)
  (out): Linear(in_features=64, out_features=1, bias=True)
)
AttGNN Loaded
AttGNN(
  (pro1_conv1): GATConv(1024, 128, heads=1)
  (pro1_fc1): Linear(in_features=128, out_features=128, bias=True)
  (pro2_conv1): GATConv(1024, 128, heads=1)
  (pro2_fc1): Linear(in_features=128, out_features=128, bias=True)
  (relu): LeakyReLU(negative_slope=0.01)
  (sigmoid): Sigmoid()
  (dropout): Dropout(p=0.2, inplace=False)
  (fc1): Linear(in_features=256, out_features=256, bias=True)
  (fc2): Linear(in_featu



In [3]:
device = torch.device("cuda:0") if torch.cuda.is_available() else torch.cuda("cpu")
model = GCNN()
model.load_state_dict(torch.load("../human_features/GCN_50.pth")) #path to load the model
model.to(device)
model.eval()
predictions = torch.Tensor()
labels = torch.Tensor()
with torch.no_grad():
    for prot_1, prot_2, label in testloader:
      prot_1 = prot_1.to(device)
      prot_2 = prot_2.to(device)
      #print("H")
      #print(torch.Tensor.size(prot_1.x), torch.Tensor.size(prot_2.x))
      output = model(prot_1, prot_2)
      predictions = torch.cat((predictions, output.cpu()), 0)
      labels = torch.cat((labels, label.view(-1,1).cpu()), 0)
labels = labels.numpy().flatten()
predictions = predictions.numpy().flatten()

GCNN Loaded


In [4]:

def choose_optimal_threshold(actual, predicted):
    """
    Choose the optimal threshold that maximizes the F1 score
    
    Args:
        actual (np.array): True labels
        predicted (np.array): Predicted probabilities
    
    Returns:
        float: Optimal threshold
    """
    thresholds = np.linspace(0, 1, 100)
    f1_scores = []
    
    for threshold in thresholds:
        binary_actual = (actual >= np.median(actual)).astype(int)
        binary_pred = (predicted >= threshold).astype(int)
        
        # Compute confusion matrix
        tn, fp, fn, tp = confusion_matrix(binary_actual, binary_pred).ravel()
        
        # Compute F1 score
        if tp + fp == 0 or tp + fn == 0:
            f1 = 0
        else:
            precision = tp / (tp + fp)
            recall = tp / (tp + fn)
            f1 = 2 * (precision * recall) / (precision + recall) if precision + recall > 0 else 0
        
        f1_scores.append(f1)
    
    # Return threshold with max F1 score
    return thresholds[np.argmax(f1_scores)]

def get_binary_metrics(actual, predicted, threshold=0.5):
    """
    Compute binary classification metrics for continuous labels
    
    Args:
        actual (np.array): True labels
        predicted (np.array): Predicted probabilities
        threshold (float, optional): Classification threshold. If None, optimal threshold is computed.
    
    Returns:
        dict: Metrics including accuracy, precision, recall, F1, etc.
    """
    # If no threshold provided, find optimal threshold
    if threshold is None:
        threshold = choose_optimal_threshold(actual, predicted)
    
    # Convert to binary classification
    binary_actual = (actual >= np.median(actual)).astype(int)
    binary_pred = (predicted >= threshold).astype(int)
    
    # Compute confusion matrix
    tn, fp, fn, tp = confusion_matrix(binary_actual, binary_pred).ravel()
    
    # Compute metrics
    metrics = {
        'threshold': threshold,
        'accuracy': (tp + tn) / (tp + tn + fp + fn),
        'precision': tp / (tp + fp) if tp + fp > 0 else 0,
        'recall': tp / (tp + fn) if tp + fn > 0 else 0,
        'specificity': tn / (tn + fp) if tn + fp > 0 else 0,
        'f1_score': 2 * tp / (2 * tp + fp + fn) if tp > 0 else 0,
        'auroc': roc_auc_score(binary_actual, predicted),
        'auprc': average_precision_score(binary_actual, predicted)
    }
    
    return metrics

# Modify existing metrics to use the new approach
def get_mse(actual, predicted):
    return ((actual - predicted) ** 2).mean()

def get_accuracy(actual, predicted, threshold=None):
    metrics = get_binary_metrics(actual, predicted, threshold)
    return metrics['accuracy'] * 100.0

def precision(actual, predicted, threshold=None):
    metrics = get_binary_metrics(actual, predicted, threshold)
    return metrics['precision']

def sensitivity(actual, predicted, threshold=None):
    metrics = get_binary_metrics(actual, predicted, threshold)
    return metrics['recall']

def specificity(actual, predicted, threshold=None):
    metrics = get_binary_metrics(actual, predicted, threshold)
    return metrics['specificity']

def f_score(actual, predicted, threshold=None):
    metrics = get_binary_metrics(actual, predicted, threshold)
    return metrics['f1_score']

def mcc(actual, predicted, threshold=None):
    metrics = get_binary_metrics(actual, predicted, threshold)
    
    # Matthews Correlation Coefficient calculation
    tp = metrics['precision'] * len(actual)
    tn = metrics['specificity'] * len(actual)
    fp = tp / metrics['precision'] - tp
    fn = tp / metrics['recall'] - tp
    
    numerator = (tp * tn) - (fp * fn)
    denominator = np.sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn))
    
    return numerator / denominator if denominator != 0 else 0

def auroc(actual, predicted):
    return get_binary_metrics(actual, predicted)['auroc']

def auprc(actual, predicted):
    return get_binary_metrics(actual, predicted)['auprc']

In [5]:
loss = get_mse(labels, predictions)
acc = get_accuracy(labels, predictions, 0.5)
prec = precision(labels, predictions, 0.5)
sensitivity = sensitivity(labels, predictions,  0.5)
specificity = specificity(labels, predictions, 0.5)
f1 = f_score(labels, predictions, 0.5)
mcc = mcc(labels, predictions,  0.5)
auroc = auroc(labels, predictions)
auprc = auprc(labels, predictions)


print(f'loss : {loss}')
print(f'Accuracy : {acc}')
print(f'precision: {prec}')
print(f'Sensititvity :{sensitivity}')
print(f'specificity : {specificity}')
print(f'f-score : {f1}')
print(f'MCC : {mcc}')
print(f'AUROC: {auroc}')
print(f'AUPRC: {auprc}')

loss : 0.01565999181152305
Accuracy : 30.707973695026713
precision: 0.9301470588235294
Sensititvity :0.14602706342589625
specificity : 0.9558253681219323
f-score : 0.25242503187184745
MCC : 0.0787584535952667
AUROC: 0.7492833848650995
AUPRC: 0.9020276008243605


In [None]:
## model with 5 epochs

In [6]:
device = torch.device("cuda:0") if torch.cuda.is_available() else torch.cuda("cpu")
model = GCNN()
model.load_state_dict(torch.load("../human_features/GCN_5epochs.pth")) #path to load the model
model.to(device)
model.eval()
predictions = torch.Tensor()
labels = torch.Tensor()
with torch.no_grad():
    for prot_1, prot_2, label in testloader:
      prot_1 = prot_1.to(device)
      prot_2 = prot_2.to(device)
      #print("H")
      #print(torch.Tensor.size(prot_1.x), torch.Tensor.size(prot_2.x))
      output = model(prot_1, prot_2)
      predictions = torch.cat((predictions, output.cpu()), 0)
      labels = torch.cat((labels, label.view(-1,1).cpu()), 0)
labels = labels.numpy().flatten()
predictions = predictions.numpy().flatten()

GCNN Loaded


In [8]:
## model 5 epochs 

def choose_optimal_threshold(actual, predicted):
    """
    Choose the optimal threshold that maximizes the F1 score
    
    Args:
        actual (np.array): True labels
        predicted (np.array): Predicted probabilities
    
    Returns:
        float: Optimal threshold
    """
    thresholds = np.linspace(0, 1, 100)
    f1_scores = []
    
    for threshold in thresholds:
        binary_actual = (actual >= np.median(actual)).astype(int)
        binary_pred = (predicted >= threshold).astype(int)
        
        # Compute confusion matrix
        tn, fp, fn, tp = confusion_matrix(binary_actual, binary_pred).ravel()
        
        # Compute F1 score
        if tp + fp == 0 or tp + fn == 0:
            f1 = 0
        else:
            precision = tp / (tp + fp)
            recall = tp / (tp + fn)
            f1 = 2 * (precision * recall) / (precision + recall) if precision + recall > 0 else 0
        
        f1_scores.append(f1)
    
    # Return threshold with max F1 score
    return thresholds[np.argmax(f1_scores)]

def get_binary_metrics(actual, predicted, threshold=0.5):
    """
    Compute binary classification metrics for continuous labels
    
    Args:
        actual (np.array): True labels
        predicted (np.array): Predicted probabilities
        threshold (float, optional): Classification threshold. If None, optimal threshold is computed.
    
    Returns:
        dict: Metrics including accuracy, precision, recall, F1, etc.
    """
    # If no threshold provided, find optimal threshold
    if threshold is None:
        threshold = choose_optimal_threshold(actual, predicted)
    
    # Convert to binary classification
    binary_actual = (actual >= np.median(actual)).astype(int)
    binary_pred = (predicted >= threshold).astype(int)
    
    # Compute confusion matrix
    tn, fp, fn, tp = confusion_matrix(binary_actual, binary_pred).ravel()
    
    # Compute metrics
    metrics = {
        'threshold': threshold,
        'accuracy': (tp + tn) / (tp + tn + fp + fn),
        'precision': tp / (tp + fp) if tp + fp > 0 else 0,
        'recall': tp / (tp + fn) if tp + fn > 0 else 0,
        'specificity': tn / (tn + fp) if tn + fp > 0 else 0,
        'f1_score': 2 * tp / (2 * tp + fp + fn) if tp > 0 else 0,
        'auroc': roc_auc_score(binary_actual, predicted),
        'auprc': average_precision_score(binary_actual, predicted)
    }
    
    return metrics

# Modify existing metrics to use the new approach
def get_mse(actual, predicted):
    return ((actual - predicted) ** 2).mean()

def get_accuracy(actual, predicted, threshold=None):
    metrics = get_binary_metrics(actual, predicted, threshold)
    return metrics['accuracy'] * 100.0

def precision(actual, predicted, threshold=None):
    metrics = get_binary_metrics(actual, predicted, threshold)
    return metrics['precision']

def sensitivity(actual, predicted, threshold=None):
    metrics = get_binary_metrics(actual, predicted, threshold)
    return metrics['recall']

def specificity(actual, predicted, threshold=None):
    metrics = get_binary_metrics(actual, predicted, threshold)
    return metrics['specificity']

def f_score(actual, predicted, threshold=None):
    metrics = get_binary_metrics(actual, predicted, threshold)
    return metrics['f1_score']

def mcc(actual, predicted, threshold=None):
    metrics = get_binary_metrics(actual, predicted, threshold)
    
    # Matthews Correlation Coefficient calculation
    tp = metrics['precision'] * len(actual)
    tn = metrics['specificity'] * len(actual)
    fp = tp / metrics['precision'] - tp
    fn = tp / metrics['recall'] - tp
    
    numerator = (tp * tn) - (fp * fn)
    denominator = np.sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn))
    
    return numerator / denominator if denominator != 0 else 0

def auroc(actual, predicted):
    return get_binary_metrics(actual, predicted)['auroc']

def auprc(actual, predicted):
    return get_binary_metrics(actual, predicted)['auprc']

In [9]:
## model 5 epochs 
loss = get_mse(labels, predictions)
acc = get_accuracy(labels, predictions, 0.5)
prec = precision(labels, predictions, 0.5)
sensitivity = sensitivity(labels, predictions,  0.5)
specificity = specificity(labels, predictions, 0.5)
f1 = f_score(labels, predictions, 0.5)
mcc = mcc(labels, predictions,  0.5)
auroc = auroc(labels, predictions)
auprc = auprc(labels, predictions)


print(f'loss : {loss}')
print(f'Accuracy : {acc}')
print(f'precision: {prec}')
print(f'Sensititvity :{sensitivity}')
print(f'specificity : {specificity}')
print(f'f-score : {f1}')
print(f'MCC : {mcc}')
print(f'AUROC: {auroc}')
print(f'AUPRC: {auprc}')

loss : 0.015582451499309272
Accuracy : 30.38943690916564
precision: 0.93268416596105
Sensititvity :0.14128134419290708
specificity : 0.9589253422888142
f-score : 0.24539125591757172
MCC : 0.07652147866671151
AUROC: 0.7501692704582001
AUPRC: 0.9024788435946378


In [11]:
## continue-valued metrics 
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

def get_rmse(actual, predicted):
    """
    Root Mean Squared Error
    
    Args:
        actual (np.array): True values
        predicted (np.array): Predicted values
    
    Returns:
        float: Root Mean Squared Error
    """
    return np.sqrt(mean_squared_error(actual, predicted))

def get_mae(actual, predicted):
    """
    Mean Absolute Error
    
    Args:
        actual (np.array): True values
        predicted (np.array): Predicted values
    
    Returns:
        float: Mean Absolute Error
    """
    return mean_absolute_error(actual, predicted)

def get_r2_score(actual, predicted):
    """
    R-squared (Coefficient of Determination)
    
    Args:
        actual (np.array): True values
        predicted (np.array): Predicted values
    
    Returns:
        float: R-squared score
    """
    return r2_score(actual, predicted)

def pearson_correlation(actual, predicted):
    """
    Pearson Correlation Coefficient
    
    Args:
        actual (np.array): True values
        predicted (np.array): Predicted values
    
    Returns:
        float: Pearson correlation coefficient
    """
    return np.corrcoef(actual, predicted)[0, 1]

def spearman_correlation(actual, predicted):
    """
    Spearman Rank Correlation Coefficient
    
    Args:
        actual (np.array): True values
        predicted (np.array): Predicted values
    
    Returns:
        float: Spearman correlation coefficient
    """
    from scipy import stats
    return stats.spearmanr(actual, predicted)[0]

# Optional: Plotting function to visualize predictions vs actual values
def plot_prediction_vs_actual(actual, predicted, title='Predictions vs Actual 5 epochs'):
    """
    Create a scatter plot of predictions vs actual values
    
    Args:
        actual (np.array): True values
        predicted (np.array): Predicted values
        title (str, optional): Plot title
    
    Returns:
        matplotlib.figure.Figure: Matplotlib figure
    """
    import matplotlib.pyplot as plt
    
    plt.figure(figsize=(10, 6))
    plt.scatter(actual, predicted, alpha=0.5)
    plt.plot([actual.min(), actual.max()], [actual.min(), actual.max()], 'r--', lw=2)
    plt.xlabel('Actual Values')
    plt.ylabel('Predicted Values')
    plt.title(title)
    plt.tight_layout()
    
    return plt.gcf()

In [12]:
# Existing code remains the same
rmse = get_rmse(labels, predictions)
mae = get_mae(labels, predictions)
r2 = get_r2_score(labels, predictions)
pearson = pearson_correlation(labels, predictions)
spearman = spearman_correlation(labels, predictions)

print(f'RMSE: {rmse}')
print(f'MAE: {mae}')
print(f'R-squared: {r2}')
print(f'Pearson Correlation: {pearson}')
print(f'Spearman Correlation: {spearman}')

# Optional: Create a visualization
fig = plot_prediction_vs_actual(labels, predictions)
fig.savefig('predictions_vs_actual_5epochs.png')
plt.close(fig)

RMSE: 0.12482968997521893
MAE: 0.07864372484931197
R-squared: 0.46821213455548116
Pearson Correlation: 0.6854294136296294
Spearman Correlation: 0.5431617919895632
