In [10]:
import torch
import torch.nn as nn
import glob
import numpy as np
import csv
import os

# Define the 2D CNN model in PyTorch
class CNN2D(nn.Module):
    def __init__(self, input_channels):
        super(CNN2D, self).__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(input_channels, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU()
        )
        self.pool1 = nn.MaxPool2d(2, 2)
        
        self.conv2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU()
        )
        self.pool2 = nn.MaxPool2d(2, 2)
        
        self.conv3 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU()
        )
        self.pool3 = nn.MaxPool2d(2, 2)
        
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(128 * 4 * 18, 128)  # Adjust based on input size
        self.dropout = nn.Dropout(0.5)
        self.fc2 = nn.Linear(128, 1)
    
    def forward(self, x):
        x = self.conv1(x)
        x = self.pool1(x)
        
        x = self.conv2(x)
        x = self.pool2(x)
        
        x = self.conv3(x)
        x = self.pool3(x)
        
        x = self.flatten(x)
        x = torch.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)

        return x
    
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

models_exp1 = []
models_exp2 = []
models_exp3 = []
models_exp4 = []
models_exp5 = []

k = 50

for i in range (1, (k + 1)):
    model_exp1 = CNN2D(input_channels=1).to(device)
    model_exp1 = nn.DataParallel(model_exp1)

    model_exp2 = CNN2D(input_channels=1).to(device)
    model_exp2 = nn.DataParallel(model_exp2)

    model_exp3 = CNN2D(input_channels=1).to(device)
    model_exp3 = nn.DataParallel(model_exp3)

    model_exp4 = CNN2D(input_channels=1).to(device)
    model_exp4 = nn.DataParallel(model_exp4)

    model_exp5 = CNN2D(input_channels=1).to(device)
    model_exp5 = nn.DataParallel(model_exp5)

    model_path = f"../../../Models/Cholesterol/GNN/GNN-5A_Exp1/Models/model_bin_{i}.pth" 
    model_exp1.load_state_dict(torch.load(model_path, map_location=device))

    model_path = f"../../../Models/Cholesterol/GNN/GNN-5A_Exp2/Models/model_bin_{i}.pth" 
    model_exp2.load_state_dict(torch.load(model_path, map_location=device))

    model_path = f"../../../Models/Cholesterol/GNN/GNN-5A_Exp3/Models/model_bin_{i}.pth" 
    model_exp3.load_state_dict(torch.load(model_path, map_location=device))

    model_path = f"../../../Models/Cholesterol/GNN/GNN-5A_Exp4/Models/model_bin_{i}.pth" 
    model_exp4.load_state_dict(torch.load(model_path, map_location=device))

    model_path = f"../../../Models/Cholesterol/GNN/GNN-5A_Exp5/Models/model_bin_{i}.pth" 
    model_exp5.load_state_dict(torch.load(model_path, map_location=device))

    model_exp1.eval()
    model_exp2.eval()
    model_exp3.eval()
    model_exp4.eval()
    model_exp5.eval()

    models_exp1.append(model_exp1)
    models_exp2.append(model_exp2)
    models_exp3.append(model_exp3)
    models_exp4.append(model_exp4)
    models_exp5.append(model_exp5)

  model_exp1.load_state_dict(torch.load(model_path, map_location=device))
  model_exp2.load_state_dict(torch.load(model_path, map_location=device))
  model_exp3.load_state_dict(torch.load(model_path, map_location=device))
  model_exp4.load_state_dict(torch.load(model_path, map_location=device))
  model_exp5.load_state_dict(torch.load(model_path, map_location=device))


In [11]:
def evaluate_file(model, file_path, threshold=0.5):
    grid = np.load(file_path)

    if grid.ndim == 2:
        non_padded_rows = np.sum(np.any(grid != 0, axis=(1)))
    else:
        raise ValueError(f"Unexpected grid shape: {grid.shape}")
    
    grid_tensor = torch.tensor(grid, dtype=torch.float32).unsqueeze(0).unsqueeze(0)  # Add batch and channel dims
    grid_tensor = grid_tensor.to(device)
    
    model.eval()
    with torch.no_grad():
        output = model(grid_tensor).squeeze(1)  

    prob = torch.sigmoid(output).item()

    predicted_class = int(prob >= threshold)

    return predicted_class, prob, non_padded_rows


In [12]:
def evaluate_directory(dir, csv_output, models):
    predictions = []
    capture_rate = 0
    overlapping_capture_rate = 0
    overlapping_index = 0

    files = glob.glob(f"{dir}/*.npy")

    model_positive_counts = [0] * len(models)
    model_negative_counts = [0] * len(models)

    #print(f"{'Filename':<120} {'IndividualCaptureRate':<25} NumberOfAtoms")

    os.makedirs(os.path.dirname(csv_output), exist_ok=True)

    with open(csv_output, "w", newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(["filename", "average_score", "score_std", "number_atoms"]) 

        for file in sorted(files):    
            model_probs = []
            for model_index, model in enumerate(models):
                predicted_class, prob, non_padded_rows = evaluate_file(model, file)
                model_probs.append(prob)

                if predicted_class == 1:
                    model_positive_counts[model_index] += 1
                else:
                    model_negative_counts[model_index] += 1

            prediction_mean = np.mean(model_probs)
            prediction_std = np.std(model_probs)

            predictions.append((model_index, predicted_class))
            capture_rate += prediction_mean
            #if non_padded_rows <= 65 and non_padded_rows >= 55:
            #print(f"{file:<120} {prediction:<25} {non_padded_rows}")
            overlapping_capture_rate += prediction_mean
            overlapping_index += 1
            writer.writerow([file, prediction_mean, prediction_std, non_padded_rows])

    capture_rate /= len(files)
    overlapping_capture_rate /= overlapping_index

    print("\nModel Predictions Summary:")
    for i, (pos, neg) in enumerate(zip(model_positive_counts, model_negative_counts), start=1):
        print(f"Model {i}: Positives = {pos}, Negatives = {neg}")

    print("Overlapping Capture Rate is", overlapping_capture_rate)

    return capture_rate

In [13]:
spies_exp1 = f"../../../Data/SplitData/Cholesterol/cholesterol-graph-5A/Spies"  
csv_output_exp1 = "../../../Models/Cholesterol/GNN/GNN-5A_Exp1/NewResults/SpyCaptureRates.csv"

spy_capture_rate = evaluate_directory(spies_exp1, csv_output_exp1, models_exp1)

spies_exp2 = f"../../../Data/SplitData/Cholesterol/cholesterol-graph-5A_exp2/Spies"  
csv_output_exp2 = "../../../Models/Cholesterol/GNN/GNN-5A_Exp2/NewResults/SpyCaptureRates.csv"

spy_capture_rate = evaluate_directory(spies_exp2, csv_output_exp2, models_exp2)

spies_exp3 = f"../../../Data/SplitData/Cholesterol/cholesterol-graph-5A_exp3/Spies"  
csv_output_exp3 = "../../../Models/Cholesterol/GNN/GNN-5A_Exp3/NewResults/SpyCaptureRates.csv"

spy_capture_rate = evaluate_directory(spies_exp3, csv_output_exp3, models_exp3)

spies_exp4 = f"../../../Data/SplitData/Cholesterol/cholesterol-graph-5A_exp4/Spies"  
csv_output_exp4 = "../../../Models/Cholesterol/GNN/GNN-5A_Exp4/NewResults/SpyCaptureRates.csv"

spy_capture_rate = evaluate_directory(spies_exp4, csv_output_exp4, models_exp4)

spies_exp5 = f"../../../Data/SplitData/Cholesterol/cholesterol-graph-5A_exp5/Spies"  
csv_output_exp5 = "../../../Models/Cholesterol/GNN/GNN-5A_Exp5/NewResults/SpyCaptureRates.csv"

spy_capture_rate = evaluate_directory(spies_exp5, csv_output_exp5, models_exp5)


Model Predictions Summary:
Model 1: Positives = 121, Negatives = 33
Model 2: Positives = 110, Negatives = 44
Model 3: Positives = 115, Negatives = 39
Model 4: Positives = 111, Negatives = 43
Model 5: Positives = 115, Negatives = 39
Model 6: Positives = 108, Negatives = 46
Model 7: Positives = 109, Negatives = 45
Model 8: Positives = 108, Negatives = 46
Model 9: Positives = 122, Negatives = 32
Model 10: Positives = 106, Negatives = 48
Model 11: Positives = 116, Negatives = 38
Model 12: Positives = 124, Negatives = 30
Model 13: Positives = 121, Negatives = 33
Model 14: Positives = 121, Negatives = 33
Model 15: Positives = 117, Negatives = 37
Model 16: Positives = 103, Negatives = 51
Model 17: Positives = 121, Negatives = 33
Model 18: Positives = 99, Negatives = 55
Model 19: Positives = 104, Negatives = 50
Model 20: Positives = 111, Negatives = 43
Model 21: Positives = 118, Negatives = 36
Model 22: Positives = 105, Negatives = 49
Model 23: Positives = 115, Negatives = 39
Model 24: Positi

In [14]:
import pandas as pd

def get_spy_info(csv):
    df = pd.read_csv(csv)  # Replace with your actual filename if needed

    # Extract the 'average_score' column
    scores = df["average_score"]

    # Compute statistics
    mean_score = scores.mean()
    percentile_50 = scores.quantile(0.5)
    percentile_25 = scores.quantile(0.25)
    percentile_75 = scores.quantile(0.75)
    min_score = scores.min()
    max_score = scores.max()

    # Print results
    print(f"Mean: {mean_score:.6f}")
    print(f"50th Percentile (Median): {percentile_50:.6f}")
    print(f"Bottom 25 Percentile: {percentile_25:.6f}")
    print(f"Top 25 Percentile: {percentile_75:.6f}")
    print(f"Min: {min_score:.6f}")
    print(f"Max: {max_score:.6f}\n")

    return mean_score, percentile_50, percentile_25, percentile_75, min_score, max_score

print("Experiment 1 Spy Results:")
mean_score_exp1, percentile_50_exp1, percentile_25_exp1, percentile_75_exp1, min_score_exp1, max_score_exp1 = get_spy_info(csv_output_exp1)

print("Experiment 2 Spy Results:")
mean_score_exp2, percentile_50_exp2, percentile_25_exp2, percentile_75_exp2, min_score_exp2, max_score_exp2 = get_spy_info(csv_output_exp2)

print("Experiment 3 Spy Results:")
mean_score_exp3, percentile_50_exp3, percentile_25_exp3, percentile_75_exp3, min_score_exp3, max_score_exp3 = get_spy_info(csv_output_exp3)

print("Experiment 4 Spy Results:")
mean_score_exp4, percentile_50_exp4, percentile_25_exp4, percentile_75_exp4, min_score_exp4, max_score_exp4 = get_spy_info(csv_output_exp4)

print("Experiment 5 Spy Results:")
mean_score_exp5, percentile_50_exp5, percentile_25_exp5, percentile_75_exp5, min_score_exp5, max_score_exp5 = get_spy_info(csv_output_exp5)

Experiment 1 Spy Results:
Mean: 0.715358
50th Percentile (Median): 0.766796
Bottom 25 Percentile: 0.672870
Top 25 Percentile: 0.827340
Min: 0.012814
Max: 0.952532

Experiment 2 Spy Results:
Mean: 0.749904
50th Percentile (Median): 0.794622
Bottom 25 Percentile: 0.732618
Top 25 Percentile: 0.850029
Min: 0.091547
Max: 0.964338

Experiment 3 Spy Results:
Mean: 0.744754
50th Percentile (Median): 0.794971
Bottom 25 Percentile: 0.705047
Top 25 Percentile: 0.844019
Min: 0.018880
Max: 0.944226

Experiment 4 Spy Results:
Mean: 0.709973
50th Percentile (Median): 0.762311
Bottom 25 Percentile: 0.658557
Top 25 Percentile: 0.825139
Min: 0.000157
Max: 0.949129

Experiment 5 Spy Results:
Mean: 0.712068
50th Percentile (Median): 0.770435
Bottom 25 Percentile: 0.669200
Top 25 Percentile: 0.828231
Min: 0.000156
Max: 0.946053



In [15]:
test_positives_exp1 = f"../../../Data/SplitData/Cholesterol/cholesterol-graph-5A/Test/Positive"  
csv_output_exp1 = "../../../Models/Cholesterol/GNN/GNN-5A_Exp1/NewResults/TestPositiveCaptureRates.csv"

spy_capture_rate = evaluate_directory(test_positives_exp1, csv_output_exp1, models_exp1)

test_positives_exp2 = f"../../../Data/SplitData/Cholesterol/cholesterol-graph-5A_exp2/Test/Positive"  
csv_output_exp2 = "../../../Models/Cholesterol/GNN/GNN-5A_Exp2/NewResults/TestPositiveCaptureRates.csv"

spy_capture_rate = evaluate_directory(test_positives_exp2, csv_output_exp2, models_exp2)

test_positives_exp3 = f"../../../Data/SplitData/Cholesterol/cholesterol-graph-5A_exp3/Test/Positive"  
csv_output_exp3 = "../../../Models/Cholesterol/GNN/GNN-5A_Exp3/NewResults/TestPositiveCaptureRates.csv"

spy_capture_rate = evaluate_directory(test_positives_exp3, csv_output_exp3, models_exp3)

test_positives_exp4 = f"../../../Data/SplitData/Cholesterol/cholesterol-graph-5A_exp4/Test/Positive"  
csv_output_exp4 = "../../../Models/Cholesterol/GNN/GNN-5A_Exp4/NewResults/TestPositiveCaptureRates.csv"

spy_capture_rate = evaluate_directory(test_positives_exp4, csv_output_exp4, models_exp4)

test_positives_exp5 = f"../../../Data/SplitData/Cholesterol/cholesterol-graph-5A_exp5/Test/Positive"  
csv_output_exp5 = "../../../Models/Cholesterol/GNN/GNN-5A_Exp5/NewResults/TestPositiveCaptureRates.csv"

spy_capture_rate = evaluate_directory(test_positives_exp5, csv_output_exp5, models_exp5)


Model Predictions Summary:
Model 1: Positives = 146, Negatives = 8
Model 2: Positives = 144, Negatives = 10
Model 3: Positives = 144, Negatives = 10
Model 4: Positives = 146, Negatives = 8
Model 5: Positives = 144, Negatives = 10
Model 6: Positives = 146, Negatives = 8
Model 7: Positives = 144, Negatives = 10
Model 8: Positives = 144, Negatives = 10
Model 9: Positives = 146, Negatives = 8
Model 10: Positives = 144, Negatives = 10
Model 11: Positives = 147, Negatives = 7
Model 12: Positives = 147, Negatives = 7
Model 13: Positives = 147, Negatives = 7
Model 14: Positives = 146, Negatives = 8
Model 15: Positives = 145, Negatives = 9
Model 16: Positives = 144, Negatives = 10
Model 17: Positives = 147, Negatives = 7
Model 18: Positives = 146, Negatives = 8
Model 19: Positives = 146, Negatives = 8
Model 20: Positives = 147, Negatives = 7
Model 21: Positives = 145, Negatives = 9
Model 22: Positives = 144, Negatives = 10
Model 23: Positives = 143, Negatives = 11
Model 24: Positives = 144, Ne

In [16]:
def apply_labeling_from_spies(csv, mean_score, percentile_50, percentile_25, percentile_75, min_score, max_score):
    df = pd.read_csv(csv)  # Replace with your actual filename if needed

    def get_min_max_label(score):
        if score < min_score:
            return "StrongNegative"
        elif min_score <= score < mean_score:
            return "PseudoNegative"
        elif mean_score <= score < max_score:
            return "PseudoPositive"
        else:
            return "StrongPositive"

    df["min_max_label"] = df["average_score"].apply(get_min_max_label)

    def get_percentile_label(score):
        if score < percentile_25:
            return "StrongNegative"
        elif percentile_25 <= score < percentile_50:
            return "PseudoNegative"
        elif percentile_50 <= score < percentile_75:
            return "PseudoPositive"
        else:
            return "StrongPositive"

    df["percentile_label"] = df["average_score"].apply(get_percentile_label)
    df["threshold_label"] = df["average_score"].apply(lambda x: "Negative" if x < 0.5 else "Positive")

    min_max_results = []
    min_max_results.append((df["average_score"] < min_score).sum()) # min_max_strong_negative_count
    min_max_results.append(((df["average_score"] > min_score) & (df["average_score"] < mean_score)).sum()) # min_max_pseudo_negative_count
    min_max_results.append(((df["average_score"] > mean_score) & (df["average_score"] < max_score)).sum()) # min_max_pseudo_positive_count
    min_max_results.append((df["average_score"] > max_score).sum()) # min_max_strong_positive_count

    percentile_results = []
    percentile_results.append((df["average_score"] < percentile_25).sum()) # percentile_strong_negative_count
    percentile_results.append(((df["average_score"] > percentile_25) & (df["average_score"] < percentile_50)).sum()) # percentile_pseudo_negative_count
    percentile_results.append(((df["average_score"] > percentile_50) & (df["average_score"] < percentile_75)).sum()) # percentile_pseudo_positive_count
    percentile_results.append((df["average_score"] > percentile_75).sum()) # percentile_strong_positive_count

    threshold_results = []
    threshold_results.append((df["average_score"] < 0.5).sum()) # threshold_negative_count
    threshold_results.append((df["average_score"] > 0.5).sum()) # threshold_positive_count

    # Table 1: Min/Max and Percentile
    labels_4 = ["StrongNegative", "PseudoNegative", "PseudoPositive", "StrongPositive"]
    minmax_percentile_df = pd.DataFrame({
        "Label": labels_4,
        "Min_Max (Using Mean)": min_max_results,
        "Percentile (75-50-25)": percentile_results
    })

    # Table 2: Threshold 0.5
    labels_2 = ["Negative", "Positive"]
    threshold_df = pd.DataFrame({
        "Label": labels_2,
        "Threshold (0.5)": threshold_results
    })

    print(minmax_percentile_df,"\n", threshold_df)

    df.to_csv(csv, index=False)

    return min_max_results, percentile_results, threshold_results

def show_evaluation_results(min_max_all, percentile_all, threshold_all):
    # Compute mean and std for each label
    min_max_mean = np.mean(min_max_all, axis=0)
    min_max_std = np.std(min_max_all, axis=0)

    percentile_mean = np.mean(percentile_all, axis=0)
    percentile_std = np.std(percentile_all, axis=0)

    threshold_mean = np.mean(threshold_all, axis=0)
    threshold_std = np.std(threshold_all, axis=0)

    # Create DataFrames to display
    labels_4 = ["StrongNegative", "PseudoNegative", "PseudoPositive", "StrongPositive"]
    labels_2 = ["Negative", "Positive"]

    min_max_df = pd.DataFrame({
        "Label": labels_4,
        "Min-Max Mean": min_max_mean,
        "Min-Max Std": min_max_std
    })

    percentile_df = pd.DataFrame({
        "Label": labels_4,
        "Percentile Mean": percentile_mean,
        "Percentile Std": percentile_std
    })

    threshold_df = pd.DataFrame({
        "Label": labels_2,
        "Threshold Mean": threshold_mean,
        "Threshold Std": threshold_std
    })

    # Display
    print("=== Min-Max Results ===")
    print(min_max_df)
    print("\n=== Percentile Results ===")
    print(percentile_df)
    print("\n=== Threshold Results ===")
    print(threshold_df)

In [17]:
pd.set_option('display.width', 500)
pd.set_option('display.max_columns', None)

min_max_results_exp1, percentile_results_exp1, threshold_results_exp1 = apply_labeling_from_spies(csv_output_exp1, mean_score_exp1, percentile_50_exp1, percentile_25_exp1, percentile_75_exp1, min_score_exp1, max_score_exp1)
min_max_results_exp2, percentile_results_exp2, threshold_results_exp2 = apply_labeling_from_spies(csv_output_exp2, mean_score_exp2, percentile_50_exp2, percentile_25_exp2, percentile_75_exp2, min_score_exp2, max_score_exp2)
min_max_results_exp3, percentile_results_exp3, threshold_results_exp3 = apply_labeling_from_spies(csv_output_exp3, mean_score_exp3, percentile_50_exp3, percentile_25_exp3, percentile_75_exp3, min_score_exp3, max_score_exp3)
min_max_results_exp4, percentile_results_exp4, threshold_results_exp4 = apply_labeling_from_spies(csv_output_exp4, mean_score_exp4, percentile_50_exp4, percentile_25_exp4, percentile_75_exp4, min_score_exp4, max_score_exp4)
min_max_results_exp5, percentile_results_exp5, threshold_results_exp5 = apply_labeling_from_spies(csv_output_exp5, mean_score_exp5, percentile_50_exp5, percentile_25_exp5, percentile_75_exp5, min_score_exp5, max_score_exp5)

min_max_all = np.array([
    min_max_results_exp1,
    min_max_results_exp2,
    min_max_results_exp3,
    min_max_results_exp4,
    min_max_results_exp5
])

percentile_all = np.array([
    percentile_results_exp1,
    percentile_results_exp2,
    percentile_results_exp3,
    percentile_results_exp4,
    percentile_results_exp5
])

threshold_all = np.array([
    threshold_results_exp1,
    threshold_results_exp2,
    threshold_results_exp3,
    threshold_results_exp4,
    threshold_results_exp5
])

show_evaluation_results(min_max_all, percentile_all, threshold_all)

            Label  Min_Max (Using Mean)  Percentile (75-50-25)
0  StrongNegative                     0                     13
1  PseudoNegative                    16                      5
2  PseudoPositive                    94                      7
3  StrongPositive                    44                    129 
       Label  Threshold (0.5)
0  Negative                8
1  Positive              146
            Label  Min_Max (Using Mean)  Percentile (75-50-25)
0  StrongNegative                     3                     14
1  PseudoNegative                    13                      8
2  PseudoPositive                   113                     11
3  StrongPositive                    25                    121 
       Label  Threshold (0.5)
0  Negative                7
1  Positive              147
            Label  Min_Max (Using Mean)  Percentile (75-50-25)
0  StrongNegative                     1                     26
1  PseudoNegative                    29                      7
2  

In [18]:
test_unlabeled_exp1 = f"../../../Data/SplitData/Cholesterol/cholesterol-graph-5A/Test/CombinedUnlabeled"  
csv_output_exp1 = "../../../Models/Cholesterol/GNN/GNN-5A_Exp1/NewResults/TestUnlabeledCaptureRates.csv"

spy_capture_rate = evaluate_directory(test_unlabeled_exp1, csv_output_exp1, models_exp1)

test_unlabeled_exp2 = f"../../../Data/SplitData/Cholesterol/cholesterol-graph-5A_exp2/Test/CombinedUnlabeled"  
csv_output_exp2 = "../../../Models/Cholesterol/GNN/GNN-5A_Exp2/NewResults/TestUnlabeledCaptureRates.csv"

spy_capture_rate = evaluate_directory(test_unlabeled_exp2, csv_output_exp2, models_exp2)

test_unlabeled_exp3 = f"../../../Data/SplitData/Cholesterol/cholesterol-graph-5A_exp3/Test/CombinedUnlabeled"  
csv_output_exp3 = "../../../Models/Cholesterol/GNN/GNN-5A_Exp3/NewResults/TestUnlabeledCaptureRates.csv"

spy_capture_rate = evaluate_directory(test_unlabeled_exp3, csv_output_exp3, models_exp3)

test_unlabeled_exp4 = f"../../../Data/SplitData/Cholesterol/cholesterol-graph-5A_exp4/Test/CombinedUnlabeled"  
csv_output_exp4 = "../../../Models/Cholesterol/GNN/GNN-5A_Exp4/NewResults/TestUnlabeledCaptureRates.csv"

spy_capture_rate = evaluate_directory(test_unlabeled_exp4, csv_output_exp4, models_exp4)

test_unlabeled_exp5 = f"../../../Data/SplitData/Cholesterol/cholesterol-graph-5A_exp5/Test/CombinedUnlabeled"  
csv_output_exp5 = "../../../Models/Cholesterol/GNN/GNN-5A_Exp5/NewResults/TestUnlabeledCaptureRates.csv"

spy_capture_rate = evaluate_directory(test_unlabeled_exp5, csv_output_exp5, models_exp5)


Model Predictions Summary:
Model 1: Positives = 291, Negatives = 632
Model 2: Positives = 279, Negatives = 644
Model 3: Positives = 306, Negatives = 617
Model 4: Positives = 275, Negatives = 648
Model 5: Positives = 303, Negatives = 620
Model 6: Positives = 312, Negatives = 611
Model 7: Positives = 291, Negatives = 632
Model 8: Positives = 278, Negatives = 645
Model 9: Positives = 296, Negatives = 627
Model 10: Positives = 327, Negatives = 596
Model 11: Positives = 316, Negatives = 607
Model 12: Positives = 326, Negatives = 597
Model 13: Positives = 297, Negatives = 626
Model 14: Positives = 275, Negatives = 648
Model 15: Positives = 285, Negatives = 638
Model 16: Positives = 319, Negatives = 604
Model 17: Positives = 292, Negatives = 631
Model 18: Positives = 299, Negatives = 624
Model 19: Positives = 306, Negatives = 617
Model 20: Positives = 299, Negatives = 624
Model 21: Positives = 310, Negatives = 613
Model 22: Positives = 274, Negatives = 649
Model 23: Positives = 310, Negative

In [19]:
pd.set_option('display.width', 500)
pd.set_option('display.max_columns', None)

min_max_results_exp1, percentile_results_exp1, threshold_results_exp1 = apply_labeling_from_spies(csv_output_exp1, mean_score_exp1, percentile_50_exp1, percentile_25_exp1, percentile_75_exp1, min_score_exp1, max_score_exp1)
min_max_results_exp2, percentile_results_exp2, threshold_results_exp2 = apply_labeling_from_spies(csv_output_exp2, mean_score_exp2, percentile_50_exp2, percentile_25_exp2, percentile_75_exp2, min_score_exp2, max_score_exp2)
min_max_results_exp3, percentile_results_exp3, threshold_results_exp3 = apply_labeling_from_spies(csv_output_exp3, mean_score_exp3, percentile_50_exp3, percentile_25_exp3, percentile_75_exp3, min_score_exp3, max_score_exp3)
min_max_results_exp4, percentile_results_exp4, threshold_results_exp4 = apply_labeling_from_spies(csv_output_exp4, mean_score_exp4, percentile_50_exp4, percentile_25_exp4, percentile_75_exp4, min_score_exp4, max_score_exp4)
min_max_results_exp5, percentile_results_exp5, threshold_results_exp5 = apply_labeling_from_spies(csv_output_exp5, mean_score_exp5, percentile_50_exp5, percentile_25_exp5, percentile_75_exp5, min_score_exp5, max_score_exp5)

min_max_all = np.array([
    min_max_results_exp1,
    min_max_results_exp2,
    min_max_results_exp3,
    min_max_results_exp4,
    min_max_results_exp5
])

percentile_all = np.array([
    percentile_results_exp1,
    percentile_results_exp2,
    percentile_results_exp3,
    percentile_results_exp4,
    percentile_results_exp5
])

threshold_all = np.array([
    threshold_results_exp1,
    threshold_results_exp2,
    threshold_results_exp3,
    threshold_results_exp4,
    threshold_results_exp5
])

show_evaluation_results(min_max_all, percentile_all, threshold_all)

            Label  Min_Max (Using Mean)  Percentile (75-50-25)
0  StrongNegative                   314                    691
1  PseudoNegative                   398                     44
2  PseudoPositive                   188                     46
3  StrongPositive                    23                    142 
       Label  Threshold (0.5)
0  Negative              626
1  Positive              297
            Label  Min_Max (Using Mean)  Percentile (75-50-25)
0  StrongNegative                   417                    720
1  PseudoNegative                   314                     37
2  PseudoPositive                   182                     43
3  StrongPositive                    10                    123 
       Label  Threshold (0.5)
0  Negative              621
1  Positive              302
            Label  Min_Max (Using Mean)  Percentile (75-50-25)
0  StrongNegative                   341                    705
1  PseudoNegative                   389                     59
2  

In [20]:
test_ivan_exp1 = f"../../../Data/SplitData/Cholesterol/IvanTestSet/ivan-graph-5A/positive"  
csv_output_exp1 = "GNN-5A_Exp1/Results/TestIvanCaptureRates.csv"

spy_capture_rate = evaluate_directory(test_ivan_exp1, csv_output_exp1, models_exp1)

test_ivan_exp2 = f"../../../Data/SplitData/Cholesterol/IvanTestSet/ivan-graph-5A/positive"  
csv_output_exp2 = "GNN-5A_Exp2/Results/TestIvanCaptureRates.csv"

spy_capture_rate = evaluate_directory(test_ivan_exp2, csv_output_exp2, models_exp2)

test_ivan_exp3 = f"../../../Data/SplitData/Cholesterol/IvanTestSet/ivan-graph-5A/positive"  
csv_output_exp3 = "GNN-5A_Exp3/Results/TestIvanCaptureRates.csv"

spy_capture_rate = evaluate_directory(test_ivan_exp3, csv_output_exp3, models_exp3)

test_ivan_exp4 = f"../../../Data/SplitData/Cholesterol/IvanTestSet/ivan-graph-5A/positive"  
csv_output_exp4 = "GNN-5A_Exp4/Results/TestIvanCaptureRates.csv"

spy_capture_rate = evaluate_directory(test_ivan_exp4, csv_output_exp4, models_exp4)

test_ivan_exp5 = f"../../../Data/SplitData/Cholesterol/IvanTestSet/ivan-graph-5A/positive"  
csv_output_exp5 = "GNN-5A_Exp5/Results/TestIvanCaptureRates.csv"

spy_capture_rate = evaluate_directory(test_ivan_exp5, csv_output_exp5, models_exp5)


Model Predictions Summary:
Model 1: Positives = 53, Negatives = 4
Model 2: Positives = 53, Negatives = 4
Model 3: Positives = 51, Negatives = 6
Model 4: Positives = 53, Negatives = 4
Model 5: Positives = 53, Negatives = 4
Model 6: Positives = 55, Negatives = 2
Model 7: Positives = 55, Negatives = 2
Model 8: Positives = 54, Negatives = 3
Model 9: Positives = 54, Negatives = 3
Model 10: Positives = 55, Negatives = 2
Model 11: Positives = 52, Negatives = 5
Model 12: Positives = 53, Negatives = 4
Model 13: Positives = 53, Negatives = 4
Model 14: Positives = 53, Negatives = 4
Model 15: Positives = 53, Negatives = 4
Model 16: Positives = 53, Negatives = 4
Model 17: Positives = 54, Negatives = 3
Model 18: Positives = 51, Negatives = 6
Model 19: Positives = 55, Negatives = 2
Model 20: Positives = 53, Negatives = 4
Model 21: Positives = 52, Negatives = 5
Model 22: Positives = 52, Negatives = 5
Model 23: Positives = 51, Negatives = 6
Model 24: Positives = 55, Negatives = 2
Model 25: Positives =

In [21]:
pd.set_option('display.width', 500)
pd.set_option('display.max_columns', None)

min_max_results_exp1, percentile_results_exp1, threshold_results_exp1 = apply_labeling_from_spies(csv_output_exp1, mean_score_exp1, percentile_50_exp1, percentile_25_exp1, percentile_75_exp1, min_score_exp1, max_score_exp1)
min_max_results_exp2, percentile_results_exp2, threshold_results_exp2 = apply_labeling_from_spies(csv_output_exp2, mean_score_exp2, percentile_50_exp2, percentile_25_exp2, percentile_75_exp2, min_score_exp2, max_score_exp2)
min_max_results_exp3, percentile_results_exp3, threshold_results_exp3 = apply_labeling_from_spies(csv_output_exp3, mean_score_exp3, percentile_50_exp3, percentile_25_exp3, percentile_75_exp3, min_score_exp3, max_score_exp3)
min_max_results_exp4, percentile_results_exp4, threshold_results_exp4 = apply_labeling_from_spies(csv_output_exp4, mean_score_exp4, percentile_50_exp4, percentile_25_exp4, percentile_75_exp4, min_score_exp4, max_score_exp4)
min_max_results_exp5, percentile_results_exp5, threshold_results_exp5 = apply_labeling_from_spies(csv_output_exp5, mean_score_exp5, percentile_50_exp5, percentile_25_exp5, percentile_75_exp5, min_score_exp5, max_score_exp5)

min_max_all = np.array([
    min_max_results_exp1,
    min_max_results_exp2,
    min_max_results_exp3,
    min_max_results_exp4,
    min_max_results_exp5
])

percentile_all = np.array([
    percentile_results_exp1,
    percentile_results_exp2,
    percentile_results_exp3,
    percentile_results_exp4,
    percentile_results_exp5
])

threshold_all = np.array([
    threshold_results_exp1,
    threshold_results_exp2,
    threshold_results_exp3,
    threshold_results_exp4,
    threshold_results_exp5
])

show_evaluation_results(min_max_all, percentile_all, threshold_all)

            Label  Min_Max (Using Mean)  Percentile (75-50-25)
0  StrongNegative                     0                      4
1  PseudoNegative                     7                      7
2  PseudoPositive                    44                      6
3  StrongPositive                     6                     40 
       Label  Threshold (0.5)
0  Negative                3
1  Positive               54
            Label  Min_Max (Using Mean)  Percentile (75-50-25)
0  StrongNegative                     2                      7
1  PseudoNegative                     7                      7
2  PseudoPositive                    38                      5
3  StrongPositive                    10                     38 
       Label  Threshold (0.5)
0  Negative                2
1  Positive               55
            Label  Min_Max (Using Mean)  Percentile (75-50-25)
0  StrongNegative                     2                      7
1  PseudoNegative                     5                      5
2  