In [17]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, global_mean_pool
import glob
import numpy as np
import csv
from torch_geometric.data import Data
import os

class GCN(nn.Module):
    def __init__(self, input_dim):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(input_dim, 32)
        self.bn1 = nn.BatchNorm1d(32)
        
        self.conv2 = GCNConv(32, 64)
        self.bn2 = nn.BatchNorm1d(64)
        
        self.conv3 = GCNConv(64, 128)
        self.bn3 = nn.BatchNorm1d(128)

        self.dropout_gcn = nn.Dropout(0.2)
        self.dropout = nn.Dropout(0.6)
        
        self.fc1 = nn.Linear(128, 64)
        self.out = nn.Linear(64, 1)

    def forward(self, data):
        x, edge_index, edge_weight, batch = data.x, data.edge_index, data.edge_attr, data.batch

        x = self.conv1(x, edge_index, edge_weight)
        x = self.bn1(x)
        x = F.relu(x)
        x = self.dropout_gcn(x)

        x = self.conv2(x, edge_index, edge_weight)
        x = self.bn2(x)
        x = F.relu(x)
        x = self.dropout_gcn(x)

        x = self.conv3(x, edge_index, edge_weight)
        x = self.bn3(x)
        x = F.relu(x)
        x = self.dropout_gcn(x)

        # Global pooling to get graph-level representation
        x = global_mean_pool(x, batch)

        x = self.fc1(x)
        x = F.relu(x)
        x = self.dropout(x)
        x = self.out(x)

        return x
    
def organize_graph_and_add_weight(file_path, label):
    data = np.load(file_path, allow_pickle=True).item()
    inverse_distance = data['inverse_distance']
    encoded_matrix = data['encoded_matrix']

    x = torch.tensor(encoded_matrix, dtype=torch.float32)
    adj = torch.tensor(inverse_distance, dtype=torch.float32)

    # Normalize adjacency (row-normalize)
    #adj = adj / (adj.sum(dim=1, keepdim=True) + 1e-8)

    # Create edge_index and edge weights
    edge_index = (adj > 0).nonzero(as_tuple=False).t()
    edge_weight = adj[adj > 0]

    y = torch.tensor([label], dtype=torch.float32)
    
    return Data(x=x, edge_index=edge_index, edge_attr=edge_weight, y=y)
    
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

models_exp1 = []
models_exp2 = []
models_exp3 = []
models_exp4 = []
models_exp5 = []

k = 50

for i in range (1, (k + 1)):
    model_exp1 = GCN(input_dim=37).to(device)

    model_exp2 = GCN(input_dim=37).to(device)

    model_exp3 = GCN(input_dim=37).to(device)

    model_exp4 = GCN(input_dim=37).to(device)

    model_exp5 = GCN(input_dim=37).to(device)

    model_path = f"GCN-5A_Exp1/Models/model_bin_{i}.pth" 
    model_exp1.load_state_dict(torch.load(model_path, map_location=device))

    model_path = f"GCN-5A_Exp2/Models/model_bin_{i}.pth" 
    model_exp2.load_state_dict(torch.load(model_path, map_location=device))

    model_path = f"GCN-5A_Exp3/Models/model_bin_{i}.pth" 
    model_exp3.load_state_dict(torch.load(model_path, map_location=device))

    model_path = f"GCN-5A_Exp4/Models/model_bin_{i}.pth" 
    model_exp4.load_state_dict(torch.load(model_path, map_location=device))

    model_path = f"GCN-5A_Exp5/Models/model_bin_{i}.pth" 
    model_exp5.load_state_dict(torch.load(model_path, map_location=device))

    model_exp1.eval()
    model_exp2.eval()
    model_exp3.eval()
    model_exp4.eval()
    model_exp5.eval()

    models_exp1.append(model_exp1)
    models_exp2.append(model_exp2)
    models_exp3.append(model_exp3)
    models_exp4.append(model_exp4)
    models_exp5.append(model_exp5)

  model_exp1.load_state_dict(torch.load(model_path, map_location=device))
  model_exp2.load_state_dict(torch.load(model_path, map_location=device))
  model_exp3.load_state_dict(torch.load(model_path, map_location=device))
  model_exp4.load_state_dict(torch.load(model_path, map_location=device))
  model_exp5.load_state_dict(torch.load(model_path, map_location=device))


In [18]:
def get_capture_rate(dir, csv_output, models):
    predictions = []
    capture_rate = 0
    overlapping_capture_rate = 0
    overlapping_index = 0

    files = glob.glob(f"{dir}/*.npy")

    model_positive_counts = [0] * len(models)
    model_negative_counts = [0] * len(models)

    os.makedirs(os.path.dirname(csv_output), exist_ok=True)

    with open(csv_output, "w", newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(["filename", "average_score", "score_std", "number_atoms"]) 

        for file in sorted(files):    
            model_probs = []
            for model_index, model in enumerate(models):
                graph = organize_graph_and_add_weight(file, label=0).to(device)
                graph.batch = torch.zeros(graph.num_nodes, dtype=torch.long).to(device)
                non_padded_rows = graph.x.size(0)
                with torch.no_grad():
                    out = model(graph)
                    prob = torch.sigmoid(out).item()
                    model_probs.append(prob)

                    if prob >= 0.5:
                        model_positive_counts[model_index] += 1
                    else:
                        model_negative_counts[model_index] += 1

            prediction_mean = np.mean(model_probs)
            prediction_std = np.std(model_probs)

            predictions.append((file, prediction_mean))
            capture_rate += prediction_mean
            #print(f"{file:<120} {prediction:<25} {non_padded_rows}")
            overlapping_capture_rate += prediction_mean
            overlapping_index += 1
            writer.writerow([file, prediction_mean, prediction_std, non_padded_rows])

    capture_rate /= len(files)
    overlapping_capture_rate /= overlapping_index

    print("\nModel Predictions Summary:")
    for i, (pos, neg) in enumerate(zip(model_positive_counts, model_negative_counts), start=1):
        print(f"Model {i}: Positives = {pos}, Negatives = {neg}")

    #print("Overlapping Capture Rate is", overlapping_capture_rate)

    return capture_rate

In [19]:
spies_exp1 = f"../../../Data/SplitData/Cholesterol/cholesterol-separate-graphs-5A_exp1/Spies"  
csv_output_exp1 = "GCN-5A_Exp1/Results/SpyCaptureRates.csv"

spy_capture_rate = get_capture_rate(spies_exp1, csv_output_exp1, models_exp1)

spies_exp2 = f"../../../Data/SplitData/Cholesterol/cholesterol-separate-graphs-5A_exp2/Spies"  
csv_output_exp2 = "GCN-5A_Exp2/Results/SpyCaptureRates.csv"

spy_capture_rate = get_capture_rate(spies_exp2, csv_output_exp2, models_exp2)

spies_exp3 = f"../../../Data/SplitData/Cholesterol/cholesterol-separate-graphs-5A_exp3/Spies"  
csv_output_exp3 = "GCN-5A_Exp3/Results/SpyCaptureRates.csv"

spy_capture_rate = get_capture_rate(spies_exp3, csv_output_exp3, models_exp3)

spies_exp4 = f"../../../Data/SplitData/Cholesterol/cholesterol-separate-graphs-5A_exp4/Spies"  
csv_output_exp4 = "GCN-5A_Exp4/Results/SpyCaptureRates.csv"

spy_capture_rate = get_capture_rate(spies_exp4, csv_output_exp4, models_exp4)

spies_exp5 = f"../../../Data/SplitData/Cholesterol/cholesterol-separate-graphs-5A_exp5/Spies"  
csv_output_exp5 = "GCN-5A_Exp5/Results/SpyCaptureRates.csv"

spy_capture_rate = get_capture_rate(spies_exp5, csv_output_exp5, models_exp5)


Model Predictions Summary:
Model 1: Positives = 118, Negatives = 36
Model 2: Positives = 131, Negatives = 23
Model 3: Positives = 120, Negatives = 34
Model 4: Positives = 131, Negatives = 23
Model 5: Positives = 122, Negatives = 32
Model 6: Positives = 124, Negatives = 30
Model 7: Positives = 131, Negatives = 23
Model 8: Positives = 130, Negatives = 24
Model 9: Positives = 117, Negatives = 37
Model 10: Positives = 123, Negatives = 31
Model 11: Positives = 137, Negatives = 17
Model 12: Positives = 115, Negatives = 39
Model 13: Positives = 117, Negatives = 37
Model 14: Positives = 128, Negatives = 26
Model 15: Positives = 118, Negatives = 36
Model 16: Positives = 121, Negatives = 33
Model 17: Positives = 107, Negatives = 47
Model 18: Positives = 117, Negatives = 37
Model 19: Positives = 122, Negatives = 32
Model 20: Positives = 115, Negatives = 39
Model 21: Positives = 135, Negatives = 19
Model 22: Positives = 118, Negatives = 36
Model 23: Positives = 127, Negatives = 27
Model 24: Posit

In [20]:
import pandas as pd

def get_spy_info(csv):
    df = pd.read_csv(csv)  # Replace with your actual filename if needed

    # Extract the 'average_score' column
    scores = df["average_score"]

    # Compute statistics
    mean_score = scores.mean()
    percentile_50 = scores.quantile(0.5)
    percentile_25 = scores.quantile(0.25)
    percentile_75 = scores.quantile(0.75)
    min_score = scores.min()
    max_score = scores.max()

    # Print results
    print(f"Mean: {mean_score:.6f}")
    print(f"50th Percentile (Median): {percentile_50:.6f}")
    print(f"Bottom 25 Percentile: {percentile_25:.6f}")
    print(f"Top 25 Percentile: {percentile_75:.6f}")
    print(f"Min: {min_score:.6f}")
    print(f"Max: {max_score:.6f}\n")

    return mean_score, percentile_50, percentile_25, percentile_75, min_score, max_score

print("Experiment 1 Spy Results:")
mean_score_exp1, percentile_50_exp1, percentile_25_exp1, percentile_75_exp1, min_score_exp1, max_score_exp1 = get_spy_info(csv_output_exp1)

print("Experiment 2 Spy Results:")
mean_score_exp2, percentile_50_exp2, percentile_25_exp2, percentile_75_exp2, min_score_exp2, max_score_exp2 = get_spy_info(csv_output_exp2)

print("Experiment 3 Spy Results:")
mean_score_exp3, percentile_50_exp3, percentile_25_exp3, percentile_75_exp3, min_score_exp3, max_score_exp3 = get_spy_info(csv_output_exp3)

print("Experiment 4 Spy Results:")
mean_score_exp4, percentile_50_exp4, percentile_25_exp4, percentile_75_exp4, min_score_exp4, max_score_exp4 = get_spy_info(csv_output_exp4)

print("Experiment 5 Spy Results:")
mean_score_exp5, percentile_50_exp5, percentile_25_exp5, percentile_75_exp5, min_score_exp5, max_score_exp5 = get_spy_info(csv_output_exp5)

Experiment 1 Spy Results:
Mean: 0.747436
50th Percentile (Median): 0.794645
Bottom 25 Percentile: 0.671681
Top 25 Percentile: 0.876279
Min: 0.092567
Max: 0.978279

Experiment 2 Spy Results:
Mean: 0.770596
50th Percentile (Median): 0.823733
Bottom 25 Percentile: 0.683083
Top 25 Percentile: 0.910043
Min: 0.172504
Max: 0.971192

Experiment 3 Spy Results:
Mean: 0.780548
50th Percentile (Median): 0.819758
Bottom 25 Percentile: 0.708112
Top 25 Percentile: 0.901665
Min: 0.034102
Max: 0.964920

Experiment 4 Spy Results:
Mean: 0.734243
50th Percentile (Median): 0.799679
Bottom 25 Percentile: 0.649582
Top 25 Percentile: 0.869579
Min: 0.008715
Max: 0.972053

Experiment 5 Spy Results:
Mean: 0.743760
50th Percentile (Median): 0.813436
Bottom 25 Percentile: 0.649597
Top 25 Percentile: 0.889125
Min: 0.005725
Max: 0.976941



In [21]:
test_positives_exp1 = f"../../../Data/SplitData/Cholesterol/cholesterol-separate-graphs-5A_exp1/Test/Positive"  
csv_output_exp1 = "GCN-5A_Exp1/Results/TestPositiveCaptureRates.csv"

spy_capture_rate = get_capture_rate(test_positives_exp1, csv_output_exp1, models_exp1)

test_positives_exp2 = f"../../../Data/SplitData/Cholesterol/cholesterol-separate-graphs-5A_exp2/Test/Positive"  
csv_output_exp2 = "GCN-5A_Exp2/Results/TestPositiveCaptureRates.csv"

spy_capture_rate = get_capture_rate(test_positives_exp2, csv_output_exp2, models_exp2)

test_positives_exp3 = f"../../../Data/SplitData/Cholesterol/cholesterol-separate-graphs-5A_exp3/Test/Positive"  
csv_output_exp3 = "GCN-5A_Exp3/Results/TestPositiveCaptureRates.csv"

spy_capture_rate = get_capture_rate(test_positives_exp3, csv_output_exp3, models_exp3)

test_positives_exp4 = f"../../../Data/SplitData/Cholesterol/cholesterol-separate-graphs-5A_exp4/Test/Positive"  
csv_output_exp4 = "GCN-5A_Exp4/Results/TestPositiveCaptureRates.csv"

spy_capture_rate = get_capture_rate(test_positives_exp4, csv_output_exp4, models_exp4)

test_positives_exp5 = f"../../../Data/SplitData/Cholesterol/cholesterol-separate-graphs-5A_exp5/Test/Positive"  
csv_output_exp5 = "GCN-5A_Exp5/Results/TestPositiveCaptureRates.csv"

spy_capture_rate = get_capture_rate(test_positives_exp5, csv_output_exp5, models_exp5)


Model Predictions Summary:
Model 1: Positives = 142, Negatives = 12
Model 2: Positives = 144, Negatives = 10
Model 3: Positives = 143, Negatives = 11
Model 4: Positives = 147, Negatives = 7
Model 5: Positives = 142, Negatives = 12
Model 6: Positives = 143, Negatives = 11
Model 7: Positives = 148, Negatives = 6
Model 8: Positives = 144, Negatives = 10
Model 9: Positives = 137, Negatives = 17
Model 10: Positives = 147, Negatives = 7
Model 11: Positives = 148, Negatives = 6
Model 12: Positives = 142, Negatives = 12
Model 13: Positives = 143, Negatives = 11
Model 14: Positives = 137, Negatives = 17
Model 15: Positives = 142, Negatives = 12
Model 16: Positives = 142, Negatives = 12
Model 17: Positives = 135, Negatives = 19
Model 18: Positives = 148, Negatives = 6
Model 19: Positives = 141, Negatives = 13
Model 20: Positives = 139, Negatives = 15
Model 21: Positives = 145, Negatives = 9
Model 22: Positives = 138, Negatives = 16
Model 23: Positives = 135, Negatives = 19
Model 24: Positives =

In [22]:
def apply_labeling_from_spies(csv, mean_score, percentile_50, percentile_25, percentile_75, min_score, max_score):
    df = pd.read_csv(csv)  # Replace with your actual filename if needed

    def get_min_max_label(score):
        if score < min_score:
            return "StrongNegative"
        elif min_score <= score < mean_score:
            return "PseudoNegative"
        elif mean_score <= score < max_score:
            return "PseudoPositive"
        else:
            return "StrongPositive"

    df["min_max_label"] = df["average_score"].apply(get_min_max_label)

    def get_percentile_label(score):
        if score < percentile_25:
            return "StrongNegative"
        elif percentile_25 <= score < percentile_50:
            return "PseudoNegative"
        elif percentile_50 <= score < percentile_75:
            return "PseudoPositive"
        else:
            return "StrongPositive"

    df["percentile_label"] = df["average_score"].apply(get_percentile_label)
    df["threshold_label"] = df["average_score"].apply(lambda x: "Negative" if x < 0.5 else "Positive")

    min_max_results = []
    min_max_results.append((df["average_score"] < min_score).sum()) # min_max_strong_negative_count
    min_max_results.append(((df["average_score"] > min_score) & (df["average_score"] < mean_score)).sum()) # min_max_pseudo_negative_count
    min_max_results.append(((df["average_score"] > mean_score) & (df["average_score"] < max_score)).sum()) # min_max_pseudo_positive_count
    min_max_results.append((df["average_score"] > max_score).sum()) # min_max_strong_positive_count

    percentile_results = []
    percentile_results.append((df["average_score"] < percentile_25).sum()) # percentile_strong_negative_count
    percentile_results.append(((df["average_score"] > percentile_25) & (df["average_score"] < percentile_50)).sum()) # percentile_pseudo_negative_count
    percentile_results.append(((df["average_score"] > percentile_50) & (df["average_score"] < percentile_75)).sum()) # percentile_pseudo_positive_count
    percentile_results.append((df["average_score"] > percentile_75).sum()) # percentile_strong_positive_count

    threshold_results = []
    threshold_results.append((df["average_score"] < 0.5).sum()) # threshold_negative_count
    threshold_results.append((df["average_score"] > 0.5).sum()) # threshold_positive_count

    # Table 1: Min/Max and Percentile
    labels_4 = ["StrongNegative", "PseudoNegative", "PseudoPositive", "StrongPositive"]
    minmax_percentile_df = pd.DataFrame({
        "Label": labels_4,
        "Min_Max (Using Mean)": min_max_results,
        "Percentile (75-50-25)": percentile_results
    })

    # Table 2: Threshold 0.5
    labels_2 = ["Negative", "Positive"]
    threshold_df = pd.DataFrame({
        "Label": labels_2,
        "Threshold (0.5)": threshold_results
    })

    print(minmax_percentile_df,"\n", threshold_df)

    df.to_csv(csv, index=False)

    return min_max_results, percentile_results, threshold_results

def show_evaluation_results(min_max_all, percentile_all, threshold_all):
    # Compute mean and std for each label
    min_max_mean = np.mean(min_max_all, axis=0)
    min_max_std = np.std(min_max_all, axis=0)

    percentile_mean = np.mean(percentile_all, axis=0)
    percentile_std = np.std(percentile_all, axis=0)

    threshold_mean = np.mean(threshold_all, axis=0)
    threshold_std = np.std(threshold_all, axis=0)

    # Create DataFrames to display
    labels_4 = ["StrongNegative", "PseudoNegative", "PseudoPositive", "StrongPositive"]
    labels_2 = ["Negative", "Positive"]

    min_max_df = pd.DataFrame({
        "Label": labels_4,
        "Min-Max Mean": min_max_mean,
        "Min-Max Std": min_max_std
    })

    percentile_df = pd.DataFrame({
        "Label": labels_4,
        "Percentile Mean": percentile_mean,
        "Percentile Std": percentile_std
    })

    threshold_df = pd.DataFrame({
        "Label": labels_2,
        "Threshold Mean": threshold_mean,
        "Threshold Std": threshold_std
    })

    # Display
    print("=== Min-Max Results ===")
    print(min_max_df)
    print("\n=== Percentile Results ===")
    print(percentile_df)
    print("\n=== Threshold Results ===")
    print(threshold_df)

In [23]:
pd.set_option('display.width', 500)
pd.set_option('display.max_columns', None)

min_max_results_exp1, percentile_results_exp1, threshold_results_exp1 = apply_labeling_from_spies(csv_output_exp1, mean_score_exp1, percentile_50_exp1, percentile_25_exp1, percentile_75_exp1, min_score_exp1, max_score_exp1)
min_max_results_exp2, percentile_results_exp2, threshold_results_exp2 = apply_labeling_from_spies(csv_output_exp2, mean_score_exp2, percentile_50_exp2, percentile_25_exp2, percentile_75_exp2, min_score_exp2, max_score_exp2)
min_max_results_exp3, percentile_results_exp3, threshold_results_exp3 = apply_labeling_from_spies(csv_output_exp3, mean_score_exp3, percentile_50_exp3, percentile_25_exp3, percentile_75_exp3, min_score_exp3, max_score_exp3)
min_max_results_exp4, percentile_results_exp4, threshold_results_exp4 = apply_labeling_from_spies(csv_output_exp4, mean_score_exp4, percentile_50_exp4, percentile_25_exp4, percentile_75_exp4, min_score_exp4, max_score_exp4)
min_max_results_exp5, percentile_results_exp5, threshold_results_exp5 = apply_labeling_from_spies(csv_output_exp5, mean_score_exp5, percentile_50_exp5, percentile_25_exp5, percentile_75_exp5, min_score_exp5, max_score_exp5)

min_max_all = np.array([
    min_max_results_exp1,
    min_max_results_exp2,
    min_max_results_exp3,
    min_max_results_exp4,
    min_max_results_exp5
])

percentile_all = np.array([
    percentile_results_exp1,
    percentile_results_exp2,
    percentile_results_exp3,
    percentile_results_exp4,
    percentile_results_exp5
])

threshold_all = np.array([
    threshold_results_exp1,
    threshold_results_exp2,
    threshold_results_exp3,
    threshold_results_exp4,
    threshold_results_exp5
])

show_evaluation_results(min_max_all, percentile_all, threshold_all)

            Label  Min_Max (Using Mean)  Percentile (75-50-25)
0  StrongNegative                     0                     17
1  PseudoNegative                    26                     20
2  PseudoPositive                   115                     23
3  StrongPositive                    13                     94 
       Label  Threshold (0.5)
0  Negative                3
1  Positive              151
            Label  Min_Max (Using Mean)  Percentile (75-50-25)
0  StrongNegative                     3                     20
1  PseudoNegative                    22                     20
2  PseudoPositive                   116                     32
3  StrongPositive                    13                     82 
       Label  Threshold (0.5)
0  Negative               10
1  Positive              144
            Label  Min_Max (Using Mean)  Percentile (75-50-25)
0  StrongNegative                     0                     31
1  PseudoNegative                    42                     16
2  

In [24]:
test_unlabeled_exp1 = f"../../../Data/SplitData/Cholesterol/cholesterol-separate-graphs-5A_exp1/Test/CombinedUnlabeled"  
csv_output_exp1 = "GCN-5A_Exp1/Results/TestUnlabeledCaptureRates.csv"

spy_capture_rate = get_capture_rate(test_unlabeled_exp1, csv_output_exp1, models_exp1)

test_unlabeled_exp2 = f"../../../Data/SplitData/Cholesterol/cholesterol-separate-graphs-5A_exp2/Test/CombinedUnlabeled"  
csv_output_exp2 = "GCN-5A_Exp2/Results/TestUnlabeledCaptureRates.csv"

spy_capture_rate = get_capture_rate(test_unlabeled_exp2, csv_output_exp2, models_exp2)

test_unlabeled_exp3 = f"../../../Data/SplitData/Cholesterol/cholesterol-separate-graphs-5A_exp3/Test/CombinedUnlabeled"  
csv_output_exp3 = "GCN-5A_Exp3/Results/TestUnlabeledCaptureRates.csv"

spy_capture_rate = get_capture_rate(test_unlabeled_exp3, csv_output_exp3, models_exp3)

test_unlabeled_exp4 = f"../../../Data/SplitData/Cholesterol/cholesterol-separate-graphs-5A_exp4/Test/CombinedUnlabeled"  
csv_output_exp4 = "GCN-5A_Exp4/Results/TestUnlabeledCaptureRates.csv"

spy_capture_rate = get_capture_rate(test_unlabeled_exp4, csv_output_exp4, models_exp4)

test_unlabeled_exp5 = f"../../../Data/SplitData/Cholesterol/cholesterol-separate-graphs-5A_exp5/Test/CombinedUnlabeled"  
csv_output_exp5 = "GCN-5A_Exp5/Results/TestUnlabeledCaptureRates.csv"

spy_capture_rate = get_capture_rate(test_unlabeled_exp5, csv_output_exp5, models_exp5)


Model Predictions Summary:
Model 1: Positives = 267, Negatives = 656
Model 2: Positives = 291, Negatives = 632
Model 3: Positives = 278, Negatives = 645
Model 4: Positives = 253, Negatives = 670
Model 5: Positives = 258, Negatives = 665
Model 6: Positives = 260, Negatives = 663
Model 7: Positives = 255, Negatives = 668
Model 8: Positives = 249, Negatives = 674
Model 9: Positives = 218, Negatives = 705
Model 10: Positives = 274, Negatives = 649
Model 11: Positives = 289, Negatives = 634
Model 12: Positives = 259, Negatives = 664
Model 13: Positives = 262, Negatives = 661
Model 14: Positives = 244, Negatives = 679
Model 15: Positives = 243, Negatives = 680
Model 16: Positives = 279, Negatives = 644
Model 17: Positives = 171, Negatives = 752
Model 18: Positives = 270, Negatives = 653
Model 19: Positives = 254, Negatives = 669
Model 20: Positives = 214, Negatives = 709
Model 21: Positives = 274, Negatives = 649
Model 22: Positives = 218, Negatives = 705
Model 23: Positives = 205, Negative

In [25]:
pd.set_option('display.width', 500)
pd.set_option('display.max_columns', None)

min_max_results_exp1, percentile_results_exp1, threshold_results_exp1 = apply_labeling_from_spies(csv_output_exp1, mean_score_exp1, percentile_50_exp1, percentile_25_exp1, percentile_75_exp1, min_score_exp1, max_score_exp1)
min_max_results_exp2, percentile_results_exp2, threshold_results_exp2 = apply_labeling_from_spies(csv_output_exp2, mean_score_exp2, percentile_50_exp2, percentile_25_exp2, percentile_75_exp2, min_score_exp2, max_score_exp2)
min_max_results_exp3, percentile_results_exp3, threshold_results_exp3 = apply_labeling_from_spies(csv_output_exp3, mean_score_exp3, percentile_50_exp3, percentile_25_exp3, percentile_75_exp3, min_score_exp3, max_score_exp3)
min_max_results_exp4, percentile_results_exp4, threshold_results_exp4 = apply_labeling_from_spies(csv_output_exp4, mean_score_exp4, percentile_50_exp4, percentile_25_exp4, percentile_75_exp4, min_score_exp4, max_score_exp4)
min_max_results_exp5, percentile_results_exp5, threshold_results_exp5 = apply_labeling_from_spies(csv_output_exp5, mean_score_exp5, percentile_50_exp5, percentile_25_exp5, percentile_75_exp5, min_score_exp5, max_score_exp5)

min_max_all = np.array([
    min_max_results_exp1,
    min_max_results_exp2,
    min_max_results_exp3,
    min_max_results_exp4,
    min_max_results_exp5
])

percentile_all = np.array([
    percentile_results_exp1,
    percentile_results_exp2,
    percentile_results_exp3,
    percentile_results_exp4,
    percentile_results_exp5
])

threshold_all = np.array([
    threshold_results_exp1,
    threshold_results_exp2,
    threshold_results_exp3,
    threshold_results_exp4,
    threshold_results_exp5
])

show_evaluation_results(min_max_all, percentile_all, threshold_all)

            Label  Min_Max (Using Mean)  Percentile (75-50-25)
0  StrongNegative                   417                    758
1  PseudoNegative                   372                     58
2  PseudoPositive                   131                     49
3  StrongPositive                     3                     58 
       Label  Threshold (0.5)
0  Negative              684
1  Positive              239
            Label  Min_Max (Using Mean)  Percentile (75-50-25)
0  StrongNegative                   517                    757
1  PseudoNegative                   288                     66
2  PseudoPositive                   109                     56
3  StrongPositive                     9                     44 
       Label  Threshold (0.5)
0  Negative              685
1  Positive              238
            Label  Min_Max (Using Mean)  Percentile (75-50-25)
0  StrongNegative                   298                    773
1  PseudoNegative                   505                     54
2  

In [26]:
test_ivan_exp1 = f"../../../Data/SplitData/Cholesterol/IvanTestSet/ivan-separate-graphs-5A/positive"  
csv_output_exp1 = "GCN-5A_Exp1/Results/TestIvanCaptureRates.csv"

spy_capture_rate = get_capture_rate(test_ivan_exp1, csv_output_exp1, models_exp1)

test_ivan_exp2 = f"../../../Data/SplitData/Cholesterol/IvanTestSet/ivan-separate-graphs-5A/positive"  
csv_output_exp2 = "GCN-5A_Exp2/Results/TestIvanCaptureRates.csv"

spy_capture_rate = get_capture_rate(test_ivan_exp2, csv_output_exp2, models_exp2)

test_ivan_exp3 = f"../../../Data/SplitData/Cholesterol/IvanTestSet/ivan-separate-graphs-5A/positive"  
csv_output_exp3 = "GCN-5A_Exp3/Results/TestIvanCaptureRates.csv"

spy_capture_rate = get_capture_rate(test_ivan_exp3, csv_output_exp3, models_exp3)

test_ivan_exp4 = f"../../../Data/SplitData/Cholesterol/IvanTestSet/ivan-separate-graphs-5A/positive"  
csv_output_exp4 = "GCN-5A_Exp4/Results/TestIvanCaptureRates.csv"

spy_capture_rate = get_capture_rate(test_ivan_exp4, csv_output_exp4, models_exp4)

test_ivan_exp5 = f"../../../Data/SplitData/Cholesterol/IvanTestSet/ivan-separate-graphs-5A/positive"  
csv_output_exp5 = "GCN-5A_Exp5/Results/TestIvanCaptureRates.csv"

spy_capture_rate = get_capture_rate(test_ivan_exp5, csv_output_exp5, models_exp5)


Model Predictions Summary:
Model 1: Positives = 51, Negatives = 6
Model 2: Positives = 50, Negatives = 7
Model 3: Positives = 49, Negatives = 8
Model 4: Positives = 50, Negatives = 7
Model 5: Positives = 53, Negatives = 4
Model 6: Positives = 48, Negatives = 9
Model 7: Positives = 49, Negatives = 8
Model 8: Positives = 47, Negatives = 10
Model 9: Positives = 46, Negatives = 11
Model 10: Positives = 48, Negatives = 9
Model 11: Positives = 51, Negatives = 6
Model 12: Positives = 47, Negatives = 10
Model 13: Positives = 52, Negatives = 5
Model 14: Positives = 50, Negatives = 7
Model 15: Positives = 48, Negatives = 9
Model 16: Positives = 49, Negatives = 8
Model 17: Positives = 43, Negatives = 14
Model 18: Positives = 48, Negatives = 9
Model 19: Positives = 49, Negatives = 8
Model 20: Positives = 48, Negatives = 9
Model 21: Positives = 51, Negatives = 6
Model 22: Positives = 49, Negatives = 8
Model 23: Positives = 49, Negatives = 8
Model 24: Positives = 49, Negatives = 8
Model 25: Positiv

In [27]:
pd.set_option('display.width', 500)
pd.set_option('display.max_columns', None)

min_max_results_exp1, percentile_results_exp1, threshold_results_exp1 = apply_labeling_from_spies(csv_output_exp1, mean_score_exp1, percentile_50_exp1, percentile_25_exp1, percentile_75_exp1, min_score_exp1, max_score_exp1)
min_max_results_exp2, percentile_results_exp2, threshold_results_exp2 = apply_labeling_from_spies(csv_output_exp2, mean_score_exp2, percentile_50_exp2, percentile_25_exp2, percentile_75_exp2, min_score_exp2, max_score_exp2)
min_max_results_exp3, percentile_results_exp3, threshold_results_exp3 = apply_labeling_from_spies(csv_output_exp3, mean_score_exp3, percentile_50_exp3, percentile_25_exp3, percentile_75_exp3, min_score_exp3, max_score_exp3)
min_max_results_exp4, percentile_results_exp4, threshold_results_exp4 = apply_labeling_from_spies(csv_output_exp4, mean_score_exp4, percentile_50_exp4, percentile_25_exp4, percentile_75_exp4, min_score_exp4, max_score_exp4)
min_max_results_exp5, percentile_results_exp5, threshold_results_exp5 = apply_labeling_from_spies(csv_output_exp5, mean_score_exp5, percentile_50_exp5, percentile_25_exp5, percentile_75_exp5, min_score_exp5, max_score_exp5)

min_max_all = np.array([
    min_max_results_exp1,
    min_max_results_exp2,
    min_max_results_exp3,
    min_max_results_exp4,
    min_max_results_exp5
])

percentile_all = np.array([
    percentile_results_exp1,
    percentile_results_exp2,
    percentile_results_exp3,
    percentile_results_exp4,
    percentile_results_exp5
])

threshold_all = np.array([
    threshold_results_exp1,
    threshold_results_exp2,
    threshold_results_exp3,
    threshold_results_exp4,
    threshold_results_exp5
])

show_evaluation_results(min_max_all, percentile_all, threshold_all)

            Label  Min_Max (Using Mean)  Percentile (75-50-25)
0  StrongNegative                     1                     10
1  PseudoNegative                    11                      4
2  PseudoPositive                    41                      9
3  StrongPositive                     4                     34 
       Label  Threshold (0.5)
0  Negative                6
1  Positive               51
            Label  Min_Max (Using Mean)  Percentile (75-50-25)
0  StrongNegative                     4                      8
1  PseudoNegative                     4                      7
2  PseudoPositive                    48                     19
3  StrongPositive                     1                     23 
       Label  Threshold (0.5)
0  Negative                6
1  Positive               51
            Label  Min_Max (Using Mean)  Percentile (75-50-25)
0  StrongNegative                     1                      8
1  PseudoNegative                    12                      9
2  