In [2]:
import torch
import torch.nn as nn
import numpy as np
from torch_geometric.data import Data
import glob
import os
import csv
from torch_geometric.nn import GATConv, global_mean_pool

# Define GAT model for batched data
class GAT(torch.nn.Module):
    def __init__(self, in_channels, out_channels, dropout_p=0.1):
        super().__init__()
        self.gat = GATConv(in_channels, out_channels, heads=1, concat=True, edge_dim=1)
        self.pool = global_mean_pool  # Can also use global_max_pool or global_add_pool
        self.dropout = nn.Dropout(p=dropout_p)
        self.norm = nn.BatchNorm1d(out_channels)
        self.linear = torch.nn.Linear(out_channels, 1)

    def forward(self, x, edge_index, edge_attr, batch):
        out, attn_weights = self.gat(x, edge_index, edge_attr, return_attention_weights=True)
        out = self.dropout(out)
        out = self.pool(out, batch)  # Pool over nodes in each graph
        out = self.norm(out)
        out = self.dropout(out) 
        out = self.linear(out)
        return out, attn_weights

def organize_graph_and_add_weight(file_path, label):
    data = np.load(file_path, allow_pickle=True).item()
    inverse_distance = data['inverse_distance']
    encoded_matrix = data['encoded_matrix']

    x = torch.tensor(encoded_matrix, dtype=torch.float32)
    adj = torch.tensor(inverse_distance, dtype=torch.float32)

    # Normalize adjacency (row-normalize)
    adj = adj / (adj.sum(dim=1, keepdim=True) + 1e-8)

    # Create edge_index and edge weights
    edge_index = (adj > 0).nonzero(as_tuple=False).t()
    edge_weight = adj[adj > 0]

    y = torch.tensor([label], dtype=torch.float32)
    
    return Data(x=x, edge_index=edge_index, edge_attr=edge_weight, y=y)
    
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

models_exp1 = []
models_exp2 = []
models_exp3 = []
models_exp4 = []
models_exp5 = []

k = 50

for i in range (1, (k + 1)):
    model_exp1 = GAT(in_channels=37, out_channels=32).to(device)

    model_exp2 = GAT(in_channels=37, out_channels=32).to(device)

    model_exp3 = GAT(in_channels=37, out_channels=32).to(device)

    model_exp4 = GAT(in_channels=37, out_channels=32).to(device)

    model_exp5 = GAT(in_channels=37, out_channels=32).to(device)

    model_path = f"../../../Models/Cholesterol/GAT/GATModels-5A_exp1v2/Models/model_bin_{i}.pth" 
    model_exp1.load_state_dict(torch.load(model_path, map_location=device))

    model_path = f"../../../Models/Cholesterol/GAT/GATModels-5A_exp2v2/Models/model_bin_{i}.pth" 
    model_exp2.load_state_dict(torch.load(model_path, map_location=device))

    model_path = f"../../../Models/Cholesterol/GAT/GATModels-5A_exp3v2/Models/model_bin_{i}.pth" 
    model_exp3.load_state_dict(torch.load(model_path, map_location=device))

    model_path = f"../../../Models/Cholesterol/GAT/GATModels-5A_exp4v2/Models/model_bin_{i}.pth" 
    model_exp4.load_state_dict(torch.load(model_path, map_location=device))

    model_path = f"../../../Models/Cholesterol/GAT/GATModels-5A_exp5v2/Models/model_bin_{i}.pth" 
    model_exp5.load_state_dict(torch.load(model_path, map_location=device))

    model_exp1.eval()
    model_exp2.eval()
    model_exp3.eval()
    model_exp4.eval()
    model_exp5.eval()

    models_exp1.append(model_exp1)
    models_exp2.append(model_exp2)
    models_exp3.append(model_exp3)
    models_exp4.append(model_exp4)
    models_exp5.append(model_exp5)

  model_exp1.load_state_dict(torch.load(model_path, map_location=device))
  model_exp2.load_state_dict(torch.load(model_path, map_location=device))
  model_exp3.load_state_dict(torch.load(model_path, map_location=device))
  model_exp4.load_state_dict(torch.load(model_path, map_location=device))
  model_exp5.load_state_dict(torch.load(model_path, map_location=device))


In [3]:
def get_capture_rate(dir, csv_output, models):
    predictions = []
    capture_rate = 0
    overlapping_capture_rate = 0
    overlapping_index = 0

    files = glob.glob(f"{dir}/*.npy")

    model_positive_counts = [0] * len(models)
    model_negative_counts = [0] * len(models)

    os.makedirs(os.path.dirname(csv_output), exist_ok=True)

    with open(csv_output, "w", newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(["filename", "average_score", "score_std", "number_atoms"]) 

        for file in sorted(files):    
            model_probs = []
            for model_index, model in enumerate(models):
                graph = organize_graph_and_add_weight(file, label=0).to(device)
                non_padded_rows = graph.x.size(0)
                with torch.no_grad():
                    out, _ = model(graph.x, graph.edge_index, graph.edge_attr, batch=torch.zeros(graph.x.size(0), dtype=torch.long).to(device))
                    prob = torch.sigmoid(out).item()
                    model_probs.append(prob)

                    if prob >= 0.5:
                        model_positive_counts[model_index] += 1
                    else:
                        model_negative_counts[model_index] += 1

            prediction_mean = np.mean(model_probs)
            prediction_std = np.std(model_probs)

            predictions.append((file, prediction_mean))
            capture_rate += prediction_mean
            #print(f"{file:<120} {prediction:<25} {non_padded_rows}")
            overlapping_capture_rate += prediction_mean
            overlapping_index += 1
            writer.writerow([file, prediction_mean, prediction_std, non_padded_rows])

    capture_rate /= len(files)
    overlapping_capture_rate /= overlapping_index

    print("\nModel Predictions Summary:")
    for i, (pos, neg) in enumerate(zip(model_positive_counts, model_negative_counts), start=1):
        print(f"Model {i}: Positives = {pos}, Negatives = {neg}")

    #print("Overlapping Capture Rate is", overlapping_capture_rate)

    return capture_rate

In [4]:
spies_exp1 = f"../../../Data/SplitData/Cholesterol/cholesterol-separate-graphs-5A_exp1/Spies"  
csv_output_exp1 = "GATModels-5A_exp1v2/NewResults/SpyCaptureRates.csv"

spy_capture_rate = get_capture_rate(spies_exp1, csv_output_exp1, models_exp1)

spies_exp2 = f"../../../Data/SplitData/Cholesterol/cholesterol-separate-graphs-5A_exp2/Spies"  
csv_output_exp2 = "GATModels-5A_exp2v2/NewResults/SpyCaptureRates.csv"

spy_capture_rate = get_capture_rate(spies_exp2, csv_output_exp2, models_exp2)

spies_exp3 = f"../../../Data/SplitData/Cholesterol/cholesterol-separate-graphs-5A_exp3/Spies"  
csv_output_exp3 = "GATModels-5A_exp3v2/NewResults/SpyCaptureRates.csv"

spy_capture_rate = get_capture_rate(spies_exp3, csv_output_exp3, models_exp3)

spies_exp4 = f"../../../Data/SplitData/Cholesterol/cholesterol-separate-graphs-5A_exp4/Spies"  
csv_output_exp4 = "GATModels-5A_exp4v2/NewResults/SpyCaptureRates.csv"

spy_capture_rate = get_capture_rate(spies_exp4, csv_output_exp4, models_exp4)

spies_exp5 = f"../../../Data/SplitData/Cholesterol/cholesterol-separate-graphs-5A_exp5/Spies"  
csv_output_exp5 = "GATModels-5A_exp5v2/NewResults/SpyCaptureRates.csv"

spy_capture_rate = get_capture_rate(spies_exp5, csv_output_exp5, models_exp5)


Model Predictions Summary:
Model 1: Positives = 124, Negatives = 30
Model 2: Positives = 135, Negatives = 19
Model 3: Positives = 130, Negatives = 24
Model 4: Positives = 134, Negatives = 20
Model 5: Positives = 127, Negatives = 27
Model 6: Positives = 129, Negatives = 25
Model 7: Positives = 133, Negatives = 21
Model 8: Positives = 133, Negatives = 21
Model 9: Positives = 139, Negatives = 15
Model 10: Positives = 132, Negatives = 22
Model 11: Positives = 126, Negatives = 28
Model 12: Positives = 138, Negatives = 16
Model 13: Positives = 132, Negatives = 22
Model 14: Positives = 129, Negatives = 25
Model 15: Positives = 131, Negatives = 23
Model 16: Positives = 137, Negatives = 17
Model 17: Positives = 129, Negatives = 25
Model 18: Positives = 133, Negatives = 21
Model 19: Positives = 124, Negatives = 30
Model 20: Positives = 130, Negatives = 24
Model 21: Positives = 132, Negatives = 22
Model 22: Positives = 129, Negatives = 25
Model 23: Positives = 121, Negatives = 33
Model 24: Posit

In [5]:
import pandas as pd

def get_spy_info(csv):
    df = pd.read_csv(csv)  # Replace with your actual filename if needed

    # Extract the 'average_score' column
    scores = df["average_score"]

    # Compute statistics
    mean_score = scores.mean()
    percentile_50 = scores.quantile(0.5)
    percentile_25 = scores.quantile(0.25)
    percentile_75 = scores.quantile(0.75)
    min_score = scores.min()
    max_score = scores.max()

    # Print results
    print(f"Mean: {mean_score:.6f}")
    print(f"50th Percentile (Median): {percentile_50:.6f}")
    print(f"Bottom 25 Percentile: {percentile_25:.6f}")
    print(f"Top 25 Percentile: {percentile_75:.6f}")
    print(f"Min: {min_score:.6f}")
    print(f"Max: {max_score:.6f}\n")

    return mean_score, percentile_50, percentile_25, percentile_75, min_score, max_score

print("Experiment 1 Spy Results:")
mean_score_exp1, percentile_50_exp1, percentile_25_exp1, percentile_75_exp1, min_score_exp1, max_score_exp1 = get_spy_info(csv_output_exp1)

print("Experiment 2 Spy Results:")
mean_score_exp2, percentile_50_exp2, percentile_25_exp2, percentile_75_exp2, min_score_exp2, max_score_exp2 = get_spy_info(csv_output_exp2)

print("Experiment 3 Spy Results:")
mean_score_exp3, percentile_50_exp3, percentile_25_exp3, percentile_75_exp3, min_score_exp3, max_score_exp3 = get_spy_info(csv_output_exp3)

print("Experiment 4 Spy Results:")
mean_score_exp4, percentile_50_exp4, percentile_25_exp4, percentile_75_exp4, min_score_exp4, max_score_exp4 = get_spy_info(csv_output_exp4)

print("Experiment 5 Spy Results:")
mean_score_exp5, percentile_50_exp5, percentile_25_exp5, percentile_75_exp5, min_score_exp5, max_score_exp5 = get_spy_info(csv_output_exp5)

Experiment 1 Spy Results:
Mean: 0.748171
50th Percentile (Median): 0.808170
Bottom 25 Percentile: 0.649602
Top 25 Percentile: 0.893732
Min: 0.088230
Max: 0.995296

Experiment 2 Spy Results:
Mean: 0.743725
50th Percentile (Median): 0.840388
Bottom 25 Percentile: 0.588170
Top 25 Percentile: 0.919473
Min: 0.082657
Max: 0.986154

Experiment 3 Spy Results:
Mean: 0.775384
50th Percentile (Median): 0.841336
Bottom 25 Percentile: 0.703769
Top 25 Percentile: 0.905124
Min: 0.107967
Max: 0.987543

Experiment 4 Spy Results:
Mean: 0.723468
50th Percentile (Median): 0.798318
Bottom 25 Percentile: 0.614784
Top 25 Percentile: 0.894516
Min: 0.033989
Max: 0.989565

Experiment 5 Spy Results:
Mean: 0.765624
50th Percentile (Median): 0.853644
Bottom 25 Percentile: 0.643862
Top 25 Percentile: 0.930683
Min: 0.015046
Max: 0.998265



In [6]:
test_positives_exp1 = f"../../../Data/SplitData/Cholesterol/cholesterol-separate-graphs-5A_exp1/Test/Positive"  
csv_output_exp1 = "GATModels-5A_exp1v2/NewResults/TestPositiveCaptureRates.csv"

spy_capture_rate = get_capture_rate(test_positives_exp1, csv_output_exp1, models_exp1)

test_positives_exp2 = f"../../../Data/SplitData/Cholesterol/cholesterol-separate-graphs-5A_exp2/Test/Positive"  
csv_output_exp2 = "GATModels-5A_exp2v2/NewResults/TestPositiveCaptureRates.csv"

spy_capture_rate = get_capture_rate(test_positives_exp2, csv_output_exp2, models_exp2)

test_positives_exp3 = f"../../../Data/SplitData/Cholesterol/cholesterol-separate-graphs-5A_exp3/Test/Positive"  
csv_output_exp3 = "GATModels-5A_exp3v2/NewResults/TestPositiveCaptureRates.csv"

spy_capture_rate = get_capture_rate(test_positives_exp3, csv_output_exp3, models_exp3)

test_positives_exp4 = f"../../../Data/SplitData/Cholesterol/cholesterol-separate-graphs-5A_exp4/Test/Positive"  
csv_output_exp4 = "GATModels-5A_exp4v2/NewResults/TestPositiveCaptureRates.csv"

spy_capture_rate = get_capture_rate(test_positives_exp4, csv_output_exp4, models_exp4)

test_positives_exp5 = f"../../../Data/SplitData/Cholesterol/cholesterol-separate-graphs-5A_exp5/Test/Positive"  
csv_output_exp5 = "GATModels-5A_exp5v2/NewResults/TestPositiveCaptureRates.csv"

spy_capture_rate = get_capture_rate(test_positives_exp5, csv_output_exp5, models_exp5)


Model Predictions Summary:
Model 1: Positives = 121, Negatives = 33
Model 2: Positives = 136, Negatives = 18
Model 3: Positives = 133, Negatives = 21
Model 4: Positives = 141, Negatives = 13
Model 5: Positives = 132, Negatives = 22
Model 6: Positives = 134, Negatives = 20
Model 7: Positives = 138, Negatives = 16
Model 8: Positives = 136, Negatives = 18
Model 9: Positives = 141, Negatives = 13
Model 10: Positives = 131, Negatives = 23
Model 11: Positives = 132, Negatives = 22
Model 12: Positives = 140, Negatives = 14
Model 13: Positives = 143, Negatives = 11
Model 14: Positives = 133, Negatives = 21
Model 15: Positives = 135, Negatives = 19
Model 16: Positives = 133, Negatives = 21
Model 17: Positives = 138, Negatives = 16
Model 18: Positives = 137, Negatives = 17
Model 19: Positives = 121, Negatives = 33
Model 20: Positives = 137, Negatives = 17
Model 21: Positives = 134, Negatives = 20
Model 22: Positives = 132, Negatives = 22
Model 23: Positives = 121, Negatives = 33
Model 24: Posit

In [7]:
def apply_labeling_from_spies(csv, mean_score, percentile_50, percentile_25, percentile_75, min_score, max_score):
    df = pd.read_csv(csv)  # Replace with your actual filename if needed

    def get_min_max_label(score):
        if score < min_score:
            return "StrongNegative"
        elif min_score <= score < mean_score:
            return "PseudoNegative"
        elif mean_score <= score < max_score:
            return "PseudoPositive"
        else:
            return "StrongPositive"

    df["min_max_label"] = df["average_score"].apply(get_min_max_label)

    def get_percentile_label(score):
        if score < percentile_25:
            return "StrongNegative"
        elif percentile_25 <= score < percentile_50:
            return "PseudoNegative"
        elif percentile_50 <= score < percentile_75:
            return "PseudoPositive"
        else:
            return "StrongPositive"

    df["percentile_label"] = df["average_score"].apply(get_percentile_label)
    df["threshold_label"] = df["average_score"].apply(lambda x: "Negative" if x < 0.5 else "Positive")

    min_max_results = []
    min_max_results.append((df["average_score"] < min_score).sum()) # min_max_strong_negative_count
    min_max_results.append(((df["average_score"] > min_score) & (df["average_score"] < mean_score)).sum()) # min_max_pseudo_negative_count
    min_max_results.append(((df["average_score"] > mean_score) & (df["average_score"] < max_score)).sum()) # min_max_pseudo_positive_count
    min_max_results.append((df["average_score"] > max_score).sum()) # min_max_strong_positive_count

    percentile_results = []
    percentile_results.append((df["average_score"] < percentile_25).sum()) # percentile_strong_negative_count
    percentile_results.append(((df["average_score"] > percentile_25) & (df["average_score"] < percentile_50)).sum()) # percentile_pseudo_negative_count
    percentile_results.append(((df["average_score"] > percentile_50) & (df["average_score"] < percentile_75)).sum()) # percentile_pseudo_positive_count
    percentile_results.append((df["average_score"] > percentile_75).sum()) # percentile_strong_positive_count

    threshold_results = []
    threshold_results.append((df["average_score"] < 0.5).sum()) # threshold_negative_count
    threshold_results.append((df["average_score"] > 0.5).sum()) # threshold_positive_count

    # Table 1: Min/Max and Percentile
    labels_4 = ["StrongNegative", "PseudoNegative", "PseudoPositive", "StrongPositive"]
    minmax_percentile_df = pd.DataFrame({
        "Label": labels_4,
        "Min_Max (Using Mean)": min_max_results,
        "Percentile (75-50-25)": percentile_results
    })

    # Table 2: Threshold 0.5
    labels_2 = ["Negative", "Positive"]
    threshold_df = pd.DataFrame({
        "Label": labels_2,
        "Threshold (0.5)": threshold_results
    })

    print(minmax_percentile_df,"\n", threshold_df)

    df.to_csv(csv, index=False)

    return min_max_results, percentile_results, threshold_results

def show_evaluation_results(min_max_all, percentile_all, threshold_all):
    # Compute mean and std for each label
    min_max_mean = np.mean(min_max_all, axis=0)
    min_max_std = np.std(min_max_all, axis=0)

    percentile_mean = np.mean(percentile_all, axis=0)
    percentile_std = np.std(percentile_all, axis=0)

    threshold_mean = np.mean(threshold_all, axis=0)
    threshold_std = np.std(threshold_all, axis=0)

    # Create DataFrames to display
    labels_4 = ["StrongNegative", "PseudoNegative", "PseudoPositive", "StrongPositive"]
    labels_2 = ["Negative", "Positive"]

    min_max_df = pd.DataFrame({
        "Label": labels_4,
        "Min-Max Mean": min_max_mean,
        "Min-Max Std": min_max_std
    })

    percentile_df = pd.DataFrame({
        "Label": labels_4,
        "Percentile Mean": percentile_mean,
        "Percentile Std": percentile_std
    })

    threshold_df = pd.DataFrame({
        "Label": labels_2,
        "Threshold Mean": threshold_mean,
        "Threshold Std": threshold_std
    })

    # Display
    print("=== Min-Max Results ===")
    print(min_max_df)
    print("\n=== Percentile Results ===")
    print(percentile_df)
    print("\n=== Threshold Results ===")
    print(threshold_df)

In [8]:
pd.set_option('display.width', 500)
pd.set_option('display.max_columns', None)

min_max_results_exp1, percentile_results_exp1, threshold_results_exp1 = apply_labeling_from_spies(csv_output_exp1, mean_score_exp1, percentile_50_exp1, percentile_25_exp1, percentile_75_exp1, min_score_exp1, max_score_exp1)
min_max_results_exp2, percentile_results_exp2, threshold_results_exp2 = apply_labeling_from_spies(csv_output_exp2, mean_score_exp2, percentile_50_exp2, percentile_25_exp2, percentile_75_exp2, min_score_exp2, max_score_exp2)
min_max_results_exp3, percentile_results_exp3, threshold_results_exp3 = apply_labeling_from_spies(csv_output_exp3, mean_score_exp3, percentile_50_exp3, percentile_25_exp3, percentile_75_exp3, min_score_exp3, max_score_exp3)
min_max_results_exp4, percentile_results_exp4, threshold_results_exp4 = apply_labeling_from_spies(csv_output_exp4, mean_score_exp4, percentile_50_exp4, percentile_25_exp4, percentile_75_exp4, min_score_exp4, max_score_exp4)
min_max_results_exp5, percentile_results_exp5, threshold_results_exp5 = apply_labeling_from_spies(csv_output_exp5, mean_score_exp5, percentile_50_exp5, percentile_25_exp5, percentile_75_exp5, min_score_exp5, max_score_exp5)

min_max_all = np.array([
    min_max_results_exp1,
    min_max_results_exp2,
    min_max_results_exp3,
    min_max_results_exp4,
    min_max_results_exp5
])

percentile_all = np.array([
    percentile_results_exp1,
    percentile_results_exp2,
    percentile_results_exp3,
    percentile_results_exp4,
    percentile_results_exp5
])

threshold_all = np.array([
    threshold_results_exp1,
    threshold_results_exp2,
    threshold_results_exp3,
    threshold_results_exp4,
    threshold_results_exp5
])

show_evaluation_results(min_max_all, percentile_all, threshold_all)

            Label  Min_Max (Using Mean)  Percentile (75-50-25)
0  StrongNegative                     0                     36
1  PseudoNegative                    59                     37
2  PseudoPositive                    95                     31
3  StrongPositive                     0                     50 
       Label  Threshold (0.5)
0  Negative               19
1  Positive              135
            Label  Min_Max (Using Mean)  Percentile (75-50-25)
0  StrongNegative                     3                     26
1  PseudoNegative                    46                     51
2  PseudoPositive                   101                     33
3  StrongPositive                     4                     44 
       Label  Threshold (0.5)
0  Negative               21
1  Positive              133
            Label  Min_Max (Using Mean)  Percentile (75-50-25)
0  StrongNegative                     3                     48
1  PseudoNegative                    58                     36
2  

In [9]:
test_unlabeled_exp1 = f"../../../Data/SplitData/Cholesterol/cholesterol-separate-graphs-5A_exp1/Test/CombinedUnlabeled"  
csv_output_exp1 = "GATModels-5A_exp1v2/NewResults/TestUnlabeledCaptureRates.csv"

spy_capture_rate = get_capture_rate(test_unlabeled_exp1, csv_output_exp1, models_exp1)

test_unlabeled_exp2 = f"../../../Data/SplitData/Cholesterol/cholesterol-separate-graphs-5A_exp2/Test/CombinedUnlabeled"  
csv_output_exp2 = "GATModels-5A_exp2v2/NewResults/TestUnlabeledCaptureRates.csv"

spy_capture_rate = get_capture_rate(test_unlabeled_exp2, csv_output_exp2, models_exp2)

test_unlabeled_exp3 = f"../../../Data/SplitData/Cholesterol/cholesterol-separate-graphs-5A_exp3/Test/CombinedUnlabeled"  
csv_output_exp3 = "GATModels-5A_exp3v2/NewResults/TestUnlabeledCaptureRates.csv"

spy_capture_rate = get_capture_rate(test_unlabeled_exp3, csv_output_exp3, models_exp3)

test_unlabeled_exp4 = f"../../../Data/SplitData/Cholesterol/cholesterol-separate-graphs-5A_exp4/Test/CombinedUnlabeled"  
csv_output_exp4 = "GATModels-5A_exp4v2/NewResults/TestUnlabeledCaptureRates.csv"

spy_capture_rate = get_capture_rate(test_unlabeled_exp4, csv_output_exp4, models_exp4)

test_unlabeled_exp5 = f"../../../Data/SplitData/Cholesterol/cholesterol-separate-graphs-5A_exp5/Test/CombinedUnlabeled"  
csv_output_exp5 = "GATModels-5A_exp5v2/NewResults/TestUnlabeledCaptureRates.csv"

spy_capture_rate = get_capture_rate(test_unlabeled_exp5, csv_output_exp5, models_exp5)


Model Predictions Summary:
Model 1: Positives = 218, Negatives = 705
Model 2: Positives = 276, Negatives = 647
Model 3: Positives = 259, Negatives = 664
Model 4: Positives = 292, Negatives = 631
Model 5: Positives = 245, Negatives = 678
Model 6: Positives = 261, Negatives = 662
Model 7: Positives = 276, Negatives = 647
Model 8: Positives = 275, Negatives = 648
Model 9: Positives = 314, Negatives = 609
Model 10: Positives = 252, Negatives = 671
Model 11: Positives = 248, Negatives = 675
Model 12: Positives = 330, Negatives = 593
Model 13: Positives = 304, Negatives = 619
Model 14: Positives = 258, Negatives = 665
Model 15: Positives = 260, Negatives = 663
Model 16: Positives = 269, Negatives = 654
Model 17: Positives = 311, Negatives = 612
Model 18: Positives = 286, Negatives = 637
Model 19: Positives = 239, Negatives = 684
Model 20: Positives = 271, Negatives = 652
Model 21: Positives = 261, Negatives = 662
Model 22: Positives = 229, Negatives = 694
Model 23: Positives = 199, Negative

In [10]:
pd.set_option('display.width', 500)
pd.set_option('display.max_columns', None)

min_max_results_exp1, percentile_results_exp1, threshold_results_exp1 = apply_labeling_from_spies(csv_output_exp1, mean_score_exp1, percentile_50_exp1, percentile_25_exp1, percentile_75_exp1, min_score_exp1, max_score_exp1)
min_max_results_exp2, percentile_results_exp2, threshold_results_exp2 = apply_labeling_from_spies(csv_output_exp2, mean_score_exp2, percentile_50_exp2, percentile_25_exp2, percentile_75_exp2, min_score_exp2, max_score_exp2)
min_max_results_exp3, percentile_results_exp3, threshold_results_exp3 = apply_labeling_from_spies(csv_output_exp3, mean_score_exp3, percentile_50_exp3, percentile_25_exp3, percentile_75_exp3, min_score_exp3, max_score_exp3)
min_max_results_exp4, percentile_results_exp4, threshold_results_exp4 = apply_labeling_from_spies(csv_output_exp4, mean_score_exp4, percentile_50_exp4, percentile_25_exp4, percentile_75_exp4, min_score_exp4, max_score_exp4)
min_max_results_exp5, percentile_results_exp5, threshold_results_exp5 = apply_labeling_from_spies(csv_output_exp5, mean_score_exp5, percentile_50_exp5, percentile_25_exp5, percentile_75_exp5, min_score_exp5, max_score_exp5)

min_max_all = np.array([
    min_max_results_exp1,
    min_max_results_exp2,
    min_max_results_exp3,
    min_max_results_exp4,
    min_max_results_exp5
])

percentile_all = np.array([
    percentile_results_exp1,
    percentile_results_exp2,
    percentile_results_exp3,
    percentile_results_exp4,
    percentile_results_exp5
])

threshold_all = np.array([
    threshold_results_exp1,
    threshold_results_exp2,
    threshold_results_exp3,
    threshold_results_exp4,
    threshold_results_exp5
])

show_evaluation_results(min_max_all, percentile_all, threshold_all)

            Label  Min_Max (Using Mean)  Percentile (75-50-25)
0  StrongNegative                   266                    736
1  PseudoNegative                   519                     78
2  PseudoPositive                   138                     59
3  StrongPositive                     0                     50 
       Label  Threshold (0.5)
0  Negative              663
1  Positive              260
            Label  Min_Max (Using Mean)  Percentile (75-50-25)
0  StrongNegative                   227                    696
1  PseudoNegative                   553                    137
2  PseudoPositive                   138                     42
3  StrongPositive                     5                     48 
       Label  Threshold (0.5)
0  Negative              653
1  Positive              270
            Label  Min_Max (Using Mean)  Percentile (75-50-25)
0  StrongNegative                   291                    761
1  PseudoNegative                   515                     83
2  

In [11]:
test_ivan_exp1 = f"../../../Data/SplitData/Cholesterol/IvanTestSet/ivan-separate-graphs-5A/positive"  
csv_output_exp1 = "GATModels-5A_exp1v2/Results/TestIvanCaptureRates.csv"

spy_capture_rate = get_capture_rate(test_ivan_exp1, csv_output_exp1, models_exp1)

test_ivan_exp2 = f"../../../Data/SplitData/Cholesterol/IvanTestSet/ivan-separate-graphs-5A/positive"  
csv_output_exp2 = "GATModels-5A_exp2v2/Results/TestIvanCaptureRates.csv"

spy_capture_rate = get_capture_rate(test_ivan_exp2, csv_output_exp2, models_exp2)

test_ivan_exp3 = f"../../../Data/SplitData/Cholesterol/IvanTestSet/ivan-separate-graphs-5A/positive"  
csv_output_exp3 = "GATModels-5A_exp3v2/Results/TestIvanCaptureRates.csv"

spy_capture_rate = get_capture_rate(test_ivan_exp3, csv_output_exp3, models_exp3)

test_ivan_exp4 = f"../../../Data/SplitData/Cholesterol/IvanTestSet/ivan-separate-graphs-5A/positive"  
csv_output_exp4 = "GATModels-5A_exp4v2/Results/TestIvanCaptureRates.csv"

spy_capture_rate = get_capture_rate(test_ivan_exp4, csv_output_exp4, models_exp4)

test_ivan_exp5 = f"../../../Data/SplitData/Cholesterol/IvanTestSet/ivan-separate-graphs-5A/positive"  
csv_output_exp5 = "GATModels-5A_exp5v2/Results/TestIvanCaptureRates.csv"

spy_capture_rate = get_capture_rate(test_ivan_exp5, csv_output_exp5, models_exp5)


Model Predictions Summary:
Model 1: Positives = 40, Negatives = 17
Model 2: Positives = 41, Negatives = 16
Model 3: Positives = 46, Negatives = 11
Model 4: Positives = 44, Negatives = 13
Model 5: Positives = 45, Negatives = 12
Model 6: Positives = 46, Negatives = 11
Model 7: Positives = 45, Negatives = 12
Model 8: Positives = 45, Negatives = 12
Model 9: Positives = 48, Negatives = 9
Model 10: Positives = 46, Negatives = 11
Model 11: Positives = 44, Negatives = 13
Model 12: Positives = 49, Negatives = 8
Model 13: Positives = 48, Negatives = 9
Model 14: Positives = 45, Negatives = 12
Model 15: Positives = 44, Negatives = 13
Model 16: Positives = 46, Negatives = 11
Model 17: Positives = 46, Negatives = 11
Model 18: Positives = 46, Negatives = 11
Model 19: Positives = 40, Negatives = 17
Model 20: Positives = 45, Negatives = 12
Model 21: Positives = 42, Negatives = 15
Model 22: Positives = 39, Negatives = 18
Model 23: Positives = 39, Negatives = 18
Model 24: Positives = 45, Negatives = 12


In [12]:
pd.set_option('display.width', 500)
pd.set_option('display.max_columns', None)

min_max_results_exp1, percentile_results_exp1, threshold_results_exp1 = apply_labeling_from_spies(csv_output_exp1, mean_score_exp1, percentile_50_exp1, percentile_25_exp1, percentile_75_exp1, min_score_exp1, max_score_exp1)
min_max_results_exp2, percentile_results_exp2, threshold_results_exp2 = apply_labeling_from_spies(csv_output_exp2, mean_score_exp2, percentile_50_exp2, percentile_25_exp2, percentile_75_exp2, min_score_exp2, max_score_exp2)
min_max_results_exp3, percentile_results_exp3, threshold_results_exp3 = apply_labeling_from_spies(csv_output_exp3, mean_score_exp3, percentile_50_exp3, percentile_25_exp3, percentile_75_exp3, min_score_exp3, max_score_exp3)
min_max_results_exp4, percentile_results_exp4, threshold_results_exp4 = apply_labeling_from_spies(csv_output_exp4, mean_score_exp4, percentile_50_exp4, percentile_25_exp4, percentile_75_exp4, min_score_exp4, max_score_exp4)
min_max_results_exp5, percentile_results_exp5, threshold_results_exp5 = apply_labeling_from_spies(csv_output_exp5, mean_score_exp5, percentile_50_exp5, percentile_25_exp5, percentile_75_exp5, min_score_exp5, max_score_exp5)

min_max_all = np.array([
    min_max_results_exp1,
    min_max_results_exp2,
    min_max_results_exp3,
    min_max_results_exp4,
    min_max_results_exp5
])

percentile_all = np.array([
    percentile_results_exp1,
    percentile_results_exp2,
    percentile_results_exp3,
    percentile_results_exp4,
    percentile_results_exp5
])

threshold_all = np.array([
    threshold_results_exp1,
    threshold_results_exp2,
    threshold_results_exp3,
    threshold_results_exp4,
    threshold_results_exp5
])

show_evaluation_results(min_max_all, percentile_all, threshold_all)

            Label  Min_Max (Using Mean)  Percentile (75-50-25)
0  StrongNegative                     1                     19
1  PseudoNegative                    24                      8
2  PseudoPositive                    32                      8
3  StrongPositive                     0                     22 
       Label  Threshold (0.5)
0  Negative               12
1  Positive               45
            Label  Min_Max (Using Mean)  Percentile (75-50-25)
0  StrongNegative                     1                     14
1  PseudoNegative                    18                     15
2  PseudoPositive                    37                     12
3  StrongPositive                     1                     16 
       Label  Threshold (0.5)
0  Negative                9
1  Positive               48
            Label  Min_Max (Using Mean)  Percentile (75-50-25)
0  StrongNegative                     1                     21
1  PseudoNegative                    24                      9
2  