In [2]:
import torch
import torch.nn as nn
import glob
import csv
import numpy as np
import os

# 3D CNN Model
class CNN3D(nn.Module):
    def __init__(self):
        super(CNN3D, self).__init__()
        
        self.conv0 = nn.Conv3d(in_channels=37, out_channels=64, kernel_size=1, stride=1, padding=0) # play around with output channels
        self.conv1 = nn.Conv3d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool3d(kernel_size=2, stride=2, padding=0)
        self.conv2 = nn.Conv3d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1)
        self.pool2 = nn.MaxPool3d(kernel_size=2, stride=2, padding=0)

        #self.dropout_conv = nn.Dropout3d(p=0.05)
        
        # After two pooling layers, spatial dimensions reduce from 40x40x40 -> 5x5x5
        self.fc1 = nn.Linear(128 * 3 * 3 * 3, 256)  # Try increasing over 256
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 2)  # Assuming 1 output for docking status/position

        #self.dropout_fc = nn.Dropout(p=0.15)
        
    def forward(self, x):
        # Forward pass through Conv layers
        x = self.pool(torch.relu(self.conv0(x)))  # Conv0 -> ReLU -> Pooling
        #x = self.dropout_conv(x)
        x = self.pool(torch.relu(self.conv1(x)))  # Conv1 -> ReLU -> Pooling
        x = self.pool2(torch.relu(self.conv2(x)))  # Conv2 -> ReLU -> Pooling

        # Flatten the input for fully connected layers
        x = x.view(-1, 128 * 3 * 3 * 3)
        
        # Forward pass through fully connected layers
        x = torch.relu(self.fc1(x)) #use tanh activation
        #x = self.dropout_fc(x)
        x = torch.relu(self.fc2(x))
        x = torch.nn.functional.softmax(self.fc3(x), dim=1)  # Final layer (output layer)
        #x = torch.clamp(x, min=1e-7, max=1 - 1e-7)  # Clamp outputs to avoid extreme values
        
        return x
    
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

models_exp1 = []
models_exp2 = []
models_exp3 = []
models_exp4 = []
models_exp5 = []

k = 50

for i in range (1, (k + 1)):
    model_exp1 = CNN3D().to(device)

    model_exp2 = CNN3D().to(device)

    model_exp3 = CNN3D().to(device)

    model_exp4 = CNN3D().to(device)

    model_exp5 = CNN3D().to(device)

    model_path = f"../../../Models/Cholesterol/3DCNN/3DCholesterolModels-st_exp1/Models/model_bin_{i}.pth" 
    model_exp1.load_state_dict(torch.load(model_path, map_location=device))

    model_path = f"../../../Models/Cholesterol/3DCNN/3DCholesterolModels-st_exp2/Models/model_bin_{i}.pth" 
    model_exp2.load_state_dict(torch.load(model_path, map_location=device))

    model_path = f"../../../Models/Cholesterol/3DCNN/3DCholesterolModels-st_exp3/Models/model_bin_{i}.pth" 
    model_exp3.load_state_dict(torch.load(model_path, map_location=device))

    model_path = f"../../../Models/Cholesterol/3DCNN/3DCholesterolModels-st_exp4/Models/model_bin_{i}.pth" 
    model_exp4.load_state_dict(torch.load(model_path, map_location=device))

    model_path = f"../../../Models/Cholesterol/3DCNN/3DCholesterolModels-st_exp5/Models/model_bin_{i}.pth" 
    model_exp5.load_state_dict(torch.load(model_path, map_location=device))

    model_exp1.eval()
    model_exp2.eval()
    model_exp3.eval()
    model_exp4.eval()
    model_exp5.eval()

    models_exp1.append(model_exp1)
    models_exp2.append(model_exp2)
    models_exp3.append(model_exp3)
    models_exp4.append(model_exp4)
    models_exp5.append(model_exp5)

  model_exp1.load_state_dict(torch.load(model_path, map_location=device))
  model_exp2.load_state_dict(torch.load(model_path, map_location=device))
  model_exp3.load_state_dict(torch.load(model_path, map_location=device))
  model_exp4.load_state_dict(torch.load(model_path, map_location=device))
  model_exp5.load_state_dict(torch.load(model_path, map_location=device))


In [3]:
def evaluate_file(model, file_path, threshold=0.5):
    grid = np.load(file_path)

    non_padded_rows = np.sum(np.any(grid != 0, axis=(3)))
    
    grid_tensor = torch.tensor(grid, dtype=torch.float32).permute(3, 0, 1, 2).unsqueeze(0)
    grid_tensor = grid_tensor.to(device)
    
    model.eval()
    with torch.no_grad():
        output = model(grid_tensor)

    prob = output[0, 1].item()

    predicted_class = int(prob >= threshold)

    return predicted_class, prob, non_padded_rows


In [4]:
def evaluate_directory(dir, csv_output, models):
    predictions = []
    capture_rate = 0
    overlapping_capture_rate = 0
    overlapping_index = 0

    files = glob.glob(f"{dir}/*.npy")

    model_positive_counts = [0] * len(models)
    model_negative_counts = [0] * len(models)

    #print(f"{'Filename':<120} {'IndividualCaptureRate':<25} NumberOfAtoms")

    os.makedirs(os.path.dirname(csv_output), exist_ok=True)

    with open(csv_output, "w", newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(["filename", "average_score", "score_std", "number_atoms"]) 

        for file in sorted(files):    
            model_probs = []
            for model_index, model in enumerate(models):
                predicted_class, prob, non_padded_rows = evaluate_file(model, file)
                model_probs.append(prob)

                if predicted_class == 1:
                    model_positive_counts[model_index] += 1
                else:
                    model_negative_counts[model_index] += 1

            prediction_mean = np.mean(model_probs)
            prediction_std = np.std(model_probs)

            capture_rate += prediction_mean
            #if non_padded_rows <= 65 and non_padded_rows >= 55:
            #print(f"{file:<120} {prediction:<25} {non_padded_rows}")
            overlapping_capture_rate += prediction_mean
            overlapping_index += 1
            writer.writerow([file, prediction_mean, prediction_std, non_padded_rows])

    capture_rate /= len(files)
    overlapping_capture_rate /= overlapping_index

    print("\nModel Predictions Summary:")
    for i, (pos, neg) in enumerate(zip(model_positive_counts, model_negative_counts), start=1):
        print(f"Model {i}: Positives = {pos}, Negatives = {neg}")

    print("Overlapping Capture Rate is", overlapping_capture_rate)

    return capture_rate

In [5]:
spies_exp1 = f"../../../Data/SplitData/Cholesterol/cholesterol-grid-st_exp1/Spies"  
csv_output_exp1 = "../../../Models/Cholesterol/3DCNN/3DCholesterolModels-st_exp1/NewResults/SpyCaptureRates.csv"

spy_capture_rate = evaluate_directory(spies_exp1, csv_output_exp1, models_exp1)

spies_exp2 = f"../../../Data/SplitData/Cholesterol/cholesterol-grid-st_exp2/Spies"  
csv_output_exp2 = "../../../Models/Cholesterol/3DCNN/3DCholesterolModels-st_exp2/NewResults/SpyCaptureRates.csv"

spy_capture_rate = evaluate_directory(spies_exp2, csv_output_exp2, models_exp2)

spies_exp3 = f"../../../Data/SplitData/Cholesterol/cholesterol-grid-st_exp3/Spies"  
csv_output_exp3 = "../../../Models/Cholesterol/3DCNN/3DCholesterolModels-st_exp3/NewResults/SpyCaptureRates.csv"

spy_capture_rate = evaluate_directory(spies_exp3, csv_output_exp3, models_exp3)

spies_exp4 = f"../../../Data/SplitData/Cholesterol/cholesterol-grid-st_exp4/Spies"  
csv_output_exp4 = "../../../Models/Cholesterol/3DCNN/3DCholesterolModels-st_exp4/NewResults/SpyCaptureRates.csv"

spy_capture_rate = evaluate_directory(spies_exp4, csv_output_exp4, models_exp4)

spies_exp5 = f"../../../Data/SplitData/Cholesterol/cholesterol-grid-st_exp5/Spies"  
csv_output_exp5 = "../../../Models/Cholesterol/3DCNN/3DCholesterolModels-st_exp5/NewResults/SpyCaptureRates.csv"

spy_capture_rate = evaluate_directory(spies_exp5, csv_output_exp5, models_exp5)


Model Predictions Summary:
Model 1: Positives = 712, Negatives = 58
Model 2: Positives = 711, Negatives = 59
Model 3: Positives = 701, Negatives = 69
Model 4: Positives = 720, Negatives = 50
Model 5: Positives = 715, Negatives = 55
Model 6: Positives = 710, Negatives = 60
Model 7: Positives = 706, Negatives = 64
Model 8: Positives = 708, Negatives = 62
Model 9: Positives = 703, Negatives = 67
Model 10: Positives = 711, Negatives = 59
Model 11: Positives = 717, Negatives = 53
Model 12: Positives = 709, Negatives = 61
Model 13: Positives = 715, Negatives = 55
Model 14: Positives = 713, Negatives = 57
Model 15: Positives = 720, Negatives = 50
Model 16: Positives = 724, Negatives = 46
Model 17: Positives = 696, Negatives = 74
Model 18: Positives = 724, Negatives = 46
Model 19: Positives = 712, Negatives = 58
Model 20: Positives = 713, Negatives = 57
Model 21: Positives = 710, Negatives = 60
Model 22: Positives = 701, Negatives = 69
Model 23: Positives = 717, Negatives = 53
Model 24: Posit

In [6]:
import pandas as pd

def get_spy_info(csv):
    df = pd.read_csv(csv)  # Replace with your actual filename if needed

    # Extract the 'average_score' column
    scores = df["average_score"]

    # Compute statistics
    mean_score = scores.mean()
    percentile_50 = scores.quantile(0.5)
    percentile_25 = scores.quantile(0.25)
    percentile_75 = scores.quantile(0.75)
    min_score = scores.min()
    max_score = scores.max()

    # Print results
    print(f"Mean: {mean_score:.6f}")
    print(f"50th Percentile (Median): {percentile_50:.6f}")
    print(f"Bottom 25 Percentile: {percentile_25:.6f}")
    print(f"Top 25 Percentile: {percentile_75:.6f}")
    print(f"Min: {min_score:.6f}")
    print(f"Max: {max_score:.6f}\n")

    return mean_score, percentile_50, percentile_25, percentile_75, min_score, max_score

print("Experiment 1 Spy Results:")
mean_score_exp1, percentile_50_exp1, percentile_25_exp1, percentile_75_exp1, min_score_exp1, max_score_exp1 = get_spy_info(csv_output_exp1)

print("Experiment 2 Spy Results:")
mean_score_exp2, percentile_50_exp2, percentile_25_exp2, percentile_75_exp2, min_score_exp2, max_score_exp2 = get_spy_info(csv_output_exp2)

print("Experiment 3 Spy Results:")
mean_score_exp3, percentile_50_exp3, percentile_25_exp3, percentile_75_exp3, min_score_exp3, max_score_exp3 = get_spy_info(csv_output_exp3)

print("Experiment 4 Spy Results:")
mean_score_exp4, percentile_50_exp4, percentile_25_exp4, percentile_75_exp4, min_score_exp4, max_score_exp4 = get_spy_info(csv_output_exp4)

print("Experiment 5 Spy Results:")
mean_score_exp5, percentile_50_exp5, percentile_25_exp5, percentile_75_exp5, min_score_exp5, max_score_exp5 = get_spy_info(csv_output_exp5)

Experiment 1 Spy Results:
Mean: 0.909196
50th Percentile (Median): 0.998402
Bottom 25 Percentile: 0.971694
Top 25 Percentile: 0.999265
Min: 0.000393
Max: 0.999589

Experiment 2 Spy Results:
Mean: 0.911292
50th Percentile (Median): 0.997469
Bottom 25 Percentile: 0.947971
Top 25 Percentile: 0.999269
Min: 0.014354
Max: 0.999578

Experiment 3 Spy Results:
Mean: 0.928312
50th Percentile (Median): 0.998486
Bottom 25 Percentile: 0.983632
Top 25 Percentile: 0.999377
Min: 0.001310
Max: 0.999662

Experiment 4 Spy Results:
Mean: 0.894196
50th Percentile (Median): 0.997700
Bottom 25 Percentile: 0.958346
Top 25 Percentile: 0.999255
Min: 0.000063
Max: 0.999621

Experiment 5 Spy Results:
Mean: 0.922306
50th Percentile (Median): 0.997718
Bottom 25 Percentile: 0.982572
Top 25 Percentile: 0.998905
Min: 0.000010
Max: 0.999436



In [7]:
test_positives_exp1 = f"../../../Data/SplitData/Cholesterol/cholesterol-grid-st_exp1/Test/Positive"  
csv_output_exp1 = "../../../Models/Cholesterol/3DCNN/3DCholesterolModels-st_exp1/NewResults/TestPositiveCaptureRates.csv"

spy_capture_rate = evaluate_directory(test_positives_exp1, csv_output_exp1, models_exp1)

test_positives_exp2 = f"../../../Data/SplitData/Cholesterol/cholesterol-grid-st_exp2/Test/Positive"  
csv_output_exp2 = "../../../Models/Cholesterol/3DCNN/3DCholesterolModels-st_exp2/NewResults/TestPositiveCaptureRates.csv"

spy_capture_rate = evaluate_directory(test_positives_exp2, csv_output_exp2, models_exp2)

test_positives_exp3 = f"../../../Data/SplitData/Cholesterol/cholesterol-grid-st_exp3/Test/Positive"  
csv_output_exp3 = "../../../Models/Cholesterol/3DCNN/3DCholesterolModels-st_exp3/NewResults/TestPositiveCaptureRates.csv"

spy_capture_rate = evaluate_directory(test_positives_exp3, csv_output_exp3, models_exp3)

test_positives_exp4 = f"../../../Data/SplitData/Cholesterol/cholesterol-grid-st_exp4/Test/Positive"  
csv_output_exp4 = "../../../Models/Cholesterol/3DCNN/3DCholesterolModels-st_exp4/NewResults/TestPositiveCaptureRates.csv"

spy_capture_rate = evaluate_directory(test_positives_exp4, csv_output_exp4, models_exp4)

test_positives_exp5 = f"../../../Data/SplitData/Cholesterol/cholesterol-grid-st_exp5/Test/Positive"  
csv_output_exp5 = "../../../Models/Cholesterol/3DCNN/3DCholesterolModels-st_exp5/NewResults/TestPositiveCaptureRates.csv"

spy_capture_rate = evaluate_directory(test_positives_exp5, csv_output_exp5, models_exp5)


Model Predictions Summary:
Model 1: Positives = 723, Negatives = 47
Model 2: Positives = 718, Negatives = 52
Model 3: Positives = 719, Negatives = 51
Model 4: Positives = 727, Negatives = 43
Model 5: Positives = 720, Negatives = 50
Model 6: Positives = 727, Negatives = 43
Model 7: Positives = 714, Negatives = 56
Model 8: Positives = 716, Negatives = 54
Model 9: Positives = 718, Negatives = 52
Model 10: Positives = 718, Negatives = 52
Model 11: Positives = 727, Negatives = 43
Model 12: Positives = 718, Negatives = 52
Model 13: Positives = 725, Negatives = 45
Model 14: Positives = 724, Negatives = 46
Model 15: Positives = 723, Negatives = 47
Model 16: Positives = 724, Negatives = 46
Model 17: Positives = 723, Negatives = 47
Model 18: Positives = 722, Negatives = 48
Model 19: Positives = 719, Negatives = 51
Model 20: Positives = 723, Negatives = 47
Model 21: Positives = 724, Negatives = 46
Model 22: Positives = 715, Negatives = 55
Model 23: Positives = 724, Negatives = 46
Model 24: Posit

In [8]:
def apply_labeling_from_spies(csv, mean_score, percentile_50, percentile_25, percentile_75, min_score, max_score):
    df = pd.read_csv(csv)  # Replace with your actual filename if needed

    # Extract protein ID from filename
    df['protein_id'] = df['filename'].apply(lambda x: os.path.basename(x)[:7])
    print(len(df['protein_id']), "is length of protein id's")

    # Group by protein for mean and max versions
    df1_grouped_mean = df.groupby('protein_id').agg({
        'average_score': 'mean',
        'number_atoms': 'first'
    }).reset_index()

    def get_min_max_label(score):
        if score < min_score:
            return "StrongNegative"
        elif min_score <= score < mean_score:
            return "PseudoNegative"
        elif mean_score <= score < max_score:
            return "PseudoPositive"
        else:
            return "StrongPositive"

    df1_grouped_mean["min_max_label"] = df1_grouped_mean["average_score"].apply(get_min_max_label)

    def get_percentile_label(score):
        if score < percentile_25:
            return "StrongNegative"
        elif percentile_25 <= score < percentile_50:
            return "PseudoNegative"
        elif percentile_50 <= score < percentile_75:
            return "PseudoPositive"
        else:
            return "StrongPositive"

    df1_grouped_mean["percentile_label"] = df1_grouped_mean["average_score"].apply(get_percentile_label)
    df1_grouped_mean["threshold_label"] = df1_grouped_mean["average_score"].apply(lambda x: "Negative" if x < 0.5 else "Positive")

    min_max_results = []
    min_max_results.append((df1_grouped_mean["average_score"] < min_score).sum()) # min_max_strong_negative_count
    min_max_results.append(((df1_grouped_mean["average_score"] > min_score) & (df1_grouped_mean["average_score"] < mean_score)).sum()) # min_max_pseudo_negative_count
    min_max_results.append(((df1_grouped_mean["average_score"] > mean_score) & (df1_grouped_mean["average_score"] < max_score)).sum()) # min_max_pseudo_positive_count
    min_max_results.append((df1_grouped_mean["average_score"] > max_score).sum()) # min_max_strong_positive_count

    percentile_results = []
    percentile_results.append((df1_grouped_mean["average_score"] < percentile_25).sum()) # percentile_strong_negative_count
    percentile_results.append(((df1_grouped_mean["average_score"] > percentile_25) & (df1_grouped_mean["average_score"] < percentile_50)).sum()) # percentile_pseudo_negative_count
    percentile_results.append(((df1_grouped_mean["average_score"] > percentile_50) & (df1_grouped_mean["average_score"] < percentile_75)).sum()) # percentile_pseudo_positive_count
    percentile_results.append((df1_grouped_mean["average_score"] > percentile_75).sum()) # percentile_strong_positive_count

    threshold_results = []
    threshold_results.append((df1_grouped_mean["average_score"] < 0.5).sum()) # threshold_negative_count
    threshold_results.append((df1_grouped_mean["average_score"] > 0.5).sum()) # threshold_positive_count

    # Table 1: Min/Max and Percentile
    labels_4 = ["StrongNegative", "PseudoNegative", "PseudoPositive", "StrongPositive"]
    minmax_percentile_df = pd.DataFrame({
        "Label": labels_4,
        "Min_Max (Using Mean)": min_max_results,
        "Percentile (75-50-25)": percentile_results
    })

    # Table 2: Threshold 0.5
    labels_2 = ["Negative", "Positive"]
    threshold_df = pd.DataFrame({
        "Label": labels_2,
        "Threshold (0.5)": threshold_results
    })

    print(minmax_percentile_df,"\n", threshold_df)

    output_csv = csv.replace(".csv", "_labeled.csv")
    df1_grouped_mean.to_csv(output_csv, index=False)
    print(f"Updated CSV written to: {output_csv}")

    return min_max_results, percentile_results, threshold_results

def show_evaluation_results(min_max_all, percentile_all, threshold_all):
    # Compute mean and std for each label
    min_max_mean = np.mean(min_max_all, axis=0)
    min_max_std = np.std(min_max_all, axis=0)

    percentile_mean = np.mean(percentile_all, axis=0)
    percentile_std = np.std(percentile_all, axis=0)

    threshold_mean = np.mean(threshold_all, axis=0)
    threshold_std = np.std(threshold_all, axis=0)

    # Create DataFrames to display
    labels_4 = ["StrongNegative", "PseudoNegative", "PseudoPositive", "StrongPositive"]
    labels_2 = ["Negative", "Positive"]

    min_max_df = pd.DataFrame({
        "Label": labels_4,
        "Min-Max Mean": min_max_mean,
        "Min-Max Std": min_max_std
    })

    percentile_df = pd.DataFrame({
        "Label": labels_4,
        "Percentile Mean": percentile_mean,
        "Percentile Std": percentile_std
    })

    threshold_df = pd.DataFrame({
        "Label": labels_2,
        "Threshold Mean": threshold_mean,
        "Threshold Std": threshold_std
    })

    # Display
    print("=== Min-Max Results ===")
    print(min_max_df)
    print("\n=== Percentile Results ===")
    print(percentile_df)
    print("\n=== Threshold Results ===")
    print(threshold_df)

In [9]:
pd.set_option('display.width', 500)
pd.set_option('display.max_columns', None)

min_max_results_exp1, percentile_results_exp1, threshold_results_exp1 = apply_labeling_from_spies(csv_output_exp1, mean_score_exp1, percentile_50_exp1, percentile_25_exp1, percentile_75_exp1, min_score_exp1, max_score_exp1)
min_max_results_exp2, percentile_results_exp2, threshold_results_exp2 = apply_labeling_from_spies(csv_output_exp2, mean_score_exp2, percentile_50_exp2, percentile_25_exp2, percentile_75_exp2, min_score_exp2, max_score_exp2)
min_max_results_exp3, percentile_results_exp3, threshold_results_exp3 = apply_labeling_from_spies(csv_output_exp3, mean_score_exp3, percentile_50_exp3, percentile_25_exp3, percentile_75_exp3, min_score_exp3, max_score_exp3)
min_max_results_exp4, percentile_results_exp4, threshold_results_exp4 = apply_labeling_from_spies(csv_output_exp4, mean_score_exp4, percentile_50_exp4, percentile_25_exp4, percentile_75_exp4, min_score_exp4, max_score_exp4)
min_max_results_exp5, percentile_results_exp5, threshold_results_exp5 = apply_labeling_from_spies(csv_output_exp5, mean_score_exp5, percentile_50_exp5, percentile_25_exp5, percentile_75_exp5, min_score_exp5, max_score_exp5)

min_max_all = np.array([
    min_max_results_exp1,
    min_max_results_exp2,
    min_max_results_exp3,
    min_max_results_exp4,
    min_max_results_exp5
])

percentile_all = np.array([
    percentile_results_exp1,
    percentile_results_exp2,
    percentile_results_exp3,
    percentile_results_exp4,
    percentile_results_exp5
])

threshold_all = np.array([
    threshold_results_exp1,
    threshold_results_exp2,
    threshold_results_exp3,
    threshold_results_exp4,
    threshold_results_exp5
])

show_evaluation_results(min_max_all, percentile_all, threshold_all)

770 is length of protein id's
            Label  Min_Max (Using Mean)  Percentile (75-50-25)
0  StrongNegative                     1                     35
1  PseudoNegative                    20                     41
2  PseudoPositive                   133                     44
3  StrongPositive                     0                     34 
       Label  Threshold (0.5)
0  Negative                9
1  Positive              145
Updated CSV written to: ../../../Models/Cholesterol/3DCNN/3DCholesterolModels-st_exp1/NewResults/TestPositiveCaptureRates_labeled.csv
770 is length of protein id's
            Label  Min_Max (Using Mean)  Percentile (75-50-25)
0  StrongNegative                     4                     26
1  PseudoNegative                    16                     44
2  PseudoPositive                   134                     45
3  StrongPositive                     0                     39 
       Label  Threshold (0.5)
0  Negative               10
1  Positive              14

In [10]:
test_unlabeled_exp1 = f"../../../Data/SplitData/Cholesterol/cholesterol-grid-st_exp1/Test/CombinedUnlabeled"  
csv_output_exp1 = "../../../Models/Cholesterol/3DCNN/3DCholesterolModels-st_exp1/NewResults/TestUnlabeledCaptureRates.csv"

spy_capture_rate = evaluate_directory(test_unlabeled_exp1, csv_output_exp1, models_exp1)

test_unlabeled_exp2 = f"../../../Data/SplitData/Cholesterol/cholesterol-grid-st_exp2/Test/CombinedUnlabeled"  
csv_output_exp2 = "../../../Models/Cholesterol/3DCNN/3DCholesterolModels-st_exp2/NewResults/TestUnlabeledCaptureRates.csv"

spy_capture_rate = evaluate_directory(test_unlabeled_exp2, csv_output_exp2, models_exp2)

test_unlabeled_exp3 = f"../../../Data/SplitData/Cholesterol/cholesterol-grid-st_exp3/Test/CombinedUnlabeled"  
csv_output_exp3 = "../../../Models/Cholesterol/3DCNN/3DCholesterolModels-st_exp3/NewResults/TestUnlabeledCaptureRates.csv"

spy_capture_rate = evaluate_directory(test_unlabeled_exp3, csv_output_exp3, models_exp3)

test_unlabeled_exp4 = f"../../../Data/SplitData/Cholesterol/cholesterol-grid-st_exp4/Test/CombinedUnlabeled"  
csv_output_exp4 = "../../../Models/Cholesterol/3DCNN/3DCholesterolModels-st_exp4/NewResults/TestUnlabeledCaptureRates.csv"

spy_capture_rate = evaluate_directory(test_unlabeled_exp4, csv_output_exp4, models_exp4)

test_unlabeled_exp5 = f"../../../Data/SplitData/Cholesterol/cholesterol-grid-st_exp5/Test/CombinedUnlabeled"  
csv_output_exp5 = "../../../Models/Cholesterol/3DCNN/3DCholesterolModels-st_exp5/NewResults/TestUnlabeledCaptureRates.csv"

spy_capture_rate = evaluate_directory(test_unlabeled_exp5, csv_output_exp5, models_exp5)


Model Predictions Summary:
Model 1: Positives = 1811, Negatives = 2799
Model 2: Positives = 1791, Negatives = 2819
Model 3: Positives = 1846, Negatives = 2764
Model 4: Positives = 1707, Negatives = 2903
Model 5: Positives = 1906, Negatives = 2704
Model 6: Positives = 1679, Negatives = 2931
Model 7: Positives = 1830, Negatives = 2780
Model 8: Positives = 1660, Negatives = 2950
Model 9: Positives = 1754, Negatives = 2856
Model 10: Positives = 1727, Negatives = 2883
Model 11: Positives = 1755, Negatives = 2855
Model 12: Positives = 1811, Negatives = 2799
Model 13: Positives = 1720, Negatives = 2890
Model 14: Positives = 1666, Negatives = 2944
Model 15: Positives = 1855, Negatives = 2755
Model 16: Positives = 1767, Negatives = 2843
Model 17: Positives = 1819, Negatives = 2791
Model 18: Positives = 1766, Negatives = 2844
Model 19: Positives = 1766, Negatives = 2844
Model 20: Positives = 1717, Negatives = 2893
Model 21: Positives = 1796, Negatives = 2814
Model 22: Positives = 1768, Negative

In [11]:
pd.set_option('display.width', 500)
pd.set_option('display.max_columns', None)

min_max_results_exp1, percentile_results_exp1, threshold_results_exp1 = apply_labeling_from_spies(csv_output_exp1, mean_score_exp1, percentile_50_exp1, percentile_25_exp1, percentile_75_exp1, min_score_exp1, max_score_exp1)
min_max_results_exp2, percentile_results_exp2, threshold_results_exp2 = apply_labeling_from_spies(csv_output_exp2, mean_score_exp2, percentile_50_exp2, percentile_25_exp2, percentile_75_exp2, min_score_exp2, max_score_exp2)
min_max_results_exp3, percentile_results_exp3, threshold_results_exp3 = apply_labeling_from_spies(csv_output_exp3, mean_score_exp3, percentile_50_exp3, percentile_25_exp3, percentile_75_exp3, min_score_exp3, max_score_exp3)
min_max_results_exp4, percentile_results_exp4, threshold_results_exp4z = apply_labeling_from_spies(csv_output_exp4, mean_score_exp4, percentile_50_exp4, percentile_25_exp4, percentile_75_exp4, min_score_exp4, max_score_exp4)
min_max_results_exp5, percentile_results_exp5, threshold_results_exp5 = apply_labeling_from_spies(csv_output_exp5, mean_score_exp5, percentile_50_exp5, percentile_25_exp5, percentile_75_exp5, min_score_exp5, max_score_exp5)

min_max_all = np.array([
    min_max_results_exp1,
    min_max_results_exp2,
    min_max_results_exp3,
    min_max_results_exp4,
    min_max_results_exp5
])

percentile_all = np.array([
    percentile_results_exp1,
    percentile_results_exp2,
    percentile_results_exp3,
    percentile_results_exp4,
    percentile_results_exp5
])

threshold_all = np.array([
    threshold_results_exp1,
    threshold_results_exp2,
    threshold_results_exp3,
    threshold_results_exp4,
    threshold_results_exp5
])

show_evaluation_results(min_max_all, percentile_all, threshold_all)

4610 is length of protein id's
            Label  Min_Max (Using Mean)  Percentile (75-50-25)
0  StrongNegative                   168                    721
1  PseudoNegative                   513                     95
2  PseudoPositive                   241                     57
3  StrongPositive                     0                     49 
       Label  Threshold (0.5)
0  Negative              562
1  Positive              360
Updated CSV written to: ../../../Models/Cholesterol/3DCNN/3DCholesterolModels-st_exp1/NewResults/TestUnlabeledCaptureRates_labeled.csv
4615 is length of protein id's
            Label  Min_Max (Using Mean)  Percentile (75-50-25)
0  StrongNegative                   347                    704
1  PseudoNegative                   334                     97
2  PseudoPositive                   242                     69
3  StrongPositive                     0                     53 
       Label  Threshold (0.5)
0  Negative              565
1  Positive             

In [12]:
test_ivan_exp1 = f"../../../Data/SplitData/Cholesterol/IvanTestSet/ivan-grid-st/positive"  
csv_output_exp1 = "../../../Models/Cholesterol/3DCNN/3DCholesterolModels-st_exp1/NewResults/TestIvanCaptureRates.csv"

spy_capture_rate = evaluate_directory(test_ivan_exp1, csv_output_exp1, models_exp1)

test_ivan_exp2 = f"../../../Data/SplitData/Cholesterol/IvanTestSet/ivan-grid-st/positive"  
csv_output_exp2 = "../../../Models/Cholesterol/3DCNN/3DCholesterolModels-st_exp2/NewResults/TestIvanCaptureRates.csv"

spy_capture_rate = evaluate_directory(test_ivan_exp2, csv_output_exp2, models_exp2)

test_ivan_exp3 = f"../../../Data/SplitData/Cholesterol/IvanTestSet/ivan-grid-st/positive"  
csv_output_exp3 = "../../../Models/Cholesterol/3DCNN/3DCholesterolModels-st_exp3/NewResults/TestIvanCaptureRates.csv"

spy_capture_rate = evaluate_directory(test_ivan_exp3, csv_output_exp3, models_exp3)

test_ivan_exp4 = f"../../../Data/SplitData/Cholesterol/IvanTestSet/ivan-grid-st/positive"  
csv_output_exp4 = "../../../Models/Cholesterol/3DCNN/3DCholesterolModels-st_exp4/NewResults/TestIvanCaptureRates.csv"

spy_capture_rate = evaluate_directory(test_ivan_exp4, csv_output_exp4, models_exp4)

test_ivan_exp5 = f"../../../Data/SplitData/Cholesterol/IvanTestSet/ivan-grid-st/positive"  
csv_output_exp5 = "../../../Models/Cholesterol/3DCNN/3DCholesterolModels-st_exp5/NewResults/TestIvanCaptureRates.csv"

spy_capture_rate = evaluate_directory(test_ivan_exp5, csv_output_exp5, models_exp5)


Model Predictions Summary:
Model 1: Positives = 52, Negatives = 5
Model 2: Positives = 52, Negatives = 5
Model 3: Positives = 50, Negatives = 7
Model 4: Positives = 54, Negatives = 3
Model 5: Positives = 52, Negatives = 5
Model 6: Positives = 49, Negatives = 8
Model 7: Positives = 50, Negatives = 7
Model 8: Positives = 49, Negatives = 8
Model 9: Positives = 49, Negatives = 8
Model 10: Positives = 49, Negatives = 8
Model 11: Positives = 52, Negatives = 5
Model 12: Positives = 52, Negatives = 5
Model 13: Positives = 48, Negatives = 9
Model 14: Positives = 53, Negatives = 4
Model 15: Positives = 50, Negatives = 7
Model 16: Positives = 51, Negatives = 6
Model 17: Positives = 52, Negatives = 5
Model 18: Positives = 52, Negatives = 5
Model 19: Positives = 52, Negatives = 5
Model 20: Positives = 52, Negatives = 5
Model 21: Positives = 47, Negatives = 10
Model 22: Positives = 50, Negatives = 7
Model 23: Positives = 52, Negatives = 5
Model 24: Positives = 51, Negatives = 6
Model 25: Positives 


Model Predictions Summary:
Model 1: Positives = 51, Negatives = 6
Model 2: Positives = 52, Negatives = 5
Model 3: Positives = 54, Negatives = 3
Model 4: Positives = 53, Negatives = 4
Model 5: Positives = 53, Negatives = 4
Model 6: Positives = 52, Negatives = 5
Model 7: Positives = 52, Negatives = 5
Model 8: Positives = 53, Negatives = 4
Model 9: Positives = 52, Negatives = 5
Model 10: Positives = 52, Negatives = 5
Model 11: Positives = 54, Negatives = 3
Model 12: Positives = 52, Negatives = 5
Model 13: Positives = 53, Negatives = 4
Model 14: Positives = 54, Negatives = 3
Model 15: Positives = 51, Negatives = 6
Model 16: Positives = 53, Negatives = 4
Model 17: Positives = 53, Negatives = 4
Model 18: Positives = 52, Negatives = 5
Model 19: Positives = 53, Negatives = 4
Model 20: Positives = 52, Negatives = 5
Model 21: Positives = 52, Negatives = 5
Model 22: Positives = 51, Negatives = 6
Model 23: Positives = 53, Negatives = 4
Model 24: Positives = 53, Negatives = 4
Model 25: Positives =

In [13]:
pd.set_option('display.width', 500)
pd.set_option('display.max_columns', None)

min_max_results_exp1, percentile_results_exp1, threshold_results_exp1 = apply_labeling_from_spies(csv_output_exp1, mean_score_exp1, percentile_50_exp1, percentile_25_exp1, percentile_75_exp1, min_score_exp1, max_score_exp1)
min_max_results_exp2, percentile_results_exp2, threshold_results_exp2 = apply_labeling_from_spies(csv_output_exp2, mean_score_exp2, percentile_50_exp2, percentile_25_exp2, percentile_75_exp2, min_score_exp2, max_score_exp2)
min_max_results_exp3, percentile_results_exp3, threshold_results_exp3 = apply_labeling_from_spies(csv_output_exp3, mean_score_exp3, percentile_50_exp3, percentile_25_exp3, percentile_75_exp3, min_score_exp3, max_score_exp3)
min_max_results_exp4, percentile_results_exp4, threshold_results_exp4 = apply_labeling_from_spies(csv_output_exp4, mean_score_exp4, percentile_50_exp4, percentile_25_exp4, percentile_75_exp4, min_score_exp4, max_score_exp4)
min_max_results_exp5, percentile_results_exp5, threshold_results_exp5 = apply_labeling_from_spies(csv_output_exp5, mean_score_exp5, percentile_50_exp5, percentile_25_exp5, percentile_75_exp5, min_score_exp5, max_score_exp5)

min_max_all = np.array([
    min_max_results_exp1,
    min_max_results_exp2,
    min_max_results_exp3,
    min_max_results_exp4,
    min_max_results_exp5
])

percentile_all = np.array([
    percentile_results_exp1,
    percentile_results_exp2,
    percentile_results_exp3,
    percentile_results_exp4,
    percentile_results_exp5
])

threshold_all = np.array([
    threshold_results_exp1,
    threshold_results_exp2,
    threshold_results_exp3,
    threshold_results_exp4,
    threshold_results_exp5
])

show_evaluation_results(min_max_all, percentile_all, threshold_all)

57 is length of protein id's
            Label  Min_Max (Using Mean)  Percentile (75-50-25)
0  StrongNegative                     1                     15
1  PseudoNegative                    13                     14
2  PseudoPositive                    43                     13
3  StrongPositive                     0                     15 
       Label  Threshold (0.5)
0  Negative                5
1  Positive               52
Updated CSV written to: ../../../Models/Cholesterol/3DCNN/3DCholesterolModels-st_exp1/NewResults/TestIvanCaptureRates_labeled.csv
57 is length of protein id's
            Label  Min_Max (Using Mean)  Percentile (75-50-25)
0  StrongNegative                     1                     13
1  PseudoNegative                    11                     10
2  PseudoPositive                    45                     17
3  StrongPositive                     0                     17 
       Label  Threshold (0.5)
0  Negative                4
1  Positive               53
Upda