In [7]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from torch.utils.data import Dataset, DataLoader
import glob
import matplotlib.pyplot as plt
import os
import csv

# Define the 2D CNN model in PyTorch
class CNN2D(nn.Module):
    def __init__(self, input_channels, num_classes):
        super(CNN2D, self).__init__()
        self.conv1 = nn.Conv2d(input_channels, 32, kernel_size=3, padding=1)
        self.pool1 = nn.MaxPool2d(2, 2)
        
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool2 = nn.MaxPool2d(2, 2)
        
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.pool3 = nn.MaxPool2d(2, 2)
        
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(128 * 4 * 27, 128)  # Adjust based on input size
        self.dropout = nn.Dropout(0.5)
        self.fc2 = nn.Linear(128, num_classes)
        self.softmax = nn.Softmax(dim=1)
    
    def forward(self, x):
        x = torch.relu(self.conv1(x))
        x = self.pool1(x)
        x = torch.relu(self.conv2(x))
        x = self.pool2(x)
        x = torch.relu(self.conv3(x))
        x = self.pool3(x)
        x = self.flatten(x)
        x = torch.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        x = self.softmax(x)
        return x

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

models = []

k = 50
#version_number = 6

for i in range (1, (k + 1)):
    # Initialize model
    model = CNN2D(input_channels=1, num_classes=2).to(device)

    # Load saved model weights
    model_path = f"CholesterolModel/model_bin_{i}.pth"  # Update with correct path if needed
    model.load_state_dict(torch.load(model_path, map_location=device))

    # Set the model to evaluation mode
    model.eval()

    models.append(model)


  model.load_state_dict(torch.load(model_path, map_location=device))


In [8]:
import numpy as np

def evaluate_file(model, file_path):
    # Load numpy array and convert it to a PyTorch tensor
    grid = np.load(file_path)
    grid_tensor = torch.tensor(grid, dtype=torch.float32).unsqueeze(0).unsqueeze(0)  # Add batch dimension
    grid_tensor = grid_tensor.to(device)
    
    # Perform inference
    with torch.no_grad():
        output = model(grid_tensor)
    
    # Get predicted class (assuming softmax output)
    predicted_class = torch.argmax(output, dim=1).item() # try different thresholds and save predictions as well into excel sheet
    confidence = torch.max(output).item()
    positive_class_prob = output[0, 1].item()

    return predicted_class, confidence, positive_class_prob

# # Example usage:
# file_path = "path_to_your_file.npy"  # Update with the actual file path
# pred_class, conf = evaluate_file(model, file_path)
# print(f"Predicted Class: {pred_class}, Confidence: {conf:.4f}")


In [9]:
import glob

def evaluate_directory(model, directory):
    files = glob.glob(f"{directory}/*.npy")  # Adjust path as needed
    results = {}

    false_negative_files = []
    for file in files:
        pred_class, conf, probability = evaluate_file(model, file)
        results[file] = {"Predicted Class": pred_class, "Confidence": conf, "Probability": probability}
        if pred_class == 0:
            false_negative_files.append(file)
    return results, false_negative_files

directory_path = f"../../../Data/SplitData/Cholesterol/cholesterol-graph/Spies"  # Update with your validation directory
print(f"{'ModelID':<10}{'#ofPos':<10}{'#ofNeg':<10}{'UnconfPos':<12}{'UnconfNeg':<12}{'SpyCaptureRate':<12}")

file_labels = {}
file_raw_softmax = {}

model_id = 1
total_false_negative_files = []
for model in models:
    # Example usage:
    evaluation_results, false_negative_files = evaluate_directory(model, directory_path)
    total_false_negative_files.extend(false_negative_files)

    # Print results
    positives_count = 0
    unconfident_positive_count = 0
    false_negative_count = 0
    unconfident_negative_count = 0
    for file, result in evaluation_results.items():
        if file not in file_labels:
            file_labels[file] = 0            
        file_labels[file] += result['Probability']
        if result['Predicted Class'] == 1:
            positives_count += 1
            if result['Confidence'] < 0.85:
                unconfident_positive_count += 1
        if result['Predicted Class'] == 0:
            false_negative_count += 1
            if result['Confidence'] < 0.85:
                unconfident_negative_count += 1
    print(f"{model_id:<10}{positives_count:<10}{false_negative_count:<10}{unconfident_positive_count:<12}{unconfident_negative_count:<12}{(positives_count / 154):<12}")
    model_id += 1

overall_spy_capture_rate = 0

with open("spy_capture_rates.csv", "w", newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(["filename", "average_score"])  # Header row

    print(f"\n{'FileName':<70}{'SpyCapture':<4}")
    for file, total_score in file_labels.items():
        overall_spy_capture_rate += total_score/ k
        writer.writerow([file, (total_score/ k)])
        print(f"{file:<70} {(total_score/ k):<4}")

print("Overall Spy Capture Rate:", (overall_spy_capture_rate / 154))

#find unlabeled with capture rate bigger than 0.97 treat as potential positive
# if less than 0.06 potential negative, anything in between is unlabeled
# if greater than 0.48 and less than 0.97 than likely positives but still unlabeled
# if less than 0.48 and greater than 0.06 than likely negative but still unlabeled
# take 6 colors (3 for positive and 3 for unlabeled) which are close to median (one median from each group) in 4 groups and do pca plot or tsne plot
# for each category mark dataset that it's from
# 2 pca's one for adjacency matrix and one for result


ModelID   #ofPos    #ofNeg    UnconfPos   UnconfNeg   SpyCaptureRate
1         93        61        4           6           0.6038961038961039
2         85        69        5           3           0.551948051948052
3         100       54        8           4           0.6493506493506493
4         88        66        5           4           0.5714285714285714
5         90        64        8           7           0.5844155844155844
6         92        62        3           3           0.5974025974025974
7         82        72        4           5           0.5324675324675324
8         93        61        6           6           0.6038961038961039
9         100       54        2           7           0.6493506493506493
10        80        74        5           3           0.5194805194805194
11        102       52        3           4           0.6623376623376623
12        87        67        3           4           0.564935064935065
13        101       53        2           1           0.6

In [10]:
# getting results for propofol unlabeled
file_labels = {}
directory_path = f"../../../Data/SplitData/Cholesterol/cholesterol-graph/Test/Positive"
print(f"{'ModelID':<10}{'#ofPos':<10}{'#ofNeg':<10}{'UnconfPos':<12}{'UnconfNeg':<12}{'PositiveRatio':<12}")

model_id = 1
total_false_negative_files = []
for model in models:
    # Example usage:
    evaluation_results, false_negative_files = evaluate_directory(model, directory_path)
    total_false_negative_files.extend(false_negative_files)

    # Print results
    positives_count = 0
    unconfident_positive_count = 0
    false_negative_count = 0
    unconfident_negative_count = 0
    for file, result in evaluation_results.items():
        if file not in file_labels:
            file_labels[file] = 0            
        file_labels[file] += result['Probability']
        
        #print(f"File: {file}, Prediction: {result['Predicted Class']}, Confidence: {result['Confidence']:.4f}")
        if result['Predicted Class'] == 1:
            positives_count += 1
            if result['Confidence'] < 0.85:
                unconfident_positive_count += 1
        if result['Predicted Class'] == 0:
            false_negative_count += 1
            if result['Confidence'] < 0.85:
                unconfident_negative_count += 1
    print(f"{model_id:<10}{positives_count:<10}{false_negative_count:<10}{unconfident_positive_count:<12}{unconfident_negative_count:<12}{(positives_count / 154):<12}")
    model_id += 1

print(f"\n{'FileName':<100}{'Probabilities':<20}")

overall_spy_capture_rate = 0

with open("test_positive_capture_rates.csv", "w", newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(["filename", "average_score"])  # Header row

    for file, total_score in file_labels.items():
        average_score = total_score/ k
        overall_spy_capture_rate += average_score
        writer.writerow([file, average_score])

        print(f"{file:<100} {average_score:<20}")

print("Overall Positive Capture Rate:", (overall_spy_capture_rate / 154))

# make pca cluster of adjacency matrix for each dataset


ModelID   #ofPos    #ofNeg    UnconfPos   UnconfNeg   PositiveRatio
1         93        61        5           11          0.6038961038961039
2         93        61        5           12          0.6038961038961039
3         99        55        7           11          0.6428571428571429
4         103       51        10          8           0.6688311688311688
5         102       52        8           5           0.6623376623376623
6         107       47        8           7           0.6948051948051948
7         97        57        7           10          0.6298701298701299
8         96        58        5           8           0.6233766233766234
9         104       50        4           8           0.6753246753246753
10        111       43        6           7           0.7207792207792207
11        123       31        7           8           0.7987012987012987
12        103       51        6           8           0.6688311688311688
13        117       37        7           8           0.

In [11]:
# getting results for propofol unlabeled
file_labels = {}
directory_path = f"../../../Data/SplitData/Cholesterol/cholesterol-graph/Test/Unlabeled"
print(f"{'ModelID':<10}{'#ofPos':<10}{'#ofNeg':<10}{'UnconfPos':<12}{'UnconfNeg':<12}{'PositiveRatio':<12}")

model_id = 1
total_false_negative_files = []
for model in models:
    # Example usage:
    evaluation_results, false_negative_files = evaluate_directory(model, directory_path)
    total_false_negative_files.extend(false_negative_files)

    # Print results
    positives_count = 0
    unconfident_positive_count = 0
    false_negative_count = 0
    unconfident_negative_count = 0
    for file, result in evaluation_results.items():
        if file not in file_labels:
            file_labels[file] = 0            
        file_labels[file] += result['Probability']
        
        #print(f"File: {file}, Prediction: {result['Predicted Class']}, Confidence: {result['Confidence']:.4f}")
        if result['Predicted Class'] == 1:
            positives_count += 1
            if result['Confidence'] < 0.85:
                unconfident_positive_count += 1
        if result['Predicted Class'] == 0:
            false_negative_count += 1
            if result['Confidence'] < 0.85:
                unconfident_negative_count += 1
    print(f"{model_id:<10}{positives_count:<10}{false_negative_count:<10}{unconfident_positive_count:<12}{unconfident_negative_count:<12}{(positives_count / 554):<12}")
    model_id += 1

print(f"\n{'FileName':<100}{'Probabilities':<20}")

overall_spy_capture_rate = 0

with open("test_unlabeled_capture_rates.csv", "w", newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(["filename", "average_score"])  # Header row

    for file, total_score in file_labels.items():
        average_score = total_score/ k
        overall_spy_capture_rate += average_score
        writer.writerow([file, average_score])

        print(f"{file:<100} {average_score:<20}")

print("Overall Positive Capture Rate:", (overall_spy_capture_rate / 554))


ModelID   #ofPos    #ofNeg    UnconfPos   UnconfNeg   PositiveRatio
1         199       355       35          39          0.3592057761732852
2         155       399       24          22          0.27978339350180503
3         189       365       26          27          0.34115523465703973
4         180       374       22          15          0.3249097472924188
5         174       380       29          35          0.3140794223826715
6         174       380       23          26          0.3140794223826715
7         175       379       21          27          0.315884476534296
8         175       379       23          34          0.315884476534296
9         173       381       25          41          0.31227436823104693
10        215       339       27          31          0.388086642599278
11        249       305       28          27          0.44945848375451264
12        171       383       23          24          0.30866425992779783
13        223       331       29          22          