In [1]:
import sys
import os
from dotenv import load_dotenv
load_dotenv()
sys.path.append(os.getenv('BASE_DIR')) # this is the path to the git repo (ex. C:/Users/rupra/OneDrive/Documents/NGH/CNN-Architectures)
import random
from PIL import Image as pilImage
import cv2
from skimage.transform import rotate
import numpy as np
from sklearn.model_selection import KFold
import torch
from torch.utils.data import DataLoader, Dataset
from Efficientunet.efficientunet import get_efficientunet_b0
from torch.nn.functional import interpolate
from EnsembleUnet.evaluation import dice_coef, fpr, hausdorff_distance, tpr

class ImageMaskDataset(Dataset):
    def __init__(self, images, masks):
        self.images = images
        self.masks = masks

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = self.images[idx]
        mask = self.masks[idx]
        return torch.tensor(image, dtype=torch.float32), torch.tensor(mask, dtype=torch.float32)


def dice_loss(scale=None):
    def fn(input, target):
        smooth = 1.

        if scale is not None:
            scaled = interpolate(input, scale_factor=scale, mode='bilinear', align_corners=False)
            iflat = scaled.view(-1)
        else:
            iflat = input.view(-1)

        tflat = target.view(-1)
        intersection = (iflat * tflat).sum()

        return 1 - ((2. * intersection + smooth) / (iflat.sum() + tflat.sum() + smooth))

    return fn

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:

image_directory = os.getenv('ULTRASOUND_DIR')
mask_directory = os.getenv('ULTRASOUND_MASKS_DIR')
print(f"image directory: {image_directory}")
print(f"mask directory: {mask_directory}")

image_dataset = []  
mask_dataset = []
sliced_image_dataset = []
sliced_mask_dataset = []

SIZE = 1024

# Load and process images
images = os.listdir(image_directory)
for image_name in images:    
    if image_name.endswith('.png'):
        image = pilImage.open(os.path.join(image_directory, image_name))
        image = image.resize((SIZE, SIZE))
        image_dataset.append(np.array(image))

# Load and process masks
masks = os.listdir(mask_directory)
for image_name in masks:
    if image_name.endswith('.png'):
        mask = pilImage.open(os.path.join(mask_directory, image_name))
        mask = mask.resize((SIZE, SIZE))
        mask_dataset.append(np.array(mask))

# Process datasets
for i in range(len(image_dataset)):
    # Append the whole image and mask
    sliced_image_dataset.append(image_dataset[i])
    sliced_mask_dataset.append(mask_dataset[i])

    # # Random rotation
    # angle = random.randint(5, 10)
    # cw = random.choice([-1, 1])  # Randomly choose clockwise or counter-clockwise

    # # Contrast adjustment
    # adjust = random.choice([0, 1])
    # contrast = random.uniform(1, 2)  # Use float for more variety

    # if adjust:
    #     adjusted_image = cv2.convertScaleAbs(image_dataset[i], alpha=contrast, beta=0)
    #     rotated_image = rotate(adjusted_image, angle * cw)
    #     sliced_image_dataset.append(rotated_image)
    #     sliced_mask_dataset.append(rotate(mask_dataset[i], angle * cw))

# Normalize images (expand dimensions for model input)
sliced_image_dataset = np.array(sliced_image_dataset, dtype=np.float32) / 255.0 # Convert to float32 for normalization
print(f"normalization sanity check: {np.all((sliced_image_dataset >= 0) & (sliced_image_dataset <= 1))}")
sliced_image_dataset = np.expand_dims(sliced_image_dataset, axis=-1)  # Add channel dimension

# Rescale masks to 0 to 1 and ensure they are float32
sliced_mask_dataset = np.array(sliced_mask_dataset, dtype=np.float32) / 255.0  # Normalize masks
print(f"normalization sanity check: {np.all((sliced_mask_dataset >= 0) & (sliced_mask_dataset <= 1))}")
sliced_mask_dataset = np.expand_dims(sliced_mask_dataset, axis=-1)  # Add channel dimension if necessary

# Now permute the datasets to the format (batch_size, channels, height, width)
sliced_image_dataset = np.transpose(sliced_image_dataset, (0, 3, 1, 2))  # Change to (batch_size, 1, height, width)
sliced_mask_dataset = np.transpose(sliced_mask_dataset, (0, 3, 1, 2))  # Change to (batch_size, 1, height, width)

# Check shapes
print("Image dataset shape:", sliced_image_dataset.shape)  # Should be (batch_size, 1, height, width)
print("Mask dataset shape:", sliced_mask_dataset.shape)    # Should be (batch_size, 1, height, width)

print(f'Processed {len(sliced_image_dataset)} images and {len(sliced_mask_dataset)} masks.')

base_dir = os.getcwd()
print(f"base dir: {base_dir}")
f = open(f"{base_dir}/optimized3_output.txt", "a")
print("sliced image dataset: ", len(sliced_image_dataset), file=f)
f.close()


IMG_HEIGHT = sliced_image_dataset.shape[1]
IMG_WIDTH  = sliced_image_dataset.shape[2]
IMG_CHANNELS = sliced_image_dataset.shape[3]
print(IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS)

dataset = ImageMaskDataset(sliced_image_dataset, sliced_mask_dataset)



image directory: C:\Users\Jeff\Desktop\ultrasound\Ours\Data\Ultrasound_Images
mask directory: C:\Users\Jeff\Desktop\ultrasound\Ours\Data\Ultrasound_Masks
[[  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   

IndexError: list index out of range

In [3]:
n_splits = 3

kf = KFold(n_splits=n_splits, shuffle=True, random_state=0)

n_epochs = 10
batch_size = 4

In [4]:
train_loaders = []
test_loaders = []
alpha = 0.5

for fold, (train_idx, test_idx) in enumerate(kf.split(dataset)):
    train_sampler = torch.utils.data.SubsetRandomSampler(train_idx)
    test_sampler = torch.utils.data.SubsetRandomSampler(test_idx)

    train_loader = DataLoader(dataset, batch_size=batch_size, sampler=train_sampler)
    test_loader = DataLoader(dataset, batch_size=batch_size, sampler=test_sampler)
    train_loaders.append(train_loader)
    test_loaders.append(test_loader)
    
    efficientnet_model = get_efficientunet_b0(out_channels=1, concat_input=False, pretrained=False).to(device)

    print("loaded models")
    optimizer_efficientnet = torch.optim.Adam(efficientnet_model.parameters(), lr=1e-4)  # Increased learning rate
    criterion = dice_loss()

    # Training
    for epoch in range(n_epochs):
        efficientnet_model.train()
        running_loss = 0.0
        for images, masks in train_loader:
            images, masks = images.to(device), masks.to(device)

            optimizer_efficientnet.zero_grad()
            outputs_efficientnet = torch.clamp(efficientnet_model(images), 0, 1)
            efficientnet_loss = criterion(outputs_efficientnet, masks)
            efficientnet_loss.backward()
            optimizer_efficientnet.step()

            running_loss += efficientnet_loss.item()

        print(f'Fold [{fold+1}/{n_splits}], Epoch [{epoch+1}/{n_epochs}], Loss: {running_loss/(len(train_loader))}')

        # Evaluation after each 10 epochs
        if (epoch + 1) % 5 == 0:
            efficientnet_model.eval()
            epoch_dice_scores = []
            epoch_hausdorff_distances = []
            with torch.no_grad():
                for images, masks in test_loader:
                    images, masks = images.to(device), masks.to(device)
                    outputs = (efficientnet_model(images) > alpha).float().cpu().numpy()
                    
                    for i in range(outputs.shape[0]):
                        epoch_hausdorff_distances.append(hausdorff_distance(masks[i].cpu().numpy(), outputs[i]))
                        epoch_dice_scores.append(dice_coef(masks[i].cpu().numpy(), outputs[i]))

                avg_epoch_dice_coef = np.mean(epoch_dice_scores)
                avg_epoch_hausdorff = np.mean(epoch_hausdorff_distances)

            print(f'Fold [{fold+1}/{n_splits}], Epoch [{epoch+1}/{n_epochs}] - Average Dice Coef: {avg_epoch_dice_coef}, Average Hausdorff: {avg_epoch_hausdorff}')

    torch.save(efficientnet_model.state_dict(), os.path.join("./saved_models", f'efficientnet_model_fold_{fold+1}.pth'))
    
    # Evaluation
    efficientnet_model.eval()  
    dice_scores = []
    hausdorff_distances = []
    tprs = []
    fprs = []
    alpha = 0.5
    with torch.no_grad():
        for images, masks in test_loader:
            images, masks = images.to(device), masks.to(device)
            # print(efficientnet_model(images).float().cpu().numpy().mean().item())
            efficientnet_outputs = (efficientnet_model(images) > alpha).float().cpu().numpy()
            unique, counts = np.unique(efficientnet_outputs, return_counts=True)
            # print(f"Counts of unique values in efficientnet_outputs: {dict(zip(unique, counts))}")
            
            for i in range(efficientnet_outputs.shape[0]):
                y_true = masks[i].cpu().numpy()
                # print(f"y_true shape: {y_true.shape}")
                # print(f"efficientnet_outputs shape: {efficientnet_outputs[i].shape}")
                hausdorff_distances.append(hausdorff_distance(y_true, efficientnet_outputs[i]))
                dice_scores.append(dice_coef(y_true, efficientnet_outputs[i]))
                tprs.append(tpr(y_true, efficientnet_outputs[i]))
                fprs.append(fpr(y_true, efficientnet_outputs[i]))

    avg_dice_coef = np.mean(dice_scores)
    avg_tpr = np.mean(tprs)
    avg_hausdorff = np.mean(hausdorff_distances)
    avg_fpr = np.mean(fprs)

    print(f'Fold [{fold+1}/{n_splits}] - Average Dice Coef: {avg_dice_coef}, Average TPR: {avg_tpr}, Average FPR: {avg_fpr}, Average Hausdorff: {avg_hausdorff}')
    f = open(os.getenv('OPTIMIZED_OUTPUT_FILE'), "a")
    print(f'Fold [{fold+1}/{n_splits}] - Average Dice Coef: {avg_dice_coef}, Average TPR: {avg_tpr}, Average FPR: {avg_fpr}, Average Hausdorff: {avg_hausdorff}', file=f)
    f.close()

loaded models
Fold [1/3], Epoch [1/10], Loss: 0.5816128328442574
Fold [1/3], Epoch [2/10], Loss: 0.5006431083787571
Fold [1/3], Epoch [3/10], Loss: 0.46376766196706076
Fold [1/3], Epoch [4/10], Loss: 0.425171669233929
Fold [1/3], Epoch [5/10], Loss: 0.38776693222197617
Fold [1/3], Epoch [5/10] - Average Dice Coef: 0.33457350730895996, Average Hausdorff: inf
Fold [1/3], Epoch [6/10], Loss: 0.35088942132212897
Fold [1/3], Epoch [7/10], Loss: 0.31500781937079
Fold [1/3], Epoch [8/10], Loss: 0.28300589458508923
Fold [1/3], Epoch [9/10], Loss: 0.2527675425464457
Fold [1/3], Epoch [10/10], Loss: 0.22114550051364032
Fold [1/3], Epoch [10/10] - Average Dice Coef: 0.9064556360244751, Average Hausdorff: 99.11360833563154
Fold [1/3] - Average Dice Coef: 0.9064556360244751, Average TPR: 0.9566154479980469, Average FPR: 0.019108718261122704, Average Hausdorff: 99.11360833563153
loaded models
Fold [2/3], Epoch [1/10], Loss: 0.5626294883814725
Fold [2/3], Epoch [2/10], Loss: 0.4556665549224073
Fold [

## More Epochs

In [5]:
torch.save(train_loaders, 'train_loaders.pth')
torch.save(test_loaders, 'test_loaders.pth')

def load_data_loaders(train_path='train_loaders.pth', test_path='test_loaders.pth'):
    train_loaders = torch.load(train_path)
    test_loaders = torch.load(test_path)
    return train_loaders, test_loaders



In [6]:
train_loaders, test_loaders = load_data_loaders()
n_epochs = 200
additional_epochs = 100
alpha = 0.5

for fold, (train_idx, test_idx) in enumerate(kf.split(dataset)):
    train_loader = train_loaders[fold]
    test_loader = test_loaders[fold]

    model = get_efficientunet_b0(out_channels=1, concat_input=False, pretrained=False).to(device)
    model.load_state_dict(torch.load(f'./model_fold_{fold+1}_continued.pth'))
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-5, weight_decay=1e-4)
    criterion = dice_loss()

    # Continue Training
    for epoch in range(n_epochs, n_epochs + additional_epochs):
        model.train()
        running_loss = 0.0
        for images, masks in train_loader:
            images, masks = images.to(device), masks.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            outputs = torch.clamp(outputs, 0, 1)
            loss = criterion(outputs, masks)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            
        print(f'Fold [{fold+1}/{n_splits}], Epoch [{epoch+1}/{n_epochs}] - Loss: {running_loss / len(train_loader)}')
        
        # Evaluation after each 25 epoch
        if (epoch + 1) % 25 == 0:
            model.eval()
            epoch_dice_scores = []
            epoch_hausdorff_distances = []
            with torch.no_grad():
                for images, masks in test_loader:
                    images, masks = images.to(device), masks.to(device)
                    outputs = (model(images) > alpha).float().cpu().numpy()
                    
                    for i in range(outputs.shape[0]):
                        epoch_hausdorff_distances.append(hausdorff_distance(masks[i].cpu().numpy(), outputs[i]))
                        epoch_dice_scores.append(dice_coef(masks[i].cpu().numpy(), outputs[i]))

            avg_epoch_dice_coef = np.mean(epoch_dice_scores)
            avg_epoch_hausdorff = np.mean(epoch_hausdorff_distances)

            print(f'Fold [{fold+1}/{n_splits}], Epoch [{epoch+1}/{n_epochs + additional_epochs}] - Average Dice Coef: {avg_epoch_dice_coef}, Average Hausdorff: {avg_epoch_hausdorff}')

    model_save_path = os.path.join("./", f'model_fold_{fold+1}_continued.pth')
    torch.save(model.state_dict(), model_save_path)
    print(f'Model saved at {model_save_path}')

    # Evaluation
    model.eval()
    dice_scores = []
    hausdorff_distances = []
    tprs = []
    fprs = []
    with torch.no_grad():
        for images, masks in test_loader:
            images, masks = images.to(device), masks.to(device)
            outputs = (model(images) > alpha).float().cpu().numpy()
            
            for i in range(outputs.shape[0]):
                hausdorff_distances.append(hausdorff_distance(masks[i].cpu().numpy(), outputs[i]))
                dice_scores.append(dice_coef(masks[i].cpu().numpy(), outputs[i]))
                tprs.append(tpr(masks[i].cpu().numpy(), outputs[i]))
                fprs.append(fpr(masks[i].cpu().numpy(), outputs[i]))

    avg_dice_coef = np.mean(dice_scores)
    avg_tpr = np.mean(tprs)
    avg_hausdorff = np.mean(hausdorff_distances)
    avg_fpr = np.mean(fprs)

    print(f'Fold [{fold+1}/{n_splits}] - Average Dice Coef: {avg_dice_coef}, Average TPR: {avg_tpr}, Average FPR: {avg_fpr}, Average Hausdorff: {avg_hausdorff}')
    f = open(f"C:/Users/Jeff/Desktop/ultrasound/CNN-Architectures-mobileunet-experimentation/MobileUNet/optimized3_output.txt", "a")
    print(f'Fold [{fold+1}/{n_splits}] - Average Dice Coef: {avg_dice_coef}, Average TPR: {avg_tpr}, Average FPR: {avg_fpr}, Average Hausdorff: {avg_hausdorff}', file=f)
    f.close()

  train_loaders = torch.load(train_path)
  test_loaders = torch.load(test_path)
  model.load_state_dict(torch.load(f'./model_fold_{fold+1}_continued.pth'))


FileNotFoundError: [Errno 2] No such file or directory: './model_fold_1_continued.pth'

In [4]:
import time
import matplotlib.pyplot as plt

def load_data_loaders(train_path='train_loaders.pth', test_path='test_loaders.pth'):
    train_loaders = torch.load(train_path)
    test_loaders = torch.load(test_path)
    return train_loaders, test_loaders

train_loaders, test_loaders = load_data_loaders()

model = get_efficientunet_b0(out_channels=1, concat_input=False, pretrained=False).to(device)
model.load_state_dict(torch.load('./saved_models/efficientnet_model_fold_1.pth'))
model.eval()
test_loader = test_loaders[0]
alpha = 0.5
batch_size = 4

dice_scores = []
hausdorff_distances = []
tprs = []
fprs = []
time_taken = []

with torch.no_grad():
    for j, (images, masks) in enumerate(test_loader):
        if j >= 30:
            break
        images, masks = images.to(device), masks.to(device)
        start_time = time.time()
        outputs = (model(images) > alpha).float().cpu().numpy()
        end_time = time.time()
        time_taken.append(end_time - start_time)
        print(f"Prediction time for batch {j} with batch size {batch_size}: {end_time - start_time} seconds")
        
        for i in range(outputs.shape[0]):
            dice_scores.append(dice_coef(masks[i].cpu().numpy(), outputs[i]))
            tprs.append(tpr(masks[i].cpu().numpy(), outputs[i]))
            fprs.append(fpr(masks[i].cpu().numpy(), outputs[i]))
            hausdorff_distances.append(hausdorff_distance(masks[i].cpu().numpy(), outputs[i]))
            
            mask = masks[i].cpu().numpy().transpose(1, 2, 0)
            image = images[i].cpu().numpy().transpose(1, 2, 0)
            prediction = outputs[i].transpose(1, 2, 0)
            
            plt.figure(figsize=(16, 8))
            plt.subplot(141)
            plt.title('Testing Image')
            plt.imshow(image[:,:,0], cmap='gray')
            plt.subplot(142)
            plt.title('Testing Label')
            plt.imshow(mask[:,:,0], cmap='gray')
            plt.subplot(143)
            plt.title('Prediction on test image')
            plt.imshow(prediction, cmap='gray')
            plt.subplot(144)
            plt.title("Overlayed Images")
            plt.imshow(mask.astype(float) / np.max(mask), cmap='gray')
            plt.imshow(prediction, cmap='jet', alpha=0.5)
            plt.savefig(f'C:/Users/Jeff/Desktop/ultrasound/new/CNN-Architectures/FastSCNN/overlay/fold3_{j}_{i}.png')
            plt.close()
            
    
average_dice_coef = np.mean(dice_scores)
average_tpr = np.mean(tprs)
average_fpr = np.mean(fprs)
average_hausdorff = np.mean(hausdorff_distances)
average_time_taken = np.mean(time_taken)/batch_size

print(f'Average Dice Coef: {average_dice_coef}, Average TPR: {average_tpr}, Average FPR: {average_fpr}, Average Hausdorff Distance: {average_hausdorff}, Average Prediction Time: {average_time_taken}')

  train_loaders = torch.load(train_path)
  test_loaders = torch.load(test_path)
  model.load_state_dict(torch.load('./saved_models/efficientnet_model_fold_1.pth'))


Prediction time for batch 0 with batch size 4: 0.05099916458129883 seconds
Prediction time for batch 1 with batch size 4: 0.05200004577636719 seconds
Prediction time for batch 2 with batch size 4: 0.05200004577636719 seconds
Prediction time for batch 3 with batch size 4: 0.05500030517578125 seconds
Prediction time for batch 4 with batch size 4: 0.05399966239929199 seconds
Prediction time for batch 5 with batch size 4: 0.051000118255615234 seconds
Prediction time for batch 6 with batch size 4: 0.05200028419494629 seconds
Prediction time for batch 7 with batch size 4: 0.05200004577636719 seconds
Prediction time for batch 8 with batch size 4: 0.05300092697143555 seconds
Prediction time for batch 9 with batch size 4: 0.05200004577636719 seconds
Prediction time for batch 10 with batch size 4: 0.05299973487854004 seconds
Prediction time for batch 11 with batch size 4: 0.05200028419494629 seconds
Prediction time for batch 12 with batch size 4: 0.053000450134277344 seconds
Prediction time for 