In [1]:
import os
import json
import math

# ! pip install seaborn
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import seaborn as sn

from tqdm import tqdm

from PIL import Image
from sklearn.metrics import confusion_matrix, f1_score

from skimage import morphology, color

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models
from torchvision.datasets import ImageFolder
from torchvision.transforms import transforms
from torchvision.transforms.functional import pad, to_pil_image, to_tensor
from torch.utils.data import DataLoader, SubsetRandomSampler
from torch.utils.tensorboard import SummaryWriter

from utils.utils import PartialErosion, VGG16Binary

## Load individual models

In [27]:
# densenet_10_5_layers_unlocked_w_erosion.pth
# vgg_from_scratch_w_erosion_v2.pth
# densenet_25_10_layers_unlocked_no_erosion.pth

In [2]:
# DenseNet model 1
model1 = models.densenet121(weights="DenseNet121_Weights.IMAGENET1K_V1")
# Modify the model for binary classification
num_ftrs = model1.classifier.in_features
model1.classifier = nn.Linear(num_ftrs, 1)
model1.load_state_dict(torch.load('model/densenet_10_5_layers_unlocked_no_erosion.pth'))

# DenseNet model 2
model2 = models.densenet121(weights="DenseNet121_Weights.IMAGENET1K_V1")
# Modify the model for binary classification
num_ftrs = model2.classifier.in_features
model2.classifier = nn.Linear(num_ftrs, 1)
model2.load_state_dict(torch.load('model/densenet_25_10_layers_unlocked_no_erosion.pth'))

# VGG16 model trained from scratch
model3 = VGG16Binary(input_shape=(3, 100, 200), num_classes=1)
model3.load_state_dict(torch.load('model/vgg_from_scratch_w_erosion_v2.pth'))

<All keys matched successfully>

In [11]:
dataset_path = "data/all_preprocessed_data/"

# Transforms pipeline for train and validation data loaders
preprocess = transforms.Compose([
    transforms.Resize([100, ]),
    transforms.Lambda(
        lambda img: pad(img, padding=(0, 0, max(0, 200 - img.width), max(0, 100 - img.height)), 
                                              fill=(255, 255, 255))),
    transforms.CenterCrop((100, 200)),
#   (erosion followed by !dilation is operated on the grayscale image)
    PartialErosion(iterations=2),
#     Skeletonize(),
    transforms.ToTensor(),

#     transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.Grayscale(num_output_channels=3),
#     transforms.Grayscale(),
#     transforms.ToTensor(),
    transforms.Normalize([0.5], [0.5])
#     transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
])

# Load the entire dataset (since we don't have a train and validation split in the )
dataset = ImageFolder(dataset_path, transform=preprocess)

In [29]:
# Define the percentage of samples for the validation set
val_split = 0.20  # 20% for validation

# Calculate the sizes of the train and validation sets
val_size = int(val_split * len(dataset))
train_size = len(dataset) - val_size

print("Total number of images: ", len(dataset), 
      "\nValidation images: ", val_size)

# Create indices for validation set
indices = list(range(len(dataset)))
val_indices = indices[train_size:]

# Create subset samplers for validation set
val_sampler = SubsetRandomSampler(val_indices)

batch_size = 64

# Create data loader validation set
val_loader = DataLoader(dataset, batch_size=batch_size, sampler=val_sampler)

Total number of images:  446380 
Validation images:  89276


In [30]:
# Move the model to the device (CPU or GPU)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# device = torch.device('cpu')
device

device(type='cuda')

In [31]:
# Generate Predictions

all_models = [model1, model2, model3]
all_predictions = []
all_labels = []

# store models on GPU
for i in range(len(all_models)):
    all_models[i] = all_models[i].to(device)
    all_predictions.append([])

# Generate predictions using the trained base models

with torch.no_grad():
    with tqdm(enumerate(val_loader), total=math.ceil(val_size/batch_size)) as pbar:
        
        for batch_idx, (images, labels) in pbar:
            images = images.to(device)
            labels = labels.float().unsqueeze(1).to(device)

            all_labels.extend(labels)

            for i in range(len(all_models)):
                all_models[i].eval()

                outputs = all_models[i](images)
                predicted = torch.round(torch.sigmoid(outputs))
                
                all_predictions[i].extend(predicted)

100%|██████████| 1395/1395 [17:44<00:00,  1.31it/s]


In [32]:
all_predictions[0]

[tensor([0.], device='cuda:0'),
 tensor([0.], device='cuda:0'),
 tensor([0.], device='cuda:0'),
 tensor([0.], device='cuda:0'),
 tensor([0.], device='cuda:0'),
 tensor([0.], device='cuda:0'),
 tensor([0.], device='cuda:0'),
 tensor([0.], device='cuda:0'),
 tensor([0.], device='cuda:0'),
 tensor([0.], device='cuda:0'),
 tensor([1.], device='cuda:0'),
 tensor([0.], device='cuda:0'),
 tensor([0.], device='cuda:0'),
 tensor([0.], device='cuda:0'),
 tensor([1.], device='cuda:0'),
 tensor([1.], device='cuda:0'),
 tensor([0.], device='cuda:0'),
 tensor([1.], device='cuda:0'),
 tensor([0.], device='cuda:0'),
 tensor([0.], device='cuda:0'),
 tensor([0.], device='cuda:0'),
 tensor([0.], device='cuda:0'),
 tensor([1.], device='cuda:0'),
 tensor([1.], device='cuda:0'),
 tensor([0.], device='cuda:0'),
 tensor([1.], device='cuda:0'),
 tensor([0.], device='cuda:0'),
 tensor([1.], device='cuda:0'),
 tensor([0.], device='cuda:0'),
 tensor([0.], device='cuda:0'),
 tensor([0.], device='cuda:0'),
 tensor(

In [33]:
all_predictiall_predictions = []

for item in all_predictions:
    all_predictions_cpu.append([tensor.cpu().detach().numpy() for tensor in item])

all_labels_cpu = [tensor.cpu().detach().numpy() for tensor in all_labels]

In [34]:
all_predictions_cpu[0]

[array([0.], dtype=float32),
 array([0.], dtype=float32),
 array([0.], dtype=float32),
 array([0.], dtype=float32),
 array([0.], dtype=float32),
 array([0.], dtype=float32),
 array([0.], dtype=float32),
 array([0.], dtype=float32),
 array([0.], dtype=float32),
 array([0.], dtype=float32),
 array([1.], dtype=float32),
 array([0.], dtype=float32),
 array([0.], dtype=float32),
 array([0.], dtype=float32),
 array([1.], dtype=float32),
 array([1.], dtype=float32),
 array([0.], dtype=float32),
 array([1.], dtype=float32),
 array([0.], dtype=float32),
 array([0.], dtype=float32),
 array([0.], dtype=float32),
 array([0.], dtype=float32),
 array([1.], dtype=float32),
 array([1.], dtype=float32),
 array([0.], dtype=float32),
 array([1.], dtype=float32),
 array([0.], dtype=float32),
 array([1.], dtype=float32),
 array([0.], dtype=float32),
 array([0.], dtype=float32),
 array([0.], dtype=float32),
 array([0.], dtype=float32),
 array([0.], dtype=float32),
 array([1.], dtype=float32),
 array([0.], d

In [35]:
all_labels_cpu[0]

array([1.], dtype=float32)

In [52]:
# Create Meta-Training Set

# Create the meta-training set by concatenating predicted outputs and ground truth labels
meta_train_input = np.concatenate(all_predictions_cpu, axis=1)
meta_train_labels = all_labels_cpu

# Train the Meta-Model

# Define the meta-model architecture
class MetaModel(nn.Module):
    def __init__(self):
        super(MetaModel, self).__init__()
        self.fc1 = nn.Linear(in_features=3, out_features=10)
        self.fc2 = nn.Linear(in_features=10, out_features=15)
        self.fc3 = nn.Linear(in_features=15, out_features=5)
        self.fc4 = nn.Linear(in_features=5, out_features=1)

    def forward(self, x):
        x = self.fc1(x)
        x = nn.functional.relu(x)
        x = self.fc2(x)
        x = nn.functional.relu(x)
        x = self.fc3(x)
        x = nn.functional.relu(x)
        x = self.fc4(x)
        return x

In [69]:
class DeeperHybridModel(nn.Module):
    def __init__(self, num_numeric_features, num_classes):
        super(DeeperHybridModel, self).__init__()

        # Image input (assuming image of size 64x64, and grayscale)
        self.cnn = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Flatten()  # Flatten the 3D tensor output from CNN
        )

        # Numeric input
        self.fc1 = nn.Sequential(
            nn.Linear(num_numeric_features, 64),
            nn.ReLU(),
            nn.Linear(64, 128),
            nn.ReLU(),
        )

        # Combined input
        self.fc2 = nn.Sequential(
            nn.Linear(2048 + 128, 256),  # Adjust based on previous layer outputs
            nn.ReLU(),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, num_classes),
            nn.Sigmoid()
        )

    def forward(self, image_input, numeric_input):
        x1 = self.cnn(image_input)
        x2 = self.fc1(numeric_input)
        x = torch.cat((x1, x2), dim=1)  # Concatenate the output tensors along dimension 1
        x = self.fc2(x)
        return x

In [60]:
# Train the meta-model using the meta-training set
meta_model = MetaModel()

# Define the loss function and optimizer for the meta-model
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.SGD(meta_model.parameters(), lr=0.001)

meta_model.train()
inputs = torch.tensor(meta_train_input, dtype=torch.float32)
labels = torch.tensor(meta_train_labels, dtype=torch.float32)

num_epochs = 10

for epoch in range(num_epochs):
    optimizer.zero_grad()
    outputs = meta_model(inputs)
    outputs = torch.round(torch.sigmoid(outputs))
    loss = criterion(outputs, labels)
    
    print("Epoch: ", epoch+1, " Loss: ", loss.item())
    
    loss.backward()
    optimizer.step()

Epoch:  1  Loss:  0.6931471824645996
Epoch:  2  Loss:  0.6931471824645996
Epoch:  3  Loss:  0.6931471824645996
Epoch:  4  Loss:  0.6931471824645996
Epoch:  5  Loss:  0.6931471824645996
Epoch:  6  Loss:  0.6931471824645996
Epoch:  7  Loss:  0.6931471824645996
Epoch:  8  Loss:  0.6931471824645996
Epoch:  9  Loss:  0.6931471824645996
Epoch:  10  Loss:  0.6931471824645996


In [54]:
torch.save(meta_model.state_dict(), 'model/meta_model_ann.pth')

In [63]:
# Validate and Test

# Generate predictions using the trained meta-model
meta_predictions_validation = torch.round(torch.sigmoid(meta_model(inputs)))
meta_predictions_test = meta_model(test_set_input)

# Evaluate the performance of the stacked model
validate(meta_predictions_validation, validation_set.labels)
test(meta_predictions_test, test_set.labels)

NameError: name 'test_set_input' is not defined

In [67]:
(meta_predictions_validation != labels).sum().item()

89276

In [68]:
inputs

tensor([[0., 0., 1.],
        [0., 0., 1.],
        [0., 0., 1.],
        ...,
        [0., 1., 1.],
        [0., 0., 1.],
        [0., 1., 1.]])