In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torch.nn.functional as F
from torch.utils.data import DataLoader, WeightedRandomSampler
from torchvision.datasets import ImageFolder
from tqdm import tqdm
from sklearn.metrics import classification_report
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Define a custom dataset class that loads images without resizing
class CustomImageFolder(ImageFolder):
    def __getitem__(self, index):
        path, target = self.imgs[index]
        img = self.loader(path)
        img = self.transform(img)
        return img, target


In [3]:
# Define the CNN architecture
class ConvNet(nn.Module):
    def __init__(self):
        super().__init__()

        self.act = nn.LeakyReLU(0.2)
        self.final_act = nn.Softmax(dim=1)
        self.maxpool = nn.MaxPool2d(2, 2)

        self.conv0 = nn.Conv2d(1, 32, 3, stride=1, padding=1)
        self.conv1 = nn.Conv2d(32, 64, 3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(64, 128, 3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(128, 256, 3, stride=1, padding=1)
        self.conv4 = nn.Conv2d(256, 256, 3, stride=1, padding=1)

        self.adaptive = nn.AdaptiveAvgPool2d((4, 1))
        self.flatten = nn.Flatten()

        self.linear0 = nn.Linear(1024, 256)
        self.linear1 = nn.Linear(256, 64)
        self.linear2 = nn.Linear(64, 2)

    def forward(self, x):

        x = self.conv0(x)
        x = self.act(x)
        x = self.maxpool(x)

        x = self.conv1(x)
        x = self.act(x)
        x = self.maxpool(x)

        x = self.conv2(x)
        x = self.act(x)
        x = self.maxpool(x)

        x = self.conv3(x)
        x = self.act(x)
        x = self.maxpool(x)

        x = self.conv4(x)
        x = self.act(x)
        x = self.maxpool(x)

        x = self.adaptive(x)

        x = self.flatten(x)

        x = self.linear0(x)
        x = self.act(x)
        x = self.linear1(x)
        x = self.act(x)
        x = self.linear2(x)
        x = self.final_act(x)

        return x

In [4]:
# Define the transforms to be applied to the data
transform = transforms.Compose(
    [
        transforms.Grayscale(num_output_channels=1),
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,)),
    ]
)


# Load the dataset using CustomImageFolder
dataset = CustomImageFolder(root="dataset/test_dataset", transform=transform)

# Create a data loader that loads data in batches
batch_size = 8
dataloader = DataLoader(
    dataset,
    batch_size=batch_size,
)


In [5]:
# Create an instance of the CNN and define the loss function and optimizer
model = ConvNet()


# Set device to GPU if available, otherwise use CPU
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


model.to(device)
print(f"Training on device: {device}")
model.load_state_dict(torch.load("models/cnn_v1_20epochs.pth"))

Training on device: cuda:0


<All keys matched successfully>

In [6]:
threshold = 0.5
y_true = np.array([], dtype=np.bool_)
y_pred = np.array([], dtype=np.bool_)
with torch.no_grad():
    for i, data in tqdm(enumerate(dataloader), total=len(dataloader)):
        inputs, labels = data
        inputs = inputs.to(device)
        labels = F.one_hot(labels, 2).float().to(device)
        out = model(inputs)
        y_true = np.append(y_true, labels.cpu().numpy()[:,1].astype(np.bool_))
        y_pred = np.append(y_pred, (out.cpu().detach() > threshold).numpy()[:,1])
print(classification_report(y_true, y_pred, zero_division=0))

100%|██████████| 174/174 [00:41<00:00,  4.23it/s]

              precision    recall  f1-score   support

       False       0.87      0.60      0.71      1158
        True       0.21      0.54      0.31       232

    accuracy                           0.59      1390
   macro avg       0.54      0.57      0.51      1390
weighted avg       0.76      0.59      0.64      1390




