In [1]:
import os
import cv2
import csv
from torchvision import transforms
from torchvision.models import resnet50
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MultiLabelBinarizer
from tqdm import tqdm

In [2]:
%run './cnn.ipynb'
%run './resnet.ipynb'

In [3]:
def preprocess(data, mlb):
    transform = transforms.Compose([
        transforms.ToPILImage(),
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(20),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    imgs = []
    labels = []
    for item in data:
        path = item['img_idx']
        try:
            if not os.path.exists(path):
                print(f"File not found: {path}")
                continue  # Skip this iteration if file does not exist

            image = cv2.imread(path)
            if image is None:
                print(f"Failed to load image at {path}. The image may be corrupt or in an unsupported format.")
                continue  # Skip this iteration if the image could not be read

            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            image = transform(image)
            imgs.append(image)

            label = item['label'].split('|')
            labels.append(label)
        except Exception as e:
            print(f"Error processing {path}: {e}")
            continue  # Skip this iteration if any other error occurs

    # Convert labels list to a multi-hot encoded matrix
    if labels:  # Check if there are any labels to process
        labels = mlb.fit_transform(labels)
    else:
        return torch.tensor([]), torch.tensor([])  # Return empty tensors if no labels

    return torch.stack(imgs), torch.FloatTensor(labels)

In [4]:
def get_accuracy(model, dataloader, device):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for imgs, labels in dataloader:
            imgs, labels = imgs.to(device), labels.to(device)
            outputs = torch.sigmoid(model(imgs))  # Apply sigmoid to get predictions
            predicted = (outputs > 0.5).float()  # Convert probabilities to binary values
            total += labels.numel()
            correct += (predicted == labels).sum().item()
    return correct / total * 100

def train_model(model, train_loader, val_loader, device, num_epochs=10):
    criterion = nn.BCEWithLogitsLoss()  # Use Binary Cross Entropy with Logits Loss for multi-label
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    for epoch in range(num_epochs):
        model.train()
        for imgs, labels in train_loader:
            imgs, labels = imgs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(imgs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

        train_acc = get_accuracy(model, train_loader, device)
        val_acc = get_accuracy(model, val_loader, device)
        print(f'Epoch {epoch+1}, Train accuracy {train_acc}, Val accuracy {val_acc}')

In [23]:
data = []
mlb = MultiLabelBinarizer()  # Initialize multi-label binarizer
with open('./archive/sample_labels.csv', 'r') as csvfile:
    rows = csv.DictReader(csvfile)
    for row in rows:
        img_idx = row['Image Index']
        label = row['Finding Labels']
            
        path = os.path.join('./archive/sample/images/', img_idx)  
        if os.path.exists(path):  # Ensure the image exists
            data.append({'img_idx': path, 'label': label})

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
x, y = preprocess(data, mlb)
X_train, X_rest, y_train, y_rest = train_test_split(x, y, test_size=0.3)
X_val, X_test, y_val, y_test = train_test_split(X_rest, y_rest, test_size=0.5)

In [26]:
train_data = TensorDataset(X_train, y_train)
val_data = TensorDataset(X_val, y_val)

train_loader = DataLoader(train_data, batch_size=50, shuffle=True)
val_loader = DataLoader(val_data, batch_size=50, shuffle=False)

cnn_model = CNN(num_classes=len(mlb.classes_)).to(device) 

resnet_model = ResNet(Block, [3, 4, 6, 3], num_classes=len(mlb.classes_)).to(device) 
    
train_model(cnn_model, train_loader, val_loader, device)
train_model(resnet_model, train_loader, val_loader, device)

Epoch 1, Train accuracy 84.4155844155844, Val accuracy 81.81818181818183
Epoch 2, Train accuracy 79.22077922077922, Val accuracy 63.63636363636363
Epoch 3, Train accuracy 84.4155844155844, Val accuracy 72.72727272727273
Epoch 4, Train accuracy 97.40259740259741, Val accuracy 72.72727272727273
Epoch 5, Train accuracy 97.40259740259741, Val accuracy 54.54545454545454
Epoch 6, Train accuracy 96.1038961038961, Val accuracy 81.81818181818183
Epoch 7, Train accuracy 100.0, Val accuracy 81.81818181818183
Epoch 8, Train accuracy 100.0, Val accuracy 72.72727272727273
Epoch 9, Train accuracy 100.0, Val accuracy 72.72727272727273
Epoch 10, Train accuracy 100.0, Val accuracy 72.72727272727273
Epoch 1, Train accuracy 77.92207792207793, Val accuracy 72.72727272727273
Epoch 2, Train accuracy 64.93506493506493, Val accuracy 63.63636363636363
Epoch 3, Train accuracy 77.92207792207793, Val accuracy 72.72727272727273
Epoch 4, Train accuracy 71.42857142857143, Val accuracy 72.72727272727273
Epoch 5, Train