In [1]:
import os
import torch
import torchvision.models as models
import torchvision.transforms as transforms
from torchinfo import summary
import torch.nn as nn
from PIL import Image

# Load a pre-trained ResNet-18 model
model = models.resnet18(weights=True)

# Modify the first convolutional layer to accept 1-channel input
model.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
# Get the number of features in the last layer
num_features = model.fc.in_features

# Modify the last fully connected layer to have the same number of output classes as your dataset
model.fc = nn.Linear(num_features, 15)

summary(model)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 177MB/s]


Layer (type:depth-idx)                   Param #
ResNet                                   --
├─Conv2d: 1-1                            3,136
├─BatchNorm2d: 1-2                       128
├─ReLU: 1-3                              --
├─MaxPool2d: 1-4                         --
├─Sequential: 1-5                        --
│    └─BasicBlock: 2-1                   --
│    │    └─Conv2d: 3-1                  36,864
│    │    └─BatchNorm2d: 3-2             128
│    │    └─ReLU: 3-3                    --
│    │    └─Conv2d: 3-4                  36,864
│    │    └─BatchNorm2d: 3-5             128
│    └─BasicBlock: 2-2                   --
│    │    └─Conv2d: 3-6                  36,864
│    │    └─BatchNorm2d: 3-7             128
│    │    └─ReLU: 3-8                    --
│    │    └─Conv2d: 3-9                  36,864
│    │    └─BatchNorm2d: 3-10            128
├─Sequential: 1-6                        --
│    └─BasicBlock: 2-3                   --
│    │    └─Conv2d: 3-11                 73,728

In [2]:
ROOT = "/kaggle/input/amia-public-challenge-2024"

test_img_path = ROOT + "/test/test"
train_img_path = ROOT + "/train/train"

test_annot_path = ROOT + "/test.csv"
train_annot_path = ROOT + "/train.csv"

In [3]:
from torch.utils.data import Dataset
import pandas as pd
import numpy as np
from tqdm import tqdm

class XRayImageDataset(Dataset):
    
    def __init__(self, annotations_file, img_dir, mean=None, std=None,transform_norm=None, target_transform=None):
        self.img_labels = pd.read_csv(annotations_file)
        self.img_dir = img_dir
        self.mean = mean
        self.std = std
        self.transform_norm = transforms.Compose([
            transforms.Normalize(self.mean, self.std)
        ])
        self.target_transform = target_transform    

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])+'.png'
        image = read_image(img_path) # PyTorch function, no need to change
        label = self.img_labels.iloc[idx, 2] # class_id column
        image = self.transform_norm(image.float())
        return image, label

In [4]:
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
from torchvision import datasets, transforms
from torchvision.io import read_image

train_data = XRayImageDataset(train_annot_path, train_img_path, 0.04664242120787185, 0.10213025072799406)
print(f'Training images: mean {train_data.mean}, std {train_data.std}')

train_dataloader = DataLoader(train_data, batch_size=12, shuffle=True)

Training images: mean 0.04664242120787185, std 0.10213025072799406


In [5]:
test_data = XRayImageDataset(test_annot_path, test_img_path, 0.07274098403775907, 0.16118353533641264)
print(f'Testing images: mean {test_data.mean}, std {test_data.std}')

test_dataloader = DataLoader(test_data, batch_size=12, shuffle=True)

Testing images: mean 0.07274098403775907, std 0.16118353533641264


In [None]:
import torch.optim as optim
# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Number of epochs
num_epochs = 2

# Device configuration (use GPU if available)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

# Training and evaluation loop
for epoch in range(num_epochs):
    # Training phase
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, labels in tqdm(train_dataloader, desc=f'Train Ep. 1 {epoch+1}'):
        inputs, labels = inputs.to(device), labels.to(device)

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward pass and optimize
        loss.backward()
        optimizer.step()

        # Statistics
        running_loss += loss.item() * inputs.size(0)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    epoch_loss = running_loss / total
    epoch_acc = correct / total

    print(f'Train Loss: {epoch_loss:.4f}, Train Acc: {epoch_acc:.4f}')

    # Evaluation phase
    model.eval()
    test_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, labels in tqdm(test_dataloader, desc=f'Evaluating Epoch {epoch+1}/{num_epochs}'):
            inputs, labels = inputs.to(device), labels.to(device)

            # Forward pass
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            # Statistics
            test_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    test_loss /= total
    test_acc = correct / total

    print(f'Test Loss: {test_loss:.4f}, Test Accuracy: {test_acc:.4f}')

print('Training complete')

Train Ep. 1 1:  16%|█▌        | 611/3828 [06:17<33:09,  1.62it/s]