In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models
from tqdm import tqdm

# Define data transforms
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
}

# Load datasets
train_dataset = datasets.ImageFolder('./train', transform=data_transforms['train'])
val_dataset = datasets.ImageFolder('./val', transform=data_transforms['val'])
test_dataset = datasets.ImageFolder('./test', transform=data_transforms['val'])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Define device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define models to train
models_dict = {
    'resnet18': models.resnet18(pretrained=True),
    'alexnet': models.alexnet(pretrained=True),
    'vgg16': models.vgg16(pretrained=True),
    'squeezenet': models.squeezenet1_0(pretrained=True),
    'densenet': models.densenet121(pretrained=True),
}

# Modify each model’s classifier for binary classification (2 classes: NORMAL, PNEUMONIA)
for model_name, model in models_dict.items():
    if model_name == 'resnet18' or model_name == 'inception':
        model.fc = nn.Linear(model.fc.in_features, 2)
    elif model_name == 'densenet':
        model.classifier = nn.Linear(model.classifier.in_features, 2)
    elif model_name == 'alexnet' or model_name == 'vgg16':
        model.classifier[6] = nn.Linear(model.classifier[6].in_features, 2)
    elif model_name == 'squeezenet':
        model.classifier[1] = nn.Conv2d(512, 2, kernel_size=(1, 1), stride=(1, 1))

    models_dict[model_name] = model.to(device)

# Training function
def train_model(model, criterion, optimizer, num_epochs=5, use_aux=False):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        correct = 0
        total = 0
        for images, labels in tqdm(train_loader):
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)

            # Handle aux_logits for inception
            if use_aux:
                outputs, aux_outputs = outputs
                loss1 = criterion(outputs, labels)
                loss2 = criterion(aux_outputs, labels)
                loss = loss1 + 0.4 * loss2  # Weighting aux loss slightly less
            else:
                loss = criterion(outputs, labels)

            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        epoch_loss = running_loss / len(train_loader)
        epoch_acc = 100 * correct / total
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.2f}%')

# Validation function
def validate_model(model, criterion, use_aux=False):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)

            # Handle aux_logits for inception
            if use_aux:
                outputs, _ = outputs

            loss = criterion(outputs, labels)
            running_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    val_loss = running_loss / len(val_loader)
    val_acc = 100 * correct / total
    print(f'Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_acc:.2f}%')
    return val_acc

# Test function
def test_model(model, use_aux=False):
    model.eval()
    correct = 0 
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)

            # Handle aux_logits for inception
            if use_aux:
                outputs, _ = outputs

            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    test_acc = 100 * correct / total
    print(f'Test Accuracy: {test_acc:.2f}%')
    return test_acc

# Training and evaluating each model
criterion = nn.CrossEntropyLoss()
num_epochs = 10

for model_name, model in models_dict.items():
    print(f"\nTraining {model_name}...")
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    # For inception, set `use_aux=True` to handle aux_logits
    use_aux = model_name == 'inception'
    train_model(model, criterion, optimizer, num_epochs=num_epochs, use_aux=use_aux)
    
    print(f"\nValidating {model_name}...")
    val_accuracy = validate_model(model, criterion, use_aux=use_aux)
    print(f"{model_name} - Validation Accuracy: {val_accuracy:.2f}%")

    print(f"\nTesting {model_name} on the test set...")
    test_accuracy = test_model(model, use_aux=use_aux)
    print(f"{model_name} - Test Accuracy: {test_accuracy:.2f}%")


Training inception...


  0%|          | 0/163 [00:02<?, ?it/s]


RuntimeError: Calculated padded input size per channel: (3 x 3). Kernel size: (5 x 5). Kernel size can't be greater than actual input size

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models
from tqdm import tqdm

# Define data transforms
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
}

# Load datasets
train_dataset = datasets.ImageFolder('./train', transform=data_transforms['train'])
val_dataset = datasets.ImageFolder('./val', transform=data_transforms['val'])
test_dataset = datasets.ImageFolder('./test', transform=data_transforms['test'])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Define device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define models to train
models_dict = {
   
    'inception': models.inception_v3(pretrained=True)
}

# Modify each model’s classifier for binary classification (2 classes: NORMAL, PNEUMONIA)
for model_name, model in models_dict.items():
    if model_name == 'resnet18' or model_name == 'inception':
        model.fc = nn.Linear(model.fc.in_features, 2)
    elif model_name == 'densenet':
        model.classifier = nn.Linear(model.classifier.in_features, 2)
    elif model_name == 'alexnet' or model_name == 'vgg16':
        model.classifier[6] = nn.Linear(model.classifier[6].in_features, 2)
    elif model_name == 'squeezenet':
        model.classifier[1] = nn.Conv2d(512, 2, kernel_size=(1, 1), stride=(1, 1))

    models_dict[model_name] = model.to(device)

# Training function
def train_model(model, criterion, optimizer, num_epochs=5, use_aux=False):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        correct = 0
        total = 0
        for images, labels in tqdm(train_loader):
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)

            # Handle aux_logits for inception
            if use_aux:
                outputs, aux_outputs = outputs
                loss1 = criterion(outputs, labels)
                loss2 = criterion(aux_outputs, labels)
                loss = loss1 + 0.4 * loss2  # Weighting aux loss slightly less
            else:
                loss = criterion(outputs, labels)

            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        epoch_loss = running_loss / len(train_loader)
        epoch_acc = 100 * correct / total
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.2f}%')

# Validation function
def validate_model(model, criterion, use_aux=False):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)

            # Handle aux_logits for inception
            if use_aux:
                outputs, _ = outputs

            loss = criterion(outputs, labels)
            running_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    val_loss = running_loss / len(val_loader)
    val_acc = 100 * correct / total
    print(f'Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_acc:.2f}%')
    return val_acc

# Test function
def test_model(model, use_aux=False):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)

            # Handle aux_logits for inception
            if use_aux:
                outputs, _ = outputs

            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    test_acc = 100 * correct / total
    print(f'Test Accuracy: {test_acc:.2f}%')
    return test_acc

# Training and evaluating each model
criterion = nn.CrossEntropyLoss()
num_epochs = 10

for model_name, model in models_dict.items():
    print(f"\nTraining {model_name}...")
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    # For inception, set `use_aux=True` to handle aux_logits
    use_aux = model_name == 'inception'
    train_model(model, criterion, optimizer, num_epochs=num_epochs, use_aux=use_aux)
    
    print(f"\nValidating {model_name}...")
    val_accuracy = validate_model(model, criterion, use_aux=use_aux)
    print(f"{model_name} - Validation Accuracy: {val_accuracy:.2f}%")

    print(f"\nTesting {model_name} on the test set...")
    test_accuracy = test_model(model, use_aux=use_aux)
    print(f"{model_name} - Test Accuracy: {test_accuracy:.2f}%")




Training inception...


  0%|          | 0/163 [00:02<?, ?it/s]


RuntimeError: Calculated padded input size per channel: (3 x 3). Kernel size: (5 x 5). Kernel size can't be greater than actual input size