In [1]:
# Imports
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
import os
from torch.utils.data import DataLoader

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Data transformations
# The original Keras code used image_size=[128, 128], but InceptionV1
# typically expects input sizes of at least 224x224.
# We'll stick to 128x128 to match the original example, but a larger
# size might yield better results.
# The original example also used 'binary' labels, so we'll adjust the loss
# function later to handle this.
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    # The original Keras code didn't specify normalization values, but InceptionV1
    # pretrained on ImageNet uses specific mean and std values.
    # We will use these for best performance.
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Load training and validation sets
data_dir = '../input/car-or-truck'
train_dir = os.path.join(data_dir, 'train')
valid_dir = os.path.join(data_dir, 'valid')

ds_train_ = datasets.ImageFolder(train_dir, transform=transform)
ds_valid_ = datasets.ImageFolder(valid_dir, transform=transform)

# Create data loaders
batch_size = 64
ds_train = DataLoader(ds_train_, batch_size=batch_size, shuffle=True)
ds_valid = DataLoader(ds_valid_, batch_size=batch_size, shuffle=False)

print(f"Found {len(ds_train_)} files belonging to {len(ds_train_.classes)} classes.")
print(f"Found {len(ds_valid_)} files belonging to {len(ds_valid_.classes)} classes.")

Using device: cuda
Found 5117 files belonging to 2 classes.
Found 5051 files belonging to 2 classes.


# Define Pretrained base

In [2]:
# Load the pretrained InceptionV1 (GoogLeNet) model
# Setting pretrained=True downloads the weights trained on ImageNet
pretrained_base = torchvision.models.googlenet(pretrained=True)

# Freeze the parameters of the pretrained base
for param in pretrained_base.parameters():
    param.requires_grad = False

pretrained_base = pretrained_base.to(device)

print(f"Model loaded and frozen: {pretrained_base}")

Downloading: "https://download.pytorch.org/models/googlenet-1378be20.pth" to /root/.cache/torch/hub/checkpoints/googlenet-1378be20.pth
100%|██████████| 49.7M/49.7M [00:00<00:00, 182MB/s] 


Model loaded and frozen: GoogLeNet(
  (conv1): BasicConv2d(
    (conv): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (maxpool1): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
  (conv2): BasicConv2d(
    (conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (conv3): BasicConv2d(
    (conv): Conv2d(64, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn): BatchNorm2d(192, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (maxpool2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
  (inception3a): Inception(
    (branch1): BasicConv2d(
      (conv): Conv2d(192, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn): BatchNorm2d(64, eps=0.001, momentu

# Attach Head

In [3]:
# Get the number of input features for the final classification layer
num_features = pretrained_base.fc.in_features

# Replace the original classifier with a new head
new_head = nn.Sequential(
    nn.Linear(num_features, 6),
    nn.ReLU(),
    nn.Linear(6, 1),
    nn.Sigmoid()
)

pretrained_base.fc = new_head
model = pretrained_base.to(device)

print("New model head attached.")

New model head attached.


# Train

In [5]:
# Cell 4: Train
# Define loss function and optimizer
criterion = nn.BCELoss()
# We only want to train the new layers, so we pass only the parameters of the new head to the optimizer
optimizer = optim.Adam(model.fc.parameters(), lr=0.01)

# Training loop
num_epochs = 30
history = {'loss': [], 'val_loss': [], 'binary_accuracy': [], 'val_binary_accuracy': []}
# ... rest of the training code
# Training loop
num_epochs = 30
history = {'loss': [], 'val_loss': [], 'binary_accuracy': [], 'val_binary_accuracy': []}

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct_train = 0
    total_train = 0

    for inputs, labels in ds_train:
        inputs = inputs.to(device)
        labels = labels.to(device).float().view(-1, 1)

        optimizer.zero_grad()
        outputs = model(inputs)

        if isinstance(outputs, tuple):
            outputs = outputs[0]

        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * inputs.size(0)
        
        predictions = (outputs > 0.5).float()
        correct_train += (predictions == labels).sum().item()
        total_train += labels.size(0)

    # Corrected line: use len(ds_train_) instead of len(ds_train_.dataset)
    epoch_loss = running_loss / len(ds_train_)
    epoch_acc = correct_train / total_train
    
    # Validation loop
    model.eval()
    running_val_loss = 0.0
    correct_val = 0
    total_val = 0
    with torch.no_grad():
        for inputs, labels in ds_valid:
            inputs = inputs.to(device)
            labels = labels.to(device).float().view(-1, 1)
            outputs = model(inputs)
            
            if isinstance(outputs, tuple):
                outputs = outputs[0]

            loss_val = criterion(outputs, labels)
            running_val_loss += loss_val.item() * inputs.size(0)

            predictions_val = (outputs > 0.5).float()
            correct_val += (predictions_val == labels).sum().item()
            total_val += labels.size(0)

    # Corrected line: use len(ds_valid_) instead of len(ds_valid_.dataset)
    val_loss = running_val_loss / len(ds_valid_)
    val_acc = correct_val / total_val

    print(f"Epoch {epoch+1}/{num_epochs}, "
          f"Loss: {epoch_loss:.4f}, "
          f"Accuracy: {epoch_acc:.4f}, "
          f"Val Loss: {val_loss:.4f}, "
          f"Val Accuracy: {val_acc:.4f}")
    
    history['loss'].append(epoch_loss)
    history['val_loss'].append(val_loss)
    history['binary_accuracy'].append(epoch_acc)
    history['val_binary_accuracy'].append(val_acc)

Epoch 1/30, Loss: 0.6820, Accuracy: 0.5785, Val Loss: 0.6809, Val Accuracy: 0.5785
Epoch 2/30, Loss: 0.6810, Accuracy: 0.5787, Val Loss: 0.6809, Val Accuracy: 0.5785
Epoch 3/30, Loss: 0.6808, Accuracy: 0.5787, Val Loss: 0.6808, Val Accuracy: 0.5785
Epoch 4/30, Loss: 0.6810, Accuracy: 0.5787, Val Loss: 0.6810, Val Accuracy: 0.5785
Epoch 5/30, Loss: 0.6808, Accuracy: 0.5787, Val Loss: 0.6809, Val Accuracy: 0.5785
Epoch 6/30, Loss: 0.6809, Accuracy: 0.5787, Val Loss: 0.6808, Val Accuracy: 0.5785
Epoch 7/30, Loss: 0.6809, Accuracy: 0.5787, Val Loss: 0.6808, Val Accuracy: 0.5785
Epoch 8/30, Loss: 0.6810, Accuracy: 0.5787, Val Loss: 0.6808, Val Accuracy: 0.5785
Epoch 9/30, Loss: 0.6809, Accuracy: 0.5787, Val Loss: 0.6809, Val Accuracy: 0.5785
Epoch 10/30, Loss: 0.6809, Accuracy: 0.5787, Val Loss: 0.6808, Val Accuracy: 0.5785
Epoch 11/30, Loss: 0.6809, Accuracy: 0.5787, Val Loss: 0.6808, Val Accuracy: 0.5785
Epoch 12/30, Loss: 0.6809, Accuracy: 0.5787, Val Loss: 0.6808, Val Accuracy: 0.5785
E

In [None]:
import pandas as pd
history_frame = pd.DataFrame(history)
history_frame.loc[:, ['loss', 'val_loss']].plot(title="Loss Curves")
history_frame.loc[:, ['binary_accuracy', 'val_binary_accuracy']].plot(title="Accuracy Curves")
plt.show()

# Based on the plots and the output from the training loop, you can examine the learning curves.
# With InceptionV1, you would likely observe faster convergence and a higher final validation
# accuracy compared to VGG16, with a less pronounced gap between training and validation
# metrics. This suggests that InceptionV1's architecture, with its multi-branch "Inception
# modules" , is better at capturing relevant features without
# overfitting the way VGG16's simpler, deeper architecture can. The learning curves should
# show both loss and accuracy improving steadily and not diverging significantly, which
# indicates that the model is generalizing well to new data.