In [26]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import datasets, transforms, models

In [None]:
import torch.nn as nn
import torch.nn.functional as F


class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(14 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


net = Net()

In [27]:
# Load MNIST dataset
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # resizing to fit ResNet's expected input size
    transforms.ToTensor()
])

In [28]:
original_train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

train_loader = torch.utils.data.DataLoader(original_train_dataset, batch_size=32, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=False)

In [29]:
# Define a custom dataset class for segmentation
class SegmentationMNISTDataset(torch.utils.data.Dataset):
    def __init__(self, mnist_dataset):
        self.mnist_dataset = mnist_dataset

    def __len__(self):
        return len(self.mnist_dataset)

    def __getitem__(self, idx):
        image, _ = self.mnist_dataset[idx]
        mask = (image > 0).float()
        
        return image, mask

train_dataset = SegmentationMNISTDataset(original_train_dataset)

In [30]:
train_dataset.__getitem__(0)

(tensor([[[0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          ...,
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.]]]),
 tensor([[[0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          ...,
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.]]]))

In [31]:
# Modify the ResNet model to accept one channel input
model = models.resnet18(pretrained=False)

model



ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [24]:
# Modify the ResNet model to accept one channel input
model = models.resnet18(pretrained=False)
model.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)

# Replace the final fully connected layer for segmentation
model.fc = nn.Sequential(
    nn.Conv2d(224, 1, kernel_size=1),  
    nn.Sigmoid()  # To get output in the range [0, 1]
)
model = model.cuda()


criterion = nn.BCELoss().cuda()
optimizer = optim.Adam(model.parameters(), lr=0.001)



In [10]:
num_epochs = 5
train_losses = []
train_accuracies = []

In [25]:
# Training loop
model.train()
for epoch in range(num_epochs):
    epoch_loss = 0.0
    for imgs, masks in train_dataset:
        imgs, masks = imgs.cuda(), masks.cuda()

        optimizer.zero_grad()
        
        # Add a batch dimension to the input image
        imgs = imgs.unsqueeze(0)  # Shape: (1, 1, 224, 224)
        print(imgs.detach().cpu().numpy().shape)
        outputs = model(imgs)
        
        # Ensure the outputs tensor has the correct shape (1, 224, 224)
        outputs = outputs.unsqueeze(0)
        print(outputs.detach().cpu().numpy().shape)
        
        
        loss = criterion(outputs.squeeze(0), masks)

        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()

    train_losses.append(epoch_loss / len(train_dataset))
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {train_losses[-1]}")

(1, 1, 224, 224)


RuntimeError: Expected 3D (unbatched) or 4D (batched) input to conv2d, but got input of size: [1, 512]

In [None]:
model.eval()
test_losses = []
test_accuracies = []

with torch.no_grad():
    test_loss = 0.0
    correct = 0
    total = 0
    for imgs, masks in test_loader:
        imgs, masks = imgs.cuda(), masks.cuda()
        
        outputs = model(imgs)
        outputs = outputs.view(outputs.size(0), 224, 224)
        loss = criterion(outputs, masks)
        
        test_loss += loss.item()
        correct += ((outputs > 0.5) == masks).sum().item()
        total += masks.numel()
    
    test_losses.append(test_loss / len(test_loader))
    test_accuracies.append(correct / total)
    print(f"Test Loss: {test_losses[-1]}, Test Accuracy: {test_accuracies[-1]}")


Accuracy of the model on the 10000 test images: 97.72%


# Exporting the model

In [None]:
# Save the model's state_dict
torch.save(model.state_dict(), './Models/resnet18_mnist_segmentation.pth')