# Image Segmentation with U-Net

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset

import numpy as np
import matplotlib.pyplot as plt

## U-Net Architecture

U-Net is a convolutional network for biomedical image segmentation with an encoder-decoder structure.

In [None]:
class DoubleConv(nn.Module):
    """(convolution => [BN] => ReLU) * 2"""
    def __init__(self, in_channels, out_channels):
        super(DoubleConv, self).__init__()
        self.double_conv = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )
    
    def forward(self, x):
        return self.double_conv(x)

class Down(nn.Module):
    """Downsampling with maxpool then double conv"""
    def __init__(self, in_channels, out_channels):
        super(Down, self).__init__()
        self.maxpool_conv = nn.Sequential(
            nn.MaxPool2d(2),
            DoubleConv(in_channels, out_channels)
        )
    
    def forward(self, x):
        return self.maxpool_conv(x)

class Up(nn.Module):
    """Upsampling then double conv"""
    def __init__(self, in_channels, out_channels, bilinear=True):
        super(Up, self).__init__()
        if bilinear:
            self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
        else:
            self.up = nn.ConvTranspose2d(in_channels, in_channels // 2, kernel_size=2, stride=2)
        self.conv = DoubleConv(in_channels, out_channels)
    
    def forward(self, x1, x2):
        x1 = self.up(x1)
        x = torch.cat([x2, x1], dim=1)
        return self.conv(x)

class UNet(nn.Module):
    def __init__(self, in_channels=3, out_channels=1, bilinear=True):
        super(UNet, self).__init__()
        self.inc = DoubleConv(in_channels, 64)
        self.down1 = Down(64, 128)
        self.down2 = Down(128, 256)
        self.down3 = Down(256, 512)
        self.up1 = Up(512, 256, bilinear)
        self.up2 = Up(256, 128, bilinear)
        self.up3 = Up(128, 64, bilinear)
        self.outc = nn.Conv2d(64, out_channels, kernel_size=1)
    
    def forward(self, x):
        x1 = self.inc(x)
        x2 = self.down1(x1)
        x3 = self.down2(x2)
        x4 = self.down3(x3)
        x = self.up1(x4, x3)
        x = self.up2(x, x2)
        x = self.up3(x, x1)
        logits = self.outc(x)
        return logits

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = UNet(in_channels=3, out_channels=1).to(device)
print(model)

## Synthetic Data for Demonstration

In [None]:
# Create synthetic image and mask data
class SyntheticSegmentationDataset(Dataset):
    def __init__(self, num_samples=100, img_size=128):
        self.num_samples = num_samples
        self.img_size = img_size
    
    def __len__(self):
        return self.num_samples
    
    def __getitem__(self, idx):
        # Generate synthetic image
        image = torch.randn(3, self.img_size, self.img_size)
        
        # Generate synthetic mask
        mask = torch.zeros(1, self.img_size, self.img_size)
        # Add some circles
        for _ in range(3):
            cx, cy = np.random.randint(20, self.img_size - 20, 2)
            r = np.random.randint(5, 20)
            y, x = np.ogrid[:self.img_size, :self.img_size]
            circle_mask = (x - cx) ** 2 + (y - cy) ** 2 <= r ** 2
            mask[0, circle_mask] = 1
        
        return image, mask

train_dataset = SyntheticSegmentationDataset(num_samples=100, img_size=128)
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)

## Train the Model

In [None]:
loss_fn = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

epochs = 5
losses = []

for epoch in range(epochs):
    model.train()
    epoch_loss = 0
    
    for images, masks in train_loader:
        images, masks = images.to(device), masks.to(device)
        
        outputs = model(images)
        loss = loss_fn(outputs, masks)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()
    
    epoch_loss /= len(train_loader)
    losses.append(epoch_loss)
    print(f'Epoch {epoch+1}/{epochs}, Loss: {epoch_loss:.4f}')

## Visualize Results

In [None]:
# Plot loss
plt.figure(figsize=(10, 4))
plt.subplot(1, 2, 1)
plt.plot(losses)
plt.title('Training Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.grid(True)

# Visualize predictions
model.eval()
test_img, test_mask = train_dataset[0]
with torch.no_grad():
    pred_mask = torch.sigmoid(model(test_img.unsqueeze(0).to(device))).cpu()

plt.subplot(1, 2, 2)
plt.imshow(pred_mask[0, 0], cmap='gray')
plt.title('Predicted Segmentation Mask')
plt.colorbar()

plt.tight_layout()
plt.show()