# Dataset preparation

In [None]:
# importing libraries and modules

import pickle
import os
import numpy as np
from sklearn.model_selection import train_test_split
import cv2
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import torch.nn.functional as F

In [None]:
IMAGE_SIZE = (320, 320)
BATCH_SIZE = 32
# EPOCHS = 10
LEARNING_RATE = 1e-4
IMAGE_DIR = "/kaggle/input/m-rose-data/images/images"  # Path to the folder containing images
MASK_DIR = "/kaggle/input/m-rose-data/masked images/masked images"    # Path to the folder containing masks

len(os.listdir(IMAGE_DIR)), len(os.listdir(MASK_DIR))

In [None]:
def load_and_preprocess_data(image_dir, mask_dir, image_size):
    image_paths = sorted([os.path.join(image_dir, fname) for fname in os.listdir(image_dir)])
    mask_paths = sorted([os.path.join(mask_dir, fname) for fname in os.listdir(mask_dir)])

    images = []
    masks = []

    for img_path, mask_path in zip(image_paths, mask_paths):

        img = cv2.imread(img_path)
        mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)

        img = cv2.resize(img, image_size)
        mask = cv2.resize(mask, image_size)

        img = img.astype('float32') / 255.0
        mask = mask.astype('float32') / 255.0

        images.append(img)
        masks.append(mask)
        
    return np.array(images), np.array(masks)

images, masks = load_and_preprocess_data(IMAGE_DIR, MASK_DIR, IMAGE_SIZE)
X_train, X_test, y_train, y_test = train_test_split(images, masks, test_size=0.2, random_state=42)

In [None]:
# import pickle

# # pickle.dump(X_train, open("X_train.p", "wb"))
# # pickle.dump(X_test, open("X_test.p", "wb"))
# pickle.dump(y_train, open("y_train.p", "wb"))
# pickle.dump(y_test, open("y_test.p", "wb"))


# # favorite_color = pickle.load(open("save.p", "rb"))

In [None]:
images_train_loader = DataLoader(X_train, batch_size=BATCH_SIZE, shuffle=True)
masks_train_loader = DataLoader(y_train, batch_size=BATCH_SIZE, shuffle=True)
images_test_loader = DataLoader(X_test, batch_size=BATCH_SIZE, shuffle=True)
masks_test_loader = DataLoader(y_test, batch_size=BATCH_SIZE, shuffle=True)

# # checking the dimensions
# for images, masks in zip(images_test_loader, masks_test_loader):
#     images = images.permute(0,3,1,2)
#     print(images.shape, masks.shape)

# U-Net implementation

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

class UNetModel(nn.Module):
    def __init__(self):
        super().__init__()

        # encoder block (downsampling)
        self.enc_c1 = nn.Conv2d(3, 64, kernel_size=3, padding=1)
        self.enc_c2 = nn.Conv2d(64, 64, kernel_size=3, padding=1)

        self.enc_c3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.enc_c4 = nn.Conv2d(128, 128, kernel_size=3, padding=1)

        self.enc_c5 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
        self.enc_c6 = nn.Conv2d(256, 256, kernel_size=3, padding=1)

        self.enc_c7 = nn.Conv2d(256, 512, kernel_size=3, padding=1)
        self.enc_c8 = nn.Conv2d(512, 512, kernel_size=3, padding=1) # concatenate

        #bottleneck block
        self.b1 = nn.Conv2d(512, 1024, kernel_size=3, padding=1)
        self.b2 = nn.Conv2d(1024, 1024, kernel_size=3, padding=1)

        # decoder block
        self.up1 = nn.ConvTranspose2d(1024, 512, kernel_size=2, stride=2)
        self.dec_c1 = nn.Conv2d(1024, 512, kernel_size=3, padding=1)
        self.dec_c2 = nn.Conv2d(512, 512, kernel_size=3, padding=1)

        self.up2 = nn.ConvTranspose2d(512, 256, kernel_size=2, stride=2)
        self.dec_c3 = nn.Conv2d(512, 256, kernel_size=3, padding=1)
        self.dec_c4 = nn.Conv2d(256, 256, kernel_size=3, padding=1)

        self.up3 = nn.ConvTranspose2d(256, 128, kernel_size=2, stride=2)
        self.dec_c5 = nn.Conv2d(256, 128, kernel_size=3, padding=1)
        self.dec_c6 = nn.Conv2d(128, 128, kernel_size=3, padding=1)

        self.up4 = nn.ConvTranspose2d(128, 64, kernel_size=2, stride=2)
        self.dec_c7 = nn.Conv2d(128, 64, kernel_size=3, padding=1)
        self.dec_c8 = nn.Conv2d(64, 64, kernel_size=3, padding=1)

        # output layer
        self.output_layer = nn.Conv2d(64, 2, kernel_size=1)

    def forward(self, x):

        # encoder block
        encoder1 = F.relu(self.enc_c1(x))
        encoder1 = F.relu(self.enc_c2(encoder1)) #concat layer
        encoder12 = F.max_pool2d(encoder1, 2)

        encoder12 = F.relu(self.enc_c3(encoder12))
        encoder12 = F.relu(self.enc_c4(encoder12)) #concat layer
        encoder22 = F.max_pool2d(encoder12, 2)

        encoder22 = F.relu(self.enc_c5(encoder22))
        encoder22 = F.relu(self.enc_c6(encoder22)) #concat layer
        encoder32 = F.max_pool2d(encoder22, 2)

        encoder32 = F.relu(self.enc_c7(encoder32))
        encoder32 = F.relu(self.enc_c8(encoder32)) #concat layre
        bottleneck = F.max_pool2d(encoder32, 2)

        #bottleneck block
        bottleneck = F.relu(self.b1(bottleneck))
        bottleneck = F.relu(self.b2(bottleneck))

        # decoder block (upsampling)
        upsampling1 = self.up1(bottleneck)
        concat1 = torch.cat([encoder32, upsampling1], dim=1)
        decoder1 = F.relu(self.dec_c1(concat1))
        decoder1 = F.relu(self.dec_c2(decoder1))

        upsampling2 = self.up2(decoder1)
        concat2 = torch.cat([encoder22, upsampling2], dim=1)
        decoder2 = F.relu(self.dec_c3(concat2))
        decoder2 = F.relu(self.dec_c4(decoder2))

        upsampling3 = self.up3(decoder2)
        concat3 = torch.cat([encoder12, upsampling3], dim=1)
        decoder3 = F.relu(self.dec_c5(concat3))
        decoder3 = F.relu(self.dec_c6(decoder3))

        upsampling4 = self.up4(decoder3)  # Changed from self.up1 to self.up4
        concat4 = torch.cat([encoder1, upsampling4], dim=1)
        decoder4 = F.relu(self.dec_c7(concat4))
        decoder4 = F.relu(self.dec_c8(decoder4))

        return self.output_layer(decoder4)  # Removed F.relu here


In [None]:
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = torch.device("cpu")
print(f"using device : {device}")

model = UNetModel().to(device)
criterion = nn.CrossEntropyLoss()  # Use CrossEntropyLoss for multi-class masks
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training Loop

epochs = 30
for epoch in range(epochs):
    model.train()
    running_loss = 0.0

    i=1
    for images, masks in zip(images_train_loader, masks_train_loader):
        images, masks = images.to(device), masks.to(device)

        images = images.permute(0, 3, 1, 2)  # Adjust this based on your data

        masks = masks.long()  # Convert to long tensor

        optimizer.zero_grad()
        outputs = model(images)  # Forward pass

        loss = criterion(outputs, masks)  # Compute loss
        loss.backward()  # Backward pass
        optimizer.step()  # Update weights

        running_loss += loss.item()
        print(f"end of {i}th batch of {epoch+1}th epoch")
        i+=1

    print(f"Epoch {epoch+1}/{epochs}, Loss: {running_loss / len(images_train_loader):.4f}")

## Evaluation

In [None]:
# evaluation UNet model

import torch

def calculate_accuracy(output, target):

    # Convert logits to predicted class indices
    preds = torch.argmax(output, dim=1)  # Shape: (batch_size, height, width)

    # Calculate correct predictions
    correct = (preds == target).float()  # Shape: (batch_size, height, width)

    # Calculate accuracy
    accuracy = correct.sum() / correct.numel()
    return accuracy.item()

def calculate_iou(output, target, num_classes):

    # Convert logits to predicted class indices
    preds = torch.argmax(output, dim=1)  # Shape: (batch_size, height, width)

    iou_per_class = []

    for cls in range(num_classes):

        pred_mask = (preds == cls)  # Shape: (batch_size, height, width)
        target_mask = (target == cls)  # Shape: (batch_size, height, width)

        # Calculate intersection and union
        intersection = (pred_mask & target_mask).float().sum()  # TP
        union = (pred_mask | target_mask).float().sum()  # TP + FP + FN

        # Avoid division by zero
        if union == 0:
            iou_per_class.append(float('nan'))  # Ignore this class if no ground truth
        else:
            iou_per_class.append((intersection / union).item())

    # Calculate mean IoU, ignoring NaN values
    iou_per_class = torch.tensor(iou_per_class)
    mean_iou = torch.mean(iou_per_class).item()
    return mean_iou

In [None]:
def evaluate_model(model, test_images, test_masks, num_classes, device):

    model.eval()  # Set model to evaluation mode
    total_accuracy = 0.0
    total_iou = 0.0
    num_batches = 0

    with torch.no_grad():  # Disable gradient computation
        for images, masks in zip(test_images, test_masks):
            # Move data to the device
            images = images.to(device)
            masks = masks.to(device)

            images = images.permute(0, 3, 1, 2)  # Adjust this based on your data

            masks = masks.long()  # Convert to long tensor

            # Forward pass
            outputs = model(images)

            # Calculate metrics
            accuracy = calculate_accuracy(outputs, masks)
            iou = calculate_iou(outputs, masks, num_classes)

            # Accumulate metrics
            total_accuracy += accuracy
            total_iou += iou
            num_batches += 1

    # Calculate average metrics
    avg_accuracy = total_accuracy / num_batches
    avg_iou = total_iou / num_batches

    return avg_accuracy, avg_iou

In [None]:
# Define your model, dataloader, and device
model = UNetModel().to(device)
num_classes = 2  # Adjust based on your dataset

# Evaluate the model
accuracy, mean_iou = evaluate_model(model, images_test_loader, masks_test_loader, num_classes, device)

print(f"Accuracy: {accuracy:.4f}")
print(f"Mean IoU: {mean_iou:.4f}")

using pre-trained libraries

In [5]:
import segmentation_models_pytorch as smp

# Load a pre-trained U-Net model
model = smp.Unet(
    encoder_name='resnet34',  # You can use 'efficientnet-b0', 'vgg16', etc.
    encoder_weights='imagenet',  # Pre-trained weights
    classes=1,  # Number of output classes
    activation='sigmoid'  # Activation function for the output layer
)


In [None]:
import cv2
import torch
import numpy as np
import matplotlib.pyplot as plt
from torchvision import transforms
import segmentation_models_pytorch as smp

# Load a pre-trained U-Net model
model = smp.Unet(
    encoder_name='resnet34',  # You can use 'efficientnet-b0', 'vgg16', etc.
    encoder_weights='imagenet',  # Pre-trained weights
    classes=1,  # Number of output classes
    activation='sigmoid'  # Activation function for the output layer
)

# Load and preprocess the image
image_path = 'your_image.jpg'  # Replace with your image path
image = cv2.imread(image_path, cv2.IMREAD_COLOR)  # Read the image
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # Convert to RGB
image = cv2.resize(image, (256, 256))  # Resize to 256x256

# Convert the image to a PyTorch tensor and normalize
preprocess = transforms.Compose([
    transforms.ToTensor(),  # Convert to tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize
])
input_tensor = preprocess(image).unsqueeze(0)  # Add batch dimension

# Move the tensor to the appropriate device (CPU or GPU)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
input_tensor = input_tensor.to(device)
model = model.to(device)

# Perform inference
model.eval()
with torch.no_grad():
    output = model(input_tensor)

# Get the output mask
output_mask = output.squeeze().cpu().numpy()  # Remove batch dimension and move to CPU

# Apply a threshold to get a binary mask
threshold = 0.5  # You can adjust this threshold
binary_mask = (output_mask > threshold).astype(np.uint8)

# Plot the original image and the segmentation mask
plt.figure(figsize=(10, 5))

# Original image
plt.subplot(1, 2, 1)
plt.title('Original Image')
plt.imshow(image)
plt.axis('off')

# Segmentation mask
plt.subplot(1, 2, 2)
plt.title('Segmentation Mask')
plt.imshow(binary_mask, cmap='gray')
plt.axis('off')

plt.show()