<a href="https://colab.research.google.com/github/MRameezU/ISIC2017-Unet/blob/main/notebooks/isic_cancer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!nvidia-smi

/bin/bash: line 1: nvidia-smi: command not found


## 0- Get Setup

In [2]:
!pip install --upgrade torch
!pip install --upgrade torchvision

import torch
import torchvision
print(f"torch version: {torch.__version__}")
print(f"torchvision version: {torchvision.__version__}")

torch version: 2.5.1+cu121
torchvision version: 0.20.1+cu121


device agnostic code

In [3]:
device= "cuda" if torch.cuda.is_available() else "cpu"
device

'cpu'

## 1 - Gettting Data

In [4]:
import requests
import zipfile
from pathlib import Path
from tqdm.notebook import tqdm

# path to data folder
data_path = Path("data/")
train_data_path = data_path / "train"
binary_mask_data_path = data_path / "binary"
train_zip_url = "https://isic-challenge-data.s3.amazonaws.com/2017/ISIC-2017_Training_Data.zip"
train_zip_file = data_path / "ISIC-2017_Training_Data.zip"
# binary mask
train_binary_zip_url = "https://isic-challenge-data.s3.amazonaws.com/2017/ISIC-2017_Training_Part1_GroundTruth.zip"
train_binary_zip_file = data_path / "ISIC-2017_Training_Part1_GroundTruth.zip"

def download_file(url, dest_path):
    """Downloads a file from a URL to a destination path with progress bar."""
    response = requests.get(url, stream=True)
    response.raise_for_status()  # Raise an error for bad responses
    total_size = int(response.headers.get('content-length', 0))
    with open(dest_path, "wb") as file, tqdm(
        desc=f"Downloading {dest_path.name}",
        total=total_size,
        unit="B",
        unit_scale=True,
        unit_divisor=1024,
    ) as bar:
        for chunk in response.iter_content(chunk_size=1024):
            file.write(chunk)
            bar.update(len(chunk))
    print(f"Download Complete: {dest_path}")

def extract_zip(file_path, extract_to):
    """Extracts a zip file to the specified directory."""
    with zipfile.ZipFile(file_path, mode="r") as zip_file:
        print(f"Extracting {file_path.name} to {extract_to}")
        zip_file.extractall(extract_to)
    print(f"Extraction Complete: {extract_to}")


In [5]:
# Main script
if train_data_path.is_dir() and binary_mask_data_path.is_dir():
    print(f"{train_data_path} and {binary_mask_data_path} directories already exist.")
else:
    print(f"Preparing data directories at {data_path}")
    train_data_path.mkdir(parents=True, exist_ok=True)

    # Download training data
    print(f"Downloading Training Data from: {train_zip_url}")
    download_file(train_zip_url, train_zip_file)

    # Extract the zip file
    extract_zip(train_zip_file, train_data_path)

    binary_mask_data_path.mkdir(parents=True,exist_ok=True)
    # Download training data
    print(f"Downloading Binary Mask Data from: {train_binary_zip_url}")
    download_file(train_binary_zip_url, train_binary_zip_file)

    # Extract the zip file
    extract_zip(train_binary_zip_file, binary_mask_data_path)

Preparing data directories at data
Downloading Training Data from: https://isic-challenge-data.s3.amazonaws.com/2017/ISIC-2017_Training_Data.zip


Downloading ISIC-2017_Training_Data.zip:   0%|          | 0.00/5.80G [00:00<?, ?B/s]

Download Complete: data/ISIC-2017_Training_Data.zip
Extracting ISIC-2017_Training_Data.zip to data/train
Extraction Complete: data/train
Downloading Binary Mask Data from: https://isic-challenge-data.s3.amazonaws.com/2017/ISIC-2017_Training_Part1_GroundTruth.zip


Downloading ISIC-2017_Training_Part1_GroundTruth.zip:   0%|          | 0.00/8.89M [00:00<?, ?B/s]

Download Complete: data/ISIC-2017_Training_Part1_GroundTruth.zip
Extracting ISIC-2017_Training_Part1_GroundTruth.zip to data/binary
Extraction Complete: data/binary


the data contain the Images and their respective Superpixel mask and we have to download Binary mask seperately

## 1.1 Seperating the Inputs and Ouputs
our train data folder contain both Training Images and SuperPixel mask therfore seperating them into different folders

In [6]:
import os
from pathlib import Path
import shutil


def organize_files(dataset_folder, image_output_folder, superpixel_output_folder):
    """
    Organize files by separating images and superpixel masks into different folders.

    Args:
        dataset_folder (str or Path): Path to the folder containing both images and masks.
        image_output_folder (str or Path): Path to the folder where images will be moved.
        superpixel_output_folder (str or Path): Path to the folder where superpixel masks will be moved.
    """
    # Convert paths to Path objects
    dataset_folder = Path(dataset_folder)
    image_output_folder = Path(image_output_folder)
    superpixel_output_folder = Path(superpixel_output_folder)

    # Create output folders if they don't exist
    image_output_folder.mkdir(parents=True, exist_ok=True)
    superpixel_output_folder.mkdir(parents=True, exist_ok=True)

    # Iterate through all files in the dataset folder
    for file in dataset_folder.iterdir():
        if file.is_file():
            if file.name.endswith(".jpg"):
                # Move image file
                shutil.move(str(file), str(image_output_folder / file.name))
            elif file.name.endswith("_superpixels.png"):
                # Move superpixel mask file
                shutil.move(str(file), str(superpixel_output_folder / file.name))

    print(f"Files have been organized. Images moved to {image_output_folder}, masks to {superpixel_output_folder}.")

if __name__ == "__main__":
    dataset_folder=train_data_path / "ISIC-2017_Training_Data"   #"data/train/ISIC-2017_Training_Data"
    image_output_folder="ISIC-2017_Data/Images"
    superpixel_output_folder="ISIC-2017_Data/Superpixel"

    organize_files(dataset_folder=dataset_folder,
                   image_output_folder=image_output_folder,
                   superpixel_output_folder=superpixel_output_folder)


Files have been organized. Images moved to ISIC-2017_Data/Images, masks to ISIC-2017_Data/Superpixel.


Moving Binary masks

In [7]:
import shutil
# moving file to a consolidated location
source_dir = binary_mask_data_path / "ISIC-2017_Training_Part1_GroundTruth" #Path("data/train/ISIC-2017_Training_Part1_GroundTruth")
destination_dir = Path("ISIC-2017_Data/Binary")
# Create the destination directory if it doesn't exist
destination_dir.mkdir(parents=True, exist_ok=True)
for file in source_dir.iterdir():
  if file.is_file():
    shutil.move(str(file),str(destination_dir/file.name))

## Deleting the Extras
Deleting the `data_path` folder to save storage

In [8]:
# Deleting our data_path after getting our desizerd ouput to free storage
if data_path.exists() and data_path.is_dir():
    shutil.rmtree(data_path)
    print(f"Folder '{data_path}' and all its subdirectories have been deleted.")
else:
    print(f"Folder '{data_path}' does not exist.")

Folder 'data' and all its subdirectories have been deleted.


## Result
 ISIC-2017_Data This folder contains the dataset for the ISIC 2017 skin cancer segmentation project. The dataset is organized into the following subfolders:
 ## Structure

### Subfolders

1. **Images/**
   - Contains the original images used for training and validation.

2. **Superpixel/**
   - Contains the superpixel masks generated for each image.

3. **Binary/**
   - Contains the binary masks indicating the regions of interest in each image.


## 2 - Become one with the data (data preparation and exploration)

In [9]:
import os
def walk_through(dir_path:str):
  for dirpath, dirnames, filenames in os.walk(dir_path):
    print(f"There are {len(dirnames)} directories and {len(filenames)} images in '{dirpath}'.")

In [10]:
data_path=Path("ISIC-2017_Data")
walk_through(data_path)

There are 3 directories and 0 images in 'ISIC-2017_Data'.
There are 0 directories and 2000 images in 'ISIC-2017_Data/Binary'.
There are 0 directories and 2000 images in 'ISIC-2017_Data/Superpixel'.
There are 0 directories and 2000 images in 'ISIC-2017_Data/Images'.


### 2.1 Visualize Images
Let's write some code to:

1. Get all of the image paths using `pathlib.Path.glob()` to find all of the files ending in .jpg.
2. Pick a random image path using Python's `random.choice()`.
3. And since we're working with images, we'll open the random image path using `PIL.Image.open()` (PIL stands for Python Image Library).


In [None]:
import random
from PIL import Image

# creating a list of all the images
image_path_list=list(data_path.glob("*/*.jpg"))
len(image_path_list)

# select a random image path
random_image_path = random.choice(image_path_list)

# open Image
img = Image.open(fp=random_image_path)
print(f"Random Image Path: {random_image_path}")
print(f"Image height: {img.height}")
print(f"Image width: {img.width}")

# Display the image
img

In [12]:
random_images_path_idx=random.sample(population=range(len(image_path_list)),k=3)
random_images_path_idx

[404, 1953, 942]

In [13]:
# list for inut images
image_path_list=sorted(list(data_path.glob("*/*.jpg")))
# list for binary masks
binary_mask_list=sorted(list(data_path.glob("Binary/*.png")))
# list for superpixel mask
superpixel_mask_list=sorted(list(data_path.glob("Superpixel/*.png")))
len(binary_mask_list),len(superpixel_mask_list)

(2000, 2000)

In [14]:
import random
from PIL import Image
import matplotlib.pyplot as plt
from typing import List

def plot_random_images(
    image_list: List[str],
    binary_mask_list: List[str],
    superpixel_mask_list: List[str],
    num_samples: int = 3
):
    """
    Plots a specified number of random images alongside their binary and superpixel masks.

    Args:
        image_list (List[str]): List of file paths to input images.
        binary_mask_list (List[str]): List of file paths to binary mask images.
        superpixel_mask_list (List[str]): List of file paths to superpixel mask images.
        num_samples (int, optional): Number of random samples to display. Defaults to 3.

    Raises:
        ValueError: If the lengths of `image_list`, `binary_mask_list`, and `superpixel_mask_list` are not equal.
        ValueError: If `num_samples` is greater than the number of available images.

    Example:
        ```python
        plot_random_images(
            image_list=["image1.jpg", "image2.jpg"],
            binary_mask_list=["binary1.png", "binary2.png"],
            superpixel_mask_list=["superpixel1.png", "superpixel2.png"],
            num_samples=2
        )
        ```
    """
    # Check for consistency in list lengths
    if len(image_list) != len(binary_mask_list) or len(image_list) != len(superpixel_mask_list):
        raise ValueError("All input lists must have the same length.")

    # Ensure the number of samples is valid
    if num_samples > len(image_list):
        raise ValueError("num_samples cannot be greater than the number of images available.")

    # Randomly sample indices
    idx = random.sample(range(len(image_list)), num_samples)

    # Create subplots
    fig, axes = plt.subplots(nrows=num_samples, ncols=3, figsize=(15, 5 * num_samples))

    # Ensure axes is a list of lists, even for single sample
    if num_samples == 1:
        axes = [axes]

    for i, sample_idx in enumerate(idx):
        # Open images and masks
        img = Image.open(image_list[sample_idx])
        superpixel_mask = Image.open(superpixel_mask_list[sample_idx])
        binary_mask = Image.open(binary_mask_list[sample_idx])

        # Plot the original image
        axes[i][0].imshow(img)
        axes[i][0].set_title(f"Image: {sample_idx},Size:{img.size}")
        axes[i][0].axis("off")

        # Plot the binary mask
        axes[i][1].imshow(binary_mask)
        axes[i][1].set_title(f"Binary Mask: {sample_idx},Size:{binary_mask.size}")
        axes[i][1].axis("off")

        # Plot the superpixel mask
        axes[i][2].imshow(superpixel_mask)
        axes[i][2].set_title(f"Superpixel Mask: {sample_idx},Size:{superpixel_mask.size}")
        axes[i][2].axis("off")

    # Adjust layout and display
    plt.tight_layout()
    plt.show()


In [None]:
plot_random_images(image_list=image_path_list,
                   binary_mask_list=binary_mask_list,
                   superpixel_mask_list=superpixel_mask_list,
                   num_samples=3)

### 2.2 - Combining Masks
1. Weighted Average or Overlaying
2. Union
3. Intersection
4. Boundary Enhancement

In [17]:
import cv2
def load_masks(binary_mask_path_list, superpixel_mask_path_list,):
  # Randomly sample indices
  # Check for consistency in list lengths
  if len(binary_mask_path_list) != len(superpixel_mask_path_list):
      raise ValueError("The binary and superpixel mask lists must have the same length.")

  # Randomly select a single index
  idx = random.choice(range(len(binary_mask_path_list)))

  # Load masks as grayscale images
  binary_mask = cv2.imread(binary_mask_path_list[idx], cv2.IMREAD_GRAYSCALE)
  superpixel_mask = cv2.imread(superpixel_mask_path_list[idx], cv2.IMREAD_GRAYSCALE)

  return binary_mask, superpixel_mask

bm, sm = load_masks(
    binary_mask_path_list=binary_mask_list,
    superpixel_mask_path_list=superpixel_mask_list
)

In [None]:
from google.colab.patches import cv2_imshow
cv2_imshow(bm)

In [None]:
from google.colab.patches import cv2_imshow
cv2_imshow(sm)

### 3.1 - Weighted average or Overlaying

In [20]:
import cv2
import numpy as np
from pathlib import Path

def weighted_average_combination(binary_mask,superpixel_mask,alpha=0.5):
  # Normalize masks to range [0, 1]
  binary_mask = binary_mask.astype(np.float32) / 255.0
  superpixel_mask = superpixel_mask.astype(np.float32) / 255.0

  # Compute weighted average
  combined_mask = alpha * binary_mask + (1 - alpha) * superpixel_mask

  # Clip values to [0, 1] and scale back to [0, 255]
  combined_mask = np.clip(combined_mask, 0, 1)
  return (combined_mask * 255).astype(np.uint8)

In [None]:
wac=weighted_average_combination(bm,sm)
cv2_imshow(wac)

lets try and overlay the orignal image with each mask and try to better understand our data

overlaying binary mask with orignal image

In [28]:
bm,img=load_masks(
    binary_mask_path_list=binary_mask_list,
    superpixel_mask_path_list=image_path_list,#insted of superpixel_mask_path_list we are send the image paths list
)

In [None]:
cv2_imshow(img)

In [None]:
wac=weighted_average_combination(bm,img)
cv2_imshow(wac)

overlaying subinaryperpixel mask with orignal image

In [32]:
img,sm=load_masks(
    binary_mask_path_list=image_path_list,#insted of binary_mask_path_list we are send the image paths list
    superpixel_mask_path_list=superpixel_mask_list)

In [None]:
wac=weighted_average_combination(img,sm)
cv2_imshow(wac)

## 3 - Creating our model

In [None]:
import torch
import torch.nn as nn


class conv_block(nn.Module):
  def __init__(self,ch_in,ch_out):
    super().__init__()
    self.conv = nn.Sequential(
        nn.Conv2d(in_channels=ch_in,out_channels=ch_out,kernel_size=3,stride=1,padding=1,bias=True),
        nn.BatchNorm2d(ch_out),
        nn.Relu(inplace=True),
        nn.Conv2d(ch_out,ch_out,kernel_size=3,stride=1,padding=1,bias=1),
        nn.BatchNorm2d(ch_out),
        nn.ReLU(inplace=True)
    )
  def forward(self,x):
    return self.conv(x)

class conv_transpose(nn.Module):
  def __init__(self,ch_in,ch_out):
    super().__init__()
    self.cov_trans=nn.Sequential(
        nn.Upsample(scale_factor=2),
        nn.Conv2d(ch_in,ch_out,kernel_size=3,stride=1,padding=1,bias=True),
        nn.BatchNorm2d(ch_out),
        nn.ReLU(inplace=True)
    )

  def forward(self,x):
    return self.cov_trans(x)

In [35]:
from collections import OrderedDict
import torch
import torch.nn as nn

class UNET (nn.Module):
  def __init__(self,in_channels=3,out_channels=1,init_features=32):
    super().__init__()

    features=init_features
    self.encoder1=UNET._block(in_channels,features,name="encoder_1")
    self.pool1=nn.MaxPool2d(kernel_size=2,stride=2)
    self.encoder2=UNET._block(features,features*2,name="encoder_2")
    self.pool2=nn.MaxPool2d(kernel_size=2,stride=2)
    self.encoder3=UNET._block(features*2,features*4,name="encoder_3")
    self.pool3=nn.MaxPool2d(kernel_size=2,stride=2),
    self.encoder4=UNET._block(features*4,features*8,name="encoder_4")
    self.pool4=nn.MaxPool2d(kernel_size=2,stride=2)

    # bottle neck
    self.bottleneck=UNET._block(features*8,features*16,name="bottleneck")

    # Decoer
    self.upconv4=nn.ConvTranspose2d(in_channels=features*16,out_channels=features*8,kernel_size=2,stride=2)
    self.decoder4=UNET._block((features*8)*2,features*8,name="decoder_4")

    self.upconv3=nn.ConvTranspose2d(in_channels=features*8,out_channels=features*4,kernel_size=2,stride=2)
    self.decoder3=UNET._block((features*4)*2,features*4,name="decoder_3")

    self.upconv2=nn.ConvTranspose2d(in_channels=features*4,out_channels=features*2,kernel_size=2,stride=2)
    self.decoder2=UNET._block((features*2)*2,features*2,name="decoder_2")

    self.upconv1=nn.ConvTranspose2d(in_channels=features*2,out_channels=features,kernel_size=2,stride=2)
    self.decoder1=UNET._block(features*2,features,name="decoder_1")

    # output
    self.conv=nn.Conv2d(in_channels=features,out_channels=out_channels,kernel_size=1)

  def forward(self,x):
    # encoder
    encoder_1=self.encoder1(x)
    encoder_2=self.encoder2(self.pool1(encoder_1))
    encoder_3=self.encoder3(self.pool2(encoder_2))
    encoder_4=self.encoder4(self.pool3(encoder_3))
    # bottleneck
    bottleneck=self.bottleneck(self.pool4(encoder_4))
    # decoder
    decoder_4=self.upconv4(bottleneck)
    decoder_4=torch.cat((decoder_4,encoder_4),dim=1)
    decoder_4=self.decoder4(decoder_4)

    decoder_3=self.upconv3(decoder_4)
    decoder_3=torch.cat(tensors=(decoder_3,encoder_3),dim=1)
    decoder_3=self.decoder3(decoder_3)

    decoder_2=self.upconv2(decoder_3)
    decoder_2=torch.cat(tensors=(decoder_2,encoder_2),dim=1)
    decoder_2=self.decoder2(decoder_3)

    decoder_1=self.upconv1(decoder_2)
    decoder_1=torch.cat(tensors=(decoder_1,encoder_1),dim=1)
    decoder_1=self.decoder1(decoder_1)
    # final segmentation map
    return torch.sigmoid(self.conv(decoder_1))

  def _block(in_channels,features,name):

    seq=nn.Sequential(
        OrderedDict([
            (name + "conv1",nn.Conv2d(in_channels=in_channels,out_channels=features,kernel_size=3,padding=1,bias=False)),
            (name + "norm1",nn.BatchNorm2d(num_features=features)),
            (name + "relu1",nn.ReLU(inplace=True)),

            (name + "conv2",nn.Conv2d(in_channels=features,out_channels=features,kernel_size=3,padding=1,bias=False)),
            (name + "norm2",nn.BatchNorm2d(num_features=features)),
            (name + "relu2",nn.ReLU(inplace=True)),
        ])
    )
    return seq

