<a href="https://colab.research.google.com/github/NahinAlam001/499B/blob/classification/Segmented.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import gdown
import zipfile

gdown.download('https://drive.google.com/uc?id=1JqEe1jdx5-wzDB_phKNvMIURPSpzWJGm', 'image.zip', quiet=False)
with zipfile.ZipFile('image.zip', 'r') as zip_ref:
  zip_ref.extractall()

Downloading...
From (original): https://drive.google.com/uc?id=1JqEe1jdx5-wzDB_phKNvMIURPSpzWJGm
From (redirected): https://drive.google.com/uc?id=1JqEe1jdx5-wzDB_phKNvMIURPSpzWJGm&confirm=t&uuid=f67411fe-beba-4838-8a49-c429aac1b33a
To: /content/image.zip
100%|██████████| 149M/149M [00:00<00:00, 279MB/s]


In [5]:
from pathlib import Path

image_path = Path('images')

In [6]:
import os

def walk_through_dir(dir_path):
  for dirpath, dirnames, filenames in os.walk(dir_path):
      print(f'There are {len(dirnames)} folders and {len(filenames)} files in {dirpath}')

walk_through_dir(image_path)

There are 3 folders and 0 files in images
There are 2 folders and 0 files in images/Atypical Nevus
There are 0 folders and 80 files in images/Atypical Nevus/masks
There are 0 folders and 80 files in images/Atypical Nevus/images
There are 2 folders and 0 files in images/Common Nevus
There are 0 folders and 80 files in images/Common Nevus/masks
There are 0 folders and 80 files in images/Common Nevus/images
There are 2 folders and 0 files in images/Melanoma
There are 0 folders and 40 files in images/Melanoma/masks
There are 0 folders and 40 files in images/Melanoma/images


In [7]:
import random
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset, random_split
from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [8]:
def ph2(dataset_dir, output_dir, class_names=("Melanoma", "Common Nevus", "Atypical Nevus"),
        image_ext='jpg', mask_ext='jpg', train_ratio=0.7, val_ratio=0.1, split_random=True, random_seed=None):

    if random_seed is not None:
        random.seed(random_seed)

    if train_ratio + val_ratio > 1:
        raise ValueError("train_ratio + val_ratio is too big.")

    dataset_dir = Path(dataset_dir) / "images"  # Assuming images are under 'images' folder
    train_val_test_images = []
    train_val_test_masks = []

    for class_name in class_names:
        class_dir = dataset_dir / class_name
        for image_path in class_dir.iterdir():
            if image_path.suffix.lower() == f".{image_ext}":
                mask_path = class_dir.parent / "masks" / f"{image_path.stem}_mask.{mask_ext}"
                train_val_test_images.append(image_path)
                train_val_test_masks.append(mask_path)

    train_val_test_data = list(zip(train_val_test_images, train_val_test_masks))

    train_count = int(len(train_val_test_data) * train_ratio)
    val_count = int(len(train_val_test_data) * val_ratio)

    if split_random:
        random.shuffle(train_val_test_data)

    train_data = train_val_test_data[:train_count]
    val_data = train_val_test_data[train_count:train_count + val_count]
    test_data = train_val_test_data[train_count + val_count:]

    __save_output(output_dir, train_data, val_data, test_data)

def __save_output(output_dir, train_data, val_data, test_data):
    output_dir = Path(output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)

    def create_dir(data_type):
        data_dir = output_dir / data_type
        data_dir.mkdir(parents=True, exist_ok=True)
        return data_dir

    def write_data(data_type, data, data_dir):
        if len(data) != 0:
            fp_images = open(data_dir.joinpath(f"{data_type}_images.txt"), 'w')
            fp_masks = open(data_dir.joinpath(f"{data_type}_masks.txt"), 'w')
            for item in data:
                fp_images.write("%s\n" % item[0])
                fp_masks.write("%s\n" % item[1])
            fp_images.close()
            fp_masks.close()

    train_dir = create_dir("train")
    val_dir = create_dir("val")
    test_dir = create_dir("test")

    write_data("train", train_data, train_dir)
    write_data("val", val_data, val_dir)

In [9]:
def __save_output(output_dir, train_data, val_data, test_data):
    output_dir = Path(output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)

    def create_dir(data_type):
        data_dir = output_dir / data_type
        data_dir.mkdir(parents=True, exist_ok=True)
        return data_dir

    def write_data(data_type, data, data_dir):
        if len(data) != 0:
            fp_images = open(data_dir.joinpath(f"{data_type}_images.txt"), 'w')
            fp_masks = open(data_dir.joinpath(f"{data_type}_masks.txt"), 'w')
            for item in data:
                fp_images.write("%s\n" % item[0])
                fp_masks.write("%s\n" % item[1])
            fp_images.close()
            fp_masks.close()

    train_dir = create_dir("train")
    val_dir = create_dir("val")
    test_dir = create_dir("test")

    write_data("train", train_data, train_dir)
    write_data("val", val_data, val_dir)

In [10]:
def get_data(data_dir, train=True):
    """Loads image and mask data from directories."""
    # Define transformations
    image_transform = transforms.Compose([
        transforms.Resize((577, 769)),  # Resize images to the same dimensions
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])
    mask_transform = transforms.Compose([
        transforms.Resize((577, 769)),  # Resize masks to the same dimensions
        transforms.ToTensor()
    ])

    classes = ['Melanoma', 'Common Nevus', 'Atypical Nevus']
    image_datasets = {x: datasets.ImageFolder(root='images' + "/" + x, transform=image_transform) for x in classes}
    mask_datasets = {x: datasets.ImageFolder(root='images' + "/" + x, transform=mask_transform) for x in classes}

    # Concatenate datasets for each class
    image_data = torch.utils.data.ConcatDataset([image_datasets[x] for x in classes])
    mask_data = torch.utils.data.ConcatDataset([mask_datasets[x] for x in classes])

    # Combine image and mask datasets
    dataset = [(image_data[i][0], mask_data[i][0].squeeze(), image_data[i][1]) for i in range(len(image_data))]

    if train:
        dataset = dataset[:int(0.7*len(dataset))]  # Considering only the training part

    return dataset

In [11]:
class CNNModel_Seg(nn.Module):
    def __init__(self, num_classes):
        super(CNNModel_Seg, self).__init__()
        # Change the number of input channels to 6 (3 for image, 3 for mask)
        self.conv1 = nn.Conv2d(6, 16, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(16)
        self.pool1 = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(32)
        self.pool2 = nn.MaxPool2d(2, 2)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm2d(64)
        self.pool3 = nn.MaxPool2d(2, 2)
        # Upsampling layers to upsample back to original image size
        self.upconv1 = nn.ConvTranspose2d(64, 32, kernel_size=2, stride=2)
        self.upconv2 = nn.ConvTranspose2d(32, 16, kernel_size=2, stride=2)
        self.upconv3 = nn.ConvTranspose2d(16, num_classes, kernel_size=2, stride=2)

    def forward(self, x):
        x = self.pool1(F.relu(self.bn1(self.conv1(x))))
        x = self.pool2(F.relu(self.bn2(self.conv2(x))))
        x = self.pool3(F.relu(self.bn3(self.conv3(x))))
        x = F.relu(self.upconv1(x))
        x = F.relu(self.upconv2(x))
        x = self.upconv3(x)
        return x.squeeze(1)

In [12]:
def train_model(model, train_data, val_data, optimizer, criterion, num_epochs, device):
    """Trains the CNN model with defined optimizer, criterion, and epochs."""
    train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_data, batch_size=32, shuffle=False)

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for images, masks, labels in train_loader:
            images, masks, labels = images.to(device), masks.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(torch.cat((images, masks), dim=1))

            # Ensure the output tensor has the same spatial dimensions as the masks
            outputs = F.interpolate(outputs, size=masks.shape[2:], mode='bilinear', align_corners=True)

            loss = criterion(outputs, masks)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        model.eval()
        eval_loss = 0.0
        with torch.no_grad():
            for images, masks, labels in val_loader:
                images, masks, labels = images.to(device), masks.to(device), labels.to(device)
                outputs = model(torch.cat((images, masks), dim=1))

                # Ensure the output tensor has the same spatial dimensions as the masks
                outputs = F.interpolate(outputs, size=masks.shape[2:], mode='bilinear', align_corners=True)

                eval_loss += criterion(outputs, masks).item()

        epoch_loss = running_loss / len(train_loader)
        eval_loss = eval_loss / len(val_loader)
        print(f"Epoch {epoch + 1}/{num_epochs} - Train Loss: {epoch_loss:.4f} - Val Loss: {eval_loss:.4f}")

In [13]:
full_dataset = get_data("images")
# Device selection
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Determine the lengths of the splits
total_size = len(full_dataset)
train_size = int(0.7 * total_size)  # 70% for training
valid_size = int(0.15 * total_size)  # 15% for validation
test_size = total_size - train_size - valid_size  # 15% for testing

# Create the splits
train_data, valid_data, test_data = random_split(full_dataset, [train_size, valid_size, test_size])

num_classes = 3

model = CNNModel_Seg(num_classes).to(device)
optimizer = torch.optim.Adam(model.parameters())
criterion = nn.CrossEntropyLoss()

train_model(model, train_data, valid_data, optimizer, criterion, 10, device)

Epoch 1/10 - Train Loss: 1.9958 - Val Loss: 2.0602
Epoch 2/10 - Train Loss: 1.9853 - Val Loss: 2.0522
Epoch 3/10 - Train Loss: 1.9641 - Val Loss: 2.0395
Epoch 4/10 - Train Loss: 1.9626 - Val Loss: 2.0275
Epoch 5/10 - Train Loss: 1.9619 - Val Loss: 2.0180
Epoch 6/10 - Train Loss: 1.9596 - Val Loss: 2.0161
Epoch 7/10 - Train Loss: 1.9649 - Val Loss: 2.0139
Epoch 8/10 - Train Loss: 1.9453 - Val Loss: 2.0125
Epoch 9/10 - Train Loss: 1.9322 - Val Loss: 2.0114
Epoch 10/10 - Train Loss: 1.9075 - Val Loss: 2.0105


In [14]:
def evaluate_model(model, test_data, device):
    test_loader = DataLoader(test_data, batch_size=32, shuffle=False)
    model.eval()
    with torch.no_grad():
        y_pred = []  # List to store predicted class labels
        y_true = []  # List to store ground truth class labels
        for images, masks, labels in test_loader:
            images, masks = images.to(device), masks.to(device)
            outputs = model(torch.cat((images, masks), dim=1))

            # Process outputs for evaluation (assuming your model outputs probabilities)
            predicted_classes = torch.argmax(outputs, dim=1).cpu().numpy().flatten()
            y_pred.extend(predicted_classes)
            y_true.extend(masks.cpu().numpy().flatten())

    # Calculate evaluation metrics
    precision = precision_score(y_true, y_pred, average='weighted')
    recall = recall_score(y_true, y_pred, average='weighted')
    f1 = f1_score(y_true, y_pred, average='weighted')
    print(f"Test Precision: {precision:.4f}, Recall: {recall:.4f}, F1-Score: {f1:.4f}")

In [None]:
# Save the model (optional, but recommended)
# torch.save(model.state_dict(), 'clssification.pt')  # Replace with your desired save path
# Evaluate the model on the test set
evaluate_model(model, test_data, device)