Malignant Melanocytic Proliferation detector through Super-Pixel Texture Feature Graph Neural Network

SLIC Superpixel Model Interactive Python Notebook

Keith Miller

In [1]:
import numpy as np
from trainingImageLoader import ImageLoader
import mahotas as mh
import cv2
from skimage.util import img_as_ubyte
import torch
from torch_geometric.data import Data
import torch_geometric.utils as pyg_utils
from torch_geometric.loader import DataLoader
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, global_mean_pool
import SLIC_graph


In [2]:
PIXEL_DISTANCE = 10
SUPERPIXEL_COUNT = 20

In [None]:
training_data_list = []
training_folder_ = 'ISIC-2017-training' # Image folder path (I used the ISIC folders directly from ISIC website)
training_metadata_ = 'challenge-2017-training_metadata.csv' # metadata csv file path (metadata cvs file provided with ISIC dataset)
training_data_loader = ImageLoader(training_folder_)
training_data_loader.load_metadata(training_metadata_)
for i in range(training_data_loader.num_images):
    image_, class_ = training_data_loader.iterate(return_type=cv2.IMREAD_COLOR)
    haralick_features = []
    '''
    scale_percent = 50 # percent of original size
    width = int(image_.shape[1] * scale_percent / 100)
    height = int(image_.shape[0] * scale_percent / 100)
    dim = (width, height)
    image_ = cv2.resize(image_, dim, interpolation = cv2.INTER_AREA)
    '''
    segments_, adjacency_matrix = SLIC_graph.slic_graph(image_, SUPERPIXEL_COUNT, 10)

    gray_image = cv2.cvtColor(image_, cv2.COLOR_BGR2GRAY)

    for label in range(SUPERPIXEL_COUNT):
        mask = (segments_ == label)
        patch = gray_image * mask
        patch_cropped = patch[mask]

        if len(patch_cropped) == 0:
             feats = np.zeros(26)  # default zero vector for empty patches
        else:
            try: #there are a couple of images that give errors, I need to kick them out, but for now I am using a try/except block
                feats_mean = mh.features.haralick(patch, ignore_zeros=True, distance=PIXEL_DISTANCE).mean(axis=0)
                feats_max = mh.features.haralick(patch, ignore_zeros=True, distance=PIXEL_DISTANCE).max(axis=0)
                feats = np.concat(feats_mean, feats_max, axis = 1)
            except:
                feats = np.zeros(26)

        haralick_features.append(feats)
    haralick_features = np.array(haralick_features)

    edge_index = pyg_utils.dense_to_sparse(torch.tensor(adjacency_matrix))[0]
    data = Data(x=torch.tensor(haralick_features, dtype=torch.float),
                edge_index=edge_index,
                y=torch.tensor([class_], dtype=torch.long))  # label is 0 or 1

    # Add to dataset
    training_data_list.append(data)    

0
ISIC_0000000.jpg
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
1
ISIC_0000001.jpg
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>


: 

In [None]:
test_data_list = []
test_folder_ = 'ISIC-images_2017_test' # Image folder path (I used the ISIC folders directly from ISIC website)
test_metadata_ = 'challenge-2017-test_metadata.csv' # metadata csv file path (metadata cvs file provided with ISIC dataset)
test_data_loader = ImageLoader(test_folder_)
test_data_loader.load_metadata(test_metadata_)
for i in range(test_data_loader.num_images):
    image_, class_ = test_data_loader.iterate(cv2.IMREAD_COLOR_RGB)
    haralick_features = []
    '''
    scale_percent = 50 # percent of original size
    width = int(image_.shape[1] * scale_percent / 100)
    height = int(image_.shape[0] * scale_percent / 100)
    dim = (width, height)
    image_ = cv2.resize(image_, dim, interpolation = cv2.INTER_AREA)
    '''
    segments_, adjacency_matrix = SLIC_graph.slic_graph(image_, SUPERPIXEL_COUNT, 10)

    gray_image = cv2.cvtColor(image_, cv2.COLOR_BGR2GRAY)

    for label in range(SUPERPIXEL_COUNT):
        mask = (segments_ == label)
        patch = gray_image * mask
        patch_cropped = patch[mask]

        if len(patch_cropped) == 0:
            feats = np.zeros(26)  # default zero vector for empty patches
        else:
            try: #there are a couple of images that give errors, I need to kick them out, but for now I am using a try/except block
            # Compute Haralick features and take mean over directions
                feats_mean = mh.features.haralick(patch, ignore_zeros=True, distance=PIXEL_DISTANCE).mean(axis=0)
                feats_max = mh.features.haralick(patch, ignore_zeros=True, distance=PIXEL_DISTANCE).max(axis=0)
                feats = np.concat(feats_mean, feats_max, axis = 1)
            except:
                feats = np.zeros(26)

        haralick_features.append(feats)
    haralick_features = np.array(haralick_features)

    edge_index = pyg_utils.dense_to_sparse(torch.tensor(adjacency_matrix))[0]
    data = Data(x=torch.tensor(haralick_features, dtype=torch.float),
                edge_index=edge_index,
                y=torch.tensor([class_], dtype=torch.long))  # label is 0 or 1

    # Add to dataset
    test_data_list.append(data) 

<bound method NDFrame.head of           isic_id attribution copyright_license  age_approx  \
0    ISIC_0001769   Anonymous              CC-0        15.0   
1    ISIC_0001852   Anonymous              CC-0        10.0   
2    ISIC_0001871   Anonymous              CC-0        15.0   
3    ISIC_0003462   Anonymous              CC-0        15.0   
4    ISIC_0003539   Anonymous              CC-0        15.0   
..            ...         ...               ...         ...   
145  ISIC_0015443   Anonymous              CC-0        50.0   
146  ISIC_0015445   Anonymous              CC-0        50.0   
147  ISIC_0015483   Anonymous              CC-0        50.0   
148  ISIC_0015496   Anonymous              CC-0        20.0   
149  ISIC_0015627   Anonymous              CC-0        15.0   

    anatom_site_general anatom_site_special benign_malignant  \
0                   NaN                 NaN           benign   
1                   NaN                 NaN           benign   
2                   N

In [None]:
#going to be 120%, I don't understand what's going on here 

# GNN model definition
class GridGNNClassifier(torch.nn.Module):
    def __init__(self, num_features, hidden_channels, num_classes=2):
        super(GridGNNClassifier, self).__init__()
        self.conv1 = GCNConv(num_features, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, hidden_channels)
        self.fc = torch.nn.Linear(hidden_channels, num_classes)

    def forward(self, x, edge_index, batch):
        x = F.relu(self.conv1(x, edge_index))
        x = F.relu(self.conv2(x, edge_index))
        x = global_mean_pool(x, batch)  # Corrected pooling
        x = self.fc(x)
        return F.log_softmax(x, dim=-1)

# Instantiate model
model = GridGNNClassifier(num_features=13, hidden_channels=32)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

# DataLoader for batching (e.g., batch size = 32)
loader = DataLoader(training_data_list, batch_size=128, shuffle=True)

model.train()
for epoch in range(500):
    total_loss = 0
    for batch_data in loader:
        optimizer.zero_grad()
        out = model(batch_data.x, batch_data.edge_index, batch_data.batch)
        loss = F.nll_loss(out, batch_data.y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    avg_loss = total_loss / len(loader)
    print(f'Epoch {epoch+1}, Avg Loss: {avg_loss:.4f}')

Epoch 1, Avg Loss: 17.1356
Epoch 2, Avg Loss: 3.1339
Epoch 3, Avg Loss: 0.8737
Epoch 4, Avg Loss: 0.5386
Epoch 5, Avg Loss: 0.5204
Epoch 6, Avg Loss: 0.5102
Epoch 7, Avg Loss: 0.5215
Epoch 8, Avg Loss: 0.6076
Epoch 9, Avg Loss: 0.5310
Epoch 10, Avg Loss: 0.4998
Epoch 11, Avg Loss: 0.5279
Epoch 12, Avg Loss: 0.5228
Epoch 13, Avg Loss: 0.5482
Epoch 14, Avg Loss: 0.5427
Epoch 15, Avg Loss: 0.5003
Epoch 16, Avg Loss: 0.5435
Epoch 17, Avg Loss: 0.5216
Epoch 18, Avg Loss: 0.5068
Epoch 19, Avg Loss: 0.4899
Epoch 20, Avg Loss: 0.4888
Epoch 21, Avg Loss: 0.5062
Epoch 22, Avg Loss: 0.4875
Epoch 23, Avg Loss: 0.4977
Epoch 24, Avg Loss: 0.5047
Epoch 25, Avg Loss: 0.4942
Epoch 26, Avg Loss: 0.4996
Epoch 27, Avg Loss: 0.4972
Epoch 28, Avg Loss: 0.5037
Epoch 29, Avg Loss: 0.4886
Epoch 30, Avg Loss: 0.4958
Epoch 31, Avg Loss: 0.4911
Epoch 32, Avg Loss: 0.5055
Epoch 33, Avg Loss: 0.5201
Epoch 34, Avg Loss: 0.4976
Epoch 35, Avg Loss: 0.4915
Epoch 36, Avg Loss: 0.4874
Epoch 37, Avg Loss: 0.4980
Epoch 38,

In [None]:
model.eval()
test_loader = DataLoader(test_data_list, batch_size=100)

correct = 0
total = 0
for batch_data in test_loader:
    preds = model(batch_data.x, batch_data.edge_index, batch_data.batch).argmax(dim=1)
    correct += (preds == batch_data.y).sum().item()
    total += batch_data.y.size(0)

accuracy = correct / total
print(f'Test Accuracy: {accuracy:.4f}')

Test Accuracy: 0.8000
