In [1]:
'''
Usage:
1. Directory with data sets must be placed in directory named "input"
2. Input directory must be in the same directory as the .ipynb file with this sript
3. Inside each data set directory there must be three directories named: images, labels and masks.
4. Filenames of labels and masks files must be the same as image filename.
'''

'\nUsage:\n1. Directory with data sets must be placed in directory named "input"\n2. Input directory must be in the same directory as the .ipynb file with this sript\n3. Inside each data set directory there must be three directories named: images, labels and masks.\n4. Filenames of labels and masks files must be the same as image filename.\n'

In [2]:
import numpy as np
import os
import collections
import matplotlib.pyplot as plt
import skimage.future.graph as skigraph
import shutil
import pickle
import keras
import tensorflow as tf
import networkx as nx
import time
import sys

from kgcnn.literature.GCN import make_model
from kgcnn.utils.data import ragged_tensor_from_nested_numpy
from sklearn.model_selection import train_test_split
from sklearn.cluster import DBSCAN
from kgcnn.utils.learning import LinearLearningRateScheduler
from PIL import Image, ImageOps
import radiomics
import numpy.ma as ma
import nrrd
import warnings
from collections import defaultdict

In [3]:
# script parameterss
DATA_SET_DIR_NAME = "data_set_2"
PYRADIOMICS_FEATURES = [
    "original_firstorder_Mean",
    "original_firstorder_Variance",
    "original_glcm_ClusterTendency",
    "original_glcm_Correlation",
    "original_ngtdm_Contrast",
    "original_glrlm_RunEntropy",
    "original_gldm_DependenceEntropy",
    "original_gldm_SmallDependenceEmphasis",
    "original_glrlm_GrayLevelNonUniformity",
    "original_ngtdm_Busyness",
    "original_glszm_ZoneEntropy",
    "original_glszm_SizeZoneNonUniformity"
]

# globals
DATA_DIR_PATH = f"./input/{DATA_SET_DIR_NAME}"
IMAGES_DIR_PATH = f"{DATA_DIR_PATH}/images"
SUPERPIXELS_LABELS_DIR_PATH = f"{DATA_DIR_PATH}/superpixels_labels"
MASKS_DIR_PATH = f"{DATA_DIR_PATH}/masks"

In [4]:
# utils
def save_object(filename, obj):
    obj_file = open(filename, "wb")
    pickle.dump(obj, obj_file)
    obj_file.close()
    
def load_object(filename):
    obj_file = open(filename, "rb")
    obj = pickle.load(obj_file)
    obj_file.close()
    
    return obj

class Graph:
    def __init__(self, file_id, rag, image, superpixels_labels, mask):
        self.file_id = file_id
        self.rag = rag
        self.image = image
        self.superpixels_labels = superpixels_labels
        self.mask = mask

class Subgraph:
    def __init__(self, rag, graph, middle_superpixel_label, label):
        self.rag = rag
        self.graph = graph
        self.middle_superpixel_label = middle_superpixel_label
        self.label = label
        self.unnormalized_edge_indices = None
        self.normalized_edge_indices = None
        self.edges = None
        self.nodes = None
        
def assign_labels(graph):
    white_pixels_count = defaultdict(int)
    total_pixels_count = defaultdict(int)
    
    for (i, row) in enumerate(graph.superpixels_labels):
        for (j, superpixel_label) in enumerate(row):
            total_pixels_count[superpixel_label] += 1
            if graph.mask[i][j] == 1:
                white_pixels_count[superpixel_label] += 1
      
    for node in graph.rag:
        graph.rag.nodes[node]['label'] = 1.0 if white_pixels_count[node] / total_pixels_count[node] >= 0.65 else 0.0
            
            
def assign_features(graph):
    unique_superpixels_labels = np.unique(graph.superpixels_labels)
    
    for superpixel_label in unique_superpixels_labels:
        superpixel_label_mask = (graph.superpixels_labels == superpixel_label).astype(int)
        NRRD_DIRECTORY_PATH = "./output/nrrd"
        os.makedirs(NRRD_DIRECTORY_PATH, exist_ok=True)
        
        nrrd.write(f"{NRRD_DIRECTORY_PATH}/{superpixel_label}_image.nrrd", graph.image)
        nrrd.write(f"{NRRD_DIRECTORY_PATH}/{superpixel_label}_superpixel_label_mask.nrrd", superpixel_label_mask)
        nrrd_image_path = os.path.join(NRRD_DIRECTORY_PATH, str(superpixel_label) + "_image.nrrd")
        nrrd_superpixel_label_mask_path = os.path.join(NRRD_DIRECTORY_PATH, str(superpixel_label) + "_superpixel_label_mask.nrrd")
        
        extractor = radiomics.featureextractor.RadiomicsFeatureExtractor()
        try:             
            result = extractor.execute(nrrd_image_path, nrrd_superpixel_label_mask_path)
        except Exception as exception:
            os.makedirs(f"./output/nrrd/failed/{graph.filed_id}", exist_ok=True)
            print(superpixel_label)
            print(exception)
            print("FAILED")
            nrrd.write(f"{NRRD_DIRECTORY_PATH}/failed/{graph.filename}/{superpixel_label}_superpixel_label_mask.nrrd", superpixel_label_mask)
                
        for feature in PYRADIOMICS_FEATURES:
            graph.rag.nodes[superpixel_label][feature] = result[feature]
    
def process_images():
    filenames = os.listdir(IMAGES_DIR_PATH)
    graphs = list()
    os.makedirs("./output/expected", exist_ok=True)
    os.makedirs("./output/generated", exist_ok=True)
    
    for (file_count, filename) in enumerate(filenames, start=1):
        print(f"Processing files: {file_count}/{len(filenames)}")
        
        file_id = os.path.splitext(filename)[0]
        
        try:
            image = np.array(ImageOps.grayscale(Image.open(f"{IMAGES_DIR_PATH}/{filename}")))
            mask = np.array(ImageOps.grayscale(Image.open(f"{MASKS_DIR_PATH}/{file_id}.bmp")))
            superpixels_labels = np.fromfile(f"{SUPERPIXELS_LABELS_DIR_PATH}/{file_id}.dat", dtype=np.dtype((np.int32, image.shape)))[0]
        except FileNotFoundError as error: 
            print(error)
            
        rag = skigraph.rag_mean_color(image, superpixels_labels)
        graphs.append(Graph(file_id, rag, image, superpixels_labels, mask))
        assign_labels(graphs[-1])
        assign_features(graphs[-1])
        
        expected = Image.fromarray((mask*255).astype(np.uint8))
        expected.save(f"./output/expected/{file_id}.png")
        
    print("All files have been processed")
    
    return graphs

def split_into_subgraphs(graphs):
    subgraphs = []
    for graph in graphs:
        for node in graph.rag.nodes:
            nodes = [neighbor for neighbor in graph.rag.neighbors(node)] + [node]
            rag = graph.rag.subgraph(nodes)
            label = graph.rag.nodes[node]['label']
            subgraphs.append(Subgraph(rag, graph, node, label))
            
    return subgraphs

def normalize_edge_indices(edge_indices):
    result = []
    flat_list = []
    for sublist in edge_indices:
        for item in sublist:
            flat_list.append(item)
            
    flat_list.sort()
    flat_list = list(dict.fromkeys(flat_list))
    change = {key:value for (value, key) in enumerate(flat_list)}
    
    for sublist in edge_indices:
        temp = []
        for item in sublist:
            temp.append(change[item])
        result.append(temp)    
    
    return result

def prepare_data(subgraphs):
    nodes = []
    edge_indices = []
    edges = []
    labels = []
    
    for subgraph in subgraphs:
        node_features = []
        
        for node in subgraph.rag.nodes:
            node_features.append([subgraph.rag.nodes[node][feature] for feature in PYRADIOMICS_FEATURES])
            
        nodes.append(node_features)
        edges.append([[1.0] for edge in subgraph.rag.edges.data()])
        unnormalized_edge_indices = [list(index) for index in subgraph.rag.edges]
        edge_indices.append(normalize_edge_indices(unnormalized_edge_indices))
        labels.append(subgraph.label)
        
        subgraph.nodes = nodes[-1]
        subgraph.unnormalized_edge_indices = unnormalized_edge_indices
        subgraph.normalized_edge_indices = edge_indices[-1]
        subgraph.edges = edges[-1]

    return nodes, edge_indices, edges, np.array(labels)

'''
def generate_segmented_images_from_predictions:
    predicted_images = {}

    for (i, subgraph) in enumerate(subgraphs_test):
        if subgraph.rag.file_id not in predicted_images:
            predicted_images[subgraph.rag.file_id] = np.copy(subgraph.rag.superpixels_labels)
        predicted_images[subgraph.rag.file_id][ predicted_images[subgraph.rag.file_id] == subgraph.superpixel_label] = predictions[i] - 2

    for key in predicted_images.keys():
        predicted_images[key][predicted_images[key] == -1] = 1
        predicted_images[key][predicted_images[key] == -2] = 0
        predicted_images[key]=predicted_images[key]*255
        im = Image.fromarray(predicted_images[key].astype(np.uint8))
        im.save(f"predictions/{key}.png")
'''

'\ndef generate_segmented_images_from_predictions:\n    predicted_images = {}\n\n    for (i, subgraph) in enumerate(subgraphs_test):\n        if subgraph.rag.file_id not in predicted_images:\n            predicted_images[subgraph.rag.file_id] = np.copy(subgraph.rag.superpixels_labels)\n        predicted_images[subgraph.rag.file_id][ predicted_images[subgraph.rag.file_id] == subgraph.superpixel_label] = predictions[i] - 2\n\n    for key in predicted_images.keys():\n        predicted_images[key][predicted_images[key] == -1] = 1\n        predicted_images[key][predicted_images[key] == -2] = 0\n        predicted_images[key]=predicted_images[key]*255\n        im = Image.fromarray(predicted_images[key].astype(np.uint8))\n        im.save(f"predictions/{key}.png")\n'

In [5]:
import logging
logger = logging.getLogger("radiomics")
logger.setLevel(logging.ERROR)

rags = process_images()

Processing files: 1/140
Processing files: 2/140
[Errno 2] No such file or directory: './input/data_set_2/superpixels_labels/7087195078.dat'
Processing files: 3/140
[Errno 2] No such file or directory: './input/data_set_2/superpixels_labels/6719056356.dat'
Processing files: 4/140


AttributeError: 'Graph' object has no attribute 'filed_id'

In [None]:
rags_train, rags_test = train_test_split(rags, train_size=0.8, random_state=1)

subgraphs_train = split_into_subgraphs(rags_train)
subgraphs_test = split_into_subgraphs(rags_test)

nodes_train, edge_indices_train, edges_train, labels_train =  prepare_data(subgraphs_train)
nodes_test, edge_indices_test, edges_test, labels_test =  prepare_data(subgraphs_test)

nodes_train = ragged_tensor_from_nested_numpy(nodes_train)
edges_train = ragged_tensor_from_nested_numpy(edges_train)
edge_indices_train = ragged_tensor_from_nested_numpy(edge_indices_train)

nodes_test = ragged_tensor_from_nested_numpy(nodes_test)
edges_test = ragged_tensor_from_nested_numpy(edges_test)
edge_indices_test = ragged_tensor_from_nested_numpy(edge_indices_test)

xtrain = nodes_train, edges_train, edge_indices_train
xtest = nodes_test, edges_test, edge_indices_test
ytrain = labels_train
ytest = labels_test
print([x.shape for x in xtrain])
print([x.shape for x in xtest])
print(ytrain.shape, ytest.shape)

In [None]:
model = make_model(
    name = "GCN",
    inputs = [{'shape': (None, 12), 'name': "node_attributes", 'dtype': 'float32', 'ragged': True},
            {'shape': (None, 1), 'name': "edge_attributes", 'dtype': 'float32', 'ragged': True},
            {'shape': (None, 2), 'name': "edge_indices", 'dtype': 'int64', 'ragged': True}],
    input_embedding = {"node": {"input_dim": 55, "output_dim": 64},
                       "edge": {"input_dim": 10, "output_dim": 64}},
    output_embedding =  'graph',
    output_mlp = {"use_bias": [True, True, False], "units": [140, 70, 1],
                "activation": ['relu', 'relu', 'sigmoid']},
    gcn_args = {"units": 64, "use_bias": True, "activation": 'relu', "pooling_method": 'mean', 
                "normalize_by_weights": False},
    depth = 1
)

# Set learning rate and epochs
learning_rate_start = 1e-3
learning_rate_stop = 1e-4
epo = 150
epomin = 100
epostep = 10

# Compile model with optimizer and loss
optimizer = tf.keras.optimizers.Adam(lr=learning_rate_start)
cbks = LinearLearningRateScheduler(learning_rate_start, learning_rate_stop, epomin, epo)
model.compile(loss='binary_crossentropy',
              optimizer=optimizer,
              weighted_metrics=['accuracy'])
print(model.summary())

# Start and time training
start = time.process_time()
hist = model.fit(xtrain, ytrain,
                 epochs=epo,
                 batch_size=32,
                 callbacks=[cbks],
                 validation_freq=epostep,
                 validation_data=(xtest, ytest),
                 verbose=2
                 )
stop = time.process_time()
print("Print Time for taining: ", stop - start)

# Get loss from history
trainlossall = np.array(hist.history['accuracy'])
testlossall = np.array(hist.history['val_accuracy'])
acc_valid = testlossall[-1]

# Plot loss vs epochs
plt.figure()
plt.plot(np.arange(trainlossall.shape[0]), trainlossall, label='Training ACC', c='blue')
plt.plot(np.arange(epostep, epo + epostep, epostep), testlossall, label='Test ACC', c='red')
plt.scatter([trainlossall.shape[0]], [acc_valid], label="{0:0.4f} ".format(acc_valid), c='red')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Interaction Network Loss')
plt.legend(loc='upper right', fontsize='x-large')
plt.show()

In [None]:
probability = model.predict(xtest)
predictions = np.round(probability)
print(predictions)
print(ytest)
comparison = np.concatenate((predictions, ytest, probability), axis=1)
# print(comparison)

hole = 0
true = 0
false = 0
hole2 = 0
true2 = 0
false2 = 0
for value in comparison:
    if value[1] == 1.:
        hole += 1
        if value[0] == 0.:
            false +=1
        else:
            true +=1
    if value[1] == 0.:
        hole2 += 1
        if value[0] == 1.:
            false2 +=1
        else:
            true2 +=1
print(f"{true}/{hole}")
print(f"{false}/{hole}")
print(f"{true2}/{hole2}")
print(f"{false2}/{hole2}")