In [1]:
PREFIX = './fragments/Frag1/'

In [3]:
import glob

import numpy
import PIL.Image

image = numpy.stack([ numpy.array(PIL.Image.open(file)) for file in glob.glob(f"{PREFIX}/surface_volume/*.tif") ])

image.shape

(65, 8181, 6330)

In [81]:
img = numpy.random.rand(2, 64, 64, 64)
img.shape

import cvtda.topology
import gtda.images

fe = cvtda.topology.GeometryExtractor(n_jobs = 1)
fe.fit_transform(img).shape

MultidimensionalGeometryExtractor: 100%|██████████| 2/2 [00:01<00:00,  1.47it/s]
MultidimensionalGeometryExtractor: 100%|██████████| 2/2 [00:01<00:00,  1.46it/s]


(2, 2097)

In [2]:
import numpy
import PIL.Image

mask = numpy.array(PIL.Image.open(f"{PREFIX}/mask.png"))
print(mask.min(), mask.max(), mask.shape)

target = numpy.array(PIL.Image.open(f"{PREFIX}/inklabels.png"))
print(target.min(), target.max(), target.shape)

0 1 (8181, 6330)
0 1 (8181, 6330)


In [3]:
import torchvision.tv_tensors
import torchvision.transforms.v2

VAL_X = (1088, 2368)
VAL_Y = (3456, 5376)

def train_test_split(image):
    WINDOW = 128
    HALF = (WINDOW // 2)

    train_images, train_labels, train_idxs = [], [], []
    test_images, test_labels, test_idxs = [], [], []
    for i in range(image.shape[0] - WINDOW):
        if i % HALF != 0:
            continue
        for j in range(image.shape[1] - WINDOW):
            if j % HALF != 0:
                continue
            if mask[i][j] == 0:
                continue

            img = image[i:i + WINDOW, j:j + WINDOW]
            label = target[i:i + WINDOW, j:j + WINDOW]

            if (i > VAL_Y[0]) and (i < VAL_Y[1]) and (j > VAL_X[0]) and (j < VAL_X[1]):
                test_images.append(img)
                test_labels.append(label)
                test_idxs.append((i, j))
            else:
                train_images.append(img)
                train_labels.append(label)
                train_idxs.append((i, j))

    train_images = numpy.stack(train_images)
    train_labels = numpy.stack(train_labels)
    test_images = numpy.stack(test_images)
    test_labels = numpy.stack(test_labels)
    # print(train_images.shape, train_labels.shape, test_images.shape, test_labels.shape)

    RESIZE_FACTOR = 2
    WINDOW = WINDOW // RESIZE_FACTOR
    HALF = (WINDOW // 2)

    transform = torchvision.transforms.v2.Resize((WINDOW, WINDOW))

    def resize(images, labels, idxs):
        images = torchvision.tv_tensors.Image(images)
        labels = torchvision.tv_tensors.Mask(labels)
        images, labels = transform(images, labels)
        idxs = [(i // RESIZE_FACTOR, j // RESIZE_FACTOR) for (i, j) in idxs]
        return images.numpy(), labels.numpy(), idxs

    train_images, train_labels, train_idxs = resize(train_images, train_labels, train_idxs)
    test_images, test_labels, test_idxs = resize(test_images, test_labels, test_idxs)
    # print(train_images.shape, train_labels.shape, test_images.shape, test_labels.shape)

    return (train_images, train_labels, train_idxs), (test_images, test_labels, test_idxs)

In [4]:
import re
import os

import numpy
import PIL.Image

import cvtda.utils
import cvtda.topology

def calc_features(file):
    layer = re.match(r"(.*).surface_volume.(.*)\.tif", file).groups()[1]
    if os.path.exists(f"{PREFIX}/3d/{layer}/test_features.npy"):
        train_features = numpy.load(f"{PREFIX}/3d/{layer}/train_features.npy")
        test_features = numpy.load(f"{PREFIX}/3d/{layer}/test_features.npy")
            
        remover = cvtda.utils.DuplicateFeaturesRemover()
        train_features = remover.fit_transform(train_features)
        test_features = remover.transform(test_features)
        return train_features, test_features

    image = numpy.array(PIL.Image.open(file)) / 65535
    (train_images, train_labels, train_idxs), (test_images, test_labels, test_idxs) = train_test_split(image)
    print(layer, train_images.shape, train_labels.shape, test_images.shape, test_labels.shape)

    extractor = cvtda.topology.FeatureExtractor(
        n_jobs = 1,
        num_radial_filtrations = 3,
        binarizer_thresholds = [ 0.3, 0.7 ],
        only_get_from_dump = False,
        return_diagrams = False
    )
    extractor = extractor.fit(train_images, f"{PREFIX}/3d/{layer}/train")
    train_features = extractor.transform(train_images, f"{PREFIX}/3d/{layer}/train")
    test_features = extractor.transform(test_images, f"{PREFIX}/3d/{layer}/test")

    numpy.save(f"{PREFIX}/3d/{layer}/train_features.npy", train_features)
    numpy.save(f"{PREFIX}/3d/{layer}/test_features.npy", test_features)
    return train_features, test_features

In [5]:
import glob

import tqdm
import joblib

results = list(
    joblib.Parallel(n_jobs = -1)(
        joblib.delayed(calc_features)(file)
        for file in tqdm.tqdm(glob.glob(f"{PREFIX}/surface_volume/*.tif"))
    )
)

100%|██████████| 65/65 [00:34<00:00,  1.89it/s]


In [6]:
train_features = numpy.hstack([ train for (train, _) in results ])
test_features = numpy.hstack([ test for (test, _) in results ])

print(train_features.shape, test_features.shape)

(6521, 374833) (6521, 374833)


In [7]:
import gc

del results
gc.collect()

31

In [8]:
(train_images, train_masks, train_idxs), (test_images, test_masks, test_idxs) = train_test_split(numpy.array(PIL.Image.open(f"{PREFIX}/surface_volume/00.tif")) / 65535)

train_images.shape, test_images.shape, train_masks.shape, test_masks.shape

((6521, 64, 64), (551, 64, 64), (6521, 64, 64), (551, 64, 64))

In [9]:
import cvtda.segmentation

nn_train = cvtda.segmentation.Dataset(train_images, train_features, train_masks)
nn_test = cvtda.segmentation.Dataset(test_images, test_features, test_masks)

In [10]:
del train_images
del train_features
del train_masks

del test_images
del test_features
del test_masks

gc.collect()

16

In [11]:
unet = cvtda.segmentation.MiniUnet(with_images = False, with_features = True)
unet.fit(nn_train, nn_test)
y_pred_proba = unet.predict_proba(nn_test)

Parameters: 1547362025


  0%|          | 0/100 [12:49<?, ?it/s]


AssertionError: Size mismatch between tensors

In [9]:
import cvtda.utils

remover = cvtda.utils.DuplicateFeaturesRemover()
train_features = remover.fit_transform(train_features)
test_features = remover.transform(test_features)

print(train_features.shape, test_features.shape)

  8%|▊         | 30163/370541 [03:27<39:05, 145.15it/s, partition_by=0, num_features=1, duplicates=0] 


KeyboardInterrupt: 

In [None]:
import cvtda.segmentation

cvtda.segmentation.segment(
    train_images, train_features, train_labels,
    test_images, test_features, test_labels,
    dump_name = f"{PREFIX}/predictions", remove_cross_maps = True
)

In [None]:
import numpy
import matplotlib.pyplot as plt

def restore(imgs, idxs, ax):
    restored = numpy.zeros((image.shape[0] // RESIZE_FACTOR, image.shape[1] // RESIZE_FACTOR))

    for img, (i, j) in zip(imgs, idxs):
        i_from, i_to = i + PADDING, i + WINDOW - PADDING
        j_from, j_to = j + PADDING, j + WINDOW - PADDING
        img_part = img[PADDING:-PADDING, PADDING:-PADDING]
        restored[i_from:i_to, j_from:j_to] = img_part

    ax.imshow(restored, cmap = 'gray')

fig, axes = plt.subplots(1, 4, figsize = (20, 5))

axes[0].set_title("Target")
restore(test_labels, test_idxs, axes[0])

axes[1].set_title("Only images")
restore(numpy.squeeze(numpy.load(f'{PREFIX}/predictions/images.npy')), test_idxs, axes[1])

axes[2].set_title("Only topological")
restore(numpy.squeeze(numpy.load(f'{PREFIX}/predictions/topological.npy')), test_idxs, axes[2])

axes[3].set_title("Combined")
restore(numpy.squeeze(numpy.load(f'{PREFIX}/predictions/combined.npy')), test_idxs, axes[3])