## Data Exploration

#[Synthetic Data from h5file](#synthetic-data-directly-from-dataset)

[Visualize](#plot-images-for-various-of-the-deformations-within-the-h5-file)

[Random classifier](#find-the-random-guess-by-counting-the-number-of-zero-slices)

#[Synthetic Data entering models](#synthetic-data-from-the-inputs-to-the-models)

-> This function can be found in the file visualize_model_data and that might be better

In [4]:
import os
import random
import numpy as np
import matplotlib.pyplot as plt
import sys
import sys
sys.path.append('/usr/bmicnas02/data-biwi-01/jeremy_students/lschlyter/4dflowmri_anomaly_detection')
from config import system as config_sys
sys.path.append(f'{config_sys.project_code_root}/src/helpers')
# Path to data project
data_project = config_sys.project_code_root + '/data'

# Helpers
from utils import make_dir_safely
from helpers.data_loader import load_syntetic_data
from config import system as config_sys

#### Synthetic data directly from dataset

Look depending on the metric (image-wise, 2D-slice, patient-wise) the different number of sick or healthy patients, also depending on the tasks

This synthetic dataset was created using the synthetic_anomalies file in the helpers


You have 7 patients in the validation, times 7 deformations, times 64 slices

In [8]:
preprocess_method = 'masked_slice'
synthetic_data_note = '_without_rotation_with_cs_skip_updated_ao_S10_balanced_decreased_interpolation_factor_cube_3' # 'without_noise' or '' or 'decreased_interpolation_factor'
idx_start=41
idx_end=51
z_slices = 64
syn_data = load_syntetic_data(preprocess_method =preprocess_method, idx_start=idx_start, idx_end=idx_end, sys_config = config_sys, note = synthetic_data_note)


syn_images = syn_data['images']
syn_labels = syn_data['masks']
syn_images.shape,syn_labels.shape

2024-10-08 16:56:08,977 Loading data from: /usr/bmicnas02/data-biwi-01/jeremy_students/lschlyter/4dflowmri_anomaly_detection/data/masked_slice_anomalies_images_from_41_to_51__without_rotation_with_cs_skip_updated_ao_S10_balanced_decreased_interpolation_factor_cube_3.hdf5


Already preprocessed this configuration. Loading now...


((4480, 32, 32, 24, 4), (4480, 32, 32, 24, 4))

In [5]:
if synthetic_data_note == 'without_noise':
    deformation_dict = {'None': 0, 'deformation': 1, 'patch_interpolation': 2, 'poisson_with_mixing': 3, 'poisson_without_mixing': 4}
else:
    deformation_dict = {'None': 0, 'noise': 1, 'deformation': 2, 'hollow_circle': 3
                    , 'patch_interpolation': 4, 'poisson_with_mixing': 5, 'poisson_without_mixing': 6}

deformation_viz_path = os.path.join(data_project, "synthetic_anomalies")
make_dir_safely(deformation_viz_path)
                    

#### Plot images for various of the deformations within the file

In [10]:
for i in range(len(deformation_dict)):

    deformation_type = list(deformation_dict.keys())[i]
    print(deformation_type)
    n_subjects = idx_end - idx_start
    range_of_indices = np.arange(deformation_dict[deformation_type]*n_subjects*z_slices, deformation_dict[deformation_type]*n_subjects*z_slices + n_subjects*z_slices)

    # Randomly select n images from the range of indices
    n = 32
    random.seed(42)
    random.shuffle(range_of_indices)
    range_of_indices = range_of_indices

    # Create a figure and subplot grid
    fig, axes = plt.subplots(nrows=8, ncols=8, figsize=(18, 14))

    # Iterate through the indices and plot the images and masks
    for i, ax in enumerate(axes.flatten()):
        if i % 2 == 0:
            try:
                t =random.choice(np.unique(np.where(syn_labels[range_of_indices[i]] != 0)[2]))
            except:
                t = 3
            
            im = ax.imshow(syn_images[range_of_indices[i], :, :, t, 0])
            # Make colorbar
            cbar = fig.colorbar(im, ax=ax)
        else:
            im = ax.imshow(syn_labels[range_of_indices[i -1], :, :, t, 0])
            # Make colorbar
            cbar = fig.colorbar(im, ax=ax)
        # Remove ticks
        ax.set_xticks([])
        ax.set_yticks([])

    # Give a title to the figure
    fig.suptitle("Synthetic Anomalies: {} Deformation".format(deformation_type), fontsize=20)
    plt.tight_layout()
    # Save figure
    plt.savefig(os.path.join(deformation_viz_path, "{}_synthetic_anomalies_{}_images_and_masks.png".format(synthetic_data_note,deformation_type)))
    plt.close()

        



None
noise
deformation
hollow_circle
patch_interpolation
poisson_with_mixing
poisson_without_mixing


#### Find the random guess by counting the number of zero slices

In [11]:
deformation_dict
deformation_type = list(deformation_dict.keys())[0]
range_of_indices = np.arange(deformation_dict[deformation_type]*7*64, deformation_dict[deformation_type]*7*64 + 7*64)

In [12]:
metric_formats = ['pixelwise', 'imagewise', '2Dslice']
# For the whole dataset

for metric_format in metric_formats:
    # If pixelwise, count the number of pixels that are anomalous
    if metric_format == 'pixelwise':
        print("------------------------------------------------------------------------------")
        # Count the number of anomalous pixels in the whole dataset
        num_anomalous_pixels = np.sum(syn_labels[:] != 0)
        print("Number of anomalous pixels in the whole dataset: {}".format(num_anomalous_pixels))
        # Take ratio of anomalous pixels to total pixels
        ratio_anomalous_pixels = num_anomalous_pixels / np.prod(syn_labels.shape)
        print("Ratio of anomalous pixels to total pixels: {}".format(ratio_anomalous_pixels))
    # If imagewise, count the number of images that are anomalous
    elif metric_format == 'imagewise':
        print("------------------------------------------------------------------------------")
        num_anomalous_images =np.max(syn_labels, axis=(1,2,3,4)) # Tells you if one is in there
        print("Number of anomalous images in the whole dataset: {}".format(np.sum(num_anomalous_images)))
        # Take ratio of anomalous images to total images
        ratio_anomalous_images = np.sum(num_anomalous_images) / syn_labels.shape[0]
        print("Ratio of anomalous images to total images: {}".format(ratio_anomalous_images))
    # If 2Dslice, count the number of 2D slices that are anomalous
    elif metric_format == '2Dslice':
        print("------------------------------------------------------------------------------")
        num_anomalous_2D_slices = np.sum(np.max(syn_labels, axis=(1,2,4)) != 0)
        print("Number of anomalous 2D slices in the whole dataset: {}".format(num_anomalous_2D_slices))
        # Take ratio of anomalous 2D slices to total 2D slices (n_images, 24)
        ratio_anomalous_2D_slices = num_anomalous_2D_slices / np.prod((syn_labels.shape[0],syn_labels.shape[3]))
        print("Ratio of anomalous 2D slices to total 2D slices: {}".format(ratio_anomalous_2D_slices))


------------------------------------------------------------------------------
Number of anomalous pixels in the whole dataset: 9816596
Ratio of anomalous pixels to total pixels: 0.022290084475562685
------------------------------------------------------------------------------
Number of anomalous images in the whole dataset: 3756.0
Ratio of anomalous images to total images: 0.8383928571428572
------------------------------------------------------------------------------
Number of anomalous 2D slices in the whole dataset: 49763
Ratio of anomalous 2D slices to total 2D slices: 0.46282552083333334


In [13]:
# We want to do the same thing for each deformation type
for i in range(len(deformation_dict)):
    print("------------------------------------------------------------------------------")
    print("Deformation type: {}".format(list(deformation_dict.keys())[i]))
    for metric_format in metric_formats:
        deformation_type = list(deformation_dict.keys())[i]
        # Get the range of indices for the deformation type
        range_of_indices = np.arange(deformation_dict[deformation_type]*7*64, deformation_dict[deformation_type]*7*64 + 7*64)
        # If pixelwise, count the number of pixels that are anomalous
        if metric_format == 'pixelwise':
            print("------------------------------------------------------------------------------")
            print("Metric format: {}".format(metric_format))
            # Count the number of anomalous pixels in the whole dataset
            num_anomalous_pixels = np.sum(syn_labels[range_of_indices] != 0)
            print("Number of anomalous pixels in the {} dataset: {}".format(deformation_type,num_anomalous_pixels))
            # Take ratio of anomalous pixels to total pixels
            ratio_anomalous_pixels = num_anomalous_pixels / np.prod(syn_labels[range_of_indices].shape)
            print("Ratio of anomalous pixels to total pixels: {}".format(ratio_anomalous_pixels))
        # If imagewise, count the number of images that are anomalous
        elif metric_format == 'imagewise':
            print("------------------------------------------------------------------------------")
            print("Metric format: {}".format(metric_format))
            num_anomalous_images = np.max(syn_labels[range_of_indices], axis=(1,2,3,4))
            print("Number of anomalous images in the {} dataset: {}".format(deformation_type, np.sum(num_anomalous_images)))
            # Take ratio of anomalous images to total images
            ratio_anomalous_images = np.sum(num_anomalous_images) / syn_labels[range_of_indices].shape[0]
            print("Ratio of anomalous images to total images: {}".format(ratio_anomalous_images))
        # If 2Dslice, count the number of 2D slices that are anomalous
        elif metric_format == '2Dslice':
            print("------------------------------------------------------------------------------")
            print("Metric format: {}".format(metric_format))
            num_anomalous_2D_slices = np.sum(np.max(syn_labels[range_of_indices], axis=(1,2,4)) != 0)
            print("Number of anomalous 2D slices in the {} dataset: {}".format(deformation_type,num_anomalous_2D_slices))
            # Take ratio of anomalous 2D slices to total 2D slices (n_images, 24)
            ratio_anomalous_2D_slices = num_anomalous_2D_slices / np.prod((syn_labels[range_of_indices].shape[0],syn_labels[range_of_indices].shape[3]))
            print("Ratio of anomalous 2D slices to total 2D slices: {}".format(ratio_anomalous_2D_slices))


------------------------------------------------------------------------------
Deformation type: None
------------------------------------------------------------------------------
Metric format: pixelwise
Number of anomalous pixels in the None dataset: 0
Ratio of anomalous pixels to total pixels: 0.0
------------------------------------------------------------------------------
Metric format: imagewise
Number of anomalous images in the None dataset: 0.0
Ratio of anomalous images to total images: 0.0
------------------------------------------------------------------------------
Metric format: 2Dslice
Number of anomalous 2D slices in the None dataset: 0
Ratio of anomalous 2D slices to total 2D slices: 0.0
------------------------------------------------------------------------------
Deformation type: noise
------------------------------------------------------------------------------
Metric format: pixelwise
Number of anomalous pixels in the noise dataset: 1075200
Ratio of anomalous pix

In [44]:
# We want to do the same thing but removing the poisson anomalies
# Find the range of the deformation types you wish to remove
deformation_types_to_remove = ['poisson_with_mixing']#['patch_interpolation']#['poisson_with_mixing', 'poisson_without_mixing']
# Find the range of indices for the deformation types you wish to remove
range_of_indices_to_remove = []
for i in range(len(deformation_types_to_remove)):
    range_of_indices_to_remove.append(np.arange(deformation_dict[deformation_types_to_remove[i]]*7*64, deformation_dict[deformation_types_to_remove[i]]*7*64 + 7*64))
# Find the range of indices for the deformation types you wish to keep
range_of_indices_to_keep = np.arange(0, 7*64*len(deformation_dict))
# Find the range of indices for the deformation types you wish to keep
range_of_indices_to_keep = np.delete(range_of_indices_to_keep, range_of_indices_to_remove)
# Find the range of indices for the deformation types you wish to keep
range_of_indices_to_keep = np.sort(range_of_indices_to_keep)


metric_formats = ['pixelwise', 'imagewise', '2Dslice']
# For the  dataset
print("------------------------------------------------------------------------------")
print(" Dataset without {}:".format(deformation_types_to_remove))
for metric_format in metric_formats:
    # If pixelwise, count the number of pixels that are anomalous
    if metric_format == 'pixelwise':
        print("------------------------------------------------------------------------------")
        # Count the number of anomalous pixels in the  dataset
        num_anomalous_pixels = np.sum(syn_labels[range_of_indices_to_keep] != 0)
        print("Number of anomalous pixels in the  dataset: {}".format(num_anomalous_pixels))
        # Take ratio of anomalous pixels to total pixels
        ratio_anomalous_pixels = num_anomalous_pixels / np.prod(syn_labels[range_of_indices_to_keep].shape)
        print("Ratio of anomalous pixels to total pixels: {}".format(ratio_anomalous_pixels))
    # If imagewise, count the number of images that are anomalous
    elif metric_format == 'imagewise':
        print("------------------------------------------------------------------------------")
        num_anomalous_images =np.max(syn_labels[range_of_indices_to_keep], axis=(1,2,3,4)) # Tells you if one is in there
        print("Number of anomalous images in the  dataset: {}".format(np.sum(num_anomalous_images)))
        # Take ratio of anomalous images to total images
        ratio_anomalous_images = np.sum(num_anomalous_images) / syn_labels[range_of_indices_to_keep].shape[0]
        print("Ratio of anomalous images to total images: {}".format(ratio_anomalous_images))
    # If 2Dslice, count the number of 2D slices that are anomalous
    elif metric_format == '2Dslice':
        print("------------------------------------------------------------------------------")
        num_anomalous_2D_slices = np.sum(np.max(syn_labels[range_of_indices_to_keep], axis=(1,2,4)) != 0)
        print("Number of anomalous 2D slices in the  dataset: {}".format(num_anomalous_2D_slices))
        # Take ratio of anomalous 2D slices to total 2D slices (n_images, 24)
        ratio_anomalous_2D_slices = num_anomalous_2D_slices / np.prod((syn_labels[range_of_indices_to_keep].shape[0],syn_labels.shape[3]))
        print("Ratio of anomalous 2D slices to total 2D slices: {}".format(ratio_anomalous_2D_slices))






------------------------------------------------------------------------------
 Dataset without ['poisson_with_mixing']:
------------------------------------------------------------------------------
Number of anomalous pixels in the  dataset: 4563656
Ratio of anomalous pixels to total pixels: 0.017270799667116195
------------------------------------------------------------------------------
Number of anomalous images in the  dataset: 2240.0
Ratio of anomalous images to total images: 0.8333333333333334
------------------------------------------------------------------------------
Number of anomalous 2D slices in the  dataset: 27714
Ratio of anomalous 2D slices to total 2D slices: 0.42959449404761907
