# Data configuration for the foot reconstruction project

## Environment settings

### Librairies import

In [None]:
import numpy as np
import random
import matplotlib.pyplot as plt
import matplotlib.pylab as plb
import os
import pandas as pd

# data extraction
import glob

# import CT scans (.dcm)
import pydicom as dicom

# import masks (.bmp)
from PIL import Image

# create gifs
import imageio

# plot the gifs
# install the library using this command: pip install git+https://github.com/tensorflow/docs
import tensorflow_docs.vis.embed as embed

# resizing and basic image procressing
import cv2

# display progress bar
from tqdm import tqdm

# data augmentation library
from albumentations import HorizontalFlip, VerticalFlip, Rotate

# model
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torch.utils.data import Dataset

# training
import time

# test
from operator import add
import imageio
from sklearn.metrics import accuracy_score, f1_score, jaccard_score, precision_score, recall_score

### Hyperparameters

In [None]:
TRAIN_PROPORTION = 0.6
VALIDATION_PROPORTION = 0.8
global_path = "C:/Users/arthu/Downloads/data/NN_Pieds/"

### Utility functions

In [None]:
def print_image(image):
    plt.imshow(image, cmap='gray')
    plt.colorbar()
    plt.show()

In [None]:
def create_dir(path):
    if not os.path.exists(path):
        os.makedirs(path)

Define the `fill_hole` function: takes an array of images, fill the shapes of each image and return an array of the filled images.

In [None]:
def fill_hole(images):
    imgsout = []
    h, w = images[0].shape[:2]
    
    for image in images:
        mask = np.zeros((h+2, w+2), np.uint8)
        imfloodfill = image.copy()
        cv2.floodFill(imfloodfill, mask, (0,0), 255)
        imout = cv2.bitwise_not(imfloodfill)/255
        imout = imout > 0
        imgsout.append(imout.astype(np.uint8))

    return np.array(imgsout)

In [None]:
def load_data_path(path, id):
    X_path = 'CTscans_' + id
    y_path = 'masques_' + id
    X = sorted(list(map(lambda x: x.replace('\\', '/'), glob.glob(os.path.join(path, X_path, "*.dcm")))))
    y = sorted(list(map(lambda x: x.replace('\\', '/'), glob.glob(os.path.join(path, y_path, 'calcaneum', "*.bmp")))))

    return X, y

In [None]:
def augment_data(images, masks, save_path, augment=False):
    # define image size
    size = (512, 512)

    for idx, (x, y) in tqdm(enumerate(zip(images, masks)), total=len(images)):
        # extract the name of the image
        name = x.split("/")[-1].split('.')[0]
        
        # read the image and the mask
        x = dicom.dcmread(x).pixel_array
        y = np.array(Image.open(y))

        if augment == True:
            aug = HorizontalFlip(p=1.0) # p is the probability of applying HorizontalFlip
            augmented = aug(image=x, mask=y)
            x1 = augmented["image"]
            y1 = augmented["mask"]

            aug = VerticalFlip(p=1.0)
            augmented = aug(image=x, mask=y)
            x2 = augmented["image"]
            y2 = augmented["mask"]

            aug = Rotate(limit=45, p=1.0) # 45 degree rotation
            augmented = aug(image=x, mask=y)
            x3 = augmented["image"]
            y3 = augmented["mask"]

            X = [x, x1, x2, x3]
            Y = [y, y1, y2, y3]

        else:
            X = [x]
            Y = [y]

        index = 0
        
        for i, m in zip(X, Y):

            # resize the arrays
            i = cv2.resize(i, size)
            m = cv2.resize(m, size)

            # create temporary file names
            tmp_image_name = "{}_{}.png".format(name, index)
            tmp_mask_name = "{}_{}.png".format(name, index)

            # save images
            image_path = os.path.join(save_path, "image", tmp_image_name)
            mask_path = os.path.join(save_path, "mask", tmp_mask_name)
            
            cv2.imwrite(image_path, i)
            cv2.imwrite(mask_path, m)

            index += 1

In [None]:
def sample_stack(stack, rows=6, cols=6, start_with=10, show_every=3):
    fig,ax = plt.subplots(rows,cols,figsize=[20,22])
    for i in range(rows*cols):
        ind = start_with + i*show_every
        ax[int(i/rows),int(i % rows)].set_title(f'slice {ind}')
        ax[int(i/rows),int(i % rows)].imshow(stack[ind],cmap='gray')
        ax[int(i/rows),int(i % rows)].axis('off')
    plt.show()

In [None]:
def get_foot_file_paths(global_path):
    return np.array(list(map(lambda x: x.replace('\\', '/'), glob.glob(str(global_path + "/*")))))

In [None]:
def get_foot_subfile_paths(foot_file_paths):
    CTscan_paths = []
    Masks_paths = []
    for path in foot_file_paths:
        temp = list(map(lambda x: x.replace('\\', '/'), glob.glob(str(path + "/*"))))
        CTscan_paths.append(temp[0])
        Masks_paths.append(temp[1])

    return np.array(CTscan_paths), np.array(Masks_paths)

In [None]:
def create_masks_addresses(mask_paths):
    dir_paths = list(map(lambda x: x.replace('\\', '/'), glob.glob(str(mask_paths + "/*"))))
    addresses = []
    for path in dir_paths:
        addresses.append(np.array(list(map(lambda x: x.replace('\\', '/'), glob.glob(str(path+"/*.bmp"))))))

    return np.array(addresses)

In [None]:
def create_compressed_mask(addresses):
    compressed_masks = []
    for i in tqdm(range(addresses.shape[1])):
        int_mask = np.zeros((512,512))
        for j in range(addresses.shape[0]):
            int_mask += np.array(Image.open(addresses[j][i]))

        int_mask[int_mask > 0] = 1
        compressed_masks.append(int_mask)

    return np.array(compressed_masks).astype(np.uint8)

In [None]:
def create_compressed_mask_file(compressed_masks, name, save_path):
    size = (compressed_masks.shape[1], compressed_masks.shape[2])
    
    for idx in range(compressed_masks.shape[0]):
        
        # resize the arrays
        tmp_mask = cv2.resize(compressed_masks[idx], size)

        # create temporary file name
        tmp_mask_name = str(save_path + "{}_{}.png".format(name, idx+1))

        cv2.imwrite(tmp_mask_name, tmp_mask)

In [None]:
def create_CTscans_file(global_path, save_path, name, id):
    CTscans = []
    size = (512, 512)
    temp_list = list(map(lambda x: x.replace('\\', '/'),glob.glob(str(get_foot_subfile_paths(get_foot_file_paths(global_path))[0][id] + "/*.dcm"))))
    del temp_list[-1]
    idx = 0
    for scan_path in temp_list:
        # resize the arrays
        tmp_scan = cv2.resize(dicom.dcmread(scan_path).pixel_array, size)

        # create temporary file name
        tmp_scan_name = str(save_path + "{}_{}.png".format(name, idx+1))

        cv2.imwrite(tmp_scan_name, tmp_scan)

        idx += 1

In [None]:
def create_dataset_files(addresses, train_proportion=0.6, validation_proportion=0.8):
    train_lim = np.round(len(addresses)*train_proportion)
    valid_lim = np.round(len(addresses)*validation_proportion)
    for idx in tqdm(range(len(addresses))):
        # create the compressed masks
        compressed_masks = create_compressed_mask(addresses=addresses[idx])
        # get the patient number and create
        num = addresses[idx][0][0].split('/')[-1][1:3]
        mask_name = str(num + '_mask')
        scan_name = str(num + '_CTscan')
        # save the files
        if 0 <= idx < train_lim:
            create_compressed_mask_file(compressed_masks=compressed_masks, name=mask_name, save_path='data/foot_data/train/mask/')
            create_CTscans_file(global_path=global_path, name=scan_name, save_path='data/foot_data/train/image/', id=idx)
        if train_lim <= idx < valid_lim:
            create_compressed_mask_file(compressed_masks=compressed_masks, name=mask_name, save_path='data/foot_data/validation/mask/')
            create_CTscans_file(global_path=global_path, name=scan_name, save_path='data/foot_data/validation/image/', id=idx)
        if valid_lim <= idx < len(addresses):
            create_compressed_mask_file(compressed_masks=compressed_masks, name=mask_name, save_path='data/foot_data/test/mask/')
            create_CTscans_file(global_path=global_path, name=scan_name, save_path='data/foot_data/test/image/', id=idx)

In [None]:
def create_gifs(CTscan, mask, data_number=None):
    # Create the gifs of the CT scan and of the masks
    imageio.mimsave('./images_notebooks/gif/CTscan_{}.gif'.format(data_number), CTscan, fps=30)
    imageio.mimsave('./images_notebooks/gif/Mask_{}.gif'.format(data_number), mask*255, fps=30)

    #Create reader object for the gif
    gif1 = imageio.get_reader(r'C:/Users/arthu/OneDrive/Documents/MASV2/Biomechanics of the musculoskeletal system/Projet/Foot-image-segmentation-project_ME-482/images_notebooks/gif/CTscan_50.gif')
    gif2 = imageio.get_reader(r'C:\Users\arthu\OneDrive\Documents\MASV2\Biomechanics of the musculoskeletal system\Projet\Foot-image-segmentation-project_ME-482/images_notebooks/gif/Mask_50.gif')

    #If they don't have the same number of frame take the shorter
    number_of_frames = min(gif1.get_length(), gif2.get_length()) 

    #Create writer object
    new_gif = imageio.get_writer(r'C:\Users\arthu\OneDrive\Documents\MASV2\Biomechanics of the musculoskeletal system\Projet\Foot-image-segmentation-project_ME-482/images_notebooks/gif/CTscan_Mask_50.gif')

    for frame_number in range(number_of_frames):
        img1 = gif1.get_next_data()
        img2 = gif2.get_next_data()
        #here is the magic
        new_image = np.hstack((img1, img2))
        new_gif.append_data(new_image)

### Loading the data paths

### Create the dataset (with data augmentation)

create the files for the dataset

In [None]:
upper_files = ['train', 'validation', 'test']
lower_files = ['image', 'mask']

for upper_file in upper_files:
    for lower_file in lower_files:
        create_dir('data/foot_data/' + upper_file + '/' + lower_file + '/')

In [None]:
foot_file_paths = get_foot_file_paths(global_path)

In [None]:
X, y = get_foot_subfile_paths(foot_file_paths)

In [None]:
addresses = []
for patient in tqdm(y):
    addresses.append(create_masks_addresses(patient))

In [None]:
create_dataset_files(addresses, TRAIN_PROPORTION, VALIDATION_PROPORTION)

In [None]:
def create_foot_gif(number):
    CTscan = []
    masks = []
    for path in tqdm(sorted(list(map(lambda x: x.replace('\\', '/'),glob.glob(str("C:/Users/arthu/Downloads/data/NN_Pieds/pied_{}/CTscans_{}/*.dcm").format(number)))))):
        CTscan.append(dicom.dcmread(path).pixel_array)

    for path in tqdm(glob.glob(r"C:\Users\arthu\OneDrive\Documents\MASV2\Biomechanics of the musculoskeletal system\Projet\Foot-image-segmentation-project_ME-482\data\foot_data\train\mask/*.png")[0:740]):
        masks.append(np.array(Image.open(path))*255)

    create_gifs(CTscan=np.array(CTscan), mask=np.array(masks), data_number=number)

In [None]:
sorted(list(map(lambda x: x.replace('\\', '/'),glob.glob(r"C:\Users\arthu\OneDrive\Documents\MASV2\Biomechanics of the musculoskeletal system\Projet\Foot-image-segmentation-project_ME-482\data\foot_data\train\mask/*.png"))))[0:740]

In [None]:
embed.embed_file('./images_notebooks/gif/CTscan_Mask_{}.gif'.format('50'))

In [None]:
plt.rcParams["figure.figsize"] = [7.00, 3.50]
plt.rcParams["figure.autolayout"] = True
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
z, x, y = masks.nonzero()
ax.scatter(x, y, z, c=z, cmap='gray', alpha=1)
plt.show()