In [2]:
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import os
import glob
import shutil

Categories:
- 0: background
- 1: crystal
- 2: loop
- 3: liquor

In [3]:
def rename_folder(path, dataset_name, type_name):
    assert str(type_name) in ["recon", "gt"]
    filenames = glob.glob(path + "*.tif")
    print("DATASET:", dataset_name, "TYPE:", type_name, "N_FILES:", len(filenames))
    for n, f in enumerate(filenames):
        new_name = str(dataset_name) + "_" + str(type_name) + "_" + str(n).zfill(5) + ".tif"
        os.rename(f, path + new_name)

In [4]:
path = "/dls/science/users/lqg38422/DATA/13769/recon/"
filenames = glob.glob(path + "*.tif")
dataset_name = "13769"
type_name = "recon"
for n, f in enumerate(filenames):
    new_name = str(dataset_name) + "_" + str(type_name) + "_" + str(n).zfill(5) + ".tif"
    os.rename(f, path + new_name)
    
path = "/dls/science/users/lqg38422/DATA/13769/gt/"
filenames = glob.glob(path + "*.tif")
dataset_name = "13769"   
type_name = "gt"  
for n, f in enumerate(filenames):
    new_name = str(dataset_name) + "_" + str(type_name) + "_" + str(n).zfill(5) + ".tif"
    os.rename(f, path + new_name)

In [12]:
def pad_tensors(tensor, shape):
    h, w = tensor.shape
    nh, nw = shape
    up_down = (int(np.floor((nh - h)/2)), int(np.ceil((nh - h)/2)))
    left_right = (int(np.floor((nw - w)/2)), int(np.ceil((nw - w)/2)))
    tensor = np.pad(tensor, (up_down, left_right), 'constant')
    return tensor

In [13]:
def normalise_im(im):
    return (im - im.min())/(im.max() - im.min())

In [14]:
def pad_folder(path, shape, normalise=True):
    filenames = glob.glob(path)
    for f in filenames:
        im = Image.open(f)
        im = np.array(im)
        if normalise:
            im = normalise_im(im)
        im = pad_tensors(im, shape)
        im = Image.fromarray(im)
        im.save(f)

In [16]:
# Before starting empty train folder
!rm /dls/tmp/lqg38422/TRAIN/recon/*
!rm /dls/tmp/lqg38422/TRAIN/gt/*

!rm /dls/tmp/lqg38422/VALIDATION/recon/*
!rm /dls/tmp/lqg38422/VALIDATION/gt/*

rm: cannot remove ‘/dls/tmp/lqg38422/VALIDATION/recon/*’: No such file or directory
rm: cannot remove ‘/dls/tmp/lqg38422/VALIDATION/gt/*’: No such file or directory


In [17]:
import time

# Paths
train_path_reco = "/dls/tmp/lqg38422/TRAIN/recon/*"
train_path_gt = "/dls/tmp/lqg38422/TRAIN/gt/*"

val_path_reco = "/dls/tmp/lqg38422/VALIDATION/recon/*"
val_path_gt = "/dls/tmp/lqg38422/VALIDATION/gt/*"

# Select training/validation datasets
train_datasets = ["13068", "13076", "13246", "13270", "13295", "13551", "13724", "13737", "13769", "14253"] #14116 FBP
#val_datasets = ["13737"]
test_datasets = ["13284"]

# Parameters
shape = (900, 900)

# Add training data
t = time.time()
for dataset in train_datasets:
    print(f"--- DATASET {dataset} ---")
    
    recon_path = f"/dls/science/users/lqg38422/DATA/{dataset}/recon/"
    gt_path = f"/dls/science/users/lqg38422/DATA/{dataset}/gt/"
    
    """
    # 1. Rename
    print("Renaming...")
    recon_path = f"/dls/science/users/lqg38422/DATA/{dataset}/recon/"
    rename_folder(recon_path, dataset, "recon")
    gt_path = f"/dls/science/users/lqg38422/DATA/{dataset}/gt/"
    rename_folder(gt_path, dataset, "gt")
    print("Done!")
    """
    
    # 2. Copy files
    print("Copying recon...")
    for filename in glob.glob(recon_path + "*"):
        shutil.copy(filename, train_path_reco[:-1])
    print("Copying gt...")
    for filename in glob.glob(gt_path + "*"):
        shutil.copy(filename, train_path_gt[:-1])
    print("Done!")
    
# 3. Pad all files
print("Padding recon...")
pad_folder(train_path_reco, shape, normalise=True)
print("Padding gt...")
pad_folder(train_path_gt, shape, normalise=False)

res = time.time() - t
print("Training data time:", res)


# Add validation data
t = time.time()
for dataset in val_datasets:
    print(f"--- DATASET {dataset} ---")
    
    recon_path = f"/dls/science/users/lqg38422/DATA/{dataset}/recon/"
    gt_path = f"/dls/science/users/lqg38422/DATA/{dataset}/gt/"
    
    """
    # 1. Rename
    print("Renaming...")
    recon_path = f"/dls/science/users/lqg38422/DATA/{dataset}/recon/"
    rename_folder(recon_path, dataset, "recon")
    gt_path = f"/dls/science/users/lqg38422/DATA/{dataset}/gt/"
    rename_folder(gt_path, dataset, "gt")
    print("Done!")
    """
    
    # 2. Copy files
    print("Copying recon...")
    for filename in glob.glob(recon_path + "*"):
        shutil.copy(filename, val_path_reco[:-1])
    print("Copying gt...")
    for filename in glob.glob(gt_path + "*"):
        shutil.copy(filename, val_path_gt[:-1])
    print("Done!")
    
# 3. Pad all files
print("Padding recon...")
pad_folder(val_path_reco, shape, normalise=True)
print("Padding gt...")
pad_folder(val_path_gt, shape, normalise=False)

res = time.time() - t
print("Validation data time:", res)

--- DATASET 13068 ---
Copying recon...
Copying gt...
Done!
--- DATASET 13076 ---
Copying recon...
Copying gt...
Done!
--- DATASET 13246 ---
Copying recon...
Copying gt...
Done!
--- DATASET 13270 ---
Copying recon...
Copying gt...
Done!
--- DATASET 13295 ---
Copying recon...
Copying gt...
Done!
--- DATASET 13551 ---
Copying recon...
Copying gt...
Done!
--- DATASET 13724 ---
Copying recon...
Copying gt...
Done!
--- DATASET 14253 ---
Copying recon...
Copying gt...
Done!
Padding recon...
Padding gt...
Training data time: 466.56566071510315
--- DATASET 13737 ---
Copying recon...
Copying gt...
Done!
Padding recon...
Padding gt...
Validation data time: 89.50831747055054


In [16]:
# Set recon to float16

def normalise(im):
    return (im - im.min())/(im.max() - im.min())

def recon_to_float16(path):
    for filename in glob.glob(path):
        im = np.array(Image.open(filename)).astype(np.uint16)
        #im = normalise(im)
        im = Image.fromarray(im)
        im.save(filename)

In [17]:
path = "/dls/science/users/lqg38422/DATA/13076/recon/*"
recon_to_float16(path)

In [22]:
# 13284 works - crystal is 2 loop is 1

path = "/dls/science/users/lqg38422/DATA/13284/gt/*"

vol = []
for file in glob.glob(path):
    im = np.array(Image.open(file))
    vol.append(im)
vol = np.stack(vol)
idx_1 = vol == 1
idx_2 = vol == 2
vol[idx_1] = 2
vol[idx_2] = 1
filenames = glob.glob(path)
for n in range(len(vol)):
    im = vol[n].astype(np.uint8)
    im = Image.fromarray(im)
    im.save(filenames[n])

In [21]:
# 13076 - fix reconstruction dimensions order - convert uint8 to >u2 + correct categories

def gt_to_uint8(path):
    for filename in glob.glob(path):
        im = np.array(Image.open(filename))
        im = im.astype(np.uint8)
        im = Image.fromarray(im).convert('RGB').convert('L')
        im.save(filename)
               
path = "/dls/science/users/lqg38422/DATA/13076/gt/*"
gt_to_uint8(path)

In [9]:
# 13270 - there's a 4 in image 606 - values are [0,2,3,4] - if != 0 -=1

path = "/dls/science/users/lqg38422/DATA/13270/gt/*"

vol = []
for file in glob.glob(path):
    im = np.array(Image.open(file))
    vol.append(im)
vol = np.stack(vol)
np.unique(vol)
vol[vol != 0] -= 1
filenames = glob.glob(path)
for n in range(len(vol)):
    im = vol[n].astype(np.uint8)
    im = Image.fromarray(im)
    im.save(filenames[n])