#**Libraries & functions**

In [1]:
import os
import sys
import numpy as np
from PIL import Image
from tqdm import tqdm
import numpy as np
from PIL import Image
import glob
import random
from google.colab import drive
from tqdm import tqdm

# Define the project directory path
project_dir = '/content/gdrive/MyDrive/'

In [10]:
def raw_2_tiff(raw_file_path, output_folder, width, height, num_slices, dtype, byte_order, base_name):
    # Calculate the size of each slice in bytes
    slice_size = width * height * np.dtype(dtype).itemsize

    # Read the raw file
    with open(raw_file_path, 'rb') as f:
        raw_data = np.frombuffer(f.read(), dtype=dtype)

    # Check if the size matches the expected size
    expected_size = width * height * num_slices
    if raw_data.size != expected_size:
        raise ValueError(f"Expected raw data size {expected_size}, but got {raw_data.size}")

    # Reshape the data into a 3D array
    raw_data = raw_data.reshape((num_slices, height, width))

    # Normalize the data to 8-bit
    raw_data_8bit = ((raw_data - raw_data.min()) / (raw_data.max() - raw_data.min()) * 255).astype(np.uint8)

    # Create output folder if it doesn't exist
    os.makedirs(output_folder, exist_ok=True)

    # Save each slice as a .tif file with a progress bar
    for i in tqdm(range(num_slices), desc="Saving slices"):
        slice_data = raw_data_8bit[i]
        slice_image = Image.fromarray(slice_data)
        file_name = f"{base_name}{i:04d}.tif"
        slice_image.save(os.path.join(output_folder, file_name))


def tiff_2_npy(project_dir, tiff_folder, dataset_name, npy_name):
    input_dir = os.path.join(project_dir, tiff_folder, dataset_name)
    save_directory = os.path.join(project_dir, 'npy_data', npy_name)
    rockname_img = dataset_name + "_img.npy"
    rockname_mask = dataset_name + "_mask.npy"

    # Get the list of image files
    img_files = sorted(os.listdir(os.path.join(input_dir, 'images')))
    # Get the list of mask files (assuming the masks folder has the same file names as the images folder)
    mask_files = sorted(os.listdir(os.path.join(input_dir, 'masks')))

    # Check if the number of image files matches the number of mask files
    if len(img_files) != len(mask_files):
        raise ValueError("The number of image files does not match the number of mask files")

    # Combine image and mask file paths into tuples
    file_pairs = [(os.path.join(input_dir, 'images', img_file),
                   os.path.join(input_dir, 'masks', mask_file))
                  for img_file, mask_file in zip(img_files, mask_files)]

    # Shuffle the list of file pairs
    random.shuffle(file_pairs)

    img = []
    mask = []

    for img_path, mask_path in tqdm(file_pairs, desc="Loading images and masks"):
        img.append(np.array(Image.open(img_path).convert('RGB')))
        mask.append(np.array(Image.open(mask_path)))

    # Create save directory if it does not exist
    os.makedirs(save_directory, exist_ok=True)

    # Save the images and masks as numpy arrays
    np.save(os.path.join(save_directory, rockname_img), img)
    np.save(os.path.join(save_directory, rockname_mask), mask)

#**Raw 2 tiff**

In [5]:
# Define the name of the folder containining the raw files. All data will be savec in tiff format.
# The expected data directory structure will be as follows:
# Datasets
# |_Sample1
# |  |_img
# |     |_image1.tiff
# |     |_image2.tiff
# |     |_...
# |  |_mask
# |     |_mask1.tiff
# |     |_mask2.tiff
# |     |_...
# |  |_...
# ...

# Raw files parameters
width = 976
height = 1014
num_slices = 601
dtype = np.uint16
byte_order = 'little'

# Paths and files names
tiff_folder = 'data_test'
dataset_name = "Sample2_raw"
slice_name = 'images'

input_dir = os.path.join(project_dir, 'code', 'DinoV2', dataset_name +'.raw')
output_dir = os.path.join(project_dir, tiff_folder, 'sample2', 'images')
raw_2_tiff(input_dir, output_dir,
                   width=width,
                   height=height,
                   num_slices=num_slices,
                   dtype=dtype,
                   byte_order=byte_order,
                   base_name=slice_name)

Saving slices: 100%|██████████| 601/601 [00:08<00:00, 69.07it/s]


#**Tiff 2 npy**

In [11]:
# Run the following to load the tiff files and save them in npy format.
# The expected data directory structure will be as follows:
# npy_name
# |  |sample1_img.npy
# |  |sample1_mask.npy
# |  |sample2_img.npy
# |  |sample2_mask.npy
# ...

npy_name = "som_name" # The that will contain the npy data
dataset_name = "sample2"
tiff_folder = 'data_test'

tiff_2_npy(project_dir, tiff_folder, dataset_name, npy_name)

Loading images and masks: 100%|██████████| 601/601 [00:14<00:00, 40.21it/s]
