In [11]:
import os
import logging
from datetime import datetime, timezone

import tifffile as tiff
import pandas as pd
import numpy as np

from flame import TileData
from flame import FLAMEImage

## Inputs

In [None]:
INPUT_DIREC = "/mnt/d/data/raw"
OUTPUT_DIREC = "/mnt/d/data/processed/250507_CAREtraining_DS_v1"

### Initializing Logger instance

In [3]:
logger = logging.getLogger("main")
logging.basicConfig(
    filename=f"{datetime.now().strftime('%Y%m%d-%H%M%S')}_logger.log",
    encoding="utf-8",
    level=logging.DEBUG
)

### Getting all TIF files from input directory

In [None]:
logger.info(f"Reading TIF images from {INPUT_DIREC}")

In [8]:
if not os.path.isdir(OUTPUT_DIREC):
    try:
        os.mkdir(OUTPUT_DIREC)
    except:
        logger.exception(f"Output directory already exists ({OUTPUT_DIREC})")
        raise

In [13]:
file_list = {}
for root, dirs, files in os.walk(INPUT_DIREC):
    for f in files:
        if ".tif" in f or ".tiff" in f:
            file_list[root] = f

### Indexing paths that will be added to MLFlow dataset

In [28]:
n_base = len(INPUT_DIREC.split(os.path.sep))
ids = []
names = []
for idx, (root, filename) in enumerate(file_list.items()):
    new_root = os.path.sep.join(root.split(os.path.sep)[n_base:] + [filename])
    ids.append(idx)
    names.append(new_root)

print(ids)
print('\n'.join(names))

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]
S268_250409_CAREtraining_PL/CAREtraining_ExVivo_scalp/Mosaic01_2x2_FOV600_z130_3Ch/Im_00004.tif
S268_250409_CAREtraining_PL/CAREtraining_ExVivo_scalp/Mosaic02_1x7_FOV1100_z65_3Ch/Im_00007.tif
S268_250409_CAREtraining_PL/CAREtraining_ExVivo_scalp/Mosaic03_2x2_FOV600_z65_3Ch/Im_00004.tif
S268_250409_CAREtraining_PL/CAREtraining_ExVivo_scalp/Mosaic04_2x2_FOV600_z70_3Ch/Im_00004.tif
S268_250409_CAREtraining_PL/CAREtraining_ExVivo_scalp/Mosaic05_2x2_FOV600_z75_3Ch/Im_00004.tif
S268_250409_CAREtraining_PL/CAREtraining_ExVivo_scalp/Mosaic06_2x2_FOV600_z85_3Ch/Im_00004.tif
S268_250409_CAREtraining_PL/coumarin6-0.3LP_ExVivo_slide/Image01_FOV600_z70_32Sp/Im_00001.tif
S268_250409_CAREtraining_PL/coumarin6-0.3LP_ExVivo_slide/Image02_FOV600_z70_32A1/Im_00001.tif
S268_250409_CAREtraining_PL/coumarin6-0.3LP_ExVivo_slide/Image03_FOV600_z70_32A0/Im_00001.tif
S268_250409_CAREtraining_PL/coumarin6_ExVivo_slide/Image01_FOV600_z70_32Sp/Im_0

In [44]:
image_index = pd.DataFrame({
    "image": names
})

In [47]:
image_index = image_index.set_index([pd.Index(ids)])
image_index.index.name = 'id'

In [48]:
image_index

Unnamed: 0_level_0,image
id,Unnamed: 1_level_1
0,S268_250409_CAREtraining_PL/CAREtraining_ExViv...
1,S268_250409_CAREtraining_PL/CAREtraining_ExViv...
2,S268_250409_CAREtraining_PL/CAREtraining_ExViv...
3,S268_250409_CAREtraining_PL/CAREtraining_ExViv...
4,S268_250409_CAREtraining_PL/CAREtraining_ExViv...
5,S268_250409_CAREtraining_PL/CAREtraining_ExViv...
6,S268_250409_CAREtraining_PL/coumarin6-0.3LP_Ex...
7,S268_250409_CAREtraining_PL/coumarin6-0.3LP_Ex...
8,S268_250409_CAREtraining_PL/coumarin6-0.3LP_Ex...
9,S268_250409_CAREtraining_PL/coumarin6_ExVivo_s...


In [50]:
CSVPATH = os.path.join(os.getcwd(), "datasets", f"raw_image_index.csv")
image_index.to_csv(CSVPATH)