In [None]:
import os
import logging
from datetime import datetime, timezone

import tifffile as tiff
import pandas as pd
import numpy as np

from flame import TileData
from flame import FLAMEImage
from flame.io import get_unshared_path

## Inputs

In [None]:
INPUT_DIREC = "/mnt/d/data/raw"
DATASET_DIREC = "/mnt/d/code/Balu_CARE/datasets"
IMAGE_INDEX_PATH = os.path.join(DATASET_DIREC, "raw_image_index.csv")
assert os.path.isfile(IMAGE_INDEX_PATH), f"Image index could not be found at {IMAGE_INDEX_PATH}"
IMAGE_INDEX = pd.read_csv(IMAGE_INDEX_PATH)
RESET_IMAGEARR_NFRAMES = True

In [None]:
IMAGE_INDEX.head()

In [None]:
len(IMAGE_INDEX)

In [None]:
PREINDEXED_IMAGES = IMAGE_INDEX['image']
PREINDEXED_NFRAMES = IMAGE_INDEX['imgarr_nframes']
if RESET_IMAGEARR_NFRAMES: PREINDEXED_NFRAMES[:] = np.nan


### Initializing Logger instance

In [None]:
logger = logging.getLogger("main")
logging.basicConfig(
    filename=os.path.join(os.getcwd(), "logs", f"{datetime.now().strftime('%Y%m%d-%H%M%S')}_logger.log"),
    encoding="utf-8",
    level=logging.DEBUG
)

### Getting all TIF files from input directory

In [None]:
PREINDEXED_IMAGES[0] in PREINDEXED_IMAGES.values

In [None]:
logger.info(f"Reading TIF images from {INPUT_DIREC}")

In [None]:
file_list = {}
idx = len(IMAGE_INDEX)
for root, dirs, files in os.walk(INPUT_DIREC):
    for f in files:
        if ".tif" in f or ".tiff" in f:
            this_path = get_unshared_path(INPUT_DIREC, os.path.join(root, f))
            if this_path in PREINDEXED_IMAGES.values:
                continue
            file_list[idx] = this_path
            idx += 1

In [None]:
len(file_list)

### Indexing paths that will be added to MLFlow dataset

In [None]:
ids = list(IMAGE_INDEX['id'])
names = list(PREINDEXED_IMAGES)
nframes = list(PREINDEXED_NFRAMES)
for idx, filepath in file_list.items():
    ids.append(idx)
    names.append(filepath)
    nframes.append(np.nan) # new images have nan nframes

print(f"Number of ids: {len(ids)}")
print('\n'.join(names))

In [None]:

image_index = pd.DataFrame({
    "image": names,
    "imgarr_nframes": nframes
})

In [None]:
image_index = image_index.set_index([pd.Index(ids)])
image_index.index.name = 'id'

In [None]:
image_index

In [None]:
image_index.to_csv(IMAGE_INDEX_PATH)