# Cell crop

This notebook uses both the outputs of cell_segmentation.ypynb and cell_area_analysis.ipynb to crop normalized cell images from the original images of the dataset

### Imports

In [1]:
import os
import sys
import json

import cv2 as cv
import pandas as pd
from tqdm import tqdm

sys.path.insert(0, "../../packages/python")
from models import cell_segmentation as segmentators

2025-06-08 16:40:38.299501: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-06-08 16:40:38.306575: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1749411638.314582  185294 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1749411638.317055  185294 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1749411638.324412  185294 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

### Definitions

In [2]:
sys.path.insert(0, "../../")
from config import DATASETS_PATH

IMG_TARGET_SIDE = 200

# Dataset selection
# DATASET = 'ina' # Change this to 'onion_cell_merged' if needed
# DATASET_SECTION = '' # Change this to 'train', 'valida' or 'test' for onion_cell_merged dataset or empty for 'ina'
DATASET = 'onion_cell_merged'#'ina' # Change this to 'onion_cell_merged' if needed
# DATASET_SECTION = 'test' # Change this to 'train', 'valida' or 'test' for 'onion_cell_merged', empty for 'ina'
# DATASET_SECTION = 'train'
DATASET_SECTION = 'valid'

# Input paths
IMAGES_PATH = os.path.join(DATASETS_PATH, "full_fov", DATASET, "images",DATASET_SECTION)
CSV_PATH = os.path.join(DATASETS_PATH, "cropped", DATASET, "data", DATASET_SECTION)
JSON_PATH = os.path.join(DATASETS_PATH, "cropped",'datasets_area_data.json')

# Output paths
CROPS_PATH = os.path.join(DATASETS_PATH, "cropped",DATASET,"images",DATASET_SECTION)




                


In [3]:
# sys.path.insert(0, "../../")
# from config import MEDIA_PATH, TEMP_PATH

# IMG_TARGET_SIDE = 200

# # Dataset selection
# DATASET = 'ina+' # Change this to 'onion_cell_merged' if needed
# DATASET_SECTION = '' # Change this to 'train', 'valida' or 'test' for onion_cell_merged dataset or empty for 'ina'

# # Specific paths
# IMAGES_PATH = os.path.join(MEDIA_PATH, f"images/{DATASET}/images/{DATASET_SECTION}")
# CSV_PATH = os.path.join(MEDIA_PATH, f"cropped_images/{DATASET}/data/{DATASET_SECTION}")
# CROPS_PATH = os.path.join(MEDIA_PATH, f"cropped_images/{DATASET}/images/{DATASET_SECTION}")
# JSON_PATH = os.path.join(TEMP_PATH, 'datasets_area_data.json')

### List of elements to use

In [4]:
csvs = sorted(os.listdir(CSV_PATH)) #Paths to the csv of SAM detections of each image
images = sorted(os.listdir(IMAGES_PATH)) #full_images from where the crops are made
with open(JSON_PATH, 'r') as f: #json with the information of the filename of the images
    area_data = json.load(f)

In [5]:
area_data.keys()

dict_keys(['ina_sam', 'onion_cell_merged'])

In [6]:
area_data['ina_sam'].keys()

dict_keys(['Abril2023'])

In [7]:
area_data['onion_cell_merged'].keys()

dict_keys(['A', 'B', 'C', 'D', 'E', 'F'])

In [8]:
area_data['onion_cell_merged']['A'].keys()

dict_keys(['area_promedio', 'lado_cuadrado', 'diff_area', 'dif_lado'])

### Dataset generation

In [9]:
# resize_factor = IMG_TARGET_SIDE/area_data['INA']['lado_cuadrado']

# if not os.path.exists(CROPS_PATH):
#     os.makedirs(CROPS_PATH)


# for image in tqdm(images):
 
#     image_name, image_type =  image.split('.')
#     image_group = image_name[0] if image_name[0].isalpha() else "INA" 
#     image_side = area_data[image_group]['lado_cuadrado']
#     image_resize_factor = int(resize_factor * image_side)

#     img = cv.imread(os.path.join(IMAGES_PATH, image))

#     df = pd.read_csv(os.path.join(CSV_PATH, f"{image_name}.csv"))
#     df_bbox = df[df['image'] == image_name][['x', 'y', 'w', 'h', 'cell_id']]    

#     for _, row in df_bbox.iterrows():
#         cell_id = row['cell_id']    
#         x, y, w, h = row['x'], row['y'], row['w'], row['h']
#         x, y, w, h = segmentators.CellMaskGenerator.adjust_bbox(segmentators.CellMaskGenerator, x, y, w, h, image_resize_factor*image_resize_factor, img.shape[1], img.shape[0])

#         crop = cv.resize(img[y:y+h, x:x+w], (IMG_TARGET_SIDE, IMG_TARGET_SIDE))
#         output_path = os.path.join(CROPS_PATH, f"{image_name}_{cell_id}.png")
#         cv.imwrite(output_path, crop)


                

In [10]:


resize_factor = IMG_TARGET_SIDE/area_data['ina_sam']["Abril2023"]['lado_cuadrado']

if not os.path.exists(CROPS_PATH):
    os.makedirs(CROPS_PATH)


for image in tqdm(images):
 
    image_name, image_type =  image.split('.')
    if DATASET == "ina":
        image_group = "Abril2023" 
    elif DATASET == "onion_cell_merged":
        image_group = image_name[0]
    else:
        raise ValueError(f"Unknown dataset {DATASET}")
    image_side = area_data[DATASET][image_group]['lado_cuadrado']
    image_resize_factor = int(resize_factor * image_side)

    img = cv.imread(os.path.join(IMAGES_PATH, image))

    df = pd.read_csv(os.path.join(CSV_PATH, f"{image_name}.csv"))
    df_bbox = df[df['image'] == image_name][['x', 'y', 'w', 'h', 'cell_id']]    

    for _, row in df_bbox.iterrows():
        cell_id = row['cell_id']    
        x, y, w, h = row['x'], row['y'], row['w'], row['h']
        x, y, w, h = segmentators.CellMaskGenerator.adjust_bbox(segmentators.CellMaskGenerator, x, y, w, h, image_resize_factor*image_resize_factor, img.shape[1], img.shape[0])

        crop = cv.resize(img[y:y+h, x:x+w], (IMG_TARGET_SIDE, IMG_TARGET_SIDE))
        output_path = os.path.join(CROPS_PATH, f"{image_name}_{cell_id}.png")
        cv.imwrite(output_path, crop)



100%|██████████| 129/129 [00:07<00:00, 16.29it/s]
