# 1. Import necessary packages

In [None]:
import os

import turmoric
from turmoric.image_process import nd2_to_tif
from turmoric.utils import recursively_get_all_filepaths
from turmoric.apply_thresholds import apply_all_thresh

from pathlib import Path

import pandas as pd
import numpy as np
import tifffile as tiff
import matplotlib.pyplot as plt
from pathlib import Path
from skimage import filters, io
from skimage.segmentation import clear_border
from skimage.morphology import remove_small_objects
from skimage.measure import block_reduce, label, regionprops
from skimage import exposure
from scipy import ndimage

from turmoric.cell_analysis import apply_regionprops_recursively
from turmoric.utils import organize_files_without_leakage

from PIL import Image

from sklearn.model_selection import train_test_split
import shutil

import vampire

### Making sure your images are organized properly will be really helpful to running the code successfully. It should look like this:

```
~/Data
  ├── region_1
      ├── treatment_1
      ├── treatment_2
      └── treatment_3
  ├── region_2
      ├── treatment_1
      ├── treatment_2
      └── treatment_3
```

# 2. If images are saved as ND2 files, convert to TIFs

In [None]:
path_to_nd2s = "/Users/nelsschimek/Documents/nancelab/Data/110225_inhibitors_bv-2_sheets/" # Put in the path to the directory that holds your images here


file_list = recursively_get_all_filepaths(path_to_nd2s, file_type="nd2")

for file in file_list:

    path_name = Path(file)  
    file_name = os.path.basename(file)
    nd2_to_tif(path_name.parent, file_name)
    

# 3. Determine the best thresholding method

#### This section is possibly the most important part of this entire process, as it determines the quality of data used for analysis. We will be tuning a few different parameters, namely:

1. The threshold method being used
2. the minimum size for excluding objects in the image, which are likely imaging artifacts and not cells
3. The maximum size for exluding objects from the image; large objects are likely overlapping cells that the thresholding method cannot correctly seperate into multiple objects

#### The goal of this process is to minimize the background in the image while including as many real individual cells as possible

In [None]:
path_to_tifs = "/Users/nelsschimek/Documents/nancelab/Data/110225_inhibitors_bv-2_sheets/control-24h"
tif_files = recursively_get_all_filepaths(path_to_tifs, file_type='tif')

In [None]:
tif_files

#### First, we will try a few different thresholding methods to see if any are clearly are best at seperating out cells. You must also tell the function which channel of the image the cells of interest are in. 

#### After looking through the images, there should be 1-3 thresholds that are best at capturing the cells in the original image. We will now look at the minimize and maximum object size parameters

#### There are some parameters that you can tune in the ***create_microglia*** function, specifically:

1. max_obj_size (default is 50,000). Any detected objects that are more than max_obj_size pixels in area will be removed by the threshold. This is particularly useful if the threshold algorithm cannot seperate clusters of cells into individual objects

2. min_obj_size (default is 250). Any detected objects that have an area of less than min_obj_size pixels will be removed by the threshold. This is useful when there is background signal that the threshold detects as an object but doesn't correspond to a cell. 

In [None]:
def create_microglia_mask(image, threshold_method=filters.threshold_li, max_obj_size=50000, min_obj_size=250):
    print(image.shape)
    image = image[1,:,:] # channel that cells of interest
    print(image.shape)

    thresh_li = threshold_method(image)
    binary_li = image > thresh_li

    objects = label(binary_li)
    objects = clear_border(objects)
    large_objects = remove_small_objects(objects, min_size=max_obj_size)
    small_objects = label((objects ^ large_objects) > thresh_li)

    binary_li = ndimage.binary_fill_holes(remove_small_objects(small_objects > thresh_li, min_size=min_obj_size))

    return binary_li

In [None]:
cell_images = [tiff.imread(f) for f in sorted(tif_files)]

In [None]:
mean_masks = [create_microglia_mask(f, threshold_method=filters.threshold_mean) for f in cell_images]
li_masks = [create_microglia_mask(f, threshold_method=filters.threshold_li) for f in cell_images]
#otsu_masks = [create_microglia_mask(f, threshold_method=filters.threshold_otsu) for f in cell_images]
#isodata_masks = [create_microglia_mask(f, threshold_method=filters.threshold_isodata) for f in cell_images]

In [None]:
columns = 3
rows = 5#len(li_masks)

fig, axes = plt.subplots(len(li_masks), columns, figsize=(20, 50))
for ax, original, li, mean in zip(axes, cell_images, li_masks, mean_masks):

    raw_image = original[1,:,:]
    enhanced_im = exposure.equalize_adapthist(raw_image, clip_limit=0.03)
    ax[0].imshow(enhanced_im, cmap="gray")
    ax[1].imshow(mean, cmap="gray")
    ax[2].imshow(li, cmap="gray")

# 4. Applying chosen threshold to all images

In [None]:
def apply_li_threshold(input_folder, output_folder, channel, size):
    """
    Applies Li thresholding to all .tif images in the input folder
    (and subfolders)
    and saves the binary masks in the output folder.

    Parameters:
    - input_folder: Path to the folder containing .tif images.
    - output_folder: Path to save the processed binary masks.
    - size: Minimum size of objects to retain in the binary mask.
    """
    if not os.path.isdir(input_folder):
        print(f"Error: Input folder '{input_folder}' does not exist.")
        return

    # Create output folder if it doesn't exist
    os.makedirs(output_folder, exist_ok=True)

    # Walk through all files and subfolders
    for root, _, files in os.walk(input_folder):
        for file in files:
            if file.endswith(".tif"):
                # Full input path
                input_path = os.path.join(root, file)

                # Create corresponding output subfolder
                relative_path = os.path.relpath(root, input_folder)
                output_subfolder = os.path.join(output_folder, relative_path)
                os.makedirs(output_subfolder, exist_ok=True)

                # Full output path
                output_path = os.path.join(output_subfolder,
                                           file.replace(".tif",
                                                        "_li_thresh.npy"))

                try:
                    # Read the image
                    img = io.imread(input_path)

                    # Assume the second channel is the microglia channel
                    # if img.ndim == 3:
                    #     microglia_im = img[:, :, 1]
                    # elif img.ndim == 2:
                    #     microglia_im = img[1, :, :]
                    # else:
                    #     microglia_im = img

                    microglia_im = img[channel, :, :]

                    # Apply Li threshold


                    # img = tiff.imread(input_path)
                    
                    binary_li = create_microglia_mask(microglia_im)

                        # Save the binary mask as .npy
                    np.save(output_path, binary_li)
                        

                except Exception as e:
                    print(f"Error processing {input_path}: {e}")

    print(f"Processing completed. Results are saved in '{output_folder}'.")


In [None]:
input_folder = ""
thresholded_images = "/Users/nelsschimek/Documents/nancelab/Data/gaby_data/tifs/li_thresh/Opt"

apply_li_threshold(input_folder=input_folder, output_folder=thresholded_images, channel=1, size=73)

In [None]:
props_list = ('area', 'bbox_area', 'centroid', 'convex_area',
              'eccentricity', 'equivalent_diameter', 'euler_number',
              'extent', 'filled_area', 'major_axis_length',
              'minor_axis_length', 'orientation', 'perimeter', 'solidity')

In [None]:
regionprops_df = apply_regionprops_recursively(input_folder=thresholded_images, properties_list=props_list)
regionprops_df['circularity'] = 4*np.pi*regionprops_df.area/regionprops_df.perimeter**2
regionprops_df['aspect_ratio'] = regionprops_df.major_axis_length/regionprops_df.minor_axis_length
regionprops_df


In [None]:
regionprops_df.to_csv("gaby_testing_csv.csv", index=False)

In [None]:

def convert_npy_to_tif_recursive(input_dir, output_dir):
    """
    Recursively convert all .npy files to .tif files while maintaining directory structure.
    
    Parameters:
    -----------
    input_dir : str or Path
        Root directory containing .npy files
    output_dir : str or Path
        Root directory where .tif files will be saved
    
    Returns:
    --------
    int : Number of files converted
    """
    input_path = Path(input_dir)
    output_path = Path(output_dir)
    
    if not input_path.exists():
        raise ValueError(f"Input directory does not exist: {input_dir}")
    
    # Create output directory if it doesn't exist
    output_path.mkdir(parents=True, exist_ok=True)
    
    converted_count = 0
    
    # Walk through all subdirectories
    for npy_file in input_path.rglob("*.npy"):
        # Get relative path from input directory
        relative_path = npy_file.relative_to(input_path)
        
        # Create corresponding output path with .tif extension
        output_file = output_path / relative_path.with_suffix('.tif')
        
        # Create subdirectories in output if they don't exist
        output_file.parent.mkdir(parents=True, exist_ok=True)
        
        try:
            # Load numpy array and save as TIFF
            data = np.load(npy_file)
            Image.fromarray(data).save(output_file)
            
            converted_count += 1
            print(f"Converted: {relative_path}")
            
        except Exception as e:
            print(f"Error converting {npy_file}: {str(e)}")
    
    print(f"\nConversion complete! {converted_count} files converted.")
    return converted_count


In [None]:
convert_npy_to_tif_recursive(input_dir="", output_dir="")

In [None]:
# Define your base directory and target directories for training and testing
base_dir = "/Users/nelsschimek/Documents/nancelab/Data/gaby_data/tifs/"
train_dir = "/Users/nelsschimek/Documents/nancelab/Data/gaby_data/tifs/training"
test_dir = "/Users/nelsschimek/Documents/nancelab/Data/gaby_data/tifs/testing"

# Define a list of subfolder names or patterns to look for
treatment_conditions = ["Acet", "Amin", "HC", "Meth", "Opt"]
groups = ["converted_tiffs"]

# Create training and testing directories if they don't exist
os.makedirs(train_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)

In [None]:
def train_test_split_generic(base_dir, train_dir, test_dir, groups, treatment_conditions, test_size=0.2, split=True):
    for group in groups:
        for condition in treatment_conditions:
            condition_path = os.path.join(base_dir, group, condition)
            if not os.path.exists(condition_path):
                continue
            
            print(f'processing {group} {condition}')
            print(condition_path)
            files = os.listdir(condition_path)
            train_slices, test_slices = train_test_split(files, test_size=test_size)
            
            # Create subdirectories for training and testing
            train_subdir = os.path.join(train_dir, group, condition)
            test_subdir = os.path.join(test_dir, group, condition)
            os.makedirs(train_subdir, exist_ok=True)
            os.makedirs(test_subdir, exist_ok=True)
            
            # Process training files
            for file in train_slices:
                src_path = os.path.join(condition_path, file)
                
                if split:
                    # Load image and split into 4 quadrants
                    img = Image.open(src_path)
                    img_array = np.array(img)
                    h, w = img_array.shape
                    
                    # Split into 4 quadrants
                    quadrants = [
                        img_array[:h//2, :w//2],      # Top-left (quad1)
                        img_array[:h//2, w//2:],      # Top-right (quad2)
                        img_array[h//2:, :w//2],      # Bottom-left (quad3)
                        img_array[h//2:, w//2:]       # Bottom-right (quad4)
                    ]
                    
                    # Save each quadrant
                    filename_without_ext = os.path.splitext(file)[0]
                    ext = os.path.splitext(file)[1]
                    
                    for i, quad in enumerate(quadrants, 1):
                        new_filename = f"{filename_without_ext}_quad{i}{ext}"
                        Image.fromarray(quad).save(os.path.join(train_dir, new_filename))
                else:
                    # Just copy the file as is
                    shutil.copy(src_path, os.path.join(train_dir, file))
            
            # Process test files
            for file in test_slices:
                src_path = os.path.join(condition_path, file)
                
                if split:
                    # Load image and split into 4 quadrants
                    img = Image.open(src_path)
                    img_array = np.array(img)
                    h, w = img_array.shape
                    
                    # Split into 4 quadrants
                    quadrants = [
                        img_array[:h//2, :w//2],      # Top-left (quad1)
                        img_array[:h//2, w//2:],      # Top-right (quad2)
                        img_array[h//2:, :w//2],      # Bottom-left (quad3)
                        img_array[h//2:, w//2:]       # Bottom-right (quad4)
                    ]
                    
                    # Save each quadrant
                    filename_without_ext = os.path.splitext(file)[0]
                    ext = os.path.splitext(file)[1]
                    
                    for i, quad in enumerate(quadrants, 1):
                        new_filename = f"{filename_without_ext}_quad{i}{ext}"
                        Image.fromarray(quad).save(os.path.join(test_subdir, new_filename))
                else:
                    # Just copy the file as is
                    shutil.copy(src_path, os.path.join(test_subdir, file))

In [None]:
train_test_split_generic(base_dir=base_dir,
                         train_dir=train_dir,
                         test_dir=test_dir,
                         groups=groups,
                         treatment_conditions=treatment_conditions,
                         test_size=0.2, 
                         split=True)

In [None]:
image_set_path = "/Users/nelsschimek/Documents/nancelab/Data/gaby_data/tifs/training"

vampire.extraction.extract_properties(image_set_path)

In [None]:
build_info_df = pd.DataFrame({
    'img_set_path': [image_set_path],
    'output_path': [image_set_path],
    'model_name': ['li'],
    'num_points': [50],
    'num_clusters': [5],
    'num_pc': [np.nan]
})

In [None]:
vampire.quickstart.fit_models(build_info_df)

In [None]:
model_path = os.path.join(image_set_path, 'model_li_(50_5_29)__.pickle')
vampire_model = vampire.util.read_pickle(model_path)

In [None]:

main_path = "/Users/nelsschimek/Documents/nancelab/Data/gaby_data/tifs/testing/converted_tiffs"

apply_info_df = pd.DataFrame({
    'img_set_path': [
        f"{main_path}/Acet",
        f"{main_path}/Amin",
        f"{main_path}/HC",
        f"{main_path}/Meth",
        #f"{main_path}/Opt",
    
    ],
    'model_path': [model_path,
                   model_path,
                   model_path,
                   model_path,
                   #model_path
                   ],
                   
    'output_path': [
        f"{main_path}/Acet",
        f"{main_path}/Amin",
        f"{main_path}/HC",
        f"{main_path}/Meth",
        #f"{main_path}/Opt",
       
    ],
    'img_set_name': [
        "Acet",
        "Amin",
        "HC",
        "Meth",
        #"Opt",
    ],
})

In [None]:
vampire.quickstart.transform_datasets(apply_info_df)