# Installations

In [None]:
# %pip install "opencv-python-headless<4.3"
# %pip install cellpose
# %pip install cytoself

# %pip install --upgrade scikit-image

# Imports

In [None]:
import os
import torch
import numpy as np
import random
from config import SEED
from preprocessing import preprocess_images
import logging
import pandas as pd
from src.config import RAW_FOLDER_ROOT, PROCESSED_FOLDER_ROOT


np.random.seed(SEED)
random.seed(SEED)

%reload_ext autoreload
%autoreload 2
%aimport

In [None]:
log_file_path = f"./logs/preprocessing.log"
logging.basicConfig(filename=log_file_path, level=logging.INFO,
                    format="%(asctime)s %(levelname)s %(message)s",
                    datefmt="%Y-%m-%d %H:%M:%S")

logging.info(f"Is GPU available: {torch.cuda.is_available()}")

raw_files = os.listdir(RAW_FOLDER_ROOT)
folder_to_ignore = ["220714", "220629_neurons_12markers_unstressed", "220707_neurons_12markers_stressed"]
for f in folder_to_ignore:
    raw_files.remove(f)


cols = ["WT", "FUS", "TDP43", "TBK1", "OPTN"]
dead_cells_count_df = pd.DataFrame(-np.ones((len(raw_files),len(cols))), columns = cols, index=raw_files).astype(int)


for raw_f in raw_files:
    input_folder_root = os.path.join(RAW_FOLDER_ROOT, raw_f)
    output_folder_root = os.path.join(PROCESSED_FOLDER_ROOT, raw_f)
    
    print(f"[{raw_f}] Processing folder")
    logging.info(f"[{raw_f}] Processing folder")
    
    if not os.path.isdir(input_folder_root):
        print(f"[{raw_f}] Skipping non-folder")
        logging.info(f"[{raw_f}] Skipping non-folder")
        continue
    
    cell_lines = [f for f in os.listdir(input_folder_root) if os.path.isdir(os.path.join(input_folder_root, f))]

    print(f"[{raw_f}] Cell lines detected: {cell_lines}")
    logging.info(f"[{raw_f}] Cell line detected: {cell_lines}")

    for cell_line in cell_lines:
        print(f"[{raw_f}] Cell line: {cell_line}")
        logging.info(f"[{raw_f}] Cell line: {cell_line}")
        
        input_folder_root_cell_line = os.path.join(input_folder_root, cell_line)
        
        conditions = [f for f in os.listdir(input_folder_root_cell_line) if os.path.isdir(os.path.join(input_folder_root_cell_line, f))]        
        
        print(f"[{raw_f}, {cell_line}] Conditions: {conditions}")
        logging.info(f"[{raw_f}] Conditions: {conditions}")
        
        input_folders = [os.path.join(input_folder_root, cell_line, c) for c in conditions] 
        output_folders = [os.path.join(output_folder_root, cell_line, c) for c in conditions] 

        print(f"Input folders: {input_folders}")
        logging.info(f"Input folders: {input_folders}")

        format_output_filename = lambda filename, ext: f"{filename}_{cell_line}{ext}"
        
        dead_cells_count = preprocess_images(input_folders, output_folders,\
                        format_output_filename=format_output_filename,\
                        nucleus_channel=-1)
        dead_cells_count_df.loc[raw_f, cell_line] = dead_cells_count
        dead_cells_count_df.to_csv(f"./dead_cells_counts_{id}")
        