# Installations

In [None]:
# %pip install "opencv-python-headless<4.3"
# %pip install cellpose
# %pip install cytoself

# %pip install --upgrade scikit-image

# Imports

In [None]:
import os
import numpy as np
import pandas as pd
import random
import logging
import torch

from config import SEED
from preprocessing import preprocess_images_spd



np.random.seed(SEED)
random.seed(SEED)

%reload_ext autoreload
%autoreload 2
%aimport

In [None]:
log_file_path = f"./logs/preprocessing_perturbations_spd2.log"
logging.basicConfig(filename=log_file_path, level=logging.INFO,
                    format="%(asctime)s %(levelname)s %(message)s",
                    datefmt="%Y-%m-%d %H:%M:%S")

raw_folder_root = "./data/raw/"
processed_folder_root = "./data/processed/spd2/"


logging.info(f"Is GPU available: {torch.cuda.is_available()}")

# raw_files = os.listdir(raw_folder_root)
raw_folders = ["./SpinningDisk/Perturbations"]

for raw_f in raw_folders:
    input_folder_root = os.path.join(raw_folder_root, raw_f)
    output_folder_root = os.path.join(processed_folder_root, raw_f)
    
    logging.info(f"[{raw_f}] Processing folder")
    
    if not os.path.isdir(input_folder_root):
        logging.info(f"[{raw_f}] Skipping non-folder")
        continue
    
    cell_lines = [f for f in os.listdir(input_folder_root) if os.path.isdir(os.path.join(input_folder_root, f))]

    logging.info(f"[{raw_f}] Cell line detected: {cell_lines}")

    for cell_line in cell_lines:
       
        
        logging.info(f"[{raw_f} {cell_line}] Cell line: {cell_line}")
        
        input_folder_root_cell_line = os.path.join(input_folder_root, cell_line)
        
        panels = [f for f in os.listdir(input_folder_root_cell_line) if os.path.isdir(os.path.join(input_folder_root_cell_line, f))]        
        
        logging.info(f"[{raw_f}, {cell_line}] Panels: {panels}")
         
        for panel in panels:
            logging.info(f"[{raw_f} {cell_line} {panel}] Panel: {panel}")
            
            input_folder_root_panel = os.path.join(input_folder_root_cell_line, panel)
            
            conditions = [f for f in os.listdir(input_folder_root_panel) 
                          if os.path.isdir(os.path.join(input_folder_root_panel, f)) and f != 'experiment setup']   
                
            logging.info(f"[{raw_f} {cell_line} {panel}] Conditions: {conditions}")
            
            input_folders = [os.path.join(input_folder_root, cell_line, panel, cond) for cond in conditions]     
            output_folders = [os.path.join(output_folder_root, cell_line, cond) for cond in conditions]
            
            print(f"Input folders: {input_folders}")
            logging.info(f"Input folders: {input_folders}")

            format_output_filename = lambda filename, ext: f"{filename}_{panel}_{cell_line}{ext}"
            preprocess_images_spd(input_folders, output_folders,\
                            format_output_filename=format_output_filename, show=False)
