In [42]:
#################### Params ###############

NOVA_HOME = '/home/projects/hornsteinlab/Collaboration/NOVA'
NOVA_DATA_HOME = '/home/projects/hornsteinlab/Collaboration/NOVA/input'
preprocessing_path = "/home/projects/hornsteinlab/Collaboration/NOVA/outputs/preprocessing/ManuscriptFinalData_80pct/dNLS_new_CLEAN/brenner/"
csv_name = f'raw_metrics030625_all.csv'
mappings_filepath = f"/home/projects/hornsteinlab/Collaboration/NOVA/manuscript/markers_focus_boundries/280725.csv"
imgs_path = '/home/projects/hornsteinlab/Collaboration/NOVA/input/images/raw/'
           
metric_name = 'Target_Sharpness_Brenner'
img_shape = 1024
percentiles_resolution = 0.0001
percentile_ranges_for_reports = [0, 0.1, 0.2, 0.3, 0.5, 0.7, 1, 2, 5, 10, 15, 20, 30, 40, 60, 75, 80, 85, 90, 95, 98, 99, 99.5, 99.7, 99.8,99.9,100]

In [None]:
import os
import sys
import concurrent.futures
import numpy as np
import pandas as pd
import cv2
from PIL import Image
import matplotlib.pyplot as plt
from datetime import datetime
from matplotlib.backends.backend_pdf import PdfPages
from matplotlib.gridspec import GridSpec

os.environ['NOVA_HOME'] = NOVA_HOME
sys.path.insert(1, os.getenv("NOVA_HOME"))
print(f"NOVA_HOME: {os.getenv('NOVA_HOME')}")

os.environ['NOVA_DATA_HOME'] = NOVA_DATA_HOME
print(f"NOVA_DATA_HOME: {os.getenv('NOVA_DATA_HOME')}")

from src.preprocessing.preprocessing_utils import get_image_focus_quality 
from src.preprocessing.preprocessing_utils import rescale_intensity, fit_image_shape

%reload_ext autoreload
%autoreload 2
%aimport

%matplotlib inline

In [82]:
!ls "/home/projects/hornsteinlab/Collaboration/NOVA/outputs/preprocessing/ManuscriptFinalData_80pct/neuronsDay8_new/brenner/"


log240625_all.txt
log300625_b78910.txt
raw_metrics240625_all.csv
raw_metrics240625_all.csv_checkpoint_batch1
raw_metrics240625_all.csv_checkpoint_batch2
raw_metrics240625_all.csv_checkpoint_batch3
raw_metrics300625_b78910.csv
raw_metrics300625_b78910.csv_checkpoint_batch10
raw_metrics300625_b78910.csv_checkpoint_batch7
raw_metrics300625_b78910.csv_checkpoint_batch8
raw_metrics300625_b78910.csv_checkpoint_batch9


# Utils

In [32]:
def get_metrics(tile, as_string=False):
    sharpness_brenner = get_image_focus_quality(tile)    
    if as_string:
        return f"Brenner: {round(sharpness_brenner, 3)}"
    return sharpness_brenner

def show_images(df, max_samples = 10):
    for ind, path in enumerate(df.Path.values):
        print(ind)
        if max_samples is not None and ind >= max_samples:
            print(f"Stopping at {ind}. There are {len(df.Path.values)} images in total")
            break
        
        # Target
        target_path = os.path.join(imgs_path, path)
        target_path=target_path.replace("MOmaps","NOVA")
        show_processed_tif(target_path)
        # His DAPI
        # path_l = target_path.split("/")
        # path_l[-2] = 'DAPI'
        
        # file_name = path_l[-1].split("_")
        # dapi_file_name = "_".join([file_name[0], 'w1confDAPI', file_name[-1]])
        # dapi_file_name = "/".join([*path_l[:-1], dapi_file_name])
        # print(dapi_file_name)

        # show_processed_tif(dapi_file_name)
        print('--------------------------------')
        
def init_mappings(markers=[], filepath=None):
    if filepath is not None:     
        if os.path.exists(filepath):
            mappings = pd.read_csv(filepath, index_col=0)
            return mappings
        
    mappings = pd.DataFrame(columns=['Lower_bound', 'Upper_bound'], index=markers)

    return mappings
        
def save_to_mapping(filepath, mappings, marker, value, is_upper_bound):
    col = 'Upper_bound' if is_upper_bound else 'Lower_bound' 
    mappings.loc[marker, col] = value
    
    mappings.to_csv(filepath)
    print(f"File saved to {filepath}")

def show_label(path):
    path_l = path.split("/")
    return path_l[-7:]

def process_tif(path):
    """
    Read and process the image.

    Parameters:
        path (str): Path to the image file.

    Returns:
        ndarray: Processed image.
    """
    # read the image stack
    img = cv2.imread(path, cv2.IMREAD_ANYDEPTH)
    img = fit_image_shape(img, (img_shape, img_shape))
    # rescale pixel intensities
    img = rescale_intensity(img)
    return img
    
def show_processed_tif(path):
    img = process_tif(path)
    print(get_metrics(img, True))
    # show the image with grid 
    fig, ax = plt.subplots(figsize=(7,7))
    plt.imshow(img, cmap='gray')
    put_tiles_grid(image=img, ax=ax)
    plt.axis('off')
    plt.title(show_label(path), color='purple')
    print(f"Img shape: {img.shape}")
    plt.show()

def put_tiles_grid(image, ax):
    # assumes 1000x1000 image
    import matplotlib.patches as patches

    # Add dashed grid lines for 64 blocks
    num_blocks = 10
    block_size = 100

    for i in range(1, num_blocks):
        # Draw horizontal dashed lines
        ax.plot([0, 1000], [i * block_size, i * block_size], linestyle='--', lw=1, alpha=0.5, color='pink')

        # Draw vertical dashed lines
        ax.plot([i * block_size, i * block_size], [0, 1000], linestyle='--', lw=1, alpha=0.5, color='pink')

    # Remove x and y axis labels
    ax.set_xticks([])
    ax.set_yticks([])

    # Add a title
    plt.title('Image with Dashed Grid of 64 Blocks')

def update_all_mappings(mappings, thresholds, df):
    # Iterate over the rows and fill the thresholds
    for marker in mappings.index:
        if marker in thresholds and thresholds[marker] is not None:
            df_marker = df.loc[df['Marker'] == marker]
            percentiles = df_marker[metric_name].describe(percentiles=percentiles_to_describe)
            mappings.loc[marker, "Lower_bound"] = round(percentiles[f'{thresholds[marker][0]}%'], 2)
            mappings.loc[marker, "Upper_bound"] = round(percentiles[f'{thresholds[marker][1]}%'], 2)
    return mappings

def create_histogram_report_by_batch(df: pd.DataFrame, all_markers: list) -> None:
    """
    Generate a PDF report with histograms for each marker.

    Parameters:
        df (pd.DataFrame): DataFrame containing the images data.
        all_markers (list): List of unique markers.

    Returns:
        None
    """    
    with PdfPages('Marker_histogram_by_batch.pdf') as pdf:
        for marker in all_markers:
            print(marker)
            df_marker = df.loc[df['Marker'] == marker]
            percentiles = df_marker[metric_name].describe(percentiles=percentiles_to_describe)
            create_histogram(
                df_marker, percentiles, 
                low_perc=thresholds[marker][0], 
                high_perc=thresholds[marker][1], 
                overlay_group=['Batch'], 
                x_min=0.1, x_max=99.9
            )
            plt.title(marker)
            pdf.savefig()
            plt.close()
            
def create_histogram(
    df_marker: pd.DataFrame, 
    percentiles: pd.Series, 
    low_perc: float = 0.5, 
    high_perc: float = 99.9, 
    x_min: float = None, 
    x_max: float = None, 
    overlay_group: list = None,
    plot_base: bool = True,
    actual_x_limits: tuple = None
) -> None:
    """
    Create a histogram of the Brenner values of a certain marker.

    Parameters:
        df_marker (pd.DataFrame): Marker's data.
        percentiles (pd.Series): Percentile values for annotations.
        low_perc (float): Low percentile threshold for annotations.
        high_perc (float): High percentile threshold for annotations.
        x_min (float, optional): Minimum x-axis value for the histogram.
        x_max (float, optional): Maximum x-axis value for the histogram.
        overlay_group (list, optional): Columns to group and overlay histograms.
        plot_base (bool): Whether to plot the base histogram.
        actual_x_limits (tuple, optional): Tuple specifying actual x_min and x_max values.

    Returns:
        None
    """
    assert low_perc < high_perc, "'low_perc' must be less than 'high_perc'"
    
    # Determine histogram range
    if actual_x_limits:
        hist_range = actual_x_limits  # Use passed x_min and x_max
    else:
        hist_range = (percentiles[f'{x_min}%'], percentiles[f'{x_max}%']) if x_min is not None and x_max is not None else None
    
    # Plot base histogram
    if plot_base:
        plt.hist(df_marker[metric_name].values, bins=100, range=hist_range, color=plt.cm.tab10(range(1))[0], 
                 alpha=0.3, label='Brenner scores')
    
    # Plot overlays
    if overlay_group is not None:
        grouped_data = df_marker.groupby(overlay_group)
        unique_groups = grouped_data.groups.keys()
        colors = plt.cm.tab10(range(len(unique_groups)))

        for color, group in zip(colors, unique_groups):
            group_data = grouped_data.get_group(group)
            group_label = ' - '.join(map(str, group)) if isinstance(group, tuple) else group
            plt.hist(group_data[metric_name].values, bins=100, range=hist_range, alpha=0.4, label=group_label, color=color)

    # Add percentile markers
    plt.scatter(percentiles['50%'], 0.5, color='yellow', s=12, label='50th percentile')
    plt.scatter(percentiles[f'{high_perc}%'], 0.5, color='orange', s=12, label=f'{high_perc}th percentile')
    plt.scatter(percentiles[f'{low_perc}%'], 0.5, color='red', s=12, label=f'{low_perc}th percentile')

    # Remove duplicate legend entries
    handles, labels = plt.gca().get_legend_handles_labels()
    by_label = dict(zip(labels, handles))
    plt.legend(by_label.values(), by_label.keys())

def generate_marker_reports(
    df: pd.DataFrame, 
    all_markers: list, 
    output_folder: str, 
    percentiles_to_describe: list, 
    percentile_ranges: list, 
    max_samples: int
) -> None:
    """
    Generate a detailed report for each marker, including histograms and filtered images.

    Parameters:
        df (pd.DataFrame): DataFrame containing the data.
        all_markers (list): List of unique markers.
        output_folder (str): Path to save the output PDFs.
        percentiles_to_describe (list): List of percentiles to describe the metric.
        percentile_ranges (list): List of percentile ranges for filtering.
        max_samples (int): Maximum number of images to display per range.

    Returns:
        None
    """
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    for marker in all_markers:
        print(marker)
        df_marker = df.loc[df['Marker'] == marker]
        percentiles = df_marker[metric_name].describe(percentiles=percentiles_to_describe)

        # Define the actual x-axis limits for consistent base and overlay histograms
        actual_x_limits = (percentiles['0%'], percentiles['97%'])

        pdf_path = os.path.join(output_folder, f'output_report_{marker}.pdf')
        with PdfPages(pdf_path) as pdf:
            # Generate histograms with overlays for different groups
            groups = ['Condition', 'Batch', 'Rep', 'CellLine']
            for group in groups:
                create_histogram(
                    df_marker,
                    percentiles,
                    low_perc=0,
                    high_perc=97,
                    actual_x_limits=actual_x_limits,
                    overlay_group=[group]
                )
                plt.title(f"Histogram with Overlay by {group}")
                pdf.savefig()  # Save current figure to the PDF
                plt.close()

            # Combined base histogram and overlay for each cell line
            for CL in np.unique(df_marker['CellLine']):
                create_histogram(
                    df_marker,
                    percentiles,
                    low_perc=0,
                    high_perc=97,
                    actual_x_limits=actual_x_limits,
                )
                
                # Filter and overlay the specific cell line
                df_tmp = df_marker.loc[df_marker['CellLine'] == CL]
                percentiles_tmp = df_tmp[metric_name].describe(percentiles=percentiles_to_describe)
                create_histogram(
                    df_tmp,
                    percentiles,
                    low_perc=0,
                    high_perc=97,
                    actual_x_limits=actual_x_limits,
                    overlay_group=['CellLine', 'Condition'], plot_base = False
                )
                plt.title(f"Histogram for (Cell Line: {CL})")
                # Save the combined plot to the PDF
                pdf.savefig()
                plt.close()

            # Remaining parts of the function (filtered images, percentile ranges, etc.)
            for i in range(len(percentile_ranges) - 1):
                per_min = np.round(percentile_ranges[i], 2)
                per_max = np.round(percentile_ranges[i + 1], 2)
                assert per_min < per_max, "Percentile range minimum must be less than the maximum."
                threshold = percentiles[f'{per_min}%']
                threshold_second = percentiles[f'{per_max}%']

                c = (df_marker[metric_name] >= threshold) & (df_marker[metric_name] <= threshold_second)
                df_marker_filtered = df_marker[c].sample(frac=1, random_state=1)

                text_output = (f'Images between %{per_min} - {per_max}%\n'
                               f"Number of {marker} images in threshold {threshold} "
                               f"({per_min}%) and {threshold_second} ({per_max}%): "
                               f"{len(df_marker_filtered)}\n\n"
                               f"{df_marker_filtered['CellLine'].value_counts().to_string()}\n\n"
                               f"{df_marker_filtered['Condition'].value_counts().to_string()}\n\n")

                fig = plt.figure(figsize=(12, 8))
                gs = GridSpec(3, 1, figure=fig, height_ratios=[1, 2, 0.1])
                text_ax = fig.add_subplot(gs[0, :])
                text_ax.axis('off')
                text_ax.text(0.01, 0.99, text_output, ha='left', va='top', fontsize=12, wrap=True)

                filtered_paths = df_marker_filtered['Path'].values
                num_images = min(max_samples, len(filtered_paths))
                img_gs = gs[1].subgridspec(1, num_images, wspace=0.1)

                for ind, path in enumerate(filtered_paths[:num_images]):
                    target_path = os.path.join(output_folder, path)
                    img = process_tif(target_path)

                    ax = fig.add_subplot(img_gs[0, ind])
                    ax.imshow(img, cmap='gray')
                    put_tiles_grid(image=img, ax=ax)
                    ax.axis('off')

                    labels = show_label(path)
                    perc_brenner = abs(percentiles[[per for per in percentiles.keys() if '%' in per]] - get_image_focus_quality(img)).idxmin()
                    ax.set_title(f"{labels[1]}, {labels[3]}, {get_metrics(img, True)}, {perc_brenner}", color='purple', fontsize=10)

                plt.tight_layout()
                pdf.savefig(fig)
                plt.close(fig)

# Main

In [None]:
df = pd.read_csv(os.path.join(preprocessing_path, csv_name))
df

In [85]:
# df['Path'].replace("/home/projects/hornsteinlab/Collaboration/MOmaps/input", "/home/projects/hornsteinlab/Collaboration/NOVA/input")

# df['Path'] = df['Path'].str.replace(r'/home/projects/hornsteinlab/Collaboration/MOmaps/input', '/home/projects/hornsteinlab/Collaboration/NOVA/input', regex=False)
# df['RootFolder'] = df['RootFolder'].str.replace(r'/home/projects/hornsteinlab/Collaboration/MOmaps/input', '/home/projects/hornsteinlab/Collaboration/NOVA/input', regex=False)


See existing brenner cutoffs as percentiles of the new data you are currently working on

In [36]:
import pandas as pd
import numpy as np

def calculate_percentile_bounds(df_data: pd.DataFrame, df_bounds: pd.DataFrame) -> pd.DataFrame:
    result = []
    
    for marker, row in df_bounds.iterrows():
        # Subset the data for the current marker
        subset = df_data[df_data["Marker"] == marker]["Target_Sharpness_Brenner"]
        
        if subset.empty:
            lower_pct, upper_pct = np.nan, np.nan
        else:
            lower_pct = (subset < row["Lower_bound"]).mean() * 100
            upper_pct = (subset < row["Upper_bound"]).mean() * 100

        result.append({
            "Marker": marker,
            "Lower_percentile": lower_pct,
            "Upper_percentile": upper_pct
        })
    
    return pd.DataFrame(result).set_index("Marker")

new INDI Batches 1-3

Brenner from JIRA:
#['DAPI' [0.5, 98.85], 'HNRNPA1' [0.5, 99.45], 'LSM14A' [0.01 ,99.2], 'Calreticulin' [3.7, 99.9],'GM130' [0, 98],

#'PEX14' [1, 98.1], 'SNCA' [6, 98], 'CD41' [1.1, 93],'NONO' [2 ,97.9], 'SON' [1.5, 94.8], 'DCP1A'[0.1, 96.8], 'NEMO' [1 ,98.5],
#'FUS' [0, 99.3], 'NCL' [0.6, 99.3], 'KIF5A' [2, 98], 'SQSTM1' [2, 98.3], 'Tubulin' [2, 99]

 'TIA1' [2 , 99.5]
'TOMM20' [6 , 99]  
'mitotracker' [0.5 , 98.7] 
'PML' [0 , 99.5] 
'TDP43' [6 , 99.6] 
'CLTC' [1 , 99.9] 
'PSD95' [1 , 99.6]
'Phalloidin' [5 , 99.9] 
'ANXA11' [2 , 99.7]
'LAMP1' [51 , 99.5] 
'FMRP'[4 , 99], 
'G3BP1' [1 , 95],
'PURA' [12 , 96]

In [87]:
# existing_df_new_indi123 = pd.read_csv("/home/projects/hornsteinlab/Collaboration/NOVA/manuscript/markers_focus_boundries/markers_focus_boundries_newINDI.csv", index_col=0)
# existing_df_new_indi123

# calculate_percentile_bounds(df, existing_df_new_indi123)

In [88]:
df["Marker"] = df["Marker"].str.replace("_", "-")

In [89]:
df.Path.iloc[0]

'/home/projects/hornsteinlab/Collaboration/NOVA/input/images/raw/OPERA_indi_sorted/batch7/FUSHeterozygous/panelH/Untreated/rep1/DAPI/r07c07f110-ch1t1.tiff'

In [None]:
df['Marker'].value_counts()

In [None]:
counts = df['Marker'].value_counts()
all_markers = df['Marker'].unique()
print(len(all_markers))
print(f'all_markers: {all_markers}')

yehudas_markers = all_markers[:len(all_markers)//2]
welmoeds_markers = all_markers[len(all_markers)//2:]

print(f'yehudas_markers: {yehudas_markers}')
print(f'welmoeds_markers: {welmoeds_markers}')


In [43]:
mappings = init_mappings(markers=all_markers, filepath=mappings_filepath)
mappings

Unnamed: 0,Lower_bound,Upper_bound
DAPI,,
HNRNPA1,,
LSM14A,,
Calreticulin,,
GM130,,
PEX14,,
SNCA,,
CD41,,
NONO,,
SON,,


In [44]:
percentiles_to_describe = np.arange(0, 1+percentiles_resolution, percentiles_resolution)

# Create Brenner reports

Run this block if you want to generate Brenner reports

In [14]:
# output_folder = f"{preprocessing_path}/brenner_reports{exp}_{datetime.now().strftime('%Y-%m-%d')}"
# print('output_folder is', output_folder)
# generate_marker_reports(df, all_markers, output_folder, percentiles_to_describe, percentile_ranges_for_reports, max_samples=3)

# Examine and set Brenner one by one (Option 1)

In [None]:
#################### SAFE ZONE TO CHANGE ###############

marker = 'DAPI' # MAKRENAME
# [marker] Options:
# ['DAPI' 'HNRNPA1' 'LSM14A' 'Calreticulin' 'GM130' 'PEX14' 'SNCA' 'CD41'
#  'NONO' 'SON' 'DCP1A' 'NEMO' 'FUS' 'NCL' 'KIF5A' 'SQSTM1' 'Tubulin' 'TIA1'
#  'TOMM20' 'mitotracker' 'PML' 'TDP43' 'CLTC' 'PSD95' 'Phalloidin' 'ANXA11'
#  'LAMP1' 'FMRP' 'G3BP1' 'PURA']

# [per] Options: 0-100
per = 98 # percentile threshold 

# [per] Options: 0-100
# *Optional! if you want to view images between per and another threshold
per_second_bound = None

max_samples = 3 # set max number of images (in threshold) to show

# [is_upper_bound] Options:
# True: upper bound
# False: lower bound
is_upper_bound = True 

show_percentile_plot = False

###################### END OF SAFE ZONE ###################





#########################################

if per_second_bound is None:
    per_second_bound = 100 if is_upper_bound else 0

        
print(f"marker = {marker}, per: {per}% (per_second_bound={per_second_bound}%), max_samples = {max_samples}, is upper bound: {is_upper_bound}")


df_marker = df.loc[df['Marker'] == marker]
percentiles = df_marker[metric_name].describe(percentiles=percentiles_to_describe)

threshold = percentiles[f'{per}%']
threshold_second = percentiles[f'{per_second_bound}%']

if is_upper_bound:
    c = (df_marker[metric_name]>=threshold) & (df_marker[metric_name]<=threshold_second)
else:
    c = (df_marker[metric_name]<=threshold) & (df_marker[metric_name]>=threshold_second) 

# threshold
df_marker_filtered = df_marker[c]


# shuffle
df_marker_filtered = df_marker_filtered.sample(frac=1, random_state=1)

print(f"Number of {marker} images in threshold {threshold} ({per}%) (and {threshold_second} ({per_second_bound}%)): {len(df_marker_filtered)}")
print("\n\n")
print(df_marker_filtered['CellLine'].value_counts().to_string())
print(df_marker_filtered['Condition'].value_counts().to_string())

if show_percentile_plot:
    plt.figure(figsize=(15,6))
    plt.plot(percentiles.keys().to_numpy()[4:-1], percentiles.values[4:-1])
    plt.ylabel('value')
    plt.xlabel('percentile')
    plt.xticks(rotation=90)
    plt.show()

show_images(df_marker_filtered, max_samples=max_samples)    

save_to_mapping(mappings_filepath, mappings, marker, round(threshold,2), is_upper_bound)


# Examine Brenners and set the threshold in the next block (option 2)

1. Examine Brenners (write the thresholds in the next block)

In [None]:
#################### SAFE ZONE TO CHANGE ###############

marker = 'G3BP1' # MARKERNAME
# Previous [marker] Options:
# ['DAPI' [0.5, 98.85], 'HNRNPA1' [0.5, 99.45], 'LSM14A' [0.01 ,99.2], 'Calreticulin' [3.7, 99.9],'GM130' [0, 98],
#'PEX14' [1, 98.1], 'SNCA' [6, 98], 'CD41' [1.1, 93],'NONO' [2 ,97.9], 'SON' [1.5, 94.8], 'DCP1A'[0.1, 96.8], 'NEMO' [1 ,98.5], 
#'FUS' [0, 99.3], 'NCL' [0.6, 99.3],'KIF5A' [2, 98] 'SQSTM1' [2, 98.3], 'Tubulin' [2, 99
#'TIA1' [2 , 99.5]
#  'TOMM20' [6, 99] 'mitotracker' [0.5 , 98.7] 'PML' [0, 99.5] 'TDP43' [6 , 99.6] 'CLTC' [1, 99.9] 'PSD95' [1, 99.6]
# 'Phalloidin' [5, 99.9] 'ANXA11' [2, 99.7] 'LAMP1' [60, 99.5] 'FMRP'[4, 99], 'G3BP1' [1, 95],'PURA' [12,96]]

#previous --> New_brenners_B7_10_INDI:
### 'DAPI' [0.5, 98.85],--> [0.5, 96]
### 'HNRNPA1' [0.5, 99.45], --> [0.15, 99.9]
###'LSM14A' [0.01 ,99.2], --> [0, 99.5]
###'Calreticulin' [3.7, 99.9] --> [1, 100]
###'GM130' [0, 98] -->[0, 98]
###'PEX14' [1, 98.1] --> [1.5, 93.2]
###'SNCA' [6, 98] --> [26, 95.03]  !!!!!!
###'CD41' [1.1, 93] --> [0.5, 91]
###'NONO' [2 ,97.9] --> [3, 94.3]
###'SON' [1.5, 94.8] -->[2, 93.7]
###'DCP1A'[0.1, 96.8] -->[0.05, 99]
###'NEMO' [1 ,98.5] --> [0.12, 99.9]
###'FUS' [0, 99.3] --> [0, 77.8] !!!!! ### Affected by batch7, as I saw by imaging that most of the staining was dim!
###'NCL' [0.6, 99.3] --> [0.6, 99.74]
###'KIF5A' [2, 98] --> [1, 97]
###'SQSTM1' [2, 98.3] --> [2, 97.5]
###'Tubulin' [2, 99] --> [4, 98]
###'TIA1' [2 , 99.5] --> [1.1, 96.1]
###'TOMM20' [6, 99] --> [5, 95]
###'mitotracker' [0.5 , 98.7] --> [1, 96.19]
###'PML' [0, 99.5] --> [0.8, 87.3] !!!!
###'TDP43' [6 , 99.6] --> [1.5, 89.5] !!!
###'CLTC' [1, 99.9] --> [1, 99]
###'PSD95' [1, 99.6]  --> [0.2, 96.8]
###'Phalloidin' [5, 99.9] --> [5, 98.6]
###'ANXA11' [2, 99.7] --> [1, 87.05] !!!
###'LAMP1' [51, 99.5] -->[10.5, 93] !!!!!
###'FMRP'[4, 99] --> [2, 94.58]
###'G3BP1' [1, 95] --> [1, 93.9]
#'PURA' [12,96]] --> [4, 96] !!!

per_min =0
per_max =50
batch='batch1'
rep='rep2'
cell_line='WT'

max_samples = 80

###################### END OF SAFE ZONE ###################



df_marker = df.loc[df['Marker'] == marker]
df_marker=df_marker[(df_marker.Batch_Rep==f'{batch}/{rep}') & (df_marker.CellLine==cell_line)]

percentiles = df_marker[metric_name].describe(percentiles=percentiles_to_describe)

print(f'Showing images between %{per_min} - {per_max}')
threshold = percentiles[f'{per_min}%']
threshold_second = percentiles[f'{per_max}%']

c = (df_marker[metric_name]>=threshold) & (df_marker[metric_name]<=threshold_second) 

# threshold
df_marker_filtered = df_marker[c]
# shuffle
df_marker_filtered = df_marker_filtered.sample(frac=1, random_state=1)
# df_marker_filtered.index = range(len(df_marker_filtered))

print(f"Number of {marker} images in threshold {threshold} ({per_min}%) (and {threshold_second} ({per_max}%)): {len(df_marker_filtered)}")
print("\n")
print(df_marker_filtered['CellLine'].value_counts().to_string())
print("\n")
print(df_marker_filtered['Condition'].value_counts().to_string())
print("\n")
show_images(df_marker_filtered, max_samples=max_samples)    

In [None]:
df.groupby(['Marker','Condition','CellLine','Batch_Rep']).count()

2. Setting Brenners in the csv

In [None]:
mappings_filepath

In [None]:
# new indi B78910 
thresholds = {
    'DAPI': (0.5, 96),
    'HNRNPA1':(0.15, 99.9),
    'LSM14A': (0, 99.5),
    'Calreticulin': (1, 100),
    'GM130': (0, 98),
    'PEX14': (1.5, 93.2),
    'SNCA': (26, 95.03),
    'CD41': (0.5, 91),
    'NONO': (3, 94.3),
    'SON': (2, 93.7),
    'DCP1A': (0.05, 99),
    'NEMO': (0.12, 99.9),
    'FUS': (0, 77.8),
    'NCL': (0.6, 99.74),
    'KIF5A': (1, 97),
    'SQSTM1': (2, 97.5),
    'Tubulin': (4, 98),
    'TIA1': (1.1, 96.1),
    'TOMM20': (5, 95),
    'mitotracker': (1, 96.19),
    'PML': (0.8, 87.3),
    'TDP43': (1.5, 89.5),
    'CLTC': (1, 99),
    'PSD95': (0.2, 96.8),
    'Phalloidin': (5, 98.6),
    'ANXA11': (1, 87.05),
    'LAMP1': (10.5, 93),
    'FMRP': (2, 94.58),
    'G3BP1': (1, 93.9),
    'PURA': (4, 96)
} 

## Alyssa Coyne
# thresholds = {
#  "DAPI": (0, 100),
#  "TDP43": (1, 100),
#  "Map2": (0, 100),
#  "DCP1A": (0, 100),
#  "POM121":(0, 100),
#  "Nup62":
#  "FUS": (1, 100),
# }

# thresholds = {
#     "DAPI":(0.1,75),
#     "HNRNPA1": (0,59.67),
#     "LSM14A":(0.2,57),
#     "Calreticulin":(5,99.15),
#     "GM130":(0,65),
#     "PEX14":(1,90),
# "SNCA":(5,91.6),
# "CD41":(0.2,80),
# "NONO":(5.5,85.5),
# "SON":(0.5,84.1),
# "DCP1A":(0,47),
# "NEMO":(0,56.5),
# "FUS":(0,84),
# "NCL":(5,82.64),
# "KIF5A":(0.8,91),
# "SQSTM1":(1,96.1),
# "Tubulin":(0,98),
# "TIA1":(0.2,77.5),
# "TOMM20":(0,90),
# "mitotracker":(0.1,90),
# "PML":(0,68),
# "TDP43":(0.2,74),
# "CLTC":(1.1,97),
# "PSD95":(0,81),
# "Phalloidin":(0,92),
# "ANXA11":(0,93),
# "LAMP1":(0.1,87),
# "FMRP":(1,71),
# "G3BP1":(1,74),
# "PURA":(0.5,85),
# } 

## Exp3
# thresholds = {
#     "Stress-initiation": (0, 99),
#     "Aberrant-splicing": (1, 97),
#     "Autophagy": (5, 97),
#     "DAPI": (0, 100),
#     "Apoptosis": (0.27, 95),
#     "impaired-Autophagosome": (5, 99),
#     "Cytoskeleton": (10, 99.8),
#     "DNA-damage-P53BP1": (0.3, 90),
#     "DNA-damage-pH2Ax": (7, 85),
#     "mature-Autophagosome": (2, 98),
#     "Necrosis": (4, 95),
#     "Neuronal-activity": (5, 90),
#     "Nuclear-speckles-SC35": (0.2, 85),
#     "Nuclear-speckles-SON": (0.2, 85),
#     "Parthanatos-early": (0, 85),
#     "Parthanatos-late": (5, 95),
#     "Protein-degradation": (5, 92),
#     "Senescence-signaling": (10, 90),
#     "Splicing-factories": (0, 95),
#     "TDP-43": (20, 99.2),
#     "Ubiquitin-levels": (2, 90),
#     "UPR-ATF4": (20, 99.7), 
#     "UPR-ATF6": (3, 100),
#     "UPR-IRE1a": (2, 98),
#     "Necroptosis-pMLKL": (5, 90),
#     "Necroptosis-HMGB1": (0.01, 90),
# }
## Exp 4 
# thresholds = {
#     "Stress-initiation": (0.3, 98),
#     "Aberrant-splicing": (2, 97),
#     "Autophagy": (0, 98),
#     "DAPI": (0.2, 98.3),
#     "Apoptosis": (0.5, 95),
#     "impaired-Autophagosome": (13, 99.5),
#     "Cytoskeleton": (10, 99.9),
#     "DNA-damage-P53BP1": (0.3, 92),
#     "DNA-damage-pH2Ax": (7, 95),
#     "mature-Autophagosome": (5, 98.85),
#     "Necrosis": (7.2, 98),
#     "Neuronal-activity": (7, 94),
#     "Nuclear-speckles-SC35": (0.1, 92),
#     "Nuclear-speckles-SON": (0.2, 85),
#     "Parthanatos-early": (0, 99),
#     "Parthanatos-late": (5, 99),
#     "Protein-degradation": (2, 98),
#     "Senescence-signaling": (4, 99.75),
#     "Splicing-factories": (0, 95),
#     "TDP-43": (20, 99.8),
#     "Ubiquitin-levels": (0.2, 90),
#     "UPR-ATF4": (20, 99.75),
#     "UPR-ATF6": (3, 100),
#     "UPR-IRE1a": (2, 98),
#     "Necroptosis-pMLKL": (2, 95),
#     "Necroptosis-HMGB1": (0.2, 90),
# }

mappings = update_all_mappings(mappings, thresholds, df)
mappings.to_csv(mappings_filepath)
mappings

In [None]:
mappings_filepath

In [None]:
mappings_filepath=mappings_filepath.replace('/NOVA/','/MOmaps_Noam/MOmaps/')
mappings_filepath