In [1]:
base_dir = '/home/labs/hornsteinlab/Collaboration/MOmaps/outputs/vit_models/finetuned_model/figures/funova/UMAPs'
NOVA_HOME = '/home/labs/hornsteinlab/Collaboration/NOVA_GAL/NOVA'

In [2]:
import os
import sys
import pandas as pd 
import numpy as np
import re
import fnmatch
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
import matplotlib.image as mpimg
import gc

os.environ['NOVA_HOME'] = NOVA_HOME
sys.path.insert(1, os.getenv('NOVA_HOME'))
%load_ext autoreload
%autoreload 2

from interactive_umaps_utils import extract_umap_data


In [3]:
# Define folders and corresponding umap_type values
folder_mapping = {
    "SINGLE_MARKERS": 0,
    "MULTIPLE_MARKERS": 1,
    "MULTIPLEX_MARKERS": 2
}
# Define valid cell lines, conditions, and markers
valid_cell_lines = {
    "Control-1001733", "Control-1017118", "Control-1025045", "Control-1048087",
    "C9orf72-HRE-1008566", "C9orf72-HRE-981344", "TDP--43-G348V-1057052", "TDP--43-N390D-1005373",
    "all_cell_lines"
}
valid_conditions = {"Untreated", "stress", "all_conditions"}
valid_markers = [
    "all_markers", 'DNA_RNA_DEFECTS_MARKERS', 'PROTEOSTASIS_MARKERS', 'NEURONAL_CELL_DEATH_SENESCENCE_MARKERS', 'SYNAPTIC_NEURONAL_FUNCTION_MARKERS',
    "DAPI", "Stress-initiation", "mature-Autophagosome", "Cytoskeleton", "Ubiquitin-levels",
    "UPR-IRE1a", "UPR-ATF4", "UPR-ATF6", "impaired-Autophagosome", "Autophagy",
    "Aberrant-splicing", "Parthanatos-late", "Nuclear-speckles-SC35", "Splicing-factories",
    "TDP-43", "Nuclear-speckles-SON", "DNA-damage-pH2Ax", "Parthanatos-early", "Necrosis",
    "Necroptosis-HMGB1", "Neuronal-activity", "DNA-damage-P53BP1", "Apoptosis",
    "Necroptosis-pMLKL", "Protein-degradation", "Senescence-signaling"
]

batches = ['1', '2', '3', '4']

# Define cell lines order
cell_lines_order = [
    "Control-1001733", "Control-1017118", "Control-1025045", "Control-1048087",
    "C9orf72-HRE-1008566", "C9orf72-HRE-981344", "TDP--43-G348V-1057052", "TDP--43-N390D-1005373"
]
conditions = ["Untreated", "stress"]

In [10]:
df = extract_umap_data(
    base_dir=base_dir,
    folder_mapping=folder_mapping,
    valid_cell_lines=valid_cell_lines,
    valid_conditions=valid_conditions,
    valid_markers=valid_markers,
    batches=batches
)

In [11]:
df

Unnamed: 0,folder_path,image_name,umap_type,batch,rep,cell_line,condition,markers,coloring
0,SINGLE_MARKERS/Batch2_all_reps_C9orf72-HRE-100...,TDP-43,0,2,all_reps,C9orf72-HRE-1008566,all_conditions,TDP-43,CONDITIONS
1,SINGLE_MARKERS/Batch2_all_reps_C9orf72-HRE-100...,Nuclear-speckles-SC35,0,2,all_reps,C9orf72-HRE-1008566,all_conditions,Nuclear-speckles-SC35,CONDITIONS
2,SINGLE_MARKERS/Batch2_all_reps_C9orf72-HRE-100...,Ubiquitin-levels,0,2,all_reps,C9orf72-HRE-1008566,all_conditions,Ubiquitin-levels,CONDITIONS
3,SINGLE_MARKERS/Batch2_all_reps_C9orf72-HRE-100...,UPR-ATF4,0,2,all_reps,C9orf72-HRE-1008566,all_conditions,UPR-ATF4,CONDITIONS
4,SINGLE_MARKERS/Batch2_all_reps_C9orf72-HRE-100...,Senescence-signaling,0,2,all_reps,C9orf72-HRE-1008566,all_conditions,Senescence-signaling,CONDITIONS
...,...,...,...,...,...,...,...,...,...
2504,MULTIPLEX_MARKERS/Batch1_all_reps_all_cell_lin...,umap2,2,1,all_reps,all_cell_lines,Untreated,all_markers,MULTIPLEX_CELL_LINES
2505,MULTIPLEX_MARKERS/Batch4_all_reps_all_cell_lin...,umap2,2,4,all_reps,all_cell_lines,all_conditions,PROTEOSTASIS_MARKERS,MULTIPLEX_CELL_LINES
2506,MULTIPLEX_MARKERS/Batch1_all_reps_all_cell_lin...,umap2,2,1,all_reps,all_cell_lines,all_conditions,all_markers,MULTIPLEX_CELL_LINES
2507,MULTIPLEX_MARKERS/Batch1_all_reps_Control-1001...,umap2,2,1,all_reps,Control-1001733,all_conditions,all_markers,MULTIPLEX_CONDITIONS


In [12]:
np.unique(df.cell_line)

array(['C9orf72-HRE-1008566', 'C9orf72-HRE-1008566,C9orf72-HRE-981344',
       'C9orf72-HRE-981344', 'Control-1001733',
       'Control-1001733,Control-1017118,Control-1025045,Control-1048087',
       'Control-1001733,Control-1017118,Control-1025045,Control-1048087,C9orf72-HRE-1008566,C9orf72-HRE-981344',
       'Control-1001733,Control-1017118,Control-1025045,Control-1048087,C9orf72-HRE-1008566,C9orf72-HRE-981344,TDP--43-G348V-1057052,TDP--43-N390D-1005373',
       'Control-1001733,Control-1017118,Control-1025045,Control-1048087,TDP--43-G348V-1057052,TDP--43-N390D-1005373',
       'Control-1017118', 'Control-1025045', 'Control-1048087',
       'TDP--43-G348V-1057052',
       'TDP--43-G348V-1057052,TDP--43-N390D-1005373',
       'TDP--43-N390D-1005373', 'all_cell_lines'], dtype=object)

<!-- ## UMAP0 -->

In [13]:
# Filter for UMAP type 0
dumap0 = df[df["umap_type"] == 0]
for col in dumap0.columns[1:]:
    print(col, np.unique(dumap0[col]))

image_name ['Aberrant-splicing' 'Apoptosis' 'Autophagy' 'Cytoskeleton' 'DAPI'
 'DNA-damage-P53BP1' 'DNA-damage-pH2Ax' 'Necroptosis-HMGB1'
 'Necroptosis-pMLKL' 'Necrosis' 'Neuronal-activity'
 'Nuclear-speckles-SC35' 'Nuclear-speckles-SON' 'Parthanatos-early'
 'Parthanatos-late' 'Protein-degradation' 'Senescence-signaling'
 'Splicing-factories' 'Stress-initiation' 'TDP-43' 'UPR-ATF4' 'UPR-ATF6'
 'UPR-IRE1a' 'Ubiquitin-levels' 'impaired-Autophagosome'
 'mature-Autophagosome']
umap_type [0]
batch ['1' '2' '3' '4']
rep ['all_reps']
cell_line ['C9orf72-HRE-1008566' 'C9orf72-HRE-981344' 'Control-1001733'
 'Control-1001733,Control-1017118,Control-1025045,Control-1048087'
 'Control-1001733,Control-1017118,Control-1025045,Control-1048087,C9orf72-HRE-1008566,C9orf72-HRE-981344'
 'Control-1001733,Control-1017118,Control-1025045,Control-1048087,TDP--43-G348V-1057052,TDP--43-N390D-1005373'
 'Control-1017118' 'Control-1025045' 'Control-1048087'
 'TDP--43-G348V-1057052' 'TDP--43-G348V-1057052,TDP--43-

<!-- By cell line -->

In [8]:
# Filter for UMAP type 0
dumap0 = df[df["umap_type"] == 0]

# Get unique markers and sorted batches
markers = dumap0["markers"].unique()

# Create output directory
output_dir = "umap0_reports"
os.makedirs(output_dir, exist_ok=True)

for marker in markers:
    pdf_path = os.path.join(output_dir, f"umap0_report_{marker}.pdf")
    with PdfPages(pdf_path) as pdf:
        for batch in batches:
            # Filter data for the marker and batch
            marker_batch_df = dumap0[(dumap0["markers"] == marker) & (dumap0["batch"] == batch)]
            if marker_batch_df.empty:
                continue  # Skip if no data
            
            fig, axes = plt.subplots(nrows=2, ncols=len(cell_lines_order), figsize=(len(cell_lines_order) * 10, 25))
            fig.suptitle(f"Marker: {marker} | Batch: {batch}", fontsize=48)
            plt.subplots_adjust(hspace=0.1, wspace=0.05, top=0.8, bottom=0.08, left=0.05, right=0.95)
            
            # First row: Coloring by CONDITIONS
            for i, cell_line in enumerate(cell_lines_order):
                img_row = marker_batch_df[(marker_batch_df["cell_line"] == cell_line) & (marker_batch_df["coloring"] == "CONDITIONS")
                                         & (marker_batch_df["rep"] == "all_reps")]
                ax = axes[0, i]
                ax.set_title(cell_line, fontsize=32)
                ax.axis("off")
                if not img_row.empty:
                    img_path = os.path.join(base_dir, img_row.iloc[0]["folder_path"], img_row.iloc[0]["image_name"] + ".png")
                    img = mpimg.imread(img_path)
                    ax.imshow(img)
                
            # Second row: Coloring by REPS
            for i, cell_line in enumerate(cell_lines_order):
                img_row = marker_batch_df[(marker_batch_df["cell_line"] == cell_line) & (marker_batch_df["coloring"] == "REPS")
                                          & (marker_batch_df["rep"] == "all_reps")]
                ax = axes[1, i]
                ax.axis("off")
                if not img_row.empty:
                    img_path = os.path.join(base_dir, img_row.iloc[0]["folder_path"], img_row.iloc[0]["image_name"] + ".png")
                    img = mpimg.imread(img_path)
                    ax.imshow(img)
            
            pdf.savefig(fig)
            plt.close(fig)
            gc.collect()  # Free memory
    
    print(f"PDF report generated: {pdf_path}")


PDF report generated: umap0_reports/umap0_report_TDP-43.pdf
PDF report generated: umap0_reports/umap0_report_Nuclear-speckles-SC35.pdf
PDF report generated: umap0_reports/umap0_report_Ubiquitin-levels.pdf
PDF report generated: umap0_reports/umap0_report_UPR-ATF4.pdf
PDF report generated: umap0_reports/umap0_report_Senescence-signaling.pdf
PDF report generated: umap0_reports/umap0_report_Neuronal-activity.pdf
PDF report generated: umap0_reports/umap0_report_Protein-degradation.pdf
PDF report generated: umap0_reports/umap0_report_Autophagy.pdf
PDF report generated: umap0_reports/umap0_report_Splicing-factories.pdf
PDF report generated: umap0_reports/umap0_report_Nuclear-speckles-SON.pdf
PDF report generated: umap0_reports/umap0_report_Parthanatos-late.pdf
PDF report generated: umap0_reports/umap0_report_Necrosis.pdf
PDF report generated: umap0_reports/umap0_report_Necroptosis-HMGB1.pdf
PDF report generated: umap0_reports/umap0_report_Parthanatos-early.pdf
PDF report generated: umap0_repo

In [14]:
# Create output directory
output_dir = "umap0_cell_lines_reports"
os.makedirs(output_dir, exist_ok=True)

# Define exact cell line sets
special_set_1 = "Control-1001733,Control-1017118,Control-1025045,Control-1048087,C9orf72-HRE-1008566,C9orf72-HRE-981344"
special_set_2 = "Control-1001733,Control-1017118,Control-1025045,Control-1048087,TDP--43-G348V-1057052,TDP--43-N390D-1005373"

# Get unique batches and markers
batches = dumap0["batch"].unique()
markers = dumap0["markers"].unique()

# Predefine all possible columns (7 total)
all_columns = {
    "col1": {"condition": "all_conditions", "cell_lines": "all_cell_lines", "coloring": "CONDITIONS", "cell_line_title": "All Cell Lines"},
    "col2": {"condition": "all_conditions", "cell_lines": "all_cell_lines", "coloring": "CELL_LINES", "cell_line_title": "All Cell Lines"},
    "col3": {"condition": "all_conditions", "cell_lines": "all_cell_lines", "coloring": "CELL_LINES_CONDITIONS", "cell_line_title": "All Cell Lines"},
    "col4": {"condition": "Untreated", "cell_lines": "all_cell_lines", "coloring": "CELL_LINES", "cell_line_title": "All Cell Lines"},
    "col5": {"condition": "stress", "cell_lines": "all_cell_lines", "coloring": "CELL_LINES", "cell_line_title": "All Cell Lines"},
    "col6": {"condition": "Untreated", "cell_lines": special_set_1, "coloring": "CELL_LINES", "cell_line_title": "Control + C9 Lines"},
    "col7": {"condition": "stress", "cell_lines": special_set_1, "coloring": "CELL_LINES", "cell_line_title": "Control + C9 Lines"},
    "col8": {"condition": "Untreated", "cell_lines": special_set_2, "coloring": "CELL_LINES", "cell_line_title": "Control + TDP Lines"},
    "col9": {"condition": "stress", "cell_lines": special_set_2, "coloring": "CELL_LINES", "cell_line_title": "Control + TDP Lines"},
}

# Loop through batches and markers
for batch in batches:
    pdf_path = os.path.join(output_dir, f"umap0_report_cell_lines_batch_{batch}.pdf")
    with PdfPages(pdf_path) as pdf:
        for marker in markers:
            # Filter data for the marker and batch
            marker_batch_df = dumap0[(dumap0["markers"] == marker) & (dumap0["batch"] == batch) &
                                     (dumap0["rep"] == "all_reps")]
            if marker_batch_df.empty:
                continue  # Skip if no data
            
            # Determine which columns to use
            cell_lines_set = set(marker_batch_df["cell_line"].unique())
            active_columns = {key: value for key, value in all_columns.items() if (
                key in ["col1", "col2", "col3", "col4", "col5"] or  # Always include base columns
                (key in ["col6", "col7"] and special_set_1 in cell_lines_set) or
                (key in ["col8", "col9"] and special_set_2 in cell_lines_set)
            )}

            # Create figure with dynamically selected columns
            num_cols = len(active_columns)
            fig, axes = plt.subplots(nrows=1, ncols=num_cols, figsize=(num_cols * 10, 10))
            fig.suptitle(f"Batch: {batch} | Marker: {marker}", fontsize=48)
            plt.subplots_adjust(hspace=0.1, wspace=0.05, top=0.8, bottom=0.08, left=0.05, right=0.95)

            # Plot each active column with direct `cell_line` filtering
            for i, col_key in enumerate(active_columns):
                col_info = active_columns[col_key]

                # Apply filtering based on the column's `cell_lines`
                filtered_df = marker_batch_df[
                    (marker_batch_df["condition"] == col_info["condition"]) &
                    (marker_batch_df["coloring"] == col_info["coloring"]) &
                    (marker_batch_df["cell_line"] == col_info["cell_lines"])  # Direct match to the correct set
                ]

                ax = axes[i]
                ax.set_title(f"{col_info['condition']} | {col_info['coloring']} \n {col_info['cell_line_title']}", fontsize=32)
                ax.axis("off")

                if not filtered_df.empty:
                    img_path = os.path.join(base_dir, filtered_df.iloc[0]["folder_path"], filtered_df.iloc[0]["image_name"] + ".png")
                    ax.imshow(mpimg.imread(img_path))
            
            pdf.savefig(fig)
            plt.close(fig)
            gc.collect()  # Free memory
    
    print(f"PDF report generated: {pdf_path}")


PDF report generated: umap0_cell_lines_reports/umap0_report_cell_lines_batch_2.pdf
PDF report generated: umap0_cell_lines_reports/umap0_report_cell_lines_batch_1.pdf
PDF report generated: umap0_cell_lines_reports/umap0_report_cell_lines_batch_4.pdf
PDF report generated: umap0_cell_lines_reports/umap0_report_cell_lines_batch_3.pdf


<!-- Stress Initiation -->

In [9]:
# Create output directory
output_dir = "stress_initiation_reports"
os.makedirs(output_dir, exist_ok=True)

# Define control and TDP sets
control_set = "Control-1001733,Control-1017118,Control-1025045,Control-1048087"
tdp_set = "TDP--43-G348V-1057052,TDP--43-N390D-1005373"

# Filter for stress initiation marker
stress_df = dumap0[dumap0["markers"] == "Stress-initiation"]

# Dynamically determine batches that contain either the Control set or the TDP set
valid_batches = stress_df[
    (stress_df["cell_line"] == control_set) | (stress_df["cell_line"] == tdp_set)
]["batch"].unique()

# Define column structure (4 fixed columns)
column_definitions = {
    "col1": {"cell_lines": control_set, "condition": "Untreated", "coloring": "CELL_LINES", "title": "All Controls – Untreated"},
    "col2": {"cell_lines": control_set, "condition": "stress", "coloring": "CELL_LINES", "title": "All Controls – Stress"},
    "col3": {"cell_lines": tdp_set, "condition": "Untreated", "coloring": "CELL_LINES", "title": "TDP Lines – Untreated"},
    "col4": {"cell_lines": tdp_set, "condition": "stress", "coloring": "CELL_LINES", "title": "TDP Lines – Stress"},
}

# Loop through valid batches and generate PDFs
for batch in valid_batches:
    pdf_path = os.path.join(output_dir, f"stress_initiation_report_batch_{batch}.pdf")
    with PdfPages(pdf_path) as pdf:
        # Filter data for the current batch
        batch_df = stress_df[stress_df["batch"] == batch]
        if batch_df.empty:
            continue  # Skip if no data
        
        # Create figure with 4 columns
        fig, axes = plt.subplots(nrows=1, ncols=4, figsize=(40, 10))
        fig.suptitle(f"Stress Initiation Report | Batch: {batch}", fontsize=48)
        plt.subplots_adjust(hspace=0.1, wspace=0.05, top=0.8, bottom=0.08, left=0.05, right=0.95)

        # Plot each column dynamically
        for i, col_key in enumerate(column_definitions):
            col_info = column_definitions[col_key]

            # Filter data for the current column
            filtered_df = batch_df[
                (batch_df["condition"] == col_info["condition"]) &
                (batch_df["coloring"] == col_info["coloring"]) &
                (batch_df["cell_line"] == col_info["cell_lines"])  # Exact match
            ]

            ax = axes[i]
            ax.set_title(col_info["title"], fontsize=32)
            ax.axis("off")

            if not filtered_df.empty:
                img_path = os.path.join(base_dir, filtered_df.iloc[0]["folder_path"], filtered_df.iloc[0]["image_name"] + ".png")
                ax.imshow(mpimg.imread(img_path))
        
        pdf.savefig(fig)
        plt.close(fig)
        gc.collect()  # Free memory
    
    print(f"PDF report generated: {pdf_path}")


PDF report generated: stress_initiation_reports/stress_initiation_report_batch_4.pdf


In [21]:
# # Create output directory
# output_dir = "umap0_cell_lines_reports"
# os.makedirs(output_dir, exist_ok=True)

# # Define column structure
# column_definitions = [
#     ("all_conditions", "CONDITIONS"),
#     ("all_conditions", "CELL_LINES"),
#     ("all_conditions", "CELL_LINES_CONDITIONS"),
#     ("Untreated", "CELL_LINES"),
#     ("stress", "CELL_LINES")
# ]
# # Get unique batches and markers
# batches = dumap0["batch"].unique()
# markers = dumap0["markers"].unique()

# for batch in batches:
#     pdf_path = os.path.join(output_dir, f"umap0_report_cell_lines_batch_{batch}.pdf")
#     with PdfPages(pdf_path) as pdf:
#         for marker in markers:
#             # Filter data for the marker and batch
#             marker_batch_df = dumap0[(dumap0["markers"] == marker) & (dumap0["batch"] == batch) &\
#                                      (dumap0["cell_line"] == 'all_cell_lines') & (dumap0["rep"] == "all_reps") ]
#             if marker_batch_df.empty:
#                 continue  # Skip if no data
            
#             fig, axes = plt.subplots(nrows=1, ncols=5, figsize=(50, 10))
#             fig.suptitle(f"Batch: {batch} | Marker: {marker}", fontsize=48)
#             plt.subplots_adjust(hspace=0.1, wspace=0.05, top=0.8, bottom=0.08, left=0.05, right=0.95)
#             for i, (condition, coloring) in enumerate(column_definitions):
#                 img_row = marker_batch_df[(marker_batch_df["condition"] == condition) & (marker_batch_df["coloring"] == coloring)]
#                 ax = axes[i]
#                 ax.set_title(f"{condition} | {coloring}", fontsize=32)
#                 ax.axis("off")
#                 if not img_row.empty:
#                     img_path = os.path.join(base_dir, img_row.iloc[0]["folder_path"], img_row.iloc[0]["image_name"] + ".png")
#                     img = mpimg.imread(img_path)
#                     ax.imshow(img)
            
#             pdf.savefig(fig)
#             plt.close(fig)
#             gc.collect()  # Free memory
    
#     print(f"PDF report generated: {pdf_path}")

<!-- ## UMAP1 -->

In [None]:
# Filter for UMAP type 1
dumap1 = df[df["umap_type"] == 1]
for col in dumap1.columns[1:]:
    print(col, np.unique(dumap1[col]))

image_name ['umap1']
umap_type [1]
batch ['1' '2' '3' '4']
rep ['all_reps']
cell_line ['C9orf72-HRE-1008566' 'C9orf72-HRE-981344' 'Control-1001733'
 'Control-1017118' 'Control-1025045' 'Control-1048087'
 'TDP--43-G348V-1057052' 'TDP--43-N390D-1005373' 'all_cell_lines']
condition ['Untreated' 'stress']
markers ['all_markers' 'without DAPI']
coloring ['CATEGORIES' 'MARKERS']


In [None]:
# Filter for UMAP type 1
dumap1 = df[df["umap_type"] == 1]

# Create output directory
output_dir = "umap1_reports"
os.makedirs(output_dir, exist_ok=True)

for batch in batches:
    print('Batch', batch)
    pdf_path = os.path.join(output_dir, f"umap1_report_batch_{batch}.pdf")
    with PdfPages(pdf_path) as pdf:
        for condition in conditions:
            batch_df = dumap1[(dumap1["batch"] == batch) & (dumap1["condition"] == condition) & (dumap1["rep"] == "all_reps")]
            if batch_df.empty:
                continue
            
            fig, axes = plt.subplots(nrows=3, ncols=len(cell_lines_order), figsize=(len(cell_lines_order) * 10, 25))
            fig.suptitle(f"Batch: {batch} | Condition: {condition}", fontsize=16)
            plt.subplots_adjust(hspace=0.4, wspace=0.2, top=0.85, bottom=0.05, left=0.1, right=0.95)
            
            row_mappings = [
                ("MARKERS", "all_markers"),
                ("MARKERS", "without DAPI"),
                ("CATEGORIES", "without DAPI")
            ]
            
            for row_idx, (coloring, markers) in enumerate(row_mappings):
                fig.text(0.06, 0.8 - (row_idx * 0.3), f'{coloring} ({markers})', fontsize=14, fontweight='bold', ha='right', va='center', rotation=90)
                for col_idx, cell_line in enumerate(cell_lines_order):
                    img_row = batch_df[(batch_df["cell_line"] == cell_line) & (batch_df["coloring"] == coloring) & (batch_df["markers"] == markers)]
                    ax = axes[row_idx, col_idx]
                    ax.axis("off")
                    if row_idx == 0:
                        ax.set_title(cell_line, fontsize=10, fontweight='bold')
                    if not img_row.empty:
                        img_path = os.path.join(base_dir, img_row.iloc[0]["folder_path"], img_row.iloc[0]["image_name"] + ".png")
                        img = mpimg.imread(img_path)
                        ax.imshow(img, aspect='auto')
            
            pdf.savefig(fig)
            plt.close(fig)
            gc.collect()
    
    print(f"PDF report generated: {pdf_path}")

Batch 1
PDF report generated: umap1_reports/umap1_report_batch_1.pdf
Batch 2
PDF report generated: umap1_reports/umap1_report_batch_2.pdf
Batch 3
PDF report generated: umap1_reports/umap1_report_batch_3.pdf
Batch 4
PDF report generated: umap1_reports/umap1_report_batch_4.pdf


<!-- ## UMAP2 -->

In [None]:
# Filter for UMAP type 0
dumap2 = df[df["umap_type"] == 2]
for col in dumap2.columns[1:]:
    print(col, np.unique(dumap2[col]))

image_name ['umap2']
umap_type [2]
batch ['1' '2' '3' '4']
rep ['all_reps']
cell_line ['C9orf72-HRE-1008566,C9orf72-HRE-981344' 'Control-1001733'
 'Control-1001733,Control-1017118,Control-1025045,Control-1048087'
 'Control-1001733,Control-1017118,Control-1025045,Control-1048087,C9orf72-HRE-1008566,C9orf72-HRE-981344,TDP--43-G348V-1057052,TDP--43-N390D-1005373'
 'TDP--43-G348V-1057052,TDP--43-N390D-1005373' 'all_cell_lines']
condition ['Untreated' 'all_conditions' 'stress']
markers ['DNA_RNA_DEFECTS_MARKERS' 'NEURONAL_CELL_DEATH_SENESCENCE_MARKERS'
 'PROTEOSTASIS_MARKERS' 'SYNAPTIC_NEURONAL_FUNCTION_MARKERS' 'all_markers']
coloring ['MULTIPLEX_CELL_LINES' 'MULTIPLEX_CELL_LINES_CONDITIONS'
 'MULTIPLEX_CONDITIONS']


In [None]:
# Filter for UMAP type 1
dumap2 = df[df["umap_type"] == 2]

# Create output directory
output_dir = "umap2_reports"
os.makedirs(output_dir, exist_ok=True)

# Define conditions and respective colorings
condition_coloring_mapping = {
    "all_conditions": ["MULTIPLEX_CONDITIONS", "MULTIPLEX_CELL_LINES", "MULTIPLEX_CELL_LINES_CONDITIONS"],
    "Untreated": ["MULTIPLEX_CELL_LINES"],
    "stress": ["MULTIPLEX_CELL_LINES"]
}

# Define marker ordering
priority_markers = [
    "all_markers", "DNA_RNA_DEFECTS_MARKERS", "PROTEOSTASIS_MARKERS", "NEURONAL_CELL_DEATH_SENESCENCE_MARKERS", "SYNAPTIC_NEURONAL_FUNCTION_MARKERS"
]
remaining_markers = sorted(set(dumap2["markers"]) - set(priority_markers))
marker_order = priority_markers + remaining_markers

# Create PDF for Batch 4
pdf_path = os.path.join(output_dir, "umap2_report_batch_4.pdf")
with PdfPages(pdf_path) as pdf:
    for marker in marker_order:
        batch_df = dumap2[dumap2["markers"] == marker]
        if batch_df.empty:
            continue
        
        fig, axes = plt.subplots(nrows=1, ncols=5, figsize=(30, 6), constrained_layout=True)
        fig.suptitle(f"Batch: 4 | Marker: {marker}", fontsize=16)
        
        col_idx = 0
        for condition, colorings in condition_coloring_mapping.items():
            for coloring in colorings:
                img_row = batch_df[(batch_df["condition"] == condition) & (batch_df["coloring"] == coloring) & (batch_df["cell_line"] == "all_cell_lines")]
                ax = axes[col_idx]
                ax.axis("off")
                ax.set_title(f"{condition} | {coloring}", fontsize=12, fontweight="bold")
                
                if not img_row.empty:
                    img_path = os.path.join(base_dir, img_row.iloc[0]["folder_path"], img_row.iloc[0]["image_name"] + ".png")
                    img = mpimg.imread(img_path)
                    ax.imshow(img, aspect='equal')
                
                col_idx += 1
        
        pdf.savefig(fig)
        plt.close(fig)
        gc.collect()

print(f"PDF report generated: {pdf_path}")

PDF report generated: umap2_reports/umap2_report_batch_4.pdf


In [None]:
# Filter for UMAP type 2 (Untreated condition, MULTIPLEX_CELL_LINES coloring, all_markers)
dumap2_filtered = df[
    (df["umap_type"] == 2) &
    (df["condition"] == "Untreated") &
    (df["coloring"] == "MULTIPLEX_CELL_LINES") &
    (df["markers"] == "all_markers") &
    (df["cell_line"] == "all_cell_lines")
]

# Create output directory
output_dir = "umap2_reports"
os.makedirs(output_dir, exist_ok=True)

# Create PDF for untreated condition across batches
pdf_path = os.path.join(output_dir, "umap2_untreated_across_batches.pdf")
with PdfPages(pdf_path) as pdf:
    if not dumap2_filtered.empty:
        fig, axes = plt.subplots(nrows=1, ncols=4, figsize=(24, 6), constrained_layout=True)
        fig.suptitle("UMAP2 | Untreated | all_markers", fontsize=16)

        for col_idx, batch in enumerate(["1", "2", "3", "4"]):
            img_row = dumap2_filtered[dumap2_filtered["batch"] == batch]
            ax = axes[col_idx]
            ax.axis("off")
            ax.set_title(f"Batch {batch}", fontsize=12, fontweight="bold")

            if not img_row.empty:
                img_path = os.path.join(base_dir, img_row.iloc[0]["folder_path"], img_row.iloc[0]["image_name"] + ".png")
                img = mpimg.imread(img_path)
                ax.imshow(img, aspect='equal')

        pdf.savefig(fig)
        plt.close(fig)
        gc.collect()

print(f"PDF report generated: {pdf_path}")

PDF report generated: umap2_reports/umap2_untreated_across_batches.pdf


In [None]:
# 