##  GR DUSP1 Gating Notebook

The Purpose of this notebook is:
1) Load in all analyisis for final dataframe preparation (GR_Confirmation)
3) Filter GR data to remove partial cells
4) Estimate GR cytoplasmic area from DUPS1 data
5) GR intensity to molecular counts 
6) Concatonate final GR and DUSP1 dataframes

In [None]:
import h5py
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import dask.array as da
import os
import sys
import logging
import seaborn as sns
import datetime
import glob

# Today's date
today = datetime.date.today()
# Format date as 'Jun03' (for example)
date_str = today.strftime("%b%d")

logging.getLogger('matplotlib.font_manager').disabled = True
numba_logger = logging.getLogger('numba')
numba_logger.setLevel(logging.WARNING)

matplotlib_logger = logging.getLogger('matplotlib')
matplotlib_logger.setLevel(logging.WARNING)

src_path = os.path.abspath(os.path.join(os.getcwd(), '..', '..'))
print(src_path)
sys.path.append(src_path)

from src.Analysis_DUSP1 import DUSP1DisplayManager, PostProcessingDisplay, ExperimentPlotter

In [None]:
# Base directory containing your CSV files
base_dir = "/Volumes/share/Users/Eric/GR_DUSP1_AllData/DUSP1_AllAnalysis_061225"
save_dir = "/Volumes/share/Users/Eric/GR_DUSP1_AllData/DUSP1_AllAnalysis_061225/ConcatPlots"
if not os.path.exists(save_dir):
    os.makedirs(save_dir)

In [None]:
# Helper to load and concat by pattern
def load_and_concat(pattern):
    paths = glob.glob(os.path.join(base_dir, pattern))
    dfs = []
    for path in paths:
        df = pd.read_csv(path)
        # normalize columns to lowercase
        df.columns = [c.lower() for c in df.columns]
        dfs.append(df)
    return pd.concat(dfs, ignore_index=True)

In [None]:
# Load each dataset
ssit_all      = load_and_concat("*_SSITcellresults_*.csv")
spots_all     = load_and_concat("*_FinalSpots_*.csv")
clusters_all  = load_and_concat("*_FinalClusters_*.csv")
cellprops_all = load_and_concat("*_FinalCellProps_*.csv")

# Quick check on length of each dataset
print(f"SSIT cells: {len(ssit_all)}")
print(f"Spots: {len(spots_all)}")
print(f"Clusters: {len(clusters_all)}")
print(f"Cell properties: {len(cellprops_all)}")

In [None]:
# Compute quartiles on 'nuc_area'
q_low  = cellprops_all['nuc_area'].quantile(0.25)
q_high = cellprops_all['nuc_area'].quantile(0.75)

# Filter cellprops by nuclear area range
gated_cells = cellprops_all[(cellprops_all['nuc_area'] >= q_low) &
                             (cellprops_all['nuc_area'] <= q_high)].copy()

# Extract unique_cell_ids for gating
gated_ids = gated_cells['unique_cell_id'].unique()

# Subset SSIT and clusters by gated unique_cell_id
ssit_gated     = ssit_all[ssit_all['unique_cell_id'].isin(gated_ids)].copy()
clusters_gated = clusters_all[clusters_all['unique_cell_id'].isin(gated_ids)].copy()
spots_gated    = spots_all[spots_all['unique_cell_id'].isin(gated_ids)].copy()

display(gated_cells.shape, ssit_gated.shape, clusters_gated.shape, spots_gated.shape)

# Print cells before and after gating
print(f"Cells before gating: {cellprops_all['unique_cell_id'].nunique()}")
print(f"Cells after gating: {gated_cells['unique_cell_id'].nunique()}")
# Print spots before and after gating
print(f"Spots before gating: {spots_all['unique_cell_id'].nunique()}")
print(f"Spots after gating: {spots_gated['unique_cell_id'].nunique()}")
# Print clusters before and after gating
print(f"Clusters before gating: {clusters_all['unique_cell_id'].nunique()}")
print(f"Clusters after gating: {clusters_gated['unique_cell_id'].nunique()}")

# Save the gated data
gated_cells.to_csv(os.path.join(save_dir, f'gated_cells_{date_str}.csv'), index=False)
ssit_gated.to_csv(os.path.join(save_dir, f'ssit_gated_{date_str}.csv'), index=False)
clusters_gated.to_csv(os.path.join(save_dir, f'clusters_gated_{date_str}.csv'), index=False)
spots_gated.to_csv(os.path.join(save_dir, f'spots_gated_{date_str}.csv'), index=False)

In [None]:
# Load the gated data (if already saved)
# gated_cells = pd.read_csv(os.path.join(save_dir, f'gated_cells_{date_str}.csv'))
ssit_gated = pd.read_csv(os.path.join(save_dir, f'ssit_gated_{date_str}.csv'))
# clusters_gated = pd.read_csv(os.path.join(save_dir, f'clusters_gated_{date_str}.csv'))
# spots_gated = pd.read_csv(os.path.join(save_dir, f'spots_gated_{date_str}.csv'))

In [None]:
ssit_gated.keys() # Display keys of the gated SSIT data

In [None]:
# print the number of unique cell IDs in the gated SSIT data
print(f"Unique cell IDs in gated SSIT data: {ssit_gated['unique_cell_id'].nunique()}")

In [None]:
plotter = ExperimentPlotter(ssit_gated)

# 1) 100 nM Time Sweep:
plotter.plot_experiment(
    replicas=['D','E','F','M','N'],
    times=[10,20,30,40,50,60,75,90,120,150,180],
    concs=[100],
    save_dir=save_dir
)

# 2) 75 min Conc Sweep:
plotter.plot_experiment(
    replicas=['G','H','I'],
    times=[75],
    concs=[0.001,0.01,0.1,1,10,100,1000,10000],
    save_dir=save_dir
)

# 3) Both‐varying: 0.3,1,10 nM across multiple times:
plotter.plot_experiment(
    replicas=['J','K','L'],
    times=[30,50,75,90,120,180],
    concs=[0.3,1,10],
    save_dir=save_dir
)

# Use the log file to search for analyses

In [None]:
# ── 3) TS FRACTION CALCULATION ───────────────────────────────────────────────────

# Keep only nuclear TS (is_nuc == 1)
ts_nuc = clusters_all[clusters_all["is_nuc"] == 1]

# Count TS per cell (group by replica, dex_conc, time, h5_idx, fov, cell_label)
ts_count = (
    ts_nuc
    .groupby(["replica", "dex_conc", "time", "h5_idx", "fov", "cell_label"])
    .size()
    .reset_index(name="ts_count")
)

# All cells with replica, dex_conc, time, h5_idx, fov, cell_label
all_cells = (
    cellprops_all[["replica", "dex_conc", "time", "h5_idx", "fov", "cell_label"]]
    .drop_duplicates()
)

# Merge TS counts onto all_cells; missing TS → ts_count = 0
merged = pd.merge(
    all_cells,
    ts_count,
    on=["replica", "dex_conc", "time", "h5_idx", "fov", "cell_label"],
    how="left"
).fillna({"ts_count": 0})

# Binary flag: has_ts = 1 if ts_count ≥ 1
merged["has_ts"] = (merged["ts_count"] >= 1).astype(int)

# 3a) Fraction per replica × dex_conc × time
frac_per_rep = (
    merged
    .groupby(["replica", "dex_conc", "time"])
    .agg(fraction=("has_ts", "mean"))
    .reset_index()
)

# 3b) Mean ± SD of those fractions across replicas
frac_stats = (
    frac_per_rep
    .groupby(["dex_conc", "time"])
    .agg(mean_frac=("fraction", "mean"),
         sd_frac  =("fraction", "std"))
    .reset_index()
)

# 3c) Overall (all cells pooled) mean ± SD at each dex_conc × time
overall_stats = (
    merged
    .groupby(["dex_conc", "time"])
    .agg(mean_overall=("has_ts", "mean"),
         sd_overall  =("has_ts", "std"))
    .reset_index()
)

# Merge per‐replica stats with overall stats
combined_stats = pd.merge(
    frac_stats,
    overall_stats,
    on=["dex_conc", "time"],
    how="left"
)

# ── 4) PLOT TS FRACTION OVER TIME (include 0 min control on each curve) ─────────
sns.set_theme(style="whitegrid", context="paper")
concs = sorted(combined_stats["dex_conc"].unique())

for conc in concs:
    if conc == 0:
        continue

    # 1) Fetch the control row (dex_conc=0, time=0)
    ctrl_row = combined_stats[
        (combined_stats["dex_conc"] == 0) & (combined_stats["time"] == 0)
    ].iloc[0]

    # 2) Stats for this concentration
    subset = combined_stats[combined_stats["dex_conc"] == conc].copy()

    # 3) Build a “control” row labeled at this conc, time=0
    control_for_conc = {
        "dex_conc":      conc,
        "time":          0,
        "mean_frac":     ctrl_row["mean_frac"],
        "sd_frac":       ctrl_row["sd_frac"],
        "mean_overall":  ctrl_row["mean_overall"],
        "sd_overall":    ctrl_row["sd_overall"]
    }
    control_df = pd.DataFrame([control_for_conc])
    subset = pd.concat([subset, control_df], ignore_index=True)

    # 4) Sort by time so that 0 min is first
    subset = subset.sort_values("time")

    # 5) Plot shaded “overall ± SD” and error‐bar “mean_frac ± sd_frac”
    fig, ax = plt.subplots(figsize=(8, 4))

    ax.fill_between(
        subset["time"],
        subset["mean_overall"] - subset["sd_overall"],
        subset["mean_overall"] + subset["sd_overall"],
        color="lightgray",
        alpha=0.5,
        label="Overall (all‐cells) ± SD"
    )
    ax.errorbar(
        subset["time"],
        subset["mean_frac"],
        yerr=subset["sd_frac"],
        fmt="-o",
        capsize=5,
        color="purple",
        label="Mean(replica) ± SD(replica)"
    )

    ax.set_title(f"{conc} nM Dex: Fraction of Cells with ≥1 TS Over Time")
    ax.set_xlabel("Time (min)")
    ax.set_ylabel("Fraction of Cells")
    ax.set_ylim(0, 1)
    ax.set_xticks(subset["time"].values)
    ax.legend()
    plt.tight_layout()

    # — Save this figure to base_dir —
    filename = f"TS_fraction_{int(conc*1000)}pM_{conc}_nM.png"
    # e.g., if conc=0.3 → "TS_fraction_300pM_0.3_nM.png"
    fig.savefig(os.path.join(base_dir, filename), dpi=150)
    plt.close(fig)


# ── 5) JOY (RIDGE) PLOTS FOR mRNA DISTRIBUTIONS (with CDF lines) ───────────────────
metrics = ["num_nuc_spots", "num_cyto_spots", "num_spots"]
axis_limits = {
    "num_nuc_spots": (0, 500),
    "num_cyto_spots": (0, 500),
    "num_spots": (0, 1000),
}

# Precompute CDF thresholds (median & 95th) on the control (dex_conc=0, time=0)
cdf_thr = {}
for m in metrics:
    control_vals = ssit_all[
        (ssit_all["dex_conc"] == 0) & (ssit_all["time"] == 0)
    ][m].dropna().values

    # Optional sanity check for negatives:
    print(f"METRIC {m}: control min = {control_vals.min() if len(control_vals)>0 else 'N/A'}, "
          f"control max = {control_vals.max() if len(control_vals)>0 else 'N/A'}")

    if len(control_vals) > 0:
        sorted_vals = np.sort(control_vals)
        cdf = np.arange(1, len(sorted_vals) + 1) / len(sorted_vals)
        cdf_thr[m] = (
            np.interp(0.50, cdf, sorted_vals),   # median
            np.interp(0.95, cdf, sorted_vals)    # 95th percentile
        )
    else:
        cdf_thr[m] = (None, None)

for conc in sorted(ssit_all["dex_conc"].unique()):
    if conc == 0:
        continue

    df_conc = ssit_all[ssit_all["dex_conc"] == conc]
    times = [0] + sorted(df_conc["time"].unique().tolist())

    for m in metrics:
        # (Optional) Check data range at each conc:
        all_vals = np.concatenate([
            ssit_all[(ssit_all["dex_conc"] == 0) & (ssit_all["time"] == 0)][m].dropna().values,
            df_conc[m].dropna().values
        ])
        print(f"For conc={conc} nM, metric={m}: data min={all_vals.min()}, data max={all_vals.max()}")

        fig, ax = plt.subplots(figsize=(8, len(times) * 1.2))
        fig.suptitle(f"{conc} nM Dex: {m.replace('_',' ').title()} (All Replicas)", fontsize=14)

        # Use rocket_r colormap for ridge plots
        bins = np.linspace(axis_limits[m][0], axis_limits[m][1], 30)
        cmap = sns.color_palette("rocket_r", n_colors=len(times))[::-1]

        for i, t in enumerate(times):
            if t == 0:
                data = ssit_all[(ssit_all["dex_conc"] == 0) & (ssit_all["time"] == 0)][m]
            else:
                data = df_conc[df_conc["time"] == t][m]

            hist, edges = np.histogram(data.dropna(), bins=bins, density=True)
            xs = (edges[:-1] + edges[1:]) / 2
            if hist.max() > 0:
                heights = hist / hist.max() * 0.8
            else:
                heights = np.zeros_like(xs)
            y_off = len(times) - 1 - i

            ax.fill_between(xs, y_off, y_off + heights, color=cmap[i], alpha=0.7)

        # Overlay CDF threshold lines
        lo, hi = cdf_thr[m]
        if lo is not None and hi is not None:
            ax.axvline(lo, color='red', linestyle='--', linewidth=1, label="Median (control)")
            ax.axvline(hi, color='red', linestyle='-', linewidth=1, label="95th pct (control)")

        # Force X‐axis to [0, axis_limits[m][1]] so nothing appears < 0
        ax.set_xlim(axis_limits[m][0], axis_limits[m][1])

        ax.set_yticks([len(times) - 1 - i for i in range(len(times))])
        ax.set_yticklabels([f"{t} min" for t in times])
        ax.set_xlabel("mRNA Count")
        # Only add legend once per plot (avoid duplicate entries)
        if lo is not None:
            ax.legend(loc="upper right")
        plt.tight_layout(rect=[0, 0, 1, 0.95])

        # — Save this ridge plot to base_dir —
        filename = f"joy_{int(conc*1000)}pM_{conc}nM_{m}.png"
        # e.g., "joy_300pM_0.3nM_num_nuc_spots.png"
        fig.savefig(os.path.join(base_dir, filename), dpi=150)
        plt.close(fig)


# ── 6) LINE PLOTS FOR NUCLEAR & CYTOPLASMIC mRNA COUNTS ─────────────────────────────
# Include 0 min control on each concentration's curve
for m in ["num_nuc_spots", "num_cyto_spots"]:
    for conc in sorted(ssit_all["dex_conc"].unique()):
        if conc == 0:
            continue

        # Build a DataFrame that includes both this concentration’s cells and the control cells at time=0
        df_conc = ssit_all[(ssit_all["dex_conc"] == conc)]
        df_control = ssit_all[(ssit_all["dex_conc"] == 0) & (ssit_all["time"] == 0)]
        df_combined = pd.concat([df_conc, df_control], ignore_index=True)

        # 6a) Per‐replica mean for each time (including control)
        rep_means = (
            df_combined
            .groupby(["replica", "time"])
            .agg(mean_val=(m, "mean"))
            .reset_index()
        )
        rep_stats = (
            rep_means
            .groupby("time")
            .agg(mean_of_means=("mean_val", "mean"),
                 sd_of_means=("mean_val", "std"))
            .reset_index()
        )

        # 6b) Overall (all‐cells combined) mean ± SD at each time (including control)
        overall = (
            df_combined
            .groupby("time")
            .agg(mean_overall=(m, "mean"),
                 sd_overall=(m, "std"))
            .reset_index()
        )

        merged_stats = pd.merge(rep_stats, overall, on="time", how="left")
        merged_stats = merged_stats.sort_values("time")

        fig, ax = plt.subplots(figsize=(8, 4))

        # Shaded: overall ± SD
        ax.fill_between(
            merged_stats["time"],
            merged_stats["mean_overall"] - merged_stats["sd_overall"],
            merged_stats["mean_overall"] + merged_stats["sd_overall"],
            color="lightgray",
            alpha=0.5,
            label="Overall (all‐cells) ± SD"
        )

        # Errorbar: mean(replica‐means) ± SD(replica‐means)
        ax.errorbar(
            merged_stats["time"],
            merged_stats["mean_of_means"],
            yerr=merged_stats["sd_of_means"],
            fmt="-o",
            capsize=5,
            color="green",
            label="Mean(replica) ± SD(replica)"
        )

        ax.set_title(f"{conc} nM Dex: {m.replace('_', ' ').title()} Over Time")
        ax.set_xlabel("Time (min)")
        ax.set_ylabel("Count")
        ax.set_xticks(merged_stats["time"].values)
        ax.legend()
        plt.tight_layout()

In [None]:
# list all analysis done 
all_analysis_names = am.list_analysis_names()

## Gate DUSP1 cells on 25-75% range of nuclear area

## GR_ALL & DUSP1_All final dataframe preperation for SSIT

1) Fit a Polynomial (2nd-degree) using (nuc_area_px, cyto_area_px) from DUSP1_SSIT_Final.

2) Estimate Cytoplasm Area in GR_ALL:
3) Creates `CalcCytoArea` by evaluating the fitted polynomial at each row’s `nuc_area`.

4) Gate both data sets on the 25%–75% range of nuclear area.

5) Compute “Normalized” GR (`normGRnuc`, `normGRcyt`) in GR_ALL:
- Scales nuclear/cyt intensities (5%→95% range) into integer bins [0,30].

6) Plot Histograms for the normalized nuclear/cyt GR (using custom colors).

7) Save the updated, gated data sets to CSV.

In [None]:
# 1) READ INPUT DATA
# =========================
# DUSP1_ALL from above or load from disk:
if f'DUSP1_ALL_{current_date}_NoThreshold' in locals():
    df_dusp = DUSP1_ALL
else:
    df_dusp = pd.read_csv('DUSP1_ALL.csv')

# GR_ALL from above or load from disk:
if 'GR_ALL' in locals():
    df_gr = GR_ALL
else:
    df_gr = pd.read_csv('GR_ALL_pregate.csv')


# 2) FIT POLYNOMIAL TO (NUC, CYTO) FROM DUSP1_ALL
# =========================
# We'll use only the rows that have valid nuc_area_px and cyto_area_px.
num_cells = df_dusp.shape[0]
df_dusp_nonmissing = df_dusp.dropna(subset=['nuc_area_px', 'cyto_area_px']).copy()
print(f'Cells removed because of NaN areas in DUSP1: {df_dusp_nonmissing.shape[0] - num_cells}')


# 4) GATE DUSP1 DATAFRAME ON [25%, 75%] NUCLEAR AREA
# =========================
# We'll define a helper function for gating.
num_cells = df_dusp.shape[0]
def gate_on_nuc_area(df, nuc_col):
    """Return a copy of df gated to [25th, 75th percentile] of nuc_col."""
    lower = df[nuc_col].quantile(0.25)
    upper = df[nuc_col].quantile(0.75)
    return df[(df[nuc_col] >= lower) & (df[nuc_col] <= upper)].copy()

# Gate DUSP1_ALL on nuc_area_px
print('+++ Gating Nuc Area +++')
df_dusp_gated = gate_on_nuc_area(df_dusp_nonmissing, 'nuc_area_px') 
print(f'Cells removed because of nuc_area_px gating: {df_dusp_gated.shape[0] - num_cells}')
print(f'Cells remaining after nuc_area_px gating: {df_dusp_gated.shape[0]}')

# SAVE THE GATED DATAFRAMES

In [None]:
from datetime import datetime

# SAVE THE GATED DATAFRAMES
# =========================
# Gated DUSP1 (unchanged except row filtering)
# Get the current date
current_date = datetime.now().strftime("%b%d%y")

# Save the gated DUSP1 dataframe with the current date in the filename
df_dusp_gated.to_csv(f"DUSP1_ALL_gated_{current_date}_NoThreshold.csv", index=False)
print(f"Saved gated DUSP1 to 'DUSP1_ALL_gated_{current_date}_NoThreshold.csv")


# Load in gated data

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd

df_dusp_gated = pd.read_csv(f'DUSP1_ALL_gated_Feb2425_NoThreshold.csv')
df_gr_gated = pd.read_csv('GR_ALL_gated_with_CytoArea_and_normGR_Feb2125.csv')

In [None]:
# Make a copy of the DUSP1 data
DUSP1_data = df_dusp_gated.copy()

# Experiment 1: 100 nM Dex time sweep with 12 timepoints
df_expt1 = DUSP1_data[DUSP1_data['replica'].isin(['D', 'E', 'F', 'M', 'N'])]
expt1_timepoints = [10, 20, 30, 40, 50, 60, 75, 90, 120, 150, 180]
expt1_concs = [100]

# Experiment 3: Time and concentration sweep
df_expt3 = DUSP1_data[DUSP1_data['replica'].isin(['J', 'K', 'L'])]
expt3_concs = [0.3, 1, 10, 100]
expt3_timepoints = [30, 50, 75, 90, 120, 180]

# Calculate means for each replica
replica_means = DUSP1_data.groupby(['dex_conc', 'time', 'replica']).agg({
    'num_nuc_spots': 'mean',
    'num_cyto_spots': 'mean'
}).reset_index()

# Calculate the mean and standard deviation of the replica means
summary_stats = replica_means.groupby(['dex_conc', 'time']).agg({
    'num_nuc_spots': ['mean', 'std'],
    'num_cyto_spots': ['mean', 'std']
}).reset_index()

# Rename columns for easier access
summary_stats.columns = ['dex_conc', 'time', 'mean_nuc_count', 'std_nuc_count', 'mean_cyto_count', 'std_cyto_count']

# Calculate overall mean and standard deviation for each concentration and time point
overall_stats = DUSP1_data.groupby(['dex_conc', 'time']).agg({
    'num_nuc_spots': ['mean', 'std'],
    'num_cyto_spots': ['mean', 'std']
}).reset_index()

# Rename columns for easier access
overall_stats.columns = ['dex_conc', 'time', 'overall_mean_nuc', 'overall_std_nuc', 'overall_mean_cyto', 'overall_std_cyto']

# Extract 0 min data (shared baseline from dex_conc == 0)
zero_min_summary = summary_stats[summary_stats['time'] == 0]
zero_min_overall = overall_stats[overall_stats['time'] == 0]

# Set Style
sns.set_theme(style="ticks", palette="colorblind", context="poster", font='times new roman')

# Define the color palette for Nuclear and Cytoplasmic intensities
colors_nuc_cyto = sns.color_palette("colorblind", 2)  # Two colors: one for Nuclear, one for Cytoplasmic

# Loop through the three experiments
experiments = {
    "Experiment 1: 100 nM Time Sweep": (expt1_concs, expt1_timepoints),
    # "Experiment 2: 75 min Concentration Sweep": (expt2_concs, expt2_timepoints),
    "Experiment 3: Time and Concentration Sweep": (expt3_concs, expt3_timepoints),
}

for expt_name, (concs, timepoints) in experiments.items():
    for conc in concs:
        # Filter data for plotting
        subset_summary = summary_stats[(summary_stats['dex_conc'] == conc) & (summary_stats['time'].isin(timepoints))]
        subset_overall = overall_stats[(overall_stats['dex_conc'] == conc) & (overall_stats['time'].isin(timepoints))]

        # Add 0 min time point to all subsets if not already present
        if 0 not in subset_summary['time'].values:
            subset_summary = pd.concat([zero_min_summary, subset_summary], ignore_index=True)
        if 0 not in subset_overall['time'].values:
            subset_overall = pd.concat([zero_min_overall, subset_overall], ignore_index=True)

        plt.figure(figsize=(10, 5))

        # Plot Nuclear mRNA Count Mean with Error Bars
        plt.errorbar(subset_summary['time'], subset_summary['mean_nuc_count'],
                     yerr=subset_summary['std_nuc_count'], fmt='-o', color=colors_nuc_cyto[0], capsize=5,
                     label='Nuclear mRNA Count Replicas')

        # Filling between std deviations for overall data - Nuclear
        plt.fill_between(subset_overall['time'],
                         subset_overall['overall_mean_nuc'] - subset_overall['overall_std_nuc'],
                         subset_overall['overall_mean_nuc'] + subset_overall['overall_std_nuc'],
                         color=colors_nuc_cyto[0], alpha=0.2, label='Total Data Spread - Nuclear')

        # Plot Cytoplasmic mRNA Count Mean with Error Bars
        plt.errorbar(subset_summary['time'], subset_summary['mean_cyto_count'],
                     yerr=subset_summary['std_cyto_count'], fmt='-o', color=colors_nuc_cyto[1], capsize=5,
                     label='Cytoplasmic mRNA Count Replicas')

        # Filling between std deviations for overall data - Cytoplasmic
        plt.fill_between(subset_overall['time'],
                         subset_overall['overall_mean_cyto'] - subset_overall['overall_std_cyto'],
                         subset_overall['overall_mean_cyto'] + subset_overall['overall_std_cyto'],
                         color=colors_nuc_cyto[1], alpha=0.2, label='Total Data Spread - Cytoplasmic')

        # Customize the plot
        plt.title(f"{expt_name} - {conc} nM Dex", fontsize=18, fontweight='bold')
        plt.xlabel('Time (min)', fontsize=14)
        plt.ylabel('mRNA Spot Count', fontsize=14)
        plt.grid(True)
        plt.legend(loc='upper left', fontsize=12, frameon=False, bbox_to_anchor=(1, 1))


        # Show the plot
        plt.show()


In [None]:
# Make a copy of the DUSP1 data
DUSP1_data = df_dusp_gated.copy()

# Experiment 2: 75 min concentration sweep (Replicas G, H, I)
df_expt2 = DUSP1_data[DUSP1_data['replica'].isin(['G', 'H', 'I'])].copy()

# Set Seaborn style
sns.set_theme(style="ticks", palette="colorblind", context="poster", font='times new roman')

# Melt DataFrame for Seaborn Box Plot
melted_expt2_data = df_expt2.melt(id_vars=['dex_conc'], value_vars=['num_nuc_spots', 'num_cyto_spots'],
                                  var_name='Spot_Type', value_name='Spot_Count')

# Update labels for readability
melted_expt2_data['Spot_Type'] = melted_expt2_data['Spot_Type'].replace({
    'num_nuc_spots': 'Nuclear Spots',
    'num_cyto_spots': 'Cytoplasmic Spots'
})

# Create figure with two subplots
fig, axes = plt.subplots(1, 2, figsize=(14, 6), sharey=True)

# Nuclear Spots Plot
ax1 = axes[0]
nuc_data = melted_expt2_data[melted_expt2_data['Spot_Type'] == 'Nuclear Spots']
sns.boxplot(x='dex_conc', y='Spot_Count', data=nuc_data, linewidth=2, width=0.6, showfliers=False, notch=True, ax=ax1)
sns.stripplot(x='dex_conc', y='Spot_Count', data=nuc_data, dodge=True, jitter=True, size=3, alpha=0.4, ax=ax1, marker='o', edgecolor='black', color='gray')

ax1.set_xlabel("Dexamethasone Concentration (nM)", fontsize=14, fontweight='bold')
ax1.set_ylabel("mRNA Spot Count", fontsize=14, fontweight='bold')
ax1.set_title("Nuclear DUSP1 Spot Count", fontsize=16, fontweight='bold')
ax1.grid(True, linestyle="--", linewidth=0.5)

# Cytoplasmic Spots Plot
ax2 = axes[1]
cyto_data = melted_expt2_data[melted_expt2_data['Spot_Type'] == 'Cytoplasmic Spots']
sns.boxplot(x='dex_conc', y='Spot_Count', data=cyto_data, linewidth=2, width=0.6, showfliers=False, notch=True, ax=ax2)
sns.stripplot(x='dex_conc', y='Spot_Count', data=cyto_data, dodge=True, jitter=True, size=2, alpha=0.4, ax=ax2, marker='o', edgecolor='black', color='gray')

ax2.set_xlabel("Dexamethasone Concentration (nM)", fontsize=14, fontweight='bold')
ax2.set_ylabel("")  # Remove redundant label
ax2.set_title("Cytoplasmic DUSP1 Spot Count", fontsize=16, fontweight='bold')
ax2.grid(True, linestyle="--", linewidth=0.5)

# Set Y-limits 
ax1.set_ylim(0, 300)
ax2.set_ylim(0, 800)

# Adjust layout
plt.tight_layout()

# Show the plot
plt.show()


In [None]:
## Cytoplasmic and Nuclear mRNA Spot Counts at 0 min

# Make a copy of the DUSP1 data
DUSP1_data = df_dusp_gated.copy()

# Subset data for 0 min time points
df_0min = DUSP1_data[DUSP1_data['time'] == 0]

# Plot distribution of nuclear and cytoplasmic mRNA spots across replicas for 0 min time point
fig, axes = plt.subplots(1, 2, figsize=(14, 6), sharey=True)

sns.boxplot(data=df_0min, x='replica', y='num_nuc_spots', ax=axes[0])
axes[0].set_title('Nuclear mRNA Spot Counts at 0 min')
axes[0].set_ylabel('Nuclear Spot Count')
axes[0].set_xlabel('Replica')

sns.boxplot(data=df_0min, x='replica', y='num_cyto_spots', ax=axes[1])
axes[1].set_title('Cytoplasmic mRNA Spot Counts at 0 min')
axes[1].set_ylabel('Cytoplasmic Spot Count')
axes[1].set_xlabel('Replica')

plt.tight_layout()
plt.show()