In [8]:
# =============================================================================
# Cell 1: Import Libraries and Check CUDA Availability
# =============================================================================
import os
import re
import gc
import torch
import numpy as np
import tifffile
import pandas as pd
from nd2reader import ND2Reader
from tqdm import tqdm
from cellpose import models
from tqdm import tqdm

# Check if CUDA is available
cuda_available = torch.cuda.is_available()
print(f"CUDA Available: {cuda_available}")
if cuda_available:
    gpu_name = torch.cuda.get_device_name(0)
    print(f"GPU Name: {gpu_name}")
else:
    print("No GPU detected or CUDA is not available.")

CUDA Available: True
GPU Name: NVIDIA GeForce RTX 4090


In [2]:
# =============================================================================
# Function: Convert ND2 to TIFF for a Given Channel
# =============================================================================
def convert_nd2_to_tiff_channel(nd2_path, tiff_output_dir, channel_name):
    """
    Converts an ND2 file to TIFF files by performing a maximum intensity projection
    over the z-axis for the specified channel.
    """
    try:
        with ND2Reader(nd2_path) as img:
            print(f"\nProcessing ND2 file: {os.path.basename(nd2_path)}")
            print("Detected Axes:", img.axes)
            print("Detected Shape:", img.shape)
            channels = img.metadata.get('channels')
            if channels is None:
                raise ValueError("No channel information found in ND2 metadata.")
            print("Channels in file:", channels)
            if channel_name in channels:
                channel_index = channels.index(channel_name)
                print(f"Using channel '{channel_name}' at index {channel_index}.")
            else:
                print(f"Channel '{channel_name}' not found in {nd2_path}. Skipping conversion for this channel.")
                return
            img.default_coords['c'] = channel_index

            # Set up iteration: if a time axis exists, iterate over it and bundle z,y,x.
            if 't' in img.axes and img.sizes.get('t', 1) > 1:
                img.iter_axes = 't'
                img.bundle_axes = 'zyx'
            else:
                img.iter_axes = ''
                img.bundle_axes = 'zyx'
                
            # For each time frame (or single image), perform a max projection along z.
            for idx, frame in enumerate(img):
                max_proj = np.max(frame, axis=0)
                base_name = os.path.splitext(os.path.basename(nd2_path))[0]
                if 't' in img.axes and img.sizes.get('t', 1) > 1:
                    tiff_filename = f"{base_name}_t{idx}.tiff"
                else:
                    tiff_filename = f"{base_name}.tiff"
                tiff_path = os.path.join(tiff_output_dir, tiff_filename)
                tifffile.imwrite(tiff_path, max_proj)
                print(f"Saved TIFF: {tiff_path}")
    except Exception as e:
        print(f"Error converting {nd2_path} for channel {channel_name} to TIFF: {e}")

# =============================================================================
# Function: Generate Cell Mask Using Cellpose
# =============================================================================
def generate_cellmask(tiff_path, mask_path, model):
    """
    Generates a cell mask from the given TIFF image using Cellpose and saves the mask.
    """
    try:
        img = tifffile.imread(tiff_path)
        # If the image is 3D (a stack), average over the first axis to obtain a 2D image.
        if img.ndim == 3:
            img = img.mean(axis=0)
        elif img.ndim > 3:
            raise ValueError(f"Unexpected number of dimensions ({img.ndim}) in image: {tiff_path}")

        img_min, img_max = np.min(img), np.max(img)
        dynamic_range = img_max - img_min
        if dynamic_range == 0:
            print(f"Warning: Image {tiff_path} has zero dynamic range. Skipping mask generation.")
            return
        img = img.astype(np.float32)
        img = (img - img_min) / dynamic_range

        if not np.isfinite(img).all():
            print(f"Warning: Image {tiff_path} contains non-finite values after normalization. Skipping.")
            return

        # Run Cellpose (adjust parameters such as diameter if needed)
        masks, flows, styles, diams = model.eval(
            img,
            diameter=None,
            flow_threshold=0.4,
            cellprob_threshold=0.0,
            channels=[0, 0]
        )
        tifffile.imwrite(mask_path, masks.astype(np.uint16))
        print(f"Saved cell mask: {mask_path}")
    except Exception as e:
        print(f"Error generating mask for {tiff_path}: {e}")
    finally:
        gc.collect()
        if torch.cuda.is_available():
            torch.cuda.empty_cache()

# =============================================================================
# Function: Calculate Cell-Level Intensities
# =============================================================================
def calculate_intensities(tiff_files, tiff_dirs, mask_dir, output_csv):
    """
    For each TIFF file, calculates the average intensity per cell (using the generated mask)
    for both GFP and DSred channels and saves the data to a CSV file.
    """
    data = []
    for tiff_file in tqdm(tiff_files, desc="Calculating Intensities"):
        gfp_path = os.path.join(tiff_dirs["GFP"], tiff_file)
        dsred_path = os.path.join(tiff_dirs["DSred"], tiff_file)
        mask_filename = os.path.splitext(tiff_file)[0] + "_mask.tiff"
        mask_path = os.path.join(mask_dir, mask_filename)
        
        if not os.path.exists(gfp_path):
            print(f"GFP image {gfp_path} not found. Skipping.")
            continue
        if not os.path.exists(dsred_path):
            print(f"DSred image {dsred_path} not found. Skipping.")
            continue
        if not os.path.exists(mask_path):
            print(f"Mask image {mask_path} not found. Skipping.")
            continue
        
        gfp_img = tifffile.imread(gfp_path)
        dsred_img = tifffile.imread(dsred_path)
        mask_img = tifffile.imread(mask_path)
        
        mask_indices = np.unique(mask_img)
        mask_indices = mask_indices[mask_indices != 0]  # exclude background
        
        for idx in mask_indices:
            cell_mask = (mask_img == idx)
            avg_gfp = np.mean(gfp_img[cell_mask])
            avg_dsred = np.mean(dsred_img[cell_mask])
            data.append({
                "File Name": tiff_file,
                "Mask Index": idx,
                "Average GFP Intensity": avg_gfp,
                "Average DSred Intensity": avg_dsred
            })
    
    df = pd.DataFrame(data)
    df.to_csv(output_csv, index=False)
    print(f"Cell-level intensity data saved to {output_csv}")

# =============================================================================
# Helper Function: Extract Group, Sample, and Repeat Information
# =============================================================================
def extract_info_from_filename(filename):
    """
    Extracts group (Act/Rep), sample (e.g., 01), and repeat (e.g., 002) from the TIFF filename.
    It first removes any time suffix (e.g., _t0) if present.
    """
    base = re.sub(r'_t\d+', '', os.path.splitext(filename)[0])
    pattern = re.compile(r'^(Act|Rep)_(\d{2})_(\d{3})', re.IGNORECASE)
    m = pattern.search(base)
    if m:
        return m.group(1), m.group(2), m.group(3)
    else:
        return 'Unknown', 'Unknown', 'Unknown'

# code to analysis

In [None]:
# =============================================================================
# Runnable Step 1: ND2-to-TIFF Conversion
# =============================================================================
def run_conversion(nd2_dir, dsred_tiff_dir, gfp_tiff_dir):
    nd2_files = [f for f in os.listdir(nd2_dir) if f.lower().endswith('.nd2')]
    if not nd2_files:
        print("No ND2 files found in", nd2_dir)
        return
    print(f"Found {len(nd2_files)} ND2 file(s) for conversion.")
    for nd2_file in nd2_files:
        nd2_path = os.path.join(nd2_dir, nd2_file)
        print(f"\n--- Processing: {nd2_file} ---")
        convert_nd2_to_tiff_channel(nd2_path, dsred_tiff_dir, '60x DSRED')
        convert_nd2_to_tiff_channel(nd2_path, gfp_tiff_dir, '60x GFP')

# Set up directories
nd2_dir = r"A:\_Ongoing\20250128_ActRep_version2_24well"
output_dir = os.path.join(nd2_dir, "Output")
dsred_tiff_dir = os.path.join(output_dir, "60x_DSRED_TIFF")
gfp_tiff_dir   = os.path.join(output_dir, "60x_GFP_TIFF")
mask_dir       = os.path.join(output_dir, "mask")
os.makedirs(output_dir, exist_ok=True)
os.makedirs(dsred_tiff_dir, exist_ok=True)
os.makedirs(gfp_tiff_dir, exist_ok=True)
os.makedirs(mask_dir, exist_ok=True)

# Run conversion
run_conversion(nd2_dir, dsred_tiff_dir, gfp_tiff_dir)



In [None]:
# =============================================================================
# Runnable Step 2: Cell Mask Generation
# =============================================================================
def run_mask_generation(dsred_tiff_dir, mask_dir, model):
    dsred_tiff_files = [f for f in os.listdir(dsred_tiff_dir)
                        if f.lower().endswith(('.tiff', '.tif')) and '_mask' not in f]
    if not dsred_tiff_files:
        print("No DSred TIFF files found in", dsred_tiff_dir)
        return
    print(f"Generating cell masks for {len(dsred_tiff_files)} DSred TIFF file(s)...")
    for tiff_file in tqdm(dsred_tiff_files, desc="Generating Masks"):
        tiff_path = os.path.join(dsred_tiff_dir, tiff_file)
        mask_filename = os.path.splitext(tiff_file)[0] + "_mask.tiff"
        mask_path = os.path.join(mask_dir, mask_filename)
        generate_cellmask(tiff_path, mask_path, model)
    print("Cell mask generation completed.")

print("\n=== Cell Mask Generation ===")
try:
    model = models.Cellpose(gpu=torch.cuda.is_available(), model_type="cyto3")
    print("Cellpose model initialized.")
except Exception as e:
    print("Error initializing Cellpose model:", e)
    print("Falling back to CPU mode with cyto2.")
    model = models.Cellpose(gpu=False, model_type="cyto2")
run_mask_generation(dsred_tiff_dir, mask_dir, model)


In [10]:
# =============================================================================
# Runnable Step 3: Intensity Calculation and Excel Output
# =============================================================================
def extract_info_from_filename(filename):
    """
    Extracts group (Act/Rep), sample, and repeat from a filename.
    
    For example, given "Act_01_001.tiff", it returns ("Act", "01", "001").
    If the pattern is not found, returns ('Unknown', 'Unknown', 'Unknown').
    """
    # Remove any time suffix (e.g., _t0) and file extension
    base = re.sub(r'_t\d+', '', os.path.splitext(filename)[0])
    pattern = re.compile(r'^(Act|Rep)_(\d{2})_(\d{3})', re.IGNORECASE)
    m = pattern.search(base)
    if m:
        return m.group(1), m.group(2), m.group(3)
    else:
        return 'Unknown', 'Unknown', 'Unknown'

def create_excel_from_tiff_and_mask(dsred_tiff_dir, gfp_tiff_dir, mask_dir, output_excel):
    """
    Processes DSred TIFF files (and their corresponding GFP TIFF and mask files)
    to calculate cell-level intensities. For each cell (as determined by the mask),
    the average intensities in the GFP and DSred channels are computed.
    
    It then extracts Group, Sample, and Repeat information from the original file name
    (e.g., "Act_01_001.tiff") and writes an Excel file with the following columns:
      - Group, Sample, Repeat, Mask Index, GFP_Mean, DSred_Mean
      
    Parameters:
      dsred_tiff_dir: Directory containing DSred TIFF files.
      gfp_tiff_dir: Directory containing GFP TIFF files (with matching filenames).
      mask_dir: Directory containing mask files (named as the TIFF base with '_mask.tiff').
      output_excel: Path for the output Excel file.
    """
    # List DSred TIFF files (skip any mask files)
    dsred_files = [f for f in os.listdir(dsred_tiff_dir)
                   if f.lower().endswith(('.tiff', '.tif')) and '_mask' not in f]
    
    if not dsred_files:
        print("No DSred TIFF files found in", dsred_tiff_dir)
        return
    
    data = []
    
    # Loop over each DSred TIFF file
    for tiff_file in tqdm(dsred_files, desc="Processing TIFF files"):
        # Construct full paths for DSred, GFP, and mask files.
        dsred_path = os.path.join(dsred_tiff_dir, tiff_file)
        gfp_path = os.path.join(gfp_tiff_dir, tiff_file)
        # Assume mask file is named like "<base>_mask.tiff"
        mask_filename = os.path.splitext(tiff_file)[0] + "_mask.tiff"
        mask_path = os.path.join(mask_dir, mask_filename)
        
        # Verify that the corresponding GFP and mask files exist.
        if not os.path.exists(gfp_path):
            print(f"GFP file not found for {tiff_file}, skipping.")
            continue
        if not os.path.exists(mask_path):
            print(f"Mask file not found for {tiff_file}, skipping.")
            continue
        
        # Read the images
        try:
            dsred_img = tifffile.imread(dsred_path)
            gfp_img = tifffile.imread(gfp_path)
            mask_img = tifffile.imread(mask_path)
        except Exception as e:
            print(f"Error reading files for {tiff_file}: {e}")
            continue
        
        # Identify unique cell labels (ignore background, assumed to be 0)
        cell_indices = np.unique(mask_img)
        cell_indices = cell_indices[cell_indices != 0]
        
        # Calculate average intensity for each cell
        for cell in cell_indices:
            cell_mask = (mask_img == cell)
            avg_gfp = np.mean(gfp_img[cell_mask])
            avg_dsred = np.mean(dsred_img[cell_mask])
            record = {
                "File Name": tiff_file,
                "Mask Index": cell,
                "Average GFP Intensity": avg_gfp,
                "Average DSred Intensity": avg_dsred
            }
            data.append(record)
    
    # Create a DataFrame from the collected data
    df = pd.DataFrame(data)
    
    # Extract Group, Sample, and Repeat from the "File Name" column
    df[['Group', 'Sample', 'Repeat']] = df['File Name'].apply(lambda x: pd.Series(extract_info_from_filename(x)))
    
    # Drop the original "File Name" column as it is no longer needed
    df = df.drop(columns=["File Name"])
    
    # Rename intensity columns for clarity
    df = df.rename(columns={
        "Average GFP Intensity": "GFP_Mean",
        "Average DSred Intensity": "DSred_Mean"
    })
    
    # Rearrange columns in the desired order
    df = df[['Group', 'Sample', 'Repeat', 'Mask Index', 'GFP_Mean', 'DSred_Mean']]
    
    # Write the DataFrame to an Excel file
    df.to_excel(output_excel, index=False)
    print("Excel summary saved to:", output_excel)

# ----- Usage -----
# Adjust these paths as needed.
dsred_tiff_dir = r"A:\_Ongoing\20250128_ActRep_version2_24well\Output\60x_DSRED_TIFF"
gfp_tiff_dir   = r"A:\_Ongoing\20250128_ActRep_version2_24well\Output\60x_GFP_TIFF"
mask_dir       = r"A:\_Ongoing\20250128_ActRep_version2_24well\Output\mask"
output_excel   = r"A:\_Ongoing\20250128_ActRep_version2_24well\Output\summary.xlsx"

create_excel_from_tiff_and_mask(dsred_tiff_dir, gfp_tiff_dir, mask_dir, output_excel)


Processing TIFF files: 100%|██████████| 252/252 [00:02<00:00, 120.91it/s]


Excel summary saved to: A:\_Ongoing\20250128_ActRep_version2_24well\Output\summary.xlsx


# cellpose checing visulize

In [12]:
import random
import tifffile
import matplotlib.pyplot as plt
import numpy as np
import os

def visualize_random_samples(tiff_output_dir, cellmask_output_dir, num_samples=5, seed=None):
    """
    Randomly selects and visualizes TIFF images with their corresponding Cellpose masks.
    
    Parameters:
    - tiff_output_dir: Directory containing the max projected TIFF images.
    - cellmask_output_dir: Directory containing the Cellpose mask TIFFs.
    - num_samples: Number of random samples to visualize.
    - seed: (Optional) Seed for reproducibility.
    """
    # Set seed for reproducibility if provided
    if seed is not None:
        random.seed(seed)
    
    # Get list of TIFF files
    tiff_files = [f for f in os.listdir(tiff_output_dir) if f.lower().endswith(('.tiff', '.tif'))]
    
    if not tiff_files:
        print(f"No TIFF files found in {tiff_output_dir}.")
        return
    
    # Adjust num_samples if there are fewer files than requested
    num_samples = min(num_samples, len(tiff_files))
    
    # Randomly select samples
    samples = random.sample(tiff_files, num_samples)
    
    for tiff_file in samples:
        tiff_path = os.path.join(tiff_output_dir, tiff_file)
        mask_filename = os.path.splitext(tiff_file)[0] + "_mask.tiff"
        mask_path = os.path.join(cellmask_output_dir, mask_filename)
        
        # Check if mask file exists
        if not os.path.exists(mask_path):
            print(f"Mask file {mask_filename} does not exist. Skipping.")
            continue
        
        try:
            img = tifffile.imread(tiff_path)
            mask = tifffile.imread(mask_path)
            
            # If image has multiple channels, convert to grayscale by averaging
            if img.ndim == 3:
                img_display = img.mean(axis=0)
            else:
                img_display = img
            
            # Normalize image for display
            img_display = img_display.astype(np.float32)
            img_display -= img_display.min()
            if img_display.max() != 0:
                img_display /= img_display.max()
            
            plt.figure(figsize=(12, 6))
            
            # Display Original Image
            plt.subplot(1, 2, 1)
            plt.imshow(img_display, cmap='gray')
            plt.title('Original Image')
            plt.axis('off')
            
            # Display Mask Overlay
            plt.subplot(1, 2, 2)
            plt.imshow(img_display, cmap='gray')
            plt.imshow(mask, cmap='jet', alpha=0.5)  # Overlay mask with transparency
            plt.title('Cellpose Mask Overlay')
            plt.axis('off')
            
            plt.tight_layout()
            plt.show()
            
        except Exception as e:
            print(f"Error visualizing {tiff_file}: {e}")


In [None]:
# Define the directories (use the same as in your main code)
nd2_dir = r"A:\_Ongoing\20250128_ActRep_version2_24well"
tiff_output_dir = r"A:\_Ongoing\20250128_ActRep_version2_24well\Output\60x_DSRED_TIFF"
cellmask_output_dir = r"A:\_Ongoing\20250128_ActRep_version2_24well\Output\mask"

# Visualize random samples
visualize_random_samples(tiff_output_dir, cellmask_output_dir, num_samples=2, seed=4)


# doing plot

In [None]:
# Import necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Set higher resolution for plots
%config InlineBackend.figure_format = 'retina'
# Define the input directory
input_dir = r"A:\Data_A\20241204_ActRep-20241204T221840Z-001\20241204_ActRep"

# Construct the path to the CSV file
csv_file = os.path.join(input_dir, 'cell_intensities.csv')

# Load the CSV file
df = pd.read_csv(csv_file)

# Display the first few rows to understand the data
df.head()


In [None]:
# Filter out the control data
control_df = df[df['Fragment'].str.contains('Control', case=False)]

# Calculate the mean intensity for the control group in both channels, grouped by 'Type'
control_mean = control_df.groupby('Type')[['Average GFP Intensity', 'Average DSred Intensity']].mean().reset_index()

# Rename the columns for clarity
control_mean = control_mean.rename(columns={
    'Average GFP Intensity': 'Control GFP Intensity',
    'Average DSred Intensity': 'Control DSred Intensity'
})

# Display the control mean intensities
control_mean


In [None]:
# Filter out the control data
control_df = df[df['Fragment'].str.contains('Control', case=False)]

# Calculate the mean intensity for the control group in both channels, grouped by 'Type'
control_mean = control_df.groupby('Type')[['Average GFP Intensity', 'Average DSred Intensity']].mean().reset_index()

# Rename the columns for clarity
control_mean = control_mean.rename(columns={
    'Average GFP Intensity': 'Control GFP Intensity',
    'Average DSred Intensity': 'Control DSred Intensity'
})

# Merge control mean intensities with the main DataFrame on 'Type'
df = df.merge(control_mean, on='Type', how='left')

# Calculate the adjusted intensities by subtracting the control intensities
df['Adjusted GFP Intensity'] = df['Average GFP Intensity'] - df['Control GFP Intensity']
df['Adjusted DSred Intensity'] = df['Average DSred Intensity'] - df['Control DSred Intensity']

# Preview the adjusted data
df.head()



In [None]:
# Define the mapping for fragment renaming
fragment_mapping = {
    **{f'AR{i}': f'IDR{i}' for i in range(1, 15)},  # AR1-AR14 to IDR1-IDR14
    'AR15': 'LAP*_IDR',
    'AR16': 'LAP_IDR',
    'AR17': 'LIP_IDR',
    'AR18': 'FL',
    'AR19': 'opt_LAP*IDR'
}

# Function to rename fragments
def rename_fragment(fragment):
    return fragment_mapping.get(fragment, fragment)

# Apply the renaming to the 'Fragment' column
df['Fragment'] = df['Fragment'].apply(rename_fragment)

# Display the unique fragments after renaming
print("Fragments after renaming:")
print(df['Fragment'].unique())


In [None]:
# Define the order of fragments from left to right
fragment_order = [rename_fragment(f'AR{i}') for i in range(1, 20)]  # AR1 to AR19

# Remove any fragments that are not in the mapping (e.g., 'Control', 'Unknown')
fragment_order = [frag for frag in fragment_order if frag in df['Fragment'].unique()]

print("Order of fragments for plotting:")
print(fragment_order)
# Save the adjusted data into a new CSV file in the same directory
adjusted_csv_file = os.path.join(input_dir, 'cell_intensities_adjusted.csv')
df.to_csv(adjusted_csv_file, index=False)

print(f"Adjusted data saved to {adjusted_csv_file}")


In [None]:
# Set plot style and context
sns.set_style("whitegrid")
sns.set_context("talk", font_scale=1.2)
# Exclude control data from the plotting DataFrame
df_filtered = df[~df['Fragment'].str.contains('Control|Unknown', case=False)]

In [38]:
def plot_adjusted_intensity(data, group_type, intensity_column, ylabel, output_filename):
    """
    Plots adjusted intensity box plots for a specific group and channel.

    Parameters:
    - data: DataFrame containing the data to plot.
    - group_type: 'Act' or 'Rep'.
    - intensity_column: Column name for the intensity to plot.
    - ylabel: Label for the y-axis.
    - output_filename: Filename to save the plot.
    """
    # Filter data for the specific group
    group_data = data[data['Type'] == group_type]

    # Use the predefined fragment order
    fragments = fragment_order

    plt.figure(figsize=(12, 8), dpi=1200)

    # Create boxplot without grid lines
    ax = sns.boxplot(
        x='Fragment',
        y=intensity_column,
        data=group_data,
        order=fragments,
        color='white',
        fliersize=0,  # Do not show outliers
        linewidth=2
    )

    # Overlay individual data points
    sns.stripplot(
        x='Fragment',
        y=intensity_column,
        data=group_data,
        order=fragments,
        color='black',
        size=5,
        jitter=True
    )

    # Customize plot
    plt.title(f'Adjusted {ylabel} for {group_type} Group', fontsize=16, fontweight='bold')
    plt.xlabel('Fragment', fontsize=14, fontweight='bold')
    plt.ylabel(f'Adjusted {ylabel}', fontsize=14, fontweight='bold')

    # Remove grid lines
    ax.grid(False)

    # Remove top and right spines, and the horizontal lines
    sns.despine(trim=True, left=False, bottom=False)
    ax.spines['left'].set_linewidth(1.5)
    ax.spines['bottom'].set_linewidth(1.5)

    # Adjust tick parameters
    plt.xticks(rotation=90, fontsize=12)
    plt.yticks(fontsize=12)

    # Adjust layout
    plt.tight_layout()

    # Save the figure in the same directory
    output_path = os.path.join(input_dir, output_filename)
    plt.savefig(output_path, dpi=1200)

    # Show the plot
    plt.show()

    print(f"Plot saved to {output_path}")


In [None]:
plot_adjusted_intensity(
    data=df_filtered,
    group_type='Act',
    intensity_column='Adjusted GFP Intensity',
    ylabel='GFP Intensity',
    output_filename='Act_Group_Adjusted_GFP_Intensity.png'
)


In [None]:
plot_adjusted_intensity(
    data=df_filtered,
    group_type='Rep',
    intensity_column='Adjusted GFP Intensity',
    ylabel='GFP Intensity',
    output_filename='Rep_Group_Adjusted_GFP_Intensity.png'
)


In [None]:
plot_adjusted_intensity(
    data=df_filtered,
    group_type='Act',
    intensity_column='Adjusted DSred Intensity',
    ylabel='DSred Intensity',
    output_filename='Act_Group_Adjusted_DSred_Intensity.png'
)


In [None]:
plot_adjusted_intensity(
    data=df_filtered,
    group_type='Rep',
    intensity_column='Adjusted DSred Intensity',
    ylabel='DSred Intensity',
    output_filename='Rep_Group_Adjusted_DSred_Intensity.png'
)


# how about caluclate the ratio of the signal GFP/Dsred

In [43]:
# Import necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import numpy as np

# Set higher resolution for plots
%config InlineBackend.figure_format = 'retina'

# Set plot style and context
sns.set_style("whitegrid")
sns.set_context("talk", font_scale=1.2)


In [None]:
# Define the input directory
input_dir = r"A:\Data_A\20241204_ActRep-20241204T221840Z-001\20241204_ActRep"

# Path to the adjusted CSV file
adjusted_csv_file = os.path.join(input_dir, 'cell_intensities_adjusted.csv')

# Load the adjusted CSV file
df = pd.read_csv(adjusted_csv_file)

# Display the first few rows
df.head()


In [None]:
# Calculate the ratio of Adjusted GFP Intensity to Adjusted DSred Intensity
# Handle division by zero by replacing zeros in denominator with NaN
df['Adjusted DSred Intensity'] = df['Adjusted DSred Intensity'].replace(0, np.nan)

# Calculate the ratio
df['GFP_DSred_Ratio'] = df['Adjusted GFP Intensity'] / df['Adjusted DSred Intensity']

# Handle any infinite or NaN values resulting from division
df['GFP_DSred_Ratio'].replace([np.inf, -np.inf], np.nan, inplace=True)

# Drop rows with NaN in the ratio column (optional, if you want to exclude such cells)
df_clean = df.dropna(subset=['GFP_DSred_Ratio'])

# Display the first few rows with the new ratio column
df_clean.head()


In [46]:
# Exclude control and unknown data from the plotting DataFrame
df_filtered = df_clean[~df_clean['Fragment'].str.contains('Control|Unknown', case=False)]


In [47]:
def plot_ratio(data, group_type, ratio_column, ylabel, output_filename):
    """
    Plots the ratio of adjusted intensities for a specific group.
    
    Parameters:
    - data: DataFrame containing the data to plot.
    - group_type: 'Act' or 'Rep'.
    - ratio_column: Column name for the ratio to plot.
    - ylabel: Label for the y-axis.
    - output_filename: Filename to save the plot.
    """
    # Filter data for the specific group
    group_data = data[data['Type'] == group_type]
    
    # Use the predefined fragment order
    fragments = fragment_order
    
    plt.figure(figsize=(12, 8), dpi=1200)
    
    # Create boxplot without grid lines
    ax = sns.boxplot(
        x='Fragment',
        y=ratio_column,
        data=group_data,
        order=fragments,
        color='white',
        fliersize=0,  # Do not show outliers
        linewidth=2
    )
    
    # Overlay individual data points
    sns.stripplot(
        x='Fragment',
        y=ratio_column,
        data=group_data,
        order=fragments,
        color='black',
        size=5,
        jitter=True
    )
    
    # Customize plot
    plt.title(f'GFP/DSred Ratio for {group_type} Group', fontsize=16, fontweight='bold')
    plt.xlabel('Fragment', fontsize=14, fontweight='bold')
    plt.ylabel(ylabel, fontsize=14, fontweight='bold')
    
    # Remove grid lines
    ax.grid(False)
    
    # Remove top and right spines, and the horizontal lines
    sns.despine(trim=True, left=False, bottom=False)
    ax.spines['left'].set_linewidth(1.5)
    ax.spines['bottom'].set_linewidth(1.5)
    
    # Adjust tick parameters
    plt.xticks(rotation=90, fontsize=12)
    plt.yticks(fontsize=12)
    
    # Adjust layout
    plt.tight_layout()
    
    # Save the figure in the same directory
    output_path = os.path.join(input_dir, output_filename)
    plt.savefig(output_path, dpi=1200)
    
    # Show the plot
    plt.show()
    
    print(f"Plot saved to {output_path}")


In [None]:
plot_ratio(
    data=df_filtered,
    group_type='Act',
    ratio_column='GFP_DSred_Ratio',
    ylabel='GFP / DSred Ratio',
    output_filename='Act_Group_GFP_DSred_Ratio.png'
)


In [None]:
plot_ratio(
    data=df_filtered,
    group_type='Rep',
    ratio_column='GFP_DSred_Ratio',
    ylabel='GFP / DSred Ratio',
    output_filename='Rep_Group_GFP_DSred_Ratio.png'
)
