In [None]:
import scanpy as sc
import seaborn as sns
import numpy as np
import pandas as pd
import random
import os
from matplotlib.pyplot import rc_context
sc.set_figure_params(dpi=100)

import warnings
warnings.simplefilter("ignore", FutureWarning)
warnings.simplefilter("ignore", UserWarning)
warnings.simplefilter("ignore", RuntimeWarning)
# Set working directory
os.chdir("P:/Tolulope/Manuscript/Yuan Analysis")
adata = sc.read_h5ad('integratedssssss.h5ad')
adata.raw.X


In [None]:
cell_subset = adata[adata.obs['cell type'] == 'M2-like macrophages']
cell_subset
#example WITH pseudo replicates
pbs = []
for sample in cell_subset.obs.Sample.unique():
    samp_cell_subset = cell_subset[cell_subset.obs['Sample'] == sample]
    
    samp_cell_subset.X = samp_cell_subset.layers['counts'] #make sure to use raw data
    
    
    
    indices = list(samp_cell_subset.obs_names)
    random.shuffle(indices)
    indices = np.array_split(np.array(indices), 4) #change number here for number of replicates deisred
    
    for i, pseudo_rep in enumerate(indices):
    
        rep_adata = sc.AnnData(X = samp_cell_subset[indices[i]].X.sum(axis = 0),
                               var = samp_cell_subset[indices[i]].var[[]])

        rep_adata.obs_names = [sample + '_' + str(i)]
        rep_adata.obs['condition'] = samp_cell_subset.obs['condition'].iloc[0]
        rep_adata.obs['replicate'] = i

        pbs.append(rep_adata)
pb = sc.concat(pbs)
pb.obs
from pydeseq2.dds import DeseqDataSet
from pydeseq2.ds import DeseqStats
counts = pd.DataFrame(pb.X, columns = pb.var_names) #need to do this to pass var names

# Create DeseqDataSet object with pseudo-replicates
dds = DeseqDataSet(
    counts=counts,  # Make sure 'counts' is correctly defined
    metadata=pb.obs,
    design_factors=['condition', 'replicate']  # Adjust to 'pseudo_replicate_group' if needed
)
sc.pp.filter_genes(dds, min_cells = 1)
dds.deseq2()
stat_res = DeseqStats(dds, contrast=('condition', 'YOV-GFP', 'YOV-noGFP'))
    
stat_res.summary()
de  = stat_res.results_df
de.sort_values('stat', ascending = False)
# Assuming 'res' is your DataFrame
de['Symbol'] = de.index
# Make 'Symbol' column uppercase
de['Symbol'] = de['Symbol'].str.upper()
de_sorted = de.sort_values('stat', ascending=False)  # Sorting DE results by 'stat' in descending order
de_sorted.to_csv(f'YOV-GFP_vs_YOV-noGFP_M2-like macrophages.csv')  # Saving the sorted results to a CSV file


In [None]:
res = stat_res.results_df
# Assuming 'res' is your DESeq2 results DataFrame (e.g., from DeseqStats)
res['Symbol'] = res.index  # Add gene symbols for easy reference
sigs = res[(res.padj < 0.05) & (abs(res.log2FoldChange) > 0.5)]  # Filter for significant genes

# If DESeq2 normalization is stored in dds.layers['normed_counts']
dds.layers['log1p'] = np.log1p(dds.layers['normed_counts'])  # Apply log1p transformation
# Now, 'log1p' contains the normalized, log-transformed counts

# Select significant genes from the dds object
dds_sigs = dds[:, sigs.index]

# Create a DataFrame for the normalized counts of significant genes
grapher = pd.DataFrame(dds_sigs.layers['log1p'].T,
                       index=dds_sigs.var_names, columns=dds_sigs.obs_names)

# Select only conditions of interest (modify the list based on your actual conditions)
conditions_of_interest = ['YOV_GFP_0', 'YOV_GFP_1', 'YOV_GFP_2', 'YOV_GFP_3', 
                          'YOV_noGFP_0', 'YOV_noGFP_1', 'YOV_noGFP_2', 'YOV_noGFP_3']

# Subset the data to only the selected conditions
grapher = grapher[conditions_of_interest]

# Save the normalized counts for significant genes correctly
grapher.to_csv('YOV_GFP_vs_YOV_noGFP_M2-like macrophages_for_heatmap.csv', index=True, index_label="Gene")

In [None]:
stat_res = DeseqStats(dds, contrast=('condition', 'AOV-GFP', 'AOV-noGFP'))
    
stat_res.summary()
de  = stat_res.results_df
de.sort_values('stat', ascending = False)
# Assuming 'res' is your DataFrame
de['Symbol'] = de.index
# Make 'Symbol' column uppercase
de['Symbol'] = de['Symbol'].str.upper()
de_sorted = de.sort_values('stat', ascending=False)  # Sorting DE results by 'stat' in descending order
de_sorted.to_csv(f'AOV-GFP_vs_AOV-noGFP_M2-like macrophages.csv')  # Saving the sorted results to a CSV file


In [None]:
res = stat_res.results_df
# Assuming 'res' is your DESeq2 results DataFrame (e.g., from DeseqStats)
res['Symbol'] = res.index  # Add gene symbols for easy reference
sigs = res[(res.padj < 0.05) & (abs(res.log2FoldChange) > 0.5)]  # Filter for significant genes

# If DESeq2 normalization is stored in dds.layers['normed_counts']
dds.layers['log1p'] = np.log1p(dds.layers['normed_counts'])  # Apply log1p transformation
# Now, 'log1p' contains the normalized, log-transformed counts

# Select significant genes from the dds object
dds_sigs = dds[:, sigs.index]

# Create a DataFrame for the normalized counts of significant genes
grapher = pd.DataFrame(dds_sigs.layers['log1p'].T,
                       index=dds_sigs.var_names, columns=dds_sigs.obs_names)

# Select only conditions of interest (modify the list based on your actual conditions)
conditions_of_interest = ['AOV_GFP_0', 'AOV_GFP_1', 'AOV_GFP_2', 'AOV_GFP_3', 
                          'AOV_noGFP_0', 'AOV_noGFP_1', 'AOV_noGFP_2', 'AOV_noGFP_3']

# Subset the data to only the selected conditions
grapher = grapher[conditions_of_interest]

# Save the normalized counts for significant genes correctly
grapher.to_csv('AOV_GFP_vs_AOV_noGFP_M2-like macrophages_for_heatmap1_for_heatmap.csv', index=True, index_label="Gene")

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib_venn import venn2
import os

# Set working directory
os.chdir("P:/Tolulope/Manuscript/Yuan Analysis")

# Load DEG data
df_young = pd.read_csv('YOV-GFP_vs_YOV-noGFP_M2-like macrophages.csv')   # Young OV
df_aged = pd.read_csv('AOV-GFP_vs_AOV-noGFP_M2-like macrophages.csv')   # Aged OV

# Normalize gene names
df_young['Symbol'] = df_young['Symbol'].str.upper()
df_aged['Symbol'] = df_aged['Symbol'].str.upper()

# DEG filters
up_genes_young = set(df_young[(df_young['padj'] < 0.05) & (df_young['log2FoldChange'] > 0.5)]['Symbol'])
down_genes_young = set(df_young[(df_young['padj'] < 0.05) & (df_young['log2FoldChange'] < -0.5)]['Symbol'])

up_genes_aged = set(df_aged[(df_aged['padj'] < 0.05) & (df_aged['log2FoldChange'] > 0.5)]['Symbol'])
down_genes_aged = set(df_aged[(df_aged['padj'] < 0.05) & (df_aged['log2FoldChange'] < -0.5)]['Symbol'])

# Plot
plt.figure(figsize=(12, 6))

# Upregulated genes
plt.subplot(1, 2, 1)
v1 = venn2([up_genes_young, up_genes_aged], ('Young', 'Aged'))
for patch in v1.patches:
    if patch: patch.set_alpha(0.5); patch.set_edgecolor('black'); patch.set_linewidth(1)
for text in v1.set_labels:
    text.set_fontsize(12)
for text in v1.subset_labels:
    if text: text.set_fontsize(10)
plt.title('Upregulated Genes: Young vs Aged', fontsize=14)

# Downregulated genes
plt.subplot(1, 2, 2)
v2 = venn2([down_genes_young, down_genes_aged], ('Young', 'Aged'))
for patch in v2.patches:
    if patch: patch.set_alpha(0.5); patch.set_edgecolor('black'); patch.set_linewidth(1)
for text in v2.set_labels:
    text.set_fontsize(12)
for text in v2.subset_labels:
    if text: text.set_fontsize(10)
plt.title('Downregulated Genes: Young vs Aged', fontsize=14)

plt.tight_layout()
plt.savefig("venn_comparison_Young_vs_Aged.png", dpi=300, bbox_inches='tight')
plt.show()


In [None]:
import scanpy as sc
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import scipy.sparse
import random
import os
import warnings
warnings.simplefilter("ignore", FutureWarning)
warnings.simplefilter("ignore", UserWarning)
warnings.simplefilter("ignore", RuntimeWarning)
# Set working directory
os.chdir("P:/Tolulope/Manuscript/Yuan Analysis")
adata = sc.read_h5ad('integratedssssss.h5ad')
adata

In [None]:
import pandas as pd
import numpy as np
import scanpy as sc
import random
import matplotlib.pyplot as plt
import seaborn as sns
from pydeseq2.dds import DeseqDataSet
from pydeseq2.ds import DeseqStats

# -------------------- Pseudobulk Preparation --------------------
# Subset to Dendritic cells only
cell_subset = adata[adata.obs['cell type'] == 'M2-like macrophages']

# Create pseudo-replicates
pbs = []
for sample in cell_subset.obs.Sample.unique():
    samp_cell_subset = cell_subset[cell_subset.obs['Sample'] == sample]
    samp_cell_subset.X = samp_cell_subset.layers['counts']  # Use raw counts

    indices = list(samp_cell_subset.obs_names)
    random.shuffle(indices)
    indices = np.array_split(np.array(indices), 4)  # Adjust for desired number of pseudo-reps

    for i, pseudo_rep in enumerate(indices):
        rep_adata = sc.AnnData(
            X=samp_cell_subset[pseudo_rep].X.sum(axis=0),
            var=samp_cell_subset[pseudo_rep].var[[]]
        )
        rep_adata.obs_names = [sample + '_' + str(i)]
        rep_adata.obs['condition'] = samp_cell_subset.obs['condition'].iloc[0]
        rep_adata.obs['replicate'] = i
        pbs.append(rep_adata)

# Combine pseudobulk replicates
pb = sc.concat(pbs)
counts = pd.DataFrame(pb.X, columns=pb.var_names)  # Convert to DataFrame

# Create DeseqDataSet object with pseudo-replicates
dds = DeseqDataSet(
    counts=counts,
    metadata=pb.obs,
    design_factors=['condition', 'replicate']
)

# Filter genes with low expression
sc.pp.filter_genes(dds, min_cells=1)

# Perform DESeq2 analysis
dds.deseq2()

# Run the Wald test and get results for condition contrast
stat_res = DeseqStats(dds, contrast=('condition', 'YOV_GFP', 'YOV_noGFP'))

# Get the summary of the results (check if this produces a DataFrame)
summary_output = stat_res.summary()
print(type(summary_output))  # This should be a DataFrame
if isinstance(summary_output, pd.DataFrame):
    print(summary_output.head())  # Show the first few rows to inspect


In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.sparse
import numpy as np
from matplotlib.patches import Patch

sc.settings.figdir = "FIGURE_4"
# List of genes of interest
genes_of_interest = ['Col1a1','Col1a2','Col3a1']

# Filter to include only genes present in the dataset
available_genes = [gene for gene in genes_of_interest if gene in cell_subset.raw.var_names]
print(f"Available genes in the dataset: {available_genes}")

# Raise error if none are found
if not available_genes:
    raise ValueError("None of the genes of interest are present in the dataset.")

# Extract expression matrix from .raw
expr_matrix = cell_subset.raw.X.toarray() if scipy.sparse.issparse(cell_subset.raw.X) else cell_subset.raw.X

# Perform log-normalization (log1p) on the expression matrix
normalized_expr_matrix = np.log1p(expr_matrix)

# Create a DataFrame for plotting
data = []
for gene in available_genes:
    gene_index = cell_subset.raw.var_names.get_loc(gene)
    gene_expr = normalized_expr_matrix[:, gene_index]  # Use the normalized expression
    data.append(pd.DataFrame({
        'Condition': cell_subset.obs['condition'].values,
        'Gene': gene,
        'Expression': gene_expr
    }))

plot_data = pd.concat(data, ignore_index=True)

# Debugging
print(plot_data.head())
print(f"Number of rows in plot_data: {len(plot_data)}")

# Raise error if DataFrame is empty
if plot_data.empty:
    raise ValueError("The plot_data DataFrame is empty. Check the gene extraction and subsetting steps.")

# Sort results from stat_res by the 'stat' column in descending order
de = stat_res.results_df
de = de.sort_values('stat', ascending=False)

# Set color palette for the conditions
palette = {'YOV-noGFP': '#1f77b4', 'YOV-GFP': '#1f77b4'}

# Plot violin plot
plt.figure(figsize=(4, 6))
ax = sns.violinplot(
    x='Gene',
    y='Expression',
    hue='Condition',
    data=plot_data,
    palette=palette,
    inner='box',
    hue_order=['YOV-noGFP', 'YOV-GFP']
)

# Apply hatch pattern only to ASham-GFP violins
for i, artist in enumerate(ax.collections):
    if i % 2 == 1:  # ASham-GFP (2nd hue in each gene group)
        artist.set_hatch('//')
        artist.set_edgecolor('black')  # optional: make hatch clearer

# Annotate statistical significance
max_expr = plot_data['Expression'].max()
line_spacing = max_expr * 0.15
text_offset = max_expr * 0.025

for i, gene in enumerate(available_genes):
    gene_data = plot_data[plot_data['Gene'] == gene]
    
    # Padj from your DE results
    padj = de.loc[gene, 'padj'] if gene in de.index else 1.0
    padj = 1.0 if pd.isna(padj) else padj
    
    # Significance symbol
    symbol = '***' if padj < 0.001 else '**' if padj < 0.01 else '*' if padj < 0.05 else 'ns'
    
    # Get maximum box height per gene (from both conditions)
    max_expr_per_gene = gene_data.groupby('Condition')['Expression'].max().max()

    # Small offset above the top box/violin for placing the text
    y = max_expr_per_gene + 0.1  # Adjust as needed
    x = i  # center of the gene group

    ax.text(x, y, symbol, ha='center', va='bottom', fontsize=14)


# Axis settings
plt.ylim(0, plot_data['Expression'].max() * 1.25)
plt.title('Gene Expression Levels in M2-like macrophages')
plt.ylabel('Normalized Expression')
plt.xticks(rotation=45)

# Custom legend with hatch pattern
legend_elements = [
    Patch(facecolor='#1f77b4', edgecolor='black', label='YOV-noGFP'),
    Patch(facecolor='#1f77b4', edgecolor='black', hatch='//', label='YOV-GFP')
]
plt.legend(handles=legend_elements)

# Save and show plot
plt.tight_layout()
plt.savefig('Violin_YOV_GFP vs YOV_noGFP_M2_Down.png')
plt.show()


In [None]:
# Run the Wald test and get results for condition contrast
stat_res = DeseqStats(dds, contrast=('condition', 'AOV_GFP', 'AOV_noGFP'))

# Get the summary of the results (check if this produces a DataFrame)
summary_output = stat_res.summary()
print(type(summary_output))  # This should be a DataFrame
if isinstance(summary_output, pd.DataFrame):
    print(summary_output.head())  # Show the first few rows to inspect


In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.sparse
import numpy as np
from matplotlib.patches import Patch

sc.settings.figdir = "FIGURE_4"
# List of genes of interest
genes_of_interest = ['Gas6','Notch2','Igf1']

# Filter to include only genes present in the dataset
available_genes = [gene for gene in genes_of_interest if gene in cell_subset.raw.var_names]
print(f"Available genes in the dataset: {available_genes}")

# Raise error if none are found
if not available_genes:
    raise ValueError("None of the genes of interest are present in the dataset.")

# Extract expression matrix from .raw
expr_matrix = cell_subset.raw.X.toarray() if scipy.sparse.issparse(cell_subset.raw.X) else cell_subset.raw.X

# Perform log-normalization (log1p) on the expression matrix
normalized_expr_matrix = np.log1p(expr_matrix)

# Create a DataFrame for plotting
data = []
for gene in available_genes:
    gene_index = cell_subset.raw.var_names.get_loc(gene)
    gene_expr = normalized_expr_matrix[:, gene_index]  # Use the normalized expression
    data.append(pd.DataFrame({
        'Condition': cell_subset.obs['condition'].values,
        'Gene': gene,
        'Expression': gene_expr
    }))

plot_data = pd.concat(data, ignore_index=True)

# Debugging
print(plot_data.head())
print(f"Number of rows in plot_data: {len(plot_data)}")

# Raise error if DataFrame is empty
if plot_data.empty:
    raise ValueError("The plot_data DataFrame is empty. Check the gene extraction and subsetting steps.")

# Sort results from stat_res by the 'stat' column in descending order
de = stat_res.results_df
de = de.sort_values('stat', ascending=False)

# Set color palette for the conditions
palette = {'AOV-noGFP': '#1f77b4', 'AOV-GFP': '#1f77b4'}

# Plot violin plot
plt.figure(figsize=(4, 6))
ax = sns.violinplot(
    x='Gene',
    y='Expression',
    hue='Condition',
    data=plot_data,
    palette=palette,
    inner='box',
    hue_order=['AOV-noGFP', 'AOV-GFP']
)

# Apply hatch pattern only to ASham-GFP violins
for i, artist in enumerate(ax.collections):
    if i % 2 == 1:  # ASham-GFP (2nd hue in each gene group)
        artist.set_hatch('//')
        artist.set_edgecolor('black')  # optional: make hatch clearer

# Annotate statistical significance
max_expr = plot_data['Expression'].max()
line_spacing = max_expr * 0.15
text_offset = max_expr * 0.025

for i, gene in enumerate(available_genes):
    gene_data = plot_data[plot_data['Gene'] == gene]
    
    # Padj from your DE results
    padj = de.loc[gene, 'padj'] if gene in de.index else 1.0
    padj = 1.0 if pd.isna(padj) else padj
    
    # Significance symbol
    symbol = '***' if padj < 0.001 else '**' if padj < 0.01 else '*' if padj < 0.05 else 'ns'
    
    # Get maximum box height per gene (from both conditions)
    max_expr_per_gene = gene_data.groupby('Condition')['Expression'].max().max()

    # Small offset above the top box/violin for placing the text
    y = max_expr_per_gene + 0.1  # Adjust as needed
    x = i  # center of the gene group

    ax.text(x, y, symbol, ha='center', va='bottom', fontsize=14)


# Axis settings
plt.ylim(0, plot_data['Expression'].max() * 1.25)
plt.title('Gene Expression Levels in M2-like macrophages')
plt.ylabel('Normalized Expression')
plt.xticks(rotation=45)

# Custom legend with hatch pattern
legend_elements = [
    Patch(facecolor='#1f77b4', edgecolor='black', label='AOV-noGFP'),
    Patch(facecolor='#1f77b4', edgecolor='black', hatch='//', label='AOV-GFP')
]
plt.legend(handles=legend_elements)

# Save and show plot
plt.tight_layout()
plt.savefig('Violin_AOV_GFP vs AOV_noGFP_M2_up.png')
plt.show()


In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.sparse
import numpy as np
from matplotlib.patches import Patch

sc.settings.figdir = "FIGURE_4"
# List of genes of interest
genes_of_interest = ['Col1a1','Col1a2','Col3a1']

# Filter to include only genes present in the dataset
available_genes = [gene for gene in genes_of_interest if gene in cell_subset.raw.var_names]
print(f"Available genes in the dataset: {available_genes}")

# Raise error if none are found
if not available_genes:
    raise ValueError("None of the genes of interest are present in the dataset.")

# Extract expression matrix from .raw
expr_matrix = cell_subset.raw.X.toarray() if scipy.sparse.issparse(cell_subset.raw.X) else cell_subset.raw.X

# Perform log-normalization (log1p) on the expression matrix
normalized_expr_matrix = np.log1p(expr_matrix)

# Create a DataFrame for plotting
data = []
for gene in available_genes:
    gene_index = cell_subset.raw.var_names.get_loc(gene)
    gene_expr = normalized_expr_matrix[:, gene_index]  # Use the normalized expression
    data.append(pd.DataFrame({
        'Condition': cell_subset.obs['condition'].values,
        'Gene': gene,
        'Expression': gene_expr
    }))

plot_data = pd.concat(data, ignore_index=True)

# Debugging
print(plot_data.head())
print(f"Number of rows in plot_data: {len(plot_data)}")

# Raise error if DataFrame is empty
if plot_data.empty:
    raise ValueError("The plot_data DataFrame is empty. Check the gene extraction and subsetting steps.")

# Sort results from stat_res by the 'stat' column in descending order
de = stat_res.results_df
de = de.sort_values('stat', ascending=False)

# Set color palette for the conditions
palette = {'AOV-noGFP': '#1f77b4', 'AOV-GFP': '#1f77b4'}

# Plot violin plot
plt.figure(figsize=(4, 6))
ax = sns.violinplot(
    x='Gene',
    y='Expression',
    hue='Condition',
    data=plot_data,
    palette=palette,
    inner='box',
    hue_order=['AOV-noGFP', 'AOV-GFP']
)

# Apply hatch pattern only to ASham-GFP violins
for i, artist in enumerate(ax.collections):
    if i % 2 == 1:  # ASham-GFP (2nd hue in each gene group)
        artist.set_hatch('//')
        artist.set_edgecolor('black')  # optional: make hatch clearer

# Annotate statistical significance
max_expr = plot_data['Expression'].max()
line_spacing = max_expr * 0.15
text_offset = max_expr * 0.025

for i, gene in enumerate(available_genes):
    gene_data = plot_data[plot_data['Gene'] == gene]
    
    # Padj from your DE results
    padj = de.loc[gene, 'padj'] if gene in de.index else 1.0
    padj = 1.0 if pd.isna(padj) else padj
    
    # Significance symbol
    symbol = '***' if padj < 0.001 else '**' if padj < 0.01 else '*' if padj < 0.05 else 'ns'
    
    # Get maximum box height per gene (from both conditions)
    max_expr_per_gene = gene_data.groupby('Condition')['Expression'].max().max()

    # Small offset above the top box/violin for placing the text
    y = max_expr_per_gene + 0.1  # Adjust as needed
    x = i  # center of the gene group

    ax.text(x, y, symbol, ha='center', va='bottom', fontsize=14)


# Axis settings
plt.ylim(0, plot_data['Expression'].max() * 1.25)
plt.title('Gene Expression Levels in M2-like macrophages')
plt.ylabel('Normalized Expression')
plt.xticks(rotation=45)

# Custom legend with hatch pattern
legend_elements = [
    Patch(facecolor='#1f77b4', edgecolor='black', label='AOV-noGFP'),
    Patch(facecolor='#1f77b4', edgecolor='black', hatch='//', label='AOV-GFP')
]
plt.legend(handles=legend_elements)

# Save and show plot
plt.tight_layout()
plt.savefig('Violin_AOV_GFP vs AOV_noGFP_M2_Down.png')
plt.show()


In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.sparse
import numpy as np
from matplotlib.patches import Patch

sc.settings.figdir = "FIGURE_4"
# List of genes of interest
genes_of_interest = ['Eno1','Aldoa','Pkm']

# Filter to include only genes present in the dataset
available_genes = [gene for gene in genes_of_interest if gene in cell_subset.raw.var_names]
print(f"Available genes in the dataset: {available_genes}")

# Raise error if none are found
if not available_genes:
    raise ValueError("None of the genes of interest are present in the dataset.")

# Extract expression matrix from .raw
expr_matrix = cell_subset.raw.X.toarray() if scipy.sparse.issparse(cell_subset.raw.X) else cell_subset.raw.X

# Perform log-normalization (log1p) on the expression matrix
normalized_expr_matrix = np.log1p(expr_matrix)

# Create a DataFrame for plotting
data = []
for gene in available_genes:
    gene_index = cell_subset.raw.var_names.get_loc(gene)
    gene_expr = normalized_expr_matrix[:, gene_index]  # Use the normalized expression
    data.append(pd.DataFrame({
        'Condition': cell_subset.obs['condition'].values,
        'Gene': gene,
        'Expression': gene_expr
    }))

plot_data = pd.concat(data, ignore_index=True)

# Debugging
print(plot_data.head())
print(f"Number of rows in plot_data: {len(plot_data)}")

# Raise error if DataFrame is empty
if plot_data.empty:
    raise ValueError("The plot_data DataFrame is empty. Check the gene extraction and subsetting steps.")

# Sort results from stat_res by the 'stat' column in descending order
de = stat_res.results_df
de = de.sort_values('stat', ascending=False)

# Set color palette for the conditions
palette = {'AOV-noGFP': '#1f77b4', 'AOV-GFP': '#1f77b4'}

# Plot violin plot
plt.figure(figsize=(4, 6))
ax = sns.violinplot(
    x='Gene',
    y='Expression',
    hue='Condition',
    data=plot_data,
    palette=palette,
    inner='box',
    hue_order=['AOV-noGFP', 'AOV-GFP']
)

# Apply hatch pattern only to ASham-GFP violins
for i, artist in enumerate(ax.collections):
    if i % 2 == 1:  # ASham-GFP (2nd hue in each gene group)
        artist.set_hatch('//')
        artist.set_edgecolor('black')  # optional: make hatch clearer

# Annotate statistical significance
max_expr = plot_data['Expression'].max()
line_spacing = max_expr * 0.15
text_offset = max_expr * 0.025

for i, gene in enumerate(available_genes):
    gene_data = plot_data[plot_data['Gene'] == gene]
    
    # Padj from your DE results
    padj = de.loc[gene, 'padj'] if gene in de.index else 1.0
    padj = 1.0 if pd.isna(padj) else padj
    
    # Significance symbol
    symbol = '***' if padj < 0.001 else '**' if padj < 0.01 else '*' if padj < 0.05 else 'ns'
    
    # Get maximum box height per gene (from both conditions)
    max_expr_per_gene = gene_data.groupby('Condition')['Expression'].max().max()

    # Small offset above the top box/violin for placing the text
    y = max_expr_per_gene + 0.1  # Adjust as needed
    x = i  # center of the gene group

    ax.text(x, y, symbol, ha='center', va='bottom', fontsize=14)


# Axis settings
plt.ylim(0, plot_data['Expression'].max() * 1.25)
plt.title('Gene Expression Levels in M2-like macrophages')
plt.ylabel('Normalized Expression')
plt.xticks(rotation=45)

# Custom legend with hatch pattern
legend_elements = [
    Patch(facecolor='#1f77b4', edgecolor='black', label='AOV-noGFP'),
    Patch(facecolor='#1f77b4', edgecolor='black', hatch='//', label='AOV-GFP')
]
plt.legend(handles=legend_elements)

# Save and show plot
plt.tight_layout()
plt.savefig('Violin_AOV_GFP vs AOV_noGFP_M2_Down1.png')
plt.show()


In [None]:
#supplimantary fig 4a,b

In [None]:
import os
import scanpy as sc
import pandas as pd

# Set working directory
os.chdir("P:/Tolulope/Manuscript/Yuan Analysis")

# Load AnnData
adata = sc.read_h5ad("integratedssssss.h5ad")

# Subset to Dendritic cells and ASham-GFP vs ASham-noGFP
subset = adata[
    (adata.obs['cell type'] == 'M2-like macrophages') &
    (adata.obs['condition'].isin(['YOV-GFP', 'YOV-noGFP']))
].copy()

# Use raw counts if available
if 'counts' in subset.layers:
    subset.X = subset.layers['counts']

# Optionally, append sample info to cell barcodes to match your R format
subset.obs_names = [f"{cell}-{sample}" for cell, sample in zip(subset.obs_names, subset.obs['Sample'])]

# Create and save expression matrix (genes × cells)
expr_matrix = pd.DataFrame(
    subset.X.toarray() if hasattr(subset.X, "toarray") else subset.X,
    index=subset.obs_names,
    columns=subset.var_names
).T  # Transpose: genes as rows, cells as columns

expr_matrix.to_csv("expr_matrix.csv")

# Save cell metadata (row index = full cell names)
cell_metadata = subset.obs.copy()
cell_metadata.index.name = None
cell_metadata.to_csv("cell_metadata.csv")

# ---- Save Gene Annotation ----
# Now use the `subset` object to generate gene annotation
gene_annotation = pd.DataFrame(index=subset.var_names)
gene_annotation['gene_short_name'] = subset.var_names
gene_annotation.index.name = 'gene_id'
gene_annotation.to_csv("gene_annotation.csv")

print("✅ Saved: expr_matrix.csv, cell_metadata.csv, gene_annotation.csv")
# Count number of dendritic cells per condition
cell_counts = subset.obs['condition'].value_counts()
print(cell_counts)


In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# ---------- ECDF Function ----------
def ECDF_standard(data: np.array, ax=None, **kwargs):
    """Compute and plot the empirical cumulative distribution function (ECDF).

    Args:
        data (np.array): 1D array of numerical values.
        ax (matplotlib.axes._axes.Axes, optional): Axis to plot on. Defaults to None.
        **kwargs: Additional plotting arguments (e.g., color, label, linestyle).

    Returns:
        x (np.array): Sorted data.
        y (np.array): ECDF values (proportion of samples ≤ x).
    """
    x = np.sort(data)
    y = np.arange(1, len(x)+1) / len(x)

    if ax is None:
        plt.plot(x, y, **kwargs)
    else:
        ax.plot(x, y, **kwargs)

    return x, y

# ---------- Main Plotting Code ----------
# Set working directory
os.chdir("P:/Tolulope/Manuscript/Yuan Analysis")

# Load pseudotime metadata
df = pd.read_csv("pseudotime_metadata_for_python.csv", index_col=0)

# Initialize plot
plt.figure(figsize=(5, 4))

# Define samples and plotting styles
samples = ["YOV_noGFP", "YOV_GFP"]
colors = ["#1f77b4", "#2ca02c"]
linestyles = ["-", "-"]
linewidths = [2, 2]

# Plot ECDF for each sample
for sample, color, linestyle, linewidth in zip(samples, colors, linestyles, linewidths):
    data = df[df["Sample"] == sample]["Pseudotime"].dropna().values
    ECDF_standard(data, color=color, linestyle=linestyle, linewidth=linewidth, label=sample)

# Customize plot
plt.xlabel("Pseudotime", fontsize=10)
plt.ylabel("Cumulative Probability", fontsize=10)
plt.legend(title="Condition", title_fontsize=10, fontsize=9, loc="best")
plt.tight_layout()

# Save and show plot
plt.savefig("ECDF_Pseudotime_YOV_GFP vs YOV_noGFP_M2.png", dpi=300)
plt.show()


In [None]:
import numpy as np
import pandas as pd
from scipy import stats

# --- Extract pseudotime data for each condition ---
noGFP = df[df["Sample"] == "YOV_noGFP"]["Pseudotime"].dropna()
GFP = df[df["Sample"] == "YOV_GFP"]["Pseudotime"].dropna()

# --- Median pseudotime values ---
median_noGFP = np.median(noGFP)
median_GFP = np.median(GFP)

# --- Ratio of medians and percent shift ---
delta_ratio = median_GFP / median_noGFP
percent_shift = (delta_ratio - 1) * 100  # To get the percent increase/decrease

# --- Statistical test (non-parametric) ---
stat, p_value = stats.ranksums(GFP, noGFP)

# --- Output ---
print(f"Median YOV_noGFP Pseudotime: {median_noGFP:.2f}")
print(f"Median YOV_GFP Pseudotime: {median_GFP:.2f}")
print(f"Δ YOV_GFP / YOV_noGFP = {delta_ratio:.2f} (Ratio of Medians)")
print(f"Percent Shift = {percent_shift:.1f}%")
print(f"Wilcoxon rank-sum test: statistic = {stat:.2f}, p = {p_value:.4e}")


In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from scipy.stats import gaussian_kde
import seaborn as sns
from matplotlib.patches import Patch

# Load data
df = pd.read_csv("pseudotime_metadata_for_python.csv", index_col=0)

# Set base style and smaller font scale
sns.set(style="white", context="paper")  # context='paper' is smaller than 'talk'

# Initialize plot
fig, ax = plt.subplots(figsize=(5, 4))

# Define colors
colors = {"YOV_noGFP": "#1f77b4", "YOV_GFP": "#4daf4a"}

# Plot KDEs
for sample in ["YOV_noGFP", "YOV_GFP"]:
    data = df[df["Sample"] == sample]["Pseudotime"].dropna()
    kde = gaussian_kde(data)
    x_grid = np.linspace(0, data.max(), 200)
    ax.fill_between(x_grid, kde(x_grid), alpha=0.4, color=colors[sample])
    ax.plot(x_grid, kde(x_grid), color=colors[sample], linewidth=1.5)
    

# Axis settings
ax.set_xlim(left=0)
ax.set_ylim(bottom=0)

# Labels and title with smaller font sizes
ax.set_xlabel("Pseudotime", fontsize=10)
ax.set_ylabel("Density", fontsize=10)
ax.set_title("KDE of Pseudotime", fontsize=11)

# Smaller ticks
ax.tick_params(axis='both', labelsize=9)

# Legend with smaller font and no border on patches
legend_elements = [
    Patch(facecolor=colors["YOV_noGFP"], label="YOV_noGFP"),
    Patch(facecolor=colors["YOV_GFP"], label="YOV_GFP")
]
ax.legend(
    handles=legend_elements,
    loc="center left",
    bbox_to_anchor=(1.02, 0.5),
    frameon=False,
    fontsize=9,
    borderpad=0.3
)

# Clean and save
sns.despine(trim=True)
plt.tight_layout(rect=[0, 0, 0.85, 1])
plt.savefig("KDE_Pseudotime_AdjustedFonts_YOV_GFP vs YOV_noGFP_M2.png", dpi=600, bbox_inches='tight')
plt.show()


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import ranksums
import pandas as pd

# --- Prepare data ---
plot_df = df[df["Sample"].isin(["YOV_noGFP", "YOV_GFP"])].copy()
plot_df["Sample"] = pd.Categorical(plot_df["Sample"], categories=["YOV_noGFP", "YOV_GFP"], ordered=True)

# --- Statistical test ---
noGFP = plot_df[plot_df["Sample"] == "YOV_noGFP"]["Pseudotime"].dropna()
GFP = plot_df[plot_df["Sample"] == "YOV_GFP"]["Pseudotime"].dropna()
stat, p_value = ranksums(GFP, noGFP)

# --- Significance marker ---
if p_value < 0.001:
    significance = r'$\bf{***}$'
elif p_value < 0.01:
    significance = r'$\bf{**}$'
elif p_value < 0.05:
    significance = r'$\bf{*}$'
else:
    significance = "n.s."

# --- Plot ---
sns.set(style="white", context="talk")

# Reduce the figure size (half of current size)
fig, ax = plt.subplots(figsize=(3, 5))

# Boxplot
sns.boxplot(data=plot_df, x="Sample", y="Pseudotime",
            palette={"YOV_noGFP": "#1f77b4", "YOV_GFP": "#2ca02c"},
            order=["YOV_noGFP", "YOV_GFP"],
            linewidth=1.5, width=0.5, showcaps=True, showfliers=False, ax=ax)

# Jittered points
sns.stripplot(data=plot_df, x="Sample", y="Pseudotime",
              order=["YOV_noGFP", "YOV_GFP"],
              color="black", alpha=0.4, jitter=True, size=3, ax=ax)

# Calculate the y position for the stars above the box
box_top = plot_df.groupby("Sample")["Pseudotime"].max()
y_max_star = box_top.max() + 0.02  # Slightly above the max value

# Place significance above second box
ax.text(1, y_max_star, significance, ha='center', va='bottom', fontsize=12, fontweight='bold')

# Axis styling
ax.set_xlabel("")
ax.set_ylabel("Pseudotime", fontsize=12)

# Black border around axis
for spine in ax.spines.values():
    spine.set_visible(True)
    spine.set_linewidth(1.5)
    spine.set_color('black')

# Adjust label size to match
ax.tick_params(axis='x', labelsize=10)
ax.tick_params(axis='y', labelsize=10)

plt.tight_layout()
plt.savefig("Pseudotime_Boxplot_SignificanceOnly_YOV_GFP vs YOV_noGFP_M2.png", dpi=300)
plt.show()


In [None]:
#FIG 4J

In [None]:
import os
import scanpy as sc
import pandas as pd

# Set working directory
os.chdir("P:/Tolulope/Manuscript/Yuan Analysis")

# Load AnnData
adata = sc.read_h5ad("integratedssssss.h5ad")

# Subset to Dendritic cells and ASham-GFP vs ASham-noGFP
subset = adata[
    (adata.obs['cell type'] == 'M2-like macrophages') &
    (adata.obs['condition'].isin(['AOV-GFP', 'AOV-noGFP']))
].copy()

# Use raw counts if available
if 'counts' in subset.layers:
    subset.X = subset.layers['counts']

# Optionally, append sample info to cell barcodes to match your R format
subset.obs_names = [f"{cell}-{sample}" for cell, sample in zip(subset.obs_names, subset.obs['Sample'])]

# Create and save expression matrix (genes × cells)
expr_matrix = pd.DataFrame(
    subset.X.toarray() if hasattr(subset.X, "toarray") else subset.X,
    index=subset.obs_names,
    columns=subset.var_names
).T  # Transpose: genes as rows, cells as columns

expr_matrix.to_csv("expr_matrix.csv")

# Save cell metadata (row index = full cell names)
cell_metadata = subset.obs.copy()
cell_metadata.index.name = None
cell_metadata.to_csv("cell_metadata.csv")

# ---- Save Gene Annotation ----
# Now use the `subset` object to generate gene annotation
gene_annotation = pd.DataFrame(index=subset.var_names)
gene_annotation['gene_short_name'] = subset.var_names
gene_annotation.index.name = 'gene_id'
gene_annotation.to_csv("gene_annotation.csv")

print("✅ Saved: expr_matrix.csv, cell_metadata.csv, gene_annotation.csv")
# Count number of dendritic cells per condition
cell_counts = subset.obs['condition'].value_counts()
print(cell_counts)


In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# ---------- ECDF Function ----------
def ECDF_standard(data: np.array, ax=None, **kwargs):
    """Compute and plot the empirical cumulative distribution function (ECDF).

    Args:
        data (np.array): 1D array of numerical values.
        ax (matplotlib.axes._axes.Axes, optional): Axis to plot on. Defaults to None.
        **kwargs: Additional plotting arguments (e.g., color, label, linestyle).

    Returns:
        x (np.array): Sorted data.
        y (np.array): ECDF values (proportion of samples ≤ x).
    """
    x = np.sort(data)
    y = np.arange(1, len(x)+1) / len(x)

    if ax is None:
        plt.plot(x, y, **kwargs)
    else:
        ax.plot(x, y, **kwargs)

    return x, y

# ---------- Main Plotting Code ----------
# Set working directory
os.chdir("P:/Tolulope/Manuscript/Yuan Analysis")

# Load pseudotime metadata
df = pd.read_csv("pseudotime_metadata_for_python.csv", index_col=0)

# Initialize plot
plt.figure(figsize=(5, 4))

# Define samples and plotting styles
samples = ["AOV_noGFP", "AOV_GFP"]
colors = ["#1f77b4", "#2ca02c"]
linestyles = ["-", "-"]
linewidths = [2, 2]

# Plot ECDF for each sample
for sample, color, linestyle, linewidth in zip(samples, colors, linestyles, linewidths):
    data = df[df["Sample"] == sample]["Pseudotime"].dropna().values
    ECDF_standard(data, color=color, linestyle=linestyle, linewidth=linewidth, label=sample)

# Customize plot
plt.xlabel("Pseudotime", fontsize=10)
plt.ylabel("Cumulative Probability", fontsize=10)
plt.legend(title="Condition", title_fontsize=10, fontsize=9, loc="best")
plt.tight_layout()

# Save and show plot
plt.savefig("ECDF_Pseudotime_AOV_GFP vs AOV_noGFP_M2.png", dpi=300)
plt.show()


In [None]:
import numpy as np
import pandas as pd
from scipy import stats

# --- Extract pseudotime data for each condition ---
noGFP = df[df["Sample"] == "AOV_noGFP"]["Pseudotime"].dropna()
GFP = df[df["Sample"] == "AOV_GFP"]["Pseudotime"].dropna()

# --- Median pseudotime values ---
median_noGFP = np.median(noGFP)
median_GFP = np.median(GFP)

# --- Ratio of medians and percent shift ---
delta_ratio = median_GFP / median_noGFP
percent_shift = (delta_ratio - 1) * 100  # To get the percent increase/decrease

# --- Statistical test (non-parametric) ---
stat, p_value = stats.ranksums(GFP, noGFP)

# --- Output ---
print(f"Median AOV_noGFP Pseudotime: {median_noGFP:.2f}")
print(f"Median AOV_GFP Pseudotime: {median_GFP:.2f}")
print(f"Δ AOV_GFP / AOV_noGFP = {delta_ratio:.2f} (Ratio of Medians)")
print(f"Percent Shift = {percent_shift:.1f}%")
print(f"Wilcoxon rank-sum test: statistic = {stat:.2f}, p = {p_value:.4e}")


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import ranksums
import pandas as pd

# --- Prepare data ---
plot_df = df[df["Sample"].isin(["AOV_noGFP", "AOV_GFP"])].copy()
plot_df["Sample"] = pd.Categorical(plot_df["Sample"], categories=["AOV_noGFP", "AOV_GFP"], ordered=True)

# --- Statistical test ---
noGFP = plot_df[plot_df["Sample"] == "AOV_noGFP"]["Pseudotime"].dropna()
GFP = plot_df[plot_df["Sample"] == "AOV_GFP"]["Pseudotime"].dropna()
stat, p_value = ranksums(GFP, noGFP)

# --- Significance marker ---
if p_value < 0.001:
    significance = r'$\bf{***}$'
elif p_value < 0.01:
    significance = r'$\bf{**}$'
elif p_value < 0.05:
    significance = r'$\bf{*}$'
else:
    significance = "n.s."

# --- Plot ---
sns.set(style="white", context="talk")

# Reduce the figure size (half of current size)
fig, ax = plt.subplots(figsize=(3, 5))

# Boxplot
sns.boxplot(data=plot_df, x="Sample", y="Pseudotime",
            palette={"AOV_noGFP": "#1f77b4", "AOV_GFP": "#2ca02c"},
            order=["AOV_noGFP", "AOV_GFP"],
            linewidth=1.5, width=0.5, showcaps=True, showfliers=False, ax=ax)

# Jittered points
sns.stripplot(data=plot_df, x="Sample", y="Pseudotime",
              order=["AOV_noGFP", "AOV_GFP"],
              color="black", alpha=0.4, jitter=True, size=3, ax=ax)

# Calculate the y position for the stars above the box
box_top = plot_df.groupby("Sample")["Pseudotime"].max()
y_max_star = box_top.max() + 0.02  # Slightly above the max value

# Place significance above second box
ax.text(1, y_max_star, significance, ha='center', va='bottom', fontsize=12, fontweight='bold')

# Axis styling
ax.set_xlabel("")
ax.set_ylabel("Pseudotime", fontsize=12)

# Black border around axis
for spine in ax.spines.values():
    spine.set_visible(True)
    spine.set_linewidth(1.5)
    spine.set_color('black')

# Adjust label size to match
ax.tick_params(axis='x', labelsize=10)
ax.tick_params(axis='y', labelsize=10)

plt.tight_layout()
plt.savefig("Pseudotime_Boxplot_SignificanceOnly_AOV_GFP vs AOV_noGFP_M2.png", dpi=300)
plt.show()


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.nonparametric.smoothers_lowess import lowess
from scipy.stats import ttest_ind
import os
# ---------- Main Plotting Code ----------
# Set working directory
os.chdir("P:/Tolulope/Manuscript/Yuan Analysis")

# Load data
umap_df = pd.read_csv("pseudotime_metadata_for_python.csv", index_col=0)
expr_matrix = pd.read_csv("normalized_expr_matrix_for_python.csv", index_col=0)
genes = ["Folr2", "Mrc1", "Il10"]

# Get expression values
def get_expr(gene):
    if gene in expr_matrix.index:
        return expr_matrix.loc[gene].values
    else:
        raise ValueError(f"Gene {gene} not found in the expression matrix.")

pseudotime = umap_df["Pseudotime"]
early_max, mid_max = np.quantile(pseudotime, [1/3, 2/3])

# Assign bins
def assign_bin(pt):
    if pt <= early_max:
        return "early"
    elif pt <= mid_max:
        return "mid"
    else:
        return "late"
umap_df["bin"] = pseudotime.apply(assign_bin)

# Build dataframe for plotting and testing
records = []
for gene in genes:
    expr = get_expr(gene)
    for cond in ["AOV_noGFP", "AOV_GFP"]:
        mask = umap_df["Sample"] == cond
        for i, pt in enumerate(pseudotime[mask]):
            records.append({
                "gene": gene,
                "expression": expr[mask.values][i],
                "pseudotime": pt,
                "condition": cond,
                "bin": assign_bin(pt)
            })
df = pd.DataFrame(records)

# Statistical testing per gene per bin
results = []
for gene in genes:
    for b in ["early", "mid", "late"]:
        group = df[(df["gene"] == gene) & (df["bin"] == b)]
        gfp = group[group["condition"] == "AOV_GFP"]["expression"]
        nogfp = group[group["condition"] == "AOV_noGFP"]["expression"]
        if len(gfp) > 1 and len(nogfp) > 1:
            stat, pval = ttest_ind(gfp, nogfp, equal_var=False)  # Welch's t-test
        else:
            pval = np.nan
        results.append({"gene": gene, "bin": b, "p_value": pval})
pval_df = pd.DataFrame(results)
pval_df.to_csv("pseudotime_binwise_pvalues3.csv", index=False)

# Plotting
import matplotlib.ticker as ticker

# Plotting
# Plotting
plt.figure(figsize=(5, 2.5))
ax = plt.gca()

palette = sns.color_palette("Set1", len(genes))
gene_colors = {gene: c for gene, c in zip(genes, palette)}

for gene in genes:
    df_gene = df[df["gene"] == gene]
    color = gene_colors[gene]
    for cond in ["AOV_noGFP", "AOV_GFP"]:
        sub = df_gene[df_gene["condition"] == cond]
        sm = lowess(sub["expression"], sub["pseudotime"], frac=0.3)
        linestyle = "-" if cond == "AOV_noGFP" else "--"
        label = f"{gene} ({'noGFP' if cond=='ASham_noGFP' else 'GFP'})"
        ax.plot(sm[:, 0], sm[:, 1], color=color, linestyle=linestyle, linewidth=1, label=label)

# Bin lines
ax.axvline(early_max, color="gray", linestyle=":", linewidth=0.5)
ax.axvline(mid_max, color="gray", linestyle=":", linewidth=0.5)

# Axis settings
ax.set_xlabel("Pseudotime", fontsize=9)
ax.set_ylabel("Normalized expression", fontsize=9)

# Move spines to (0,0)
ax.spines['left'].set_position(('data', 0))
ax.spines['bottom'].set_position(('data', 0))
ax.spines['right'].set_color('none')
ax.spines['top'].set_color('none')

# Adjust spine line thickness (default is 1, here it is set to 0.5 for thinner lines)
ax.spines['left'].set_linewidth(0.5)
ax.spines['bottom'].set_linewidth(0.5)

ax.xaxis.set_ticks_position('bottom')
ax.yaxis.set_ticks_position('left')

# Adjust tick line width (make them thinner)
ax.tick_params(axis='both', which='major', width=0.5)  # Set tick lines to thinner width
ax.tick_params(axis='both', which='minor', width=0.5)  # If you have minor ticks, you can adjust them too


# Adjust limits
x_min, x_max = pseudotime.min(), pseudotime.max()
ax.set_xlim(left=max(0, x_min - 0.1), right=x_max + 0.1)
y_min = df["expression"].min()
ax.set_ylim(bottom=max(0, y_min - 0.1))

# Set x-axis ticks to whole numbers
ax.xaxis.set_major_locator(ticker.MaxNLocator(integer=True))

# Legend as colored boxes
handles, labels = ax.get_legend_handles_labels()
by_label = dict(zip(labels, handles))
legend = ax.legend(
    by_label.values(),
    by_label.keys(),
    fontsize=7,
    title="Gene (Condition)",
    title_fontsize=8,
    bbox_to_anchor=(1.02, 1),
    loc='upper left',
    frameon=True,
    borderaxespad=0.
)
legend.get_frame().set_edgecolor('black')
legend.get_frame().set_linewidth(0.5)

# Adjust font size for ticks
ax.tick_params(axis='both', which='major', labelsize=8)  # Reduce tick font size

sns.despine(trim=True)
plt.tight_layout(rect=[0, 0, 0.78, 1])
plt.savefig("Pseudotime_gene_exp_with_pvals.png", dpi=300)
plt.show()
