In [None]:
import pandas as pd
import numpy as np
import scanpy as sc
import anndata as ad
import os
import ast
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages

In [None]:
# # # Load the metadata and counts data
analysis_dir = '../..'
stats_dir = f"{analysis_dir}/Results/Results_no_ptprc_adgre1/Cluster_Analysis_avg_link_gene_cell_corr/DEG_p005/old_AUC_diff_25_cutoff/DEG_TargetGenes_Sig_Regulons_fc15_p005/Stats/"
save_dir = f"{analysis_dir}/Results/Results_no_ptprc_adgre1/Dot_Plots"


In [None]:
adata_basal= sc.read_h5ad(f'{analysis_dir}/Processed_Data/combined_data_basal_5ht6ho_without_PTPRC_Adgre1_filtered.h5ad')
adata_basal

In [None]:
metadata_with_clust = pd.read_csv(f'{analysis_dir}/Processed_Data/meta_data_with_clust.csv',index_col=0)
metadata_with_clust = metadata_with_clust[metadata_with_clust.index.isin(adata_basal.obs_names)]
adata_basal.obs = metadata_with_clust
adata_basal

In [None]:
u1_5ht_stats = pd.read_csv(f'{stats_dir}5ht/U1_5Ht.csv')
u1_5ht_stats                           

In [None]:
# # DEGs 

# # U1_wt

#Fig3B
sorts = ['Fraction','Mean']

for sort in sorts:
    pdf_filename = os.path.join(save_dir, f"StemCell_DEG_U1_wt_dp_sort_{sort}_v2.pdf")
    with PdfPages(pdf_filename) as pdf:
        curr_metadata = adata_basal.obs.copy()
        curr_sub_info = [clust.split('l_')[1] for clust in curr_metadata['sub_info']]
        curr_sub_info = ['U1_wt' if c == 'U1' else c for c in curr_sub_info]
        curr_metadata['curr_sub_info'] = curr_sub_info
        adata_basal.obs = curr_metadata

        # Filter for 5ht and 6ho
        adata_5ht_basal = adata_basal[adata_basal.obs['orig.ident'] == '5Ht']
        adata_6ho_basal = adata_basal[adata_basal.obs['orig.ident'] == '6Ho']

        adata_s1 = adata_5ht_basal[adata_5ht_basal.obs['curr_sub_info']=='S1']
        adata_s2 = adata_5ht_basal[adata_5ht_basal.obs['curr_sub_info']=='S2']
        adata_s3 = adata_5ht_basal[adata_5ht_basal.obs['curr_sub_info']=='S3']
        adata_u1 = adata_5ht_basal[adata_5ht_basal.obs['curr_sub_info']=='U1_wt']
        adatas = [adata_s1,adata_s2,adata_s3,adata_u1]



        genes = ['Zeb2', 'Nrp1', 'Zfp36', 'Ednrb', 'Cited2', 'Vsir', 'Gsk3b', 'Rbpj', 'Lama5', 'Setd2', 
                 'Ufl1', 'Coro1c', 'Tcof1', 'Fgfr2', 'Ap2a2', 'Ncoa3', 'Mapk3', 'Smo']
        # Ensure the directory exists
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)

        if sort == 'Fraction':

            fraction_expressed = (adata_u1[:, genes].X > 0).mean(axis=0)

            # Convert to a pandas Series for sorting
            fraction_expressed_series = pd.Series(fraction_expressed, index=genes)

            # Step 2: Sort genes by fraction of cells
            sorted_genes = fraction_expressed_series.sort_values(ascending=False).index.tolist()

        else:
            gene_exp = []
            for g in genes:
                mean_exp_gene = []
                for data in adatas:
                    mean_exp_gene.append(data[:, [g]].to_df().mean(axis=0)[0])
                mean_exp_gene = (mean_exp_gene - np.min(mean_exp_gene)) / (np.max(mean_exp_gene) - np.min(mean_exp_gene))
                gene_exp.append(mean_exp_gene[-1])#the -1 index represents cluster u1_wt

            # Step 1: Pair the elements together
            paired_lists = list(zip(genes, gene_exp))

            # Step 2: Sort based on the second list (list2), in descending order
            sorted_pairs = sorted(paired_lists, key=lambda x: x[1], reverse=True)

            # Step 3: Unzip the sorted pairs back into two lists
            sorted_list1, sorted_list2 = zip(*sorted_pairs)

            # Convert them back to lists (since zip returns tuples)
            sorted_genes = list(sorted_list1)

    # Create a PdfPages object
        # Create a PdfPages object
        if len(sorted_genes) <= 8:
            size = 6
        elif len(sorted_genes) > 8 and len(sorted_genes) <= 10:
            size = 8
        else:
            size = 10
         # 1) 5Ht means
        df5 = adata_5ht_basal[:, sorted_genes].to_df()
        df5['curr_sub_info'] = adata_5ht_basal.obs['curr_sub_info'].values
        m5 = df5.groupby('curr_sub_info').mean()

        # 2) 6Ho means
        df6 = adata_6ho_basal[:, sorted_genes].to_df()
        df6['curr_sub_info'] = adata_6ho_basal.obs['curr_sub_info'].values
        m6 = df6.groupby('curr_sub_info').mean()

        # 3) global min/max
        global_min = min(m5.values.min(), m6.values.min())
        global_max = max(m5.values.max(), m6.values.max())
        
        
        fig, axs = plt.subplots(1, 2, figsize=(10, size))
        sc.set_figure_params(scanpy=True, fontsize=20)
        # Comparison across all 5ht clusters - scaled- mean only expressed - FALSE
        sc.pl.dotplot(
            adata_5ht_basal, 
            var_names=sorted_genes,  # Use sorted genes
            groupby='curr_sub_info', 
            standard_scale=None,
            vmin= global_min,
            vmax =2.3157187,#global_max,#To match the scale of collagen DEG
            dot_max=0.8, 
            dot_min=0, 
            color_map='Reds', 
            size_title='Fraction of cells %',
            mean_only_expressed=False,
            swap_axes=True,
            ax=axs[0],
            show=False,
            title='5ht'
        )

        # Comparison across all 6ho clusters - scaled- mean only expressed - FALSE
        sc.pl.dotplot(
            adata_6ho_basal, 
            var_names=sorted_genes,  # Use sorted genes
            groupby='curr_sub_info', 
            standard_scale=None,
            vmin= global_min,
            vmax =2.3157187,#global_max,#To match the scale of collagen DEG
            dot_max=0.8, 
            dot_min=0, 
            color_map='Reds', 
            size_title='Fraction of cells %',
            mean_only_expressed=False,
            swap_axes=True,
            ax=axs[1],   
            show=False,
            title='6ho'
        )


        # Adjust layout
        plt.tight_layout()
        fig.savefig(
            os.path.join(save_dir, f"{sort}_StemCellDEG.png"),
            dpi=300,
            bbox_inches='tight'
        )
        # Save the figure to the PDF file
        pdf.savefig(fig)  # Save the current figure
        plt.close(fig)  # Close the figure to free up memory

In [None]:
# # U1_wt
#Fig 4C
pdf_filename = os.path.join(save_dir, f"StemCell_U1_wt_dp_sort_statsv2.pdf")
with PdfPages(pdf_filename) as pdf:
    curr_metadata = adata_basal.obs.copy()
    curr_sub_info = [clust.split('l_')[1] for clust in curr_metadata['sub_info']]
    curr_sub_info = ['U1_wt' if c == 'U1' else c for c in curr_sub_info]
    curr_metadata['curr_sub_info'] = curr_sub_info
    adata_basal.obs = curr_metadata

    # Filter for 5ht and 6ho
    adata_5ht_basal = adata_basal[adata_basal.obs['orig.ident'] == '5Ht']
    adata_6ho_basal = adata_basal[adata_basal.obs['orig.ident'] == '6Ho']

    adata_s1 = adata_5ht_basal[adata_5ht_basal.obs['curr_sub_info']=='S1']
    adata_s2 = adata_5ht_basal[adata_5ht_basal.obs['curr_sub_info']=='S2']
    adata_s3 = adata_5ht_basal[adata_5ht_basal.obs['curr_sub_info']=='S3']
    adata_u1 = adata_5ht_basal[adata_5ht_basal.obs['curr_sub_info']=='U1_wt']
    adatas = [adata_s1,adata_s2,adata_s3,adata_u1]

    regulon = 'Mitf(+)'
    tf = regulon.split('(')[0]
    genes = ['Zeb2', 'Ednrb', 'Nrp1', 'Rbpj',  'Vsir' ]
    # Ensure the directory exists
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    # Sort by order from stats file

    # Extract the specific cell (which is a string representation of a list)
    target_genes_string = u1_5ht_stats[u1_5ht_stats['Significant_Regulons'] == regulon]['Significant Target Genes (SameCell)'].iloc[0]

    # Convert the string to a list using ast.literal_eval
    target_genes_list = ast.literal_eval(target_genes_string)

    # Step 1: Create a mapping from the first list to its index position
    index_map = {value: index for index, value in enumerate(target_genes_list)}

    # Step 2: Sort the second list based on the index in the first list
    sorted_genes = sorted(genes, key=lambda x: index_map.get(x, float('inf')))


    sorted_genes = [tf] + sorted_genes   

    # Create a PdfPages object
    if len(sorted_genes) <= 8:
        size = 6
    elif len(sorted_genes) > 8 and len(sorted_genes) <= 10:
        size = 8
    else:
        size = 10
    df5 = adata_5ht_basal[:, sorted_genes].to_df()
    df5['curr_sub_info'] = adata_5ht_basal.obs['curr_sub_info'].values
    m5 = df5.groupby('curr_sub_info').mean()

    # 2) 6Ho means
    df6 = adata_6ho_basal[:, sorted_genes].to_df()
    df6['curr_sub_info'] = adata_6ho_basal.obs['curr_sub_info'].values
    m6 = df6.groupby('curr_sub_info').mean()

    # 3) global min/max
    global_min = min(m5.values.min(), m6.values.min())
    global_max = max(m5.values.max(), m6.values.max())


    fig, axs = plt.subplots(1, 2, figsize=(10, size))
    sc.set_figure_params(scanpy=True, fontsize=20)
    # Comparison across all 5ht clusters - scaled- mean only expressed - FALSE
    sc.pl.dotplot(
        adata_5ht_basal, 
        var_names=sorted_genes,  # Use sorted genes
        groupby='curr_sub_info', 
        standard_scale=None,
        vmin= global_min,
        vmax =1.663693,#global_max,#To match the scale of collagen reg
        dot_max=0.8, 
        dot_min=0, 
        color_map='Reds', 
        size_title='Fraction of cells %',
        mean_only_expressed=False,
        swap_axes=True,
        ax=axs[0],
        show=False,
        title='5ht'
    )

    # Comparison across all 6ho clusters - scaled- mean only expressed - FALSE
    sc.pl.dotplot(
        adata_6ho_basal, 
        var_names=sorted_genes,  # Use sorted genes
        groupby='curr_sub_info', 
        standard_scale=None,
        vmin= global_min,
        vmax =1.663693,#global_max,#To match the scale of collagen reg
        dot_max=0.8, 
        dot_min=0, 
        color_map='Reds', 
        size_title='Fraction of cells %',
        mean_only_expressed=False,
        swap_axes=True,
        ax=axs[1],   
        show=False,
        title='6ho'
    )


    # Adjust layout
    plt.tight_layout()
    fig.savefig(
        os.path.join(save_dir, f"{regulon}_StemCellDEG.png"),
        dpi=300,
        bbox_inches='tight'
    )
    # Save the figure to the PDF file
    pdf.savefig(fig)  # Save the current figure
    plt.close(fig)  # Close the figure to free up memory