In [20]:
import pandas as pd
import numpy as np
import scanpy as sc
import anndata as ad
import os
import ast
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages

In [None]:
# # Load the metadata and counts data
#Base directory
analysis_dir = "MMG_Analysis"
#"/Users/weiwu2/Library/CloudStorage/Box-Box/Research/SingleCell_network/Manuscript/Submission_2/plot_for_cell_markers"


# # Define the directory where you want to save the PDF
save_dir = f"{analysis_dir}/Results/Plots"


In [22]:
### read original data ###
counts = pd.read_csv(f'{analysis_dir}/OriginalData/seurat.integrated.5Ht_6Ho.counts.csv',index_col=0)
metadata = pd.read_csv(f'{analysis_dir}/OriginalData/seurat.integrated.5Ht_6Ho.metadata.csv',index_col=0)
metadata_reclassified = pd.read_excel(f'{analysis_dir}/Processed_Data/meta_data_unified_v6_AG.xlsx')
#Transpose the counts since Anndata expects rows as cells and columns as genes
counts = counts.T

#Create anndata object
adata = ad.AnnData(X=counts,obs=metadata)

In [None]:
min_cells_Ip=5

##Keep cells that express at least 300 genes
sc.pp.filter_cells(adata, min_genes=300 )
##Keep cells that express less than 5000 genes
sc.pp.filter_cells(adata, max_genes=5000 )
##Keep genes that are expressed in at least 5 cells
sc.pp.filter_genes(adata, min_cells=min_cells_Ip)

##Keep cells that have mitochondrial percentage<=10
#adata = adata[adata.obs['percent.mt'] <= 10]

##Normalize data
sc.pp.normalize_total(adata, target_sum=1e4)

## log transform the data - default is natural log, unless specified otherwise
sc.pp.log1p(adata)

In [24]:
#Get cells and genes used in network analysis
basal_data = sc.read_h5ad(f'{analysis_dir}/Processed_Data/NetworkData_HVGs_basal_5ht6ho_without_PTPRC_Adgre1.h5ad')
#target_genes = ht5_basal.var_names
basal_cells = basal_data.obs_names

In [None]:
metadata_with_clust = pd.read_csv(f'{analysis_dir}/Processed_Data/meta_data_with_clust.csv')
metadata_with_clust = metadata_with_clust[metadata_with_clust['cell_id'].isin(adata.obs_names)]
metadata_with_clust = metadata_with_clust.merge(metadata_reclassified,left_on='cell_id',right_on='cell')
metadata_with_clust.index = metadata_with_clust['cell']
# #metadata_reclassified[metadata_reclassified['cell'].isin(metadata_with_clust.index)]
# #metadata_with_clust

group = []
for i,cell_label in enumerate(metadata_with_clust['Reclassified_cluster']):
    if cell_label == 'Epithelial-Luminal':
        group.append('Epithelial-Luminal')
    elif cell_label == 'Macrophages':
        group.append('Macrophages')
    elif metadata_with_clust['cell_id'][i] in basal_cells:
        group.append('Epithelial-Basal')
    else:
        group.append('Other')
metadata_with_clust['Group_dp'] = group
adata.obs = metadata_with_clust
adata_3_CellTypes = adata[adata.obs['Group_dp']!='Other']
adata_3_CellTypes
#metadata_with_clust

  elif metadata_with_clust['cell_id'][i] in basal_cells:


View of AnnData object with n_obs × n_vars = 4376 × 18075
    obs: 'cell_id', 'orig.ident_x', 'nCount_RNA_x', 'nFeature_RNA_x', 'sample_x', 'percent.mt_x', 'percent.rb_x', 'percent.hb_x', 'umi_per_gene_x', 'RNA_snn_res.0.8_x', 'seurat_clusters_x', 'mt_filter_x', 'feature_filter_x', 'doublet_filter_x', 'qualtiy_filters_x', 'RNA_snn_res.0.5_x', 'nCount_SCT_x', 'nFeature_SCT_x', 'integrated_snn_res.0.5_x', 'integrated_snn_res.1_x', 'integrated_snn_res.1.5_x', 'integrated_snn_res.2_x', 'S.Score_x', 'G2M.Score_x', 'cell_cycle_seurat_x', 'cluster1_x', 'sub_info', 'cell_type', 'cell', 'orig.ident_y', 'nCount_RNA_y', 'nFeature_RNA_y', 'sample_y', 'percent.mt_y', 'percent.rb_y', 'percent.hb_y', 'umi_per_gene_y', 'RNA_snn_res.0.8_y', 'seurat_clusters_y', 'mt_filter_y', 'feature_filter_y', 'doublet_filter_y', 'qualtiy_filters_y', 'RNA_snn_res.0.5_y', 'nCount_SCT_y', 'nFeature_SCT_y', 'integrated_snn_res.0.5_y', 'integrated_snn_res.1_y', 'integrated_snn_res.1.5_y', 'integrated_snn_res.2_y', 'S.Sco

In [26]:
adata_3_CellTypes.obs['Group_dp'].value_counts()

Group_dp
Macrophages           1963
Epithelial-Basal      1488
Epithelial-Luminal     925
Name: count, dtype: int64

In [27]:
# Filter for basal 
adata_basal = adata[adata.obs.index.isin(basal_cells)]

In [None]:
genes_to_plot = ['Epcam', 'Krt18', 'Krt19', 'Krt14', 'Krt5', 'Krt8', 'Ptprc','Cd14', 'Adgre1'] 

# Define the output PDF file path
pdf_filename = os.path.join(save_dir, "Plot_markers_basal_and_macrophage_dotplots_no_PTPRC_Adgre1.pdf")

if not os.path.exists(save_dir):
    os.makedirs(save_dir)

with PdfPages(pdf_filename) as pdf:
    # Filter metadata for basal cells
    basal_metadata = adata_basal.obs.copy()
    basal_metadata['curr_sub_info'] = [clust.split('l_')[1] for clust in basal_metadata['sub_info']]
    curr_sub_info = ['U1_wt' if c == 'U1' else c for c in basal_metadata['curr_sub_info']]
    basal_metadata['curr_sub_info'] = curr_sub_info
    adata_basal.obs = basal_metadata

    # Split into 5Ht and 6Ho basal cells
    adata_5ht_basal = adata_basal[adata_basal.obs['orig.ident_x'] == '5Ht']
    adata_6ho_basal = adata_basal[adata_basal.obs['orig.ident_x'] == '6Ho']

    df5 = adata_5ht_basal[:, genes_to_plot].to_df()
    df5['curr_sub_info'] = adata_5ht_basal.obs['curr_sub_info'].values
    m5 = df5.groupby('curr_sub_info').mean()

    # 2) 6Ho means
    df6 = adata_6ho_basal[:, genes_to_plot].to_df()
    df6['curr_sub_info'] = adata_6ho_basal.obs['curr_sub_info'].values
    m6 = df6.groupby('curr_sub_info').mean()

    # 3) global min/max
    
    global_min = min(m5.values.min(), m6.values.min())
    global_max = max(m5.values.max(), m6.values.max())
    print(global_max)
    #S5 Fig
    # Basal cells (5Ht)
    fig, axs = plt.subplots(1,2,figsize=(10, 7))    
    
    sc.set_figure_params(scanpy=True, fontsize=20)
    sc.pl.dotplot(
        adata_5ht_basal,
        var_names=genes_to_plot,
        groupby='curr_sub_info',
        standard_scale=None,
        dot_max=0.8,
        dot_min=0,
        vmin=global_min,
        vmax=global_max,
        color_map='Reds',
        size_title='Fraction of cells %',
        mean_only_expressed=False,
        swap_axes=True,
        ax=axs[0],
        show=False,
        title='5Ht Basal Cells'
    )

    sc.pl.dotplot(
        adata_6ho_basal,
        var_names=genes_to_plot,
        groupby='curr_sub_info',
        standard_scale=None,
        dot_max=0.8,
        dot_min=0,
        vmin=global_min,
        vmax=global_max,
        color_map='Reds',
        size_title='Fraction of cells %',
        mean_only_expressed=False,
        swap_axes=True,
        ax=axs[1],
        show=False,
        title='6Ho Basal Cells'
    )
    plt.tight_layout()
    #plt.tight_layout()
    fig.savefig(
        os.path.join(save_dir, f"Basal_markers.png"),
        dpi=300,
        bbox_inches='tight'
    )
        # Save the fig
    pdf.savefig(fig)
    plt.close(fig)
    
    #S3 Fig
    # Split into 5Ht and 6Ho basal cells
    adata_5ht_3celltypes = adata_3_CellTypes[adata_3_CellTypes.obs['orig.ident_x'] == '5Ht']
    adata_6ho_3celltypes = adata_3_CellTypes[adata_3_CellTypes.obs['orig.ident_x'] == '6Ho']

    #adata_macrophage.obs['group'] = 'Macrophages'
    # Macrophage cells (no subclusters)
    fig, axs = plt.subplots(1,2,figsize=(10, 7))
    sc.set_figure_params(scanpy=True, fontsize=20)
    sc.pl.dotplot(
        adata_5ht_3celltypes,
        var_names=genes_to_plot,
        groupby='Group_dp',  # No subclusters for macrophages
        standard_scale=None,
        dot_max=0.8,
        dot_min=0,
        color_map='Reds',
        size_title='Fraction of cells %',
        mean_only_expressed=False,
        swap_axes=True,
        ax=axs[0],
        show=False,
        title='5Ht'
    )
    
    sc.pl.dotplot(
        adata_6ho_3celltypes,
        var_names=genes_to_plot,
        groupby='Group_dp',  # No subclusters for macrophages
        standard_scale=None,
        dot_max=0.8,
        dot_min=0,
        color_map='Reds',
        size_title='Fraction of cells %',
        mean_only_expressed=False,
        swap_axes=True,
        ax=axs[1],
        show=False,
        title='6Ho'
    )
    plt.tight_layout()
    fig.savefig(
        os.path.join(save_dir, f"Basal_Luminal_Macrophage_markers.png"),
        dpi=300,
        bbox_inches='tight'
    )
    pdf.savefig(fig)
    plt.close(fig)

print(f"Dot plots saved to {pdf_filename}")

  df[key] = c
  df[key] = c
  df[key] = c
  df[key] = c
  df[key] = c
  df[key] = c
  df[key] = c


2.6639822


  df[key] = c
  df[key] = c
  df[key] = c
  df[key] = c
  df[key] = c
  df[key] = c
  df[key] = c
  df[key] = c
  df[key] = c
  df[key] = c
  df[key] = c
  df[key] = c
  df[key] = c
  df[key] = c
  df[key] = c
  df[key] = c
  obs_bool.groupby(level=0).sum() / obs_bool.groupby(level=0).count()
  dot_color_df = self.obs_tidy.groupby(level=0).mean()
  dot_ax.scatter(x, y, **kwds)
  df[key] = c
  df[key] = c
  df[key] = c
  df[key] = c
  df[key] = c
  df[key] = c
  df[key] = c
  df[key] = c
  df[key] = c
  df[key] = c
  df[key] = c
  df[key] = c
  df[key] = c
  df[key] = c
  df[key] = c
  df[key] = c
  df[key] = c
  df[key] = c
  df[key] = c
  df[key] = c
  df[key] = c
  df[key] = c
  df[key] = c
  obs_bool.groupby(level=0).sum() / obs_bool.groupby(level=0).count()
  dot_color_df = self.obs_tidy.groupby(level=0).mean()
  dot_ax.scatter(x, y, **kwds)
  df[key] = c
  df[key] = c
  df[key] = c
  df[key] = c
  df[key] = c
  df[key] = c
  df[key] = c
  df[key] = c
  df[key] = c
  df[key] = c
  

Dot plots saved to /Users/weiwu2/Library/CloudStorage/Box-Box/Akshat/MMG_Analysis/Results/Plots/Plot_markers_basal_and_macrophage_dotplots_no_PTPRC_Adgre1.pdf
