In [None]:
import numpy as np
import pandas as pd
import os
import scanpy as sc
import sys
import muon as mu
import muon.atac as ac
import matplotlib.pyplot as plt
import seaborn as sns


figures = '/home/carsten/alvira_bioinformatics/postnatal_lung_multiome/data/pilot/230710_pan_senescence_presentation'
sc_file = '/home/carsten/alvira_bioinformatics/postnatal_lung_multiome/data/single_cell_files'
os.makedirs(figures, exist_ok=True)
sc.set_figure_params(dpi=300, format="png")
sc.settings.figdir = figures



In [None]:
mudata = mu.read(f'{sc_file}/share/P7_multiome_processed.h5mu')
rna = mudata.mod['rna']
atac = mudata.mod['atac']

In [None]:
mudata


In [None]:
rna.obs['celltype_abv'].cat.categories

### make umap for all lineages

In [None]:

fig, axs = plt.subplots(2, 2, figsize=(4,4))
axs = axs.ravel()
#     plt.subplots_adjust(wspace=0.8, hspace=0.8)
ax_num = 0
for color in ['lineage','treatment']: 
    for ind, adata in enumerate([rna,atac]):
            if ax_num ==0:
                legend_loc=None
                title = 'RNA'
            elif ax_num==1:
                legend_loc='right margin'
                title = 'ATAC'
            elif ax_num==2:
                legend_loc=None
                title = ''
            elif ax_num==3:
                legend_loc='right margin'
                title = ''

            ax = sc.pl.embedding(adata,
                                 basis=f'X_umap',
                                 color=color,
                                 legend_loc=legend_loc,
                                 size=10,
                                 legend_fontoutline=1,
                                 legend_fontsize=10,
                                 frameon=False,
                                 title=title,
                                 ax=axs[ax_num],
                                 show=False

                                 )
            ax.title.set_size(10)
            ax_num+=1

fig.savefig(os.path.join(figures, f'umap_all.png'), dpi=300, bbox_inches='tight')

In [None]:
ac.pl.dotplot(atac, ['Cdh5','Epcam','Ptprc','Col1a1'], groupby='lineage', save='lineage_markers_atac.png')
sc.pl.dotplot(rna, ['Cdh5','Epcam','Ptprc','Col1a1'], groupby='lineage', save='lineage_markers_rna.png')

### Make Umap for ATAC/RNA data for each lineage

In [None]:
for ind, lin in enumerate(
        rna.obs['lineage'].cat.categories):
    fig, axs = plt.subplots(2, 2, figsize=(4,4))
    axs = axs.ravel()
#     plt.subplots_adjust(wspace=0.8, hspace=0.8)
    ax_num = 0
    for color in ['celltype','treatment']: 
        for ind, adata in enumerate([rna,atac]):
                if ax_num ==0:
                    legend_loc=None
                    title = 'RNA'
                elif ax_num==1:
                    legend_loc='right margin'
                    title = 'ATAC'
                elif ax_num==2:
                    legend_loc=None
                    title = ''
                elif ax_num==3:
                    legend_loc='right margin'
                    title = ''

                ax = sc.pl.embedding(adata[adata.obs['lineage'] == lin],
                                     basis=f'X_umap_{lin}',
                                     color=color,
                                     legend_loc=legend_loc,
                                     size=10,
                                     legend_fontoutline=1,
                                     legend_fontsize=10,
                                     frameon=False,
                                     title=title,
                                     ax=axs[ax_num],
                                     show=False

                                     )
                ax.title.set_size(10)
                ax_num+=1

    fig.savefig(os.path.join(figures, f'umap_{lin}.png'), dpi=300, bbox_inches='tight')

In [None]:
ac.pl.embedding(atac[atac.obs['lineage'] == 'mesenchymal'],
                                     basis=f'X_umap_mesenchymal',
                                     color='Acta1',
                                     size=10,
                                     legend_fontoutline=1,
                use_raw=False,
                average='peak_type',
                                     legend_fontsize=10,
                                     )

In [None]:
atac[atac.obs['lineage'] == 'endothelial']

### Dotplots for Cdkn1a pan expression change

In [None]:
genes=['Cdkn1a']
cts  = sorted(rna.obs['celltype_abv'].unique())

output_dict = {}
for treatment in ['Normoxia', 'Hyperoxia']:
    treat_adata = rna[rna.obs['treatment'] == treatment, genes]
    obs = treat_adata[:,genes].X.toarray()
    obs = pd.DataFrame(obs,columns=genes,index=treat_adata.obs['celltype_abv'])
    average_obs = obs.groupby(level=0).mean()
    obs_bool = obs.astype(bool)
    fraction_obs = obs_bool.groupby(level=0).sum()/obs_bool.groupby(level=0).count()
    output_dict[treatment] = {'average':average_obs,
                              'fraction':fraction_obs}
    for measure in ['average', 'fraction']:
        output_dict[treatment][measure]['ct'] = output_dict[treatment][measure].index
        output_dict[treatment][measure]['treatment'] = treatment
        output_dict[treatment][measure].index = [f'{x}_{treatment}' for x in output_dict[treatment][measure].index]

    

final_average = pd.concat([output_dict['Hyperoxia']['average'],
                           output_dict['Normoxia']['average']]).rename(columns = {'Cdkn1a': 'Cdkn1a average'})
final_fraction = pd.concat([output_dict['Hyperoxia']['fraction'],
                           output_dict['Normoxia']['fraction']]).rename(columns = {'Cdkn1a': 'Cdkn1a fraction'})
final_average['Cdkn1a fraction'] = final_fraction['Cdkn1a fraction']
final_average['ct'] = pd.Categorical(final_average['ct'], sorted(final_average['ct'].unique(), reverse = True))
final_average['treatment'] = pd.Categorical(final_average['treatment'], sorted(final_average['treatment'].unique(), reverse = True))

sns.relplot(
    data=final_average,
    x="ct", 
    y="treatment",
    hue="Cdkn1a average",
    size="Cdkn1a fraction",
    palette="Reds",
    row_order = rna.obs['celltype_abv'].cat.categories.tolist(),
    edgecolor=".7", 
    height=1.5, 
    aspect = 4, 
    sizes=(10, 200), 
    size_norm=(0, 1), legend = False
)
plt.grid(b=None)
plt.xticks(rotation = 90)
plt.savefig(f'{figures}/dotplot_Cdkn1a.png', dpi = 300, bbox_inches = 'tight')



In [None]:
sc.get.obs_df(rna, ['Cdkn1a','treatment','celltype'])

In [None]:
treatments = rna.obs['treatment'].cat.categories
cts = rna.obs['celltype_abv'].cat.categories

df = sc.get.obs_df(rna, ['Cdkn1a','treatment','celltype_abv'])
plot_df = pd.DataFrame(index=cts,
                      columns = treatments,
                      data=None)
for treatment in treatments:
    for ct in cts:
        plot_df.at[ct,treatment] = df.loc[(df['treatment']==treatment)
                                         &(df['celltype_abv']==ct)]['Cdkn1a'].mean()
plot_df.fillna(0,inplace=True)

fig, ax = plt.subplots(1, 1, figsize=(8,4))
cbar_ax = fig.add_axes([.32,.7,.25,.01])
sns.heatmap(
plot_df.T,
    cmap='Reds',
    square=True,
    xticklabels=True,
    yticklabels=True,
    linewidth=.5,
    cbar_ax = cbar_ax,
    cbar_kws={'orientation':'horizontal',
             'label':'Log10(CPM)'},
    ax=ax
)
for item in ([ax.title, ax.xaxis.label, ax.yaxis.label,] +
             ax.get_xticklabels() + ax.get_yticklabels()):
    item.set_fontsize(10)
cbar_ax.tick_params(labelsize=10)
cbar_ax.xaxis.label.set_size(10)
fig.suptitle('Cdkn1a expression',fontsize=12,y = 0.8, x=0.45)
fig.savefig(f'{figures}/rna_cdkn1a_heatmap.png')

In [None]:
atac.var.loc[atac.var['annotated_gene']=='Cdkn1a']

In [None]:
atac.X = atac.layers['counts'].copy()
sc.pp.normalize_total(atac, target_sum=1e6)
sc.pp.log1p(atac, base=10)
sc.pp.scale(atac)
treatments = atac.obs['treatment'].cat.categories
cts = atac.obs['celltype_abv'].cat.categories

df = sc.get.obs_df(atac, ['chr17:29095049-29095880','treatment','celltype_abv'])
plot_df = pd.DataFrame(index=cts,
                      columns = treatments,
                      data=None)
for treatment in treatments:
    for ct in cts:
        plot_df.at[ct,treatment] = df.loc[(df['treatment']==treatment)
                                         &(df['celltype_abv']==ct)]['chr17:29095049-29095880'].mean()
plot_df.fillna(0,inplace=True)

fig, ax = plt.subplots(1, 1, figsize=(8,4))
cbar_ax = fig.add_axes([.32,.7,.25,.01])
sns.heatmap(
plot_df.T,
    cmap='RdBu_r',
    square=True,
    center=0,
    xticklabels=True,
    yticklabels=True,
    linewidth=.5,
    cbar_ax = cbar_ax,
    cbar_kws={'orientation':'horizontal'},
    ax=ax
)
for item in ([ax.title, ax.xaxis.label, ax.yaxis.label,] +
             ax.get_xticklabels() + ax.get_yticklabels()):
    item.set_fontsize(10)
cbar_ax.tick_params(labelsize=10)
cbar_ax.xaxis.label.set_size(10)
fig.suptitle('Cdkn1a_distal_1269',fontsize=12,y = 0.8, x=0.45)
fig.savefig(f'{figures}/atac_cdkn1a_distal_1269_heatmap.png')

In [None]:
atac
sc.pl.dotplot(atac, ['chr17:29095049-29095880'], groupby=['celltype','treatment'])

In [None]:
rna.obs['celltype'].cat.categories.tolist()

In [None]:
matrix_dir = "/home/carsten/alvira_bioinformatics/postnatal_lung_multiome/data/cellranger_output/230609_aggregate/outs/analysis/tf_analysis/filtered_tf_bc_matrix"
adata_tf = sc.read_mtx(os.path.join(matrix_dir, "matrix.mtx"))

motifs_path = os.path.join(matrix_dir, "motifs.tsv")
var = pd.read_csv(motifs_path, sep='\t', index_col=0, header=None)
barcodes_path = os.path.join(matrix_dir, "barcodes.tsv")
obs = pd.read_csv(barcodes_path, sep='\t', index_col=0, header=None)
adata_tf = adata_tf.T
adata_tf.layers['raw'] = adata_tf.X.copy()
adata_tf.obs_names = obs.index.values
adata_tf.var_names = var.index.values
adata_tf = adata_tf[atac.obs_names,:]
sc.pp.normalize_total(adata_tf, target_sum=1e6)
sc.pp.log1p(adata_tf,base=10)
sc.pp.scale(adata_tf)

In [None]:
adata_tf.obs[['lineage','celltype','celltype_abv','treatment']] = atac.obs[['lineage','celltype','celltype_abv','treatment']]


In [None]:
figures_tf = f'{figures}/tf-heatmaps'
os.makedirs(figures_tf, exist_ok=True)
for tf in adata_tf.var_names:
    treatments = adata_tf.obs['treatment'].cat.categories
    cts = adata_tf.obs['celltype_abv'].cat.categories

    df = sc.get.obs_df(adata_tf, [f'{tf}','treatment','celltype_abv'])
    plot_df = pd.DataFrame(index=cts,
                          columns = treatments,
                          data=None)
    for treatment in treatments:
        for ct in cts:
            plot_df.at[ct,treatment] = df.loc[(df['treatment']==treatment)
                                             &(df['celltype_abv']==ct)][f'{tf}'].mean()
    plot_df.fillna(0,inplace=True)

    fig, ax = plt.subplots(1, 1, figsize=(8,4))
    cbar_ax = fig.add_axes([.32,.7,.25,.01])
    sns.heatmap(
    plot_df.T,
        cmap='RdBu_r',
        center=0,
        vmin=-1,
        vmax=1,
        square=True,
        xticklabels=True,
        yticklabels=True,
        linewidth=.5,
        cbar_ax = cbar_ax,
        cbar_kws={'orientation':'horizontal',
                 'label':'Z-score'},
        ax=ax
    )
    for item in ([ax.title, ax.xaxis.label, ax.yaxis.label,] +
                 ax.get_xticklabels() + ax.get_yticklabels()):
        item.set_fontsize(10)
    cbar_ax.tick_params(labelsize=10)
    cbar_ax.xaxis.label.set_size(10)
    fig.suptitle(f'{tf} accessibility',fontsize=12,y = 0.8, x=0.45)
    fig.savefig(f'{figures_tf}/heatmap_{tf}.png')
    plt.close()

In [None]:
for tf in ['SPIB_MA0081.2']:
    treatments = adata_tf.obs['treatment'].cat.categories
    cts = adata_tf.obs['celltype_abv'].cat.categories

    df = sc.get.obs_df(adata_tf, [f'{tf}','treatment','celltype_abv'])
    plot_df = pd.DataFrame(index=cts,
                          columns = treatments,
                          data=None)
    for treatment in treatments:
        for ct in cts:
            plot_df.at[ct,treatment] = df.loc[(df['treatment']==treatment)
                                             &(df['celltype_abv']==ct)][f'{tf}'].mean()
    plot_df.fillna(0,inplace=True)

    fig, ax = plt.subplots(1, 1, figsize=(8,4))
    cbar_ax = fig.add_axes([.32,.7,.25,.01])
    sns.heatmap(
    plot_df.T,
        cmap='RdBu_r',
        center=0,
        vmin=-1,
        vmax=1,
        square=True,
        xticklabels=True,
        yticklabels=True,
        linewidth=.5,
        cbar_ax = cbar_ax,
        cbar_kws={'orientation':'horizontal',
                 'label':'Z-score'},
        ax=ax
    )
    for item in ([ax.title, ax.xaxis.label, ax.yaxis.label,] +
                 ax.get_xticklabels() + ax.get_yticklabels()):
        item.set_fontsize(10)
    cbar_ax.tick_params(labelsize=10)
    cbar_ax.xaxis.label.set_size(10)
    fig.suptitle(f'{tf} accessibility',fontsize=12,y = 0.8, x=0.45)
    fig.savefig(f'{figures_tf}/heatmap_{tf}.png')


In [None]:
gene_dict = {}
hyper_deg_cur_fn = '/home/carsten/alvira_bioinformatics/postnatal_lung_multiome/data/figures/atac/datf/hyperoxia_datf_all.xlsx'
dfs = pd.read_excel(hyper_deg_cur_fn, sheet_name=None, index_col=0, header=0)
for direction in ['up', 'down']:
    gene_dict[direction] = {}
    gene_dict[direction]['celltypes'] ={}
    for ct in dfs.keys():
        df = dfs[ct]
        df = df.loc[df['pvals_adj']<0.05]
        if direction == 'up':
            df = df.loc[df['logfoldchanges']>0]
        else:
            df = df.loc[df['logfoldchanges'] < 0]
        gene_ls = df.index.tolist()
        for gene in gene_ls:
            if gene in gene_dict[direction]['celltypes'].keys():
                gene_dict[direction]['celltypes'][gene].append(ct)
            else:
                gene_dict[direction]['celltypes'][gene] = [ct]

    gene_dict[direction]['number_cts'] = {}
    for gene in gene_dict[direction]['celltypes'].keys():
        gene_dict[direction]['number_cts'][gene] = len(gene_dict[direction]['celltypes'][gene])
with pd.ExcelWriter(
            f"{figures}/shared_tf_change_hyperoxia.xlsx", engine="xlsxwriter"
    ) as writer:
    pd.DataFrame(gene_dict['up']).sort_values('number_cts',ascending=False).to_excel(writer, sheet_name=f"up")
    pd.DataFrame(gene_dict['down']).sort_values('number_cts',ascending=False).to_excel(writer, sheet_name=f"down")

In [None]:
df = atac.uns['atac']['peak_annotation'].copy()
df['gene'] = df.index.copy()
df2 = df.groupby('peak')['gene'].apply(lambda x: ','.join(x.astype(str))).reset_index()
peak_gene_dt = pd.Series(df2['gene'].values, index=df2.peak)

In [None]:
df.loc[df['gene']=='Peg3']

In [None]:
gene_dict = {}
for direction in ['up', 'down']:
    gene_dict[direction] = {}
    gene_dict[direction]['celltypes'] ={}
    for lineage in rna.obs['lineage'].cat.categories:
        hyper_deg_cur_fn = f'/home/carsten/alvira_bioinformatics/postnatal_lung_multiome/data/figures/atac/dap/{lineage}/hyperoxia_dap.xlsx'
        dfs = pd.read_excel(hyper_deg_cur_fn, sheet_name=None, index_col=0, header=0)
        for ct in dfs.keys():
            df = dfs[ct]
            df = df.loc[abs(df['scores'])<0.5]
            if direction == 'up':
                df = df.loc[df['logfoldchanges']>0]
            else:
                df = df.loc[df['logfoldchanges'] < 0]
            gene_ls = df.index.tolist()
            for gene in gene_ls:
                if gene in gene_dict[direction]['celltypes'].keys():
                    gene_dict[direction]['celltypes'][gene].append(ct)
                else:
                    gene_dict[direction]['celltypes'][gene] = [ct]

        gene_dict[direction]['number_cts'] = {}
        for gene in gene_dict[direction]['celltypes'].keys():
            gene_dict[direction]['number_cts'][gene] = len(gene_dict[direction]['celltypes'][gene])
with pd.ExcelWriter(
            f"{figures}/shared_peak_change_hyperoxia.xlsx", engine="xlsxwriter"
    ) as writer:
    up = pd.DataFrame(gene_dict['up']).sort_values('number_cts',ascending=False)
    up[['gene', 'peak_type', 'distance', 'tfs']] = adata.var.loc[up.index][['annotated_gene', 'peak_type', 'distance', 'tfs']]
    up.to_excel(writer, sheet_name=f"up")
    down = pd.DataFrame(gene_dict['down']).sort_values('number_cts',ascending=False)
    down[['gene', 'peak_type', 'distance', 'tfs']] = atac.var.loc[down.index][['annotated_gene', 'peak_type', 'distance', 'tfs']]
    down.to_excel(writer, sheet_name=f"down")