# 5. scATAC profiles of fetal tissues from the fetal cell atlas dataset
Domcke S, Hill AJ, Daza RM, Cao J, O’Day DR, Pliner HA, et al. A human cell atlas of fetal chromatin accessibility. Science. 2020;370:eaba7612

In [1]:
import os
import sys
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

# Scanpro functions
from scanpro import scanpro
from scanpro.utils import convert_counts_to_df
from scanpro.get_transformed_props import get_transformed_props

In [2]:
#Setup path to R for propeller
R_home = os.path.dirname(sys.executable)[:-4] + "/lib/R"
os.environ["R_HOME"] = R_home
%load_ext rpy2.ipython

In [3]:
# path where figures are going to be saved
FIG_PATH = 'plots'

------------------

## Read data

In [4]:
fetal_scatac_counts = pd.read_csv("data/scATAC_counts.tsv", sep='\t')

In [5]:
fetal_scatac = convert_counts_to_df(fetal_scatac_counts, column_name="cell_type")

In [6]:
# subset data for heart, lung and muscle
fetal_scatac_sub = fetal_scatac[(fetal_scatac['tissue']=='heart') | (fetal_scatac['tissue']=='lung') | (fetal_scatac['tissue']=='muscle')]

In [7]:
# delete unknown celltypes
celltypes = fetal_scatac[(fetal_scatac['tissue']=='heart') | (fetal_scatac['tissue']=='lung') | (fetal_scatac['tissue']=='muscle')]['cell_type'].unique()
celltypes = np.delete(celltypes, np.where([x.startswith('Unknown') for x in celltypes]))
celltypes = np.delete(celltypes, np.where([x.endswith('?') for x in celltypes]))

----------

## Run Scanpro

### Logit

In [9]:
out_logit = scanpro.scanpro(fetal_scatac_sub, clusters_col='cell_type',
                            conds_col='tissue', samples_col='sample_name',
                            transform='logit')

# include only known celltypes
out_logit.results = out_logit.results.loc[celltypes, :]
# sort by p value
out_logit.results = out_logit.results.sort_index()

out_logit.results

In [10]:
out_logit.plot(n_columns=4, clusters=['Cardiomyocytes', 'Bronchiolar and alveolar epithelial cells',
                                      'Skeletal muscle cells', 'Vascular endothelial cells'],
               save=f"{FIG_PATH}/scatac_logit.pdf")

In [11]:
out_logit.plot(kind='boxplot', n_columns=4,
               clusters=['Cardiomyocytes', 'Bronchiolar and alveolar epithelial cells',
                         'Skeletal muscle cells', 'Vascular endothelial cells'],
               save=f"{FIG_PATH}/scatac_logit_boxplot.pdf")

In [12]:
out_logit.results.to_csv("results/scatac_scanpro_rep_logit.tsv", sep="\t")

### arcsin

In [13]:
out_arcsin = scanpro.scanpro(fetal_scatac_sub, clusters_col='cell_type',
                            conds_col='tissue', samples_col='sample_name',
                            transform='arcsin')

# include only known celltypes
out_arcsin.results = out_arcsin.results.loc[celltypes, :]
# sort by p value
out_arcsin.results = out_arcsin.results.sort_index()

out_arcsin.results

In [14]:
out_arcsin.results.to_csv("results/scatac_scanpro_rep_arcsin.tsv", sep="\t")

----------

## Without replicates

### Logit

In [15]:
out_boot_logit = scanpro.scanpro(fetal_scatac_sub, clusters_col='cell_type',
                                 conds_col='tissue', transform='logit')

# include only known celltypes
out_boot_logit.results = out_boot_logit.results.loc[celltypes, :]
# sort by p value
out_boot_logit.results = out_boot_logit.results.sort_index()

out_boot_logit.results

In [16]:
out_boot_logit.results.to_csv("results/scatac_scanpro_norep_logit.tsv", sep="\t")

### Arcsin

In [17]:
out_boot_arcsin = scanpro.scanpro(fetal_scatac_sub, clusters_col='cell_type',
                                 conds_col='tissue', transform='arcsin')

# include only known celltypes
out_boot_arcsin.results = out_boot_arcsin.results.loc[celltypes, :]
# sort by p value
out_boot_arcsin.results = out_boot_arcsin.results.sort_index()

out_boot_arcsin.results

In [18]:
out_boot_arcsin.results.to_csv("results/scatac_scanpro_norep_arcsin.tsv", sep="\t")

-----------------