## Setup

In [None]:
# General
import scipy as sci
import numpy as np
import pandas as pd
import logging
import time
import pickle
from itertools import chain
import h5py
import scipy.sparse as sparse
import anndata as ad
import scipy.stats as stats
import gc
import os

# Plotting
import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib import rcParams
from matplotlib import cm
from matplotlib import colors
from matplotlib.pyplot import rc_context
import seaborn as sb
#from plotnine import *
from adjustText import adjust_text
#import pegasus as pg

# Analysis
import scanpy as sc
import muon as mu
from muon import atac as ac
#import snapatac2 as snap
import pysam
# Preporcessing
import scrublet as scr

#R
import rpy2
import rpy2.robjects as ro
import rpy2.rinterface_lib.callbacks
from rpy2.robjects import pandas2ri
import anndata2ri


# Warnings
import warnings
warnings.filterwarnings('ignore') #(action='once') 
import session_info
session_info.show()

#sc.logging.print_versions()

In [None]:
# Colormap
colors2 = plt.cm.Reds(np.linspace(0, 1, 128)) 
colors3 = plt.cm.Greys_r(np.linspace(0.7,0.8,20)) 
colorsComb = np.vstack([colors3, colors2]) 
mymap = colors.LinearSegmentedColormap.from_list('my_colormap', colorsComb)

In [None]:
%run utils.ipynb

In [None]:
setup_R('/home/scanalysis/mnt/envs/scUV/lib/R')
#setup_R('/home/scanalysis/mnt/miniforge3/envs/LN/lib/R')
%reload_ext rpy2.ipython

In [None]:
%%R
.libPaths()

In [None]:
%%R

# Parallelization
library(BiocParallel)
register(MulticoreParam(64, progressbar = TRUE))

library(future)
plan(multicore, workers = 64)
#options(future.globals.maxSize = 100 * 1024 ^ 3) # for 50 Gb RAM
plan()

library(doParallel)
registerDoParallel(64)

sessionInfo()

## read data

In [None]:
base_path = '/mnt/hdd/data/Multiome/'
outs_path = '/outs'

In [None]:
sc.settings.figdir = base_path + 'Figures'
sc.settings.cachedir = base_path + 'Cache'

## Scanpy settings
sc.settings.verbosity = 3

In [None]:
# Get a list of folder names, sorted alphabetically
folder_names = sorted([f for f in os.listdir(base_path) if os.path.isdir(os.path.join(base_path, f))])
folder_variables = {}

for index, folder_name in enumerate(folder_names, start=46):
    variable_name = f"sample{index}"
    folder_variables[folder_name] = variable_name
folder_variables

In [None]:
for variable_name, folder_name in folder_variables.items():
    globals()[variable_name] = folder_name

#### samples definition

In [None]:
samples = ['597_NVF_Crypts_Rep1', '598_FVF_Crypts_Rep1','599_FVF_Crypts_Rep2','604_NVF_Crypts_Rep2','FVF-low', 'FVF-high']
#samples = ['FVF-low', 'FVF-high']


In [None]:
%%R
library(scran)
library(RColorBrewer)
library(DropletUtils)

## Preprocessing ATAC

### load samples, keep only intersect and run signac QC

In [None]:
# retain only cell passing GEX QC
for folder_name, sample_name in folder_variables.items():
    if folder_name not in samples:
        print(f'skipping {folder_name}...')
        continue
    mdata= read_h5mu_to_mudata(f'{base_path}{folder_name}{outs_path}/GEX_1_done.h5mu')
    mu.pp.intersect_obs(mdata)
    # ATAC
    atac = mdata.mod['atac']
    gex = mdata.mod['rna']
    atac.var_names_make_unique()

    # add umap
    atac.obsm['X_umap'] = gex.obsm['X_umap']
    atac.obs['leiden'] = gex.obs['leiden']
    atac = signac_qc_metrics(atac, aggregated=False,species='Mmusculus', genome="mm10", sample =folder_name, ensembl_release="v102", cr_path=base_path)
    mu.write(f'{base_path}{folder_name}{outs_path}/ATAC_signac.h5mu',mdata)
    # Clear memory
    del mdata
    gc.collect()
    

### counts

In [None]:
for folder_name, sample_name in folder_variables.items():
    if folder_name not in samples:
        print(f'skipping {folder_name}...')
        continue
    mdata= read_h5mu_to_mudata(f'{base_path}{folder_name}{outs_path}/ATAC_signac.h5mu')
    atac = mdata.mod['atac']
    try:
        sb.jointplot(x=atac.obs['log_nCount_atac'], y=atac.obs['log_nFeature_atac'], n_levels=30, thresh=0.05, kind="kde", space=0, fill=True, cmap="rocket_r", color="#f69c73").plot_joint(
        sb.scatterplot, alpha=0)
    except ValueError:
        sb.jointplot(x=atac.obs['log_nCount_atac'], y=atac.obs['log_nFeature_atac'], n_levels=30, thresh=0.05, kind="kde", space=0, fill=True, cmap="rocket_r", color="#f69c73", clip= ((6,12),(5,11))).plot_joint(
        sb.scatterplot, alpha=0)    
    plt.suptitle(folder_name)
    plt.show()
    plt.close()
    #Thresholding decision: counts
    with rc_context({'figure.figsize': (8, 3)}):
        sb.histplot(atac.obs['nCount_atac'], kde=True)
        plt.title(folder_name)
        plt.show()
        plt.close
    # Clear memory
    del mdata
    gc.collect()

##### set min count

In [None]:
count_dict = {'597_NVF_Crypts_Rep1': [3500,120000],
 '598_FVF_Crypts_Rep1': [6500,120000],
 '599_FVF_Crypts_Rep2': [4500,120000],
 '604_NVF_Crypts_Rep2': [6000,130000],
 'FVF-high': [5000,140000],
 'FVF-low': [7500,120000]}

In [None]:
x_lim = [0,20000]

In [None]:
for folder_name, sample_name in folder_variables.items():
    if folder_name not in samples:
        print(f'skipping {folder_name}...')
        continue
    mdata= read_h5mu_to_mudata(f'{base_path}{folder_name}{outs_path}/ATAC_signac.h5mu')
    atac = mdata.mod['atac'] 
    with rc_context({'figure.figsize': (8, 3)}):
        sb.histplot(atac.obs['nCount_atac'][(atac.obs['nCount_atac']<x_lim[1])], kde=True, bins=60)
        plt.axvline(count_dict[folder_name][0], 0, 1)
        plt.title(folder_name)
        plt.show()
        plt.close()
    # Clear memory
    del mdata
    gc.collect()

In [None]:
x_lim = [40000,200000]

In [None]:
for folder_name, sample_name in folder_variables.items():
    if folder_name not in samples:
        print(f'skipping {folder_name}...')
        continue
    mdata= read_h5mu_to_mudata(f'{base_path}{folder_name}{outs_path}/ATAC_signac.h5mu')
    atac = mdata.mod['atac']
    with rc_context({'figure.figsize': (8, 3)}):
        sb.histplot(atac.obs['nCount_atac'][(atac.obs['nCount_atac']>x_lim[0]) & (atac.obs['nCount_atac']<x_lim[1])], kde=True, bins=60)
        plt.axvline(count_dict[folder_name][1], 0, 1)
        plt.title(folder_name)
        plt.show()
        plt.close()
    # Clear memory
    del mdata
    gc.collect()

In [None]:
counts_filters= {}
for folder_name, sample_name in folder_variables.items():
    if folder_name not in samples:
        print(f'skipping {folder_name}...')
        continue
    mdata= read_h5mu_to_mudata(f'{base_path}{folder_name}{outs_path}/ATAC_signac.h5mu')
    atac = mdata.mod['atac'] 
    min_counts_atac = count_dict[folder_name][0]
    max_counts_atac = count_dict[folder_name][1]
    counts_filters[folder_name] = (atac.obs['nCount_atac'] > min_counts_atac) & (atac.obs['nCount_atac'] < max_counts_atac)
    sc.pl.scatter(atac, 'nCount_atac', 'nFeature_atac', color='nucleosome_signal', show=False).vlines(x=[count_dict[folder_name][0], count_dict[folder_name][1]], ymin=[0,0], ymax=[max(atac.obs['nFeature_atac']),max(atac.obs['nFeature_atac'])], color="black", lw=0.5).set_linestyle("--")
    plt.suptitle(folder_name)
    plt.show()
    plt.close()
    sc.pl.scatter(atac, 'log_nCount_atac', 'log_nFeature_atac', color='nucleosome_signal', show=False).vlines(x=[np.log(count_dict[folder_name][0]), np.log(count_dict[folder_name][1])], ymin=[np.log(min(atac.obs['nFeature_atac'])),np.log(min(atac.obs['nFeature_atac']))], ymax=[np.log(max(atac.obs['nFeature_atac'])),np.log(max(atac.obs['nFeature_atac']))], color="black", lw=0.5).set_linestyle("--")
    plt.suptitle(folder_name)
    plt.show()
    plt.close()
    # Clear memory
    del mdata
    gc.collect()


### TSS Enrichment

In [None]:
for folder_name, sample_name in folder_variables.items():
    if folder_name not in samples:
        print(f'skipping {folder_name}...')
        continue
    mdata= read_h5mu_to_mudata(f'{base_path}{folder_name}{outs_path}/ATAC_signac.h5mu')
    atac = mdata.mod['atac']
    #Thresholding decision: tss enrichment
    with rc_context({'figure.figsize': (8, 3)}):
        sb.histplot(atac.obs['TSS.enrichment'][counts_filters[folder_name]], kde=True, bins=60)
        sb.histplot(atac.obs['TSS.enrichment'], kde=True, bins=60)
        plt.title(folder_name)
        plt.show()
        plt.close()
    # Clear memory
    del mdata
    gc.collect()

In [None]:
tss_dict = {'597_NVF_Crypts_Rep1': [3.8,8.5],
 '598_FVF_Crypts_Rep1': [4,8],
 '599_FVF_Crypts_Rep2': [4,8],
 '604_NVF_Crypts_Rep2': [3.8,8.5],
 'FVF-high': [4,8.5],
 'FVF-low': [4,8]}

In [None]:
x_lim = [2,5.2]

In [None]:
for folder_name, sample_name in folder_variables.items():
    if folder_name not in samples:
        print(f'skipping {folder_name}...')
        continue
    mdata= read_h5mu_to_mudata(f'{base_path}{folder_name}{outs_path}/ATAC_signac.h5mu')
    atac = mdata.mod['atac']
    with rc_context({'figure.figsize': (8, 3)}):
        sb.histplot(atac.obs['TSS.enrichment'][(atac.obs['TSS.enrichment']>x_lim[0]) & (atac.obs['TSS.enrichment']<x_lim[1]) & counts_filters[folder_name]], kde=True, bins=60)
        sb.histplot(atac.obs['TSS.enrichment'][(atac.obs['TSS.enrichment']>x_lim[0]) & (atac.obs['TSS.enrichment']<x_lim[1])], kde=True, bins=60)
        plt.axvline(tss_dict[folder_name][0], 0, 1)
        plt.title(folder_name)
        plt.show()
        plt.close()
    # Clear memory
    del mdata
    gc.collect()

In [None]:
x_lim = [6,12]

In [None]:
for folder_name, sample_name in folder_variables.items():
    if folder_name not in samples:
        print(f'skipping {folder_name}...')
        continue
    mdata= read_h5mu_to_mudata(f'{base_path}{folder_name}{outs_path}/ATAC_signac.h5mu')
    atac = mdata.mod['atac']
    with rc_context({'figure.figsize': (8, 3)}):
        sb.histplot(atac.obs['TSS.enrichment'][(atac.obs['TSS.enrichment']>x_lim[0]) & (atac.obs['TSS.enrichment']<x_lim[1]) & counts_filters[folder_name]], kde=True, bins=60)
        sb.histplot(atac.obs['TSS.enrichment'][(atac.obs['TSS.enrichment']>x_lim[0]) & (atac.obs['TSS.enrichment']<x_lim[1])], kde=True, bins=60)
        plt.axvline(tss_dict[folder_name][1], 0, 1)
        plt.title(folder_name)
        plt.show()
        plt.close()
    # Clear memory
    del mdata
    gc.collect()


#### save tss filter and plot again

In [None]:
tss_filters_atac = {}
for folder_name, sample_name in folder_variables.items():
    if folder_name not in samples:
        print(f'skipping {folder_name}...')
        continue
    mdata= read_h5mu_to_mudata(f'{base_path}{folder_name}{outs_path}/ATAC_signac.h5mu')
    atac = mdata.mod['atac']
    x_lim = [0,15]
    min_tss = tss_dict[folder_name][0]
    max_tss = tss_dict[folder_name][1]
    tss_filters_atac[folder_name] = (atac.obs['TSS.enrichment'] > min_tss) & (atac.obs['TSS.enrichment'] < max_tss)
    with rc_context({'figure.figsize': (8, 3)}):
        sb.histplot(atac.obs['TSS.enrichment'][(atac.obs['TSS.enrichment']>x_lim[0]) & (atac.obs['TSS.enrichment']<x_lim[1]) & counts_filters[folder_name]], kde=True, bins=60)
        sb.histplot(atac.obs['TSS.enrichment'][(atac.obs['TSS.enrichment']>x_lim[0]) & (atac.obs['TSS.enrichment']<x_lim[1])], kde=True, bins=60)
        plt.axvline(tss_dict[folder_name][0], 0, 1)
        plt.axvline(tss_dict[folder_name][1], 0, 1)
        plt.title(folder_name)
        plt.show()
        plt.close()
    # Clear memory
    del mdata
    gc.collect()


### Nucleosome signal

In [None]:
for folder_name, sample_name in folder_variables.items():
    if folder_name not in samples:
        print(f'skipping {folder_name}...')
        continue
    mdata= read_h5mu_to_mudata(f'{base_path}{folder_name}{outs_path}/ATAC_signac.h5mu')
    atac = mdata.mod['atac']
    #Thresholding decision: nucleosome signal
    with rc_context({'figure.figsize': (8, 3)}):
        sb.histplot(atac.obs['nucleosome_signal'][counts_filters[folder_name] & tss_filters_atac[folder_name]], kde=True, bins=60)
        sb.histplot(atac.obs['nucleosome_signal'], kde=True, bins=60)
        plt.title(folder_name)
        plt.show()
        plt.close()
    # Clear memory
    del mdata
    gc.collect()

In [None]:
nuc_dict = {'597_NVF_Crypts_Rep1': [0.35,0.8],
 '598_FVF_Crypts_Rep1': [0.35,0.8],
 '599_FVF_Crypts_Rep2': [0.4,0.8],
 '604_NVF_Crypts_Rep2': [0.35,0.78],
 'FVF-high': [0.45,1],
 'FVF-low': [0.45,1]}

In [None]:
x_lim = [0,0.6]

In [None]:
for folder_name, sample_name in folder_variables.items():
    if folder_name not in samples:
        print(f'skipping {folder_name}...')
        continue
    mdata= read_h5mu_to_mudata(f'{base_path}{folder_name}{outs_path}/ATAC_signac.h5mu')
    atac = mdata.mod['atac']
    with rc_context({'figure.figsize': (8, 3)}):
        sb.histplot(atac.obs['nucleosome_signal'][(atac.obs['nucleosome_signal']>x_lim[0]) & (atac.obs['nucleosome_signal']<x_lim[1]) & counts_filters[folder_name] & tss_filters_atac[folder_name]], kde=True, bins=60)
        sb.histplot(atac.obs['nucleosome_signal'][(atac.obs['nucleosome_signal']>x_lim[0]) & (atac.obs['nucleosome_signal']<x_lim[1]) & counts_filters[folder_name] ], kde=True, bins=60)
        # sb.distplot(atac.obs['nucleosome_signal'][(atac.obs['nucleosome_signal']>x_lim[0]) & (atac.obs['nucleosome_signal']<x_lim[1]) & counts_filter_atac], kde=True, bins=60)
        sb.histplot(atac.obs['nucleosome_signal'][(atac.obs['nucleosome_signal']>x_lim[0]) & (atac.obs['nucleosome_signal']<x_lim[1])], kde=True, bins=60)
        plt.axvline(nuc_dict[folder_name][0], 0, 1)
        plt.title(folder_name)
        plt.show()
        plt.close()
    # Clear memory
    del mdata
    gc.collect()

In [None]:
x_lim = [0.7,1.2]

#### set nucl filter and plot

In [None]:
nuc_filters_atac={}
for folder_name, sample_name in folder_variables.items():
    if folder_name not in samples:
        print(f'skipping {folder_name}...')
        continue
    mdata= read_h5mu_to_mudata(f'{base_path}{folder_name}{outs_path}/ATAC_signac.h5mu')
    atac = mdata.mod['atac']
    min_nuc = nuc_dict[folder_name][0]
    max_nuc = nuc_dict[folder_name][1]
    nuc_filters_atac[folder_name] = (atac.obs['nucleosome_signal'] > min_nuc) & (atac.obs['nucleosome_signal'] < max_nuc)
    with rc_context({'figure.figsize': (8, 3)}):
        sb.histplot(atac.obs['nucleosome_signal'][(atac.obs['nucleosome_signal']>x_lim[0]) & (atac.obs['nucleosome_signal']<x_lim[1]) & counts_filters[folder_name] & tss_filters_atac[folder_name]], kde=True, bins=60)
        sb.histplot(atac.obs['nucleosome_signal'][(atac.obs['nucleosome_signal']>x_lim[0]) & (atac.obs['nucleosome_signal']<x_lim[1]) & counts_filters[folder_name] ], kde=True, bins=60)
        sb.histplot(atac.obs['nucleosome_signal'][(atac.obs['nucleosome_signal']>x_lim[0]) & (atac.obs['nucleosome_signal']<x_lim[1])], kde=True, bins=60)
        plt.axvline(nuc_dict[folder_name][1], 0, 1)
        plt.title(folder_name)
        plt.show()
        plt.close()
    # Clear memory
    del mdata
    gc.collect()

### FRip, Blacklist and Mito reads frac

In [None]:
frip_dict = {'597_NVF_Crypts_Rep1': [0.55,120000],
 '598_FVF_Crypts_Rep1': [0.55,120000],
 '599_FVF_Crypts_Rep2': [0.55,120000],
 '604_NVF_Crypts_Rep2': [0.55,130000],
 'FVF-high': [0.55,140000],
 'FVF-low': [0.55,120000]}

In [None]:
x_lim = [0,1]

In [None]:
for folder_name, sample_name in folder_variables.items():
    if folder_name not in samples:
        print(f'skipping {folder_name}...')
        continue
    mdata= read_h5mu_to_mudata(f'{base_path}{folder_name}{outs_path}/ATAC_signac.h5mu')
    atac = mdata.mod['atac']
    with rc_context({'figure.figsize': (8, 3)}):
        sb.histplot(atac.obs['cr_fraction_fragments_in_peaks'][(atac.obs['cr_fraction_fragments_in_peaks']>x_lim[0]) & (atac.obs['cr_fraction_fragments_in_peaks']<x_lim[1]) & counts_filters[folder_name] & tss_filters_atac[folder_name] & nuc_filters_atac[folder_name]], kde=True, bins=60)
        sb.histplot(atac.obs['cr_fraction_fragments_in_peaks'][(atac.obs['cr_fraction_fragments_in_peaks']>x_lim[0]) & (atac.obs['cr_fraction_fragments_in_peaks']<x_lim[1])], kde=True, bins=60)
        plt.axvline(frip_dict[folder_name][0], 0, 1)
        plt.title(folder_name)
        plt.show()
        plt.close()
    # Clear memory
    del mdata
    gc.collect()

In [None]:
x_lim = [0,0.1]

In [None]:
max_black_dict = {'597_NVF_Crypts_Rep1': 0.045,
 '598_FVF_Crypts_Rep1': 0.04,
 '599_FVF_Crypts_Rep2': 0.04,
 '604_NVF_Crypts_Rep2': 0.05,
 'FVF-high': 0.04,
 'FVF-low': 0.04}

In [None]:
for folder_name, sample_name in folder_variables.items():
    if folder_name not in samples:
        print(f'skipping {folder_name}...')
        continue
    mdata= read_h5mu_to_mudata(f'{base_path}{folder_name}{outs_path}/ATAC_signac.h5mu')
    atac = mdata.mod['atac']
    with rc_context({'figure.figsize': (8, 3)}):
        sb.histplot(atac.obs['fraction_counts_in_blacklist'][(atac.obs['fraction_counts_in_blacklist']>x_lim[0]) & (atac.obs['fraction_counts_in_blacklist']<x_lim[1]) & counts_filters[folder_name] & tss_filters_atac[folder_name] & nuc_filters_atac[folder_name]], kde=True, bins=60)
        sb.histplot(atac.obs['fraction_counts_in_blacklist'][(atac.obs['fraction_counts_in_blacklist']>x_lim[0]) & (atac.obs['fraction_counts_in_blacklist']<x_lim[1])], kde=True, bins=60)
        plt.axvline(max_black_dict[folder_name], 0, 1)
        plt.title(folder_name)
        plt.show()
        plt.close()
    # Clear memory
    del mdata
    gc.collect()

In [None]:
x_lim = [0,0.1]

In [None]:
max_mito_dict = {'597_NVF_Crypts_Rep1': 0.0075,
 '598_FVF_Crypts_Rep1': 0.0075,
 '599_FVF_Crypts_Rep2': 0.0075,
 '604_NVF_Crypts_Rep2': 0.0075,
 'FVF-high': 0.0075,
 'FVF-low': 0.0075}

In [None]:
for folder_name, sample_name in folder_variables.items():
    if folder_name not in samples:
        print(f'skipping {folder_name}...')
        continue
    mdata= read_h5mu_to_mudata(f'{base_path}{folder_name}{outs_path}/ATAC_signac.h5mu')
    atac = mdata.mod['atac']
    with rc_context({'figure.figsize': (8, 3)}):
        sb.histplot(atac.obs['cr_fraction_reads_in_mito'][(atac.obs['cr_fraction_reads_in_mito']>x_lim[0]) & (atac.obs['cr_fraction_reads_in_mito']<x_lim[1])& counts_filters[folder_name] & tss_filters_atac[folder_name] & nuc_filters_atac[folder_name]], kde=True, bins=60)
        sb.histplot(atac.obs['cr_fraction_reads_in_mito'][(atac.obs['cr_fraction_reads_in_mito']>x_lim[0]) & (atac.obs['cr_fraction_reads_in_mito']<x_lim[1])], kde=True, bins=60)
        plt.axvline(max_mito_dict[folder_name], 0, 1)
        plt.title(folder_name)
        plt.show()
        plt.close()
    # Clear memory
    del mdata
    gc.collect()

#### define filter

In [None]:
fbm_filters_atac={}
for folder_name, sample_name in folder_variables.items():
    if folder_name not in samples:
        print(f'skipping {folder_name}...')
        continue
    mdata= read_h5mu_to_mudata(f'{base_path}{folder_name}{outs_path}/ATAC_signac.h5mu')
    atac = mdata.mod['atac']
    min_frip = frip_dict[folder_name][0]
    max_black= max_black_dict[folder_name]
    max_mito= max_mito_dict[folder_name]
    fbm_filters_atac[folder_name] = (atac.obs['cr_fraction_fragments_in_peaks'] > min_frip) & (atac.obs['fraction_counts_in_blacklist'] < max_black) & (atac.obs['cr_fraction_reads_in_mito'] < max_mito)
    # Clear memory
    del mdata
    gc.collect()

### Filtering

In [None]:
for folder_name, sample_name in folder_variables.items():
    if folder_name not in samples:
        print(f'skipping {folder_name}...')
        continue
    mdata= read_h5mu_to_mudata(f'{base_path}{folder_name}{outs_path}/ATAC_signac.h5mu')
    atac = mdata.mod['atac']
    atac.obs['filtered_cells']=pd.Categorical(list(map(str,list(counts_filters[folder_name] & tss_filters_atac[folder_name] & nuc_filters_atac[folder_name] & fbm_filters_atac[folder_name]))))
    min_frip = frip_dict[folder_name][0]
    max_black= max_black_dict[folder_name]
    max_mito= max_mito_dict[folder_name]
    min_counts_atac = count_dict[folder_name][0]
    max_counts_atac = count_dict[folder_name][1]
    min_tss = tss_dict[folder_name][0]
    max_tss = tss_dict[folder_name][1]
    min_nuc = nuc_dict[folder_name][0]
    max_nuc = nuc_dict[folder_name][1]
    #atac.obs['filtered_cells2']=pd.Categorical(list(map(str,list((atac.obs['cr_fraction_fragments_in_peaks'] > min_frip) & (atac.obs['fraction_counts_in_blacklist'] < max_black) & (atac.obs['cr_fraction_reads_in_mito'] < max_mito) &(atac.obs['nCount_atac'] > min_counts_atac) & (atac.obs['nCount_atac'] < max_counts_atac)&(atac.obs['TSS.enrichment'] > min_tss) & (atac.obs['TSS.enrichment'] < max_tss)))))
    mu.write(f'{base_path}{folder_name}{outs_path}/ATAC_signac_filtered.h5mu',mdata)
    # Clear memory
    del mdata
    gc.collect()

#### run this individually:

In [None]:
for folder_name, sample_name in folder_variables.items():
    if folder_name not in samples:
        print(f'skipping {folder_name}...')
        continue
    mdata= read_h5mu_to_mudata(f'{base_path}{folder_name}{outs_path}/ATAC_signac_filtered.h5mu')
    atac = mdata.mod['atac']
    for col in ['log_nCount_atac', 'nCount_atac', 'log_nFeature_atac', 'TSS.enrichment', 'nucleosome_signal']:
        fig, axes = plt.subplots(1,2, figsize=(10, 4), gridspec_kw=dict(width_ratios=[2,1],wspace = 0.3))
        sb.violinplot(x='leiden', y=col, data=atac.obs, ax=axes[0], hue = col, palette="bright")
        sb.violinplot(x='filtered_cells', y=col, data=atac.obs, ax=axes[1], hue = col, palette="colorblind")
        axes[1].set_ylabel(None)
        fig.suptitle(col)
        plt.show()
        plt.close()
    # Clear memory
    del mdata
    gc.collect()

In [None]:
for folder_name, sample_name in folder_variables.items():
    if folder_name not in samples:
        print(f'skipping {folder_name}...')
        continue
    mdata= read_h5mu_to_mudata(f'{base_path}{folder_name}{outs_path}/ATAC_signac_filtered.h5mu')
    atac = mdata.mod['atac']
    sc.pl.umap(atac, color=['log_nCount_atac', 'log_nFeature_atac', 'TSS.enrichment', 'nucleosome_signal', 'cr_fraction_fragments_in_peaks', 'cr_fraction_reads_in_mito', 'fraction_counts_in_blacklist'], size=20, add_outline=True, alpha=1, outline_width=(0.3, 0.0), ncols=4, title = folder_name)
    sc.pl.umap(atac, color=['filtered_cells','leiden'], size=20, add_outline=True, alpha=1, outline_width=(0.3, 0.0), ncols=6)

    ###############################################################################
    ###############################################################################

    with rc_context({'figure.figsize': (6, 4)}): #rcParams['figure.figsize']=(6,4)
        key = 'leiden'
        labels = list(atac.obs[key].cat.categories)
        keep_pct = []
        filter_pct = []
        width = 0.85       # the width of the bars: can also be len(x) sequence

        for label in labels:
            keep_pct = keep_pct + [atac.obs['filtered_cells'][atac.obs[key]==label].value_counts()['True']/atac.obs['filtered_cells'][atac.obs[key]==label].value_counts().sum()*100]
            filter_pct = filter_pct + [atac.obs['filtered_cells'][atac.obs[key]==label].value_counts()['False']/atac.obs['filtered_cells'][atac.obs[key]==label].value_counts().sum()*100]

        fig, ax = plt.subplots()

        ax.bar(labels, filter_pct, width, label='Filter Out', edgecolor='0', linewidth=0.5)
        ax.bar(labels, keep_pct, width, bottom=filter_pct, label='Keep', edgecolor='0', linewidth=0.5)

        ax.set_ylabel('%')
        ax.set_title(f'Percentage of Filtered Cells in {folder_name}')
        ax.axes.set_xticklabels(labels=labels, rotation=90)
        ax.legend(bbox_to_anchor=(1, .5),loc='center left', edgecolor='1')

        plt.ylim([-2.5,100+2.5])
        plt.xlim([-1+0.25,len(labels)-0.25])

        plt.show()
        plt.close()

    #################################################################################
    #################################################################################

    sc.pl.scatter(atac, 'log_nCount_atac', 'nucleosome_signal', color='filtered_cells', title=folder_name)
    plt.show()
    plt.close()
    sc.pl.scatter(atac, 'log_nCount_atac', 'TSS.enrichment', color='filtered_cells', title=folder_name)
    plt.show()
    plt.close()
    # Clear memory
    del mdata
    gc.collect()


In [None]:
for folder_name, sample_name in folder_variables.items():
    if folder_name not in samples:
        print(f'skipping {folder_name}...')
        continue
    mdata= read_h5mu_to_mudata(f'{base_path}{folder_name}{outs_path}/ATAC_signac_filtered.h5mu')
    atac = mdata.mod['atac']
     #Filter cells according to identified QC thresholds:
    atac = qc_filter_mdata(mdata, atac, modality='atac', qc_filter=(counts_filters[folder_name] & tss_filters_atac[folder_name] & nuc_filters_atac[folder_name] & fbm_filters_atac[folder_name]))
    mu.write(f'{base_path}{folder_name}{outs_path}/ATAC_signac_filtered.h5mu',mdata)
    # Clear memory
    del mdata
    gc.collect()

## Filter ATAc and save results

In [None]:
for folder_name, sample_name in folder_variables.items():
    if folder_name not in samples:
        print(f'skipping {folder_name}...')
        continue
    mdata= read_h5mu_to_mudata(f'{base_path}{folder_name}{outs_path}/ATAC_signac_filtered.h5mu')
    atac = mdata.mod['atac']
    atac = signac_qc_metrics(atac, aggregated=False, species='Mmusculus', genome="mm10", sample = folder_name, ensembl_release="v102", cr_path=base_path)
    # Clear memory
    del mdata
    gc.collect()

In [None]:
for folder_name, sample_name in folder_variables.items():
    if folder_name not in samples:
        print(f'skipping {folder_name}...')
        continue
    mdata= read_h5mu_to_mudata(f'{base_path}{folder_name}{outs_path}/ATAC_signac_filtered.h5mu')
    # retain only cell passing GEX & ATAC QC
    try:
        mu.pp.intersect_obs(mdata)
        mdata_f = mdata.copy()
    except ValueError: # workaround if current mdata is view
        mdata_f = mdata.copy()
        mu.pp.intersect_obs(mdata_f)
    atac = mdata_f.mod['atac']
    # retain only cell passing GEX & ATAC QC
    # Make matrices sparse again
    sparsify_mdata(mdata_f)
    mdata_f.mod['rna'].var['is_ambient'] = mdata_f.mod['rna'].var['is_ambient'].astype(str).astype('category').copy()
    # https://github.com/scverse/muon/issues/65
    mdata_f.mod['atac'].uns['files'] = dict(mdata_f.mod['atac'].uns['files'])
    mdata_f.mod['atac'].uns['atac'] = dict(mdata_f.mod['atac'].uns['atac'])
    del mdata.mod['atac'].obs['filtered_cells']
    mu.write(f'{base_path}{folder_name}{outs_path}/multiome_1_done.h5mu',mdata_f)
    # Clear memory
    del mdata
    gc.collect()