# UMAP plot of the ATAC-seq data

In [1]:
import pandas as pd
import numpy as np
import hicstraw 
from multiprocessing import Pool
from functools import partial
import glob
import os
import plotly.express as px
import math
import matplotlib.pyplot as plt
from matplotlib import colors
from pandarallel import pandarallel
import cooler
import cooltools
import pybedtools as pbed
pandarallel.initialize()
from scipy import stats, special
from statsmodels.stats import multitest
import statsmodels.api as sm
import statsmodels.formula.api as smf
import plotly.io as pio
import seaborn as sns
os.makedirs("/mnt/iusers01/jw01/mdefscs4/scratch/temp_pybedtools/", exist_ok = True)
pbed.helpers.set_tempdir("/mnt/iusers01/jw01/mdefscs4/scratch/temp_pybedtools/")
bed_genome_file = "/mnt/iusers01/jw01/mdefscs4/hg38.genome"

plt.rcParams['svg.fonttype'] = 'none'

base_dir = "/mnt/jw01-aruk-home01/projects/psa_functional_genomics/PsA_cleaned_analysis"

INFO: Pandarallel will run on 28 workers.
INFO: Pandarallel will use Memory file system to transfer data between the main process and workers.


In [3]:
ATAC_normalised_counts = pd.read_csv(f"{base_dir}/ATAC_seq_analysis/ATAC_DESeq2_quantile_normalized_counts.csv", index_col = 0)
metadata_ATAC = pd.read_csv(f"{base_dir}/metadata/cleaned_ATAC_metadata.csv", index_col=0)

In [4]:
import umap
reducer = umap.UMAP(n_components = 2)
latent_rep = reducer.fit_transform(ATAC_normalised_counts.iloc[:,3:].T)

In [10]:
rep = metadata_ATAC.copy()
rep["X"] = latent_rep[:,0]
rep["Y"] = latent_rep[:,1]

In [13]:
rep = rep.sort_values(by="cell_type")
fig = px.scatter(rep, x = "X", y = "Y", color = "cell_type", hover_name = "proper_name", template = "plotly_white", symbol = "female_sex", opacity = 0.8,
height = 600, width = 700)
fig.update_traces(marker={'size': 10})
fig.show()
fig.write_image(f"{base_dir}/ATAC_seq_analysis/figures/ATAC_UMAP_sex.svg")

In [14]:
fig = px.scatter(rep, x = "X", y = "Y", color = "cell_type", hover_name = "proper_name", template = "plotly_white", symbol = "condition", opacity = 0.8,
height = 600, width = 700)
fig.update_traces(marker={'size': 10})
fig.show()
fig.write_image(f"{base_dir}/ATAC_seq_analysis/figures/ATAC_UMAP.svg")