# EPIC-ATAC deconvolution of healthy cfDNA samples using the built-in reference profile

## Import required libraries

In [None]:
library(EPICATAC)
library(tidyr)
library(ggplot2)
library(data.table)
library(dplyr)
library(reshape2)  
library(ggthemes)
library(patchwork)

## Load healthy cfDNA transformed marker regions

In [None]:
# Load file
df_for_epic <- fread("/mnt/DATA3/daniel/project/02_cfDNA_preprocessing/data/03_intersect_mapped/cfDNA_healthy_original/healthy_cfDNA_transformed_marker_regions.bed", sep = "\t", header = TRUE)

# Check the result
str(df_for_epic)
head(df_for_epic)

## Preprocess data for EPIC-ATAC (format regions, samples)

In [None]:
# Create region column
df_for_epic$region <- paste0(df_for_epic$chrom, ":", df_for_epic$start, "-", df_for_epic$end)

# Convert to data.frame 
df_for_epic <- as.data.frame(df_for_epic)

# Make region the rownames
rownames(df_for_epic) <- df_for_epic$region

# Drop any non-sample columns
sample_cols <- c("EE87922", "EE87925", "EE87927", "EE87932", "EE87933")

df_for_epic <- df_for_epic[, sample_cols, drop = FALSE]

# 6. Verify
str(df_for_epic)
head(df_for_epic)


## Convert counts to TPM-like 

In [None]:
tpm_counts <- EPICATAC:::get_TPMlike_counts(df_for_epic)
head(tpm_counts)

## Run EPIC-ATAC deconvolution on healthy cfDNA samples

In [None]:
results <- EPIC_ATAC(
  bulk = tpm_counts,
  reference = atacRef_TME,
  ATAC = TRUE,
  withOtherCells = FALSE,
  constrainedSum = TRUE,
  rangeBasedOptim = TRUE,
  genome_version = "hg38",
  mRNA_cell = NULL,
  mRNA_cell_sub = NULL,
  scaleExprs = TRUE,
  nb_iter = 1000
)


## View deconvolution results

In [None]:
# View your results
results$cellFractions
results$fit.gof

## Prepare data for plotting

In [None]:
# matrix => data.frame
df_cellFrac <- as.data.frame(results$cellFractions)

# Store sample IDs in a column
df_cellFrac$sample <- rownames(df_cellFrac)
head(df_cellFrac)

# Convert to long format
df_long <- pivot_longer(df_cellFrac, 
                        cols = -sample,  
                        names_to = "cellType", 
                        values_to = "fraction")

## Plot cell-type proportions in healthy cfDNA samples

In [None]:
# Adjust display size if in Jupyter
options(repr.plot.width = 10, repr.plot.height = 10)

# Add dummy label for facet title
df_long$Patient.Type <- "Healthy"

# Create the plot
p_healthy <- ggplot(df_long, aes(
  x = cellType,
  y = fraction
)) +
  geom_boxplot(
    width = 0.6,
    fill = "aliceblue",     
    color = "black",
    size = 0.3,
    outlier.shape = 16,
    outlier.size = 1.0,
    outlier.color = "black"
  ) +
  labs(
    x = "",
    y = "Proportion"
  ) +
  scale_y_continuous(expand = c(0, 0), limits = c(0, NA)) +
  facet_wrap(~Patient.Type, strip.position = "top") +
  theme_minimal(base_size = 20) +
  theme(
    strip.placement = "outside",
    strip.text = element_text(size = 24, face = "plain", margin = margin(b = 8)),
    strip.background = element_blank(),
    plot.margin = margin(t = 10, r = 10, b = 10, l = 10),
    axis.text.x = element_text(angle = 45, hjust = 1, size = 20),
    axis.text.y = element_text(size = 20),
    axis.title.x = element_blank(),
    axis.title.y = element_text(size = 24, margin = margin(r = 15)),
    legend.position = "none"
  )

# Show the plot
p_healthy

# Save the plot
ggsave("new_healthy_cfDNA_proportions.png", p_healthy, width = 10, height = 10, dpi = 300)
