# EPIC-ATAC deconvolution of cancer cfDNA samples using the built-in reference profile

## Import required libraries

In [None]:
library(EPICATAC)
library(tidyr)
library(ggplot2)
library(data.table)
library(dplyr)
library(reshape2)  
library(ggthemes)
library(patchwork)

## Load cancer cfDNA transformed marker regions

In [None]:
# load file
df_for_epic <- fread("/mnt/DATA3/daniel/project/02_cfDNA_preprocessing/data/03_intersect_mapped/cfDNA_cancer_samples_original/cfDNA_cancer_samples_original.bed", sep = "\t", header = TRUE)
    
# Check the result
str(df_for_epic)
head(df_for_epic)

## Preprocess data for EPIC-ATAC (format regions, samples)

In [None]:
# Create region column
df_for_epic$region <- paste0(df_for_epic$chrom, ":", df_for_epic$start, "-", df_for_epic$end)

# Convert to data.frame 
df_for_epic <- as.data.frame(df_for_epic)

# Make region the rownames
rownames(df_for_epic) <- df_for_epic$region

df_for_epic <- as.matrix(df_for_epic[, !(colnames(df_for_epic) %in% c("chrom", "start", "end", "region", "marker_start", "marker_end"))])

# Verify
str(df_for_epic)
head(df_for_epic)


## Convert counts to TPM-like 

In [None]:
tpm_counts <- EPICATAC:::get_TPMlike_counts(df_for_epic)
head(tpm_counts)

## Run EPIC-ATAC deconvolution on cancer cfDNA samples

In [None]:
results <- EPIC_ATAC(
  bulk = tpm_counts,
  reference = atacRef_TME,
  ATAC = TRUE,
  withOtherCells = FALSE,
  constrainedSum = TRUE,
  rangeBasedOptim = TRUE,
  genome_version = "hg38",
  mRNA_cell = NULL,
  mRNA_cell_sub = NULL,
  scaleExprs = TRUE,
  nb_iter = 1000
)


# View the results

In [None]:
# View your results
results$cellFractions
results$fit.gof

## Prepare data for plotting

In [None]:
# matrix => data.frame
df_cellFrac <- as.data.frame(results$cellFractions)

# Store sample IDs in a column
df_cellFrac$sample <- rownames(df_cellFrac)
head(df_cellFrac)

# Convert to long format
df_long <- pivot_longer(df_cellFrac, 
                        cols = -sample,  
                        names_to = "cellType", 
                        values_to = "fraction")

## Import metadata for cancer cfDNA samples

In [None]:
# Define the file path
file_path <- "/mnt/DATA2/cfDNA_finaledb/Cristiano_samplemap.tsv"

# Read the TSV file into a dataframe
cristiano_samplemap <- read.delim(file_path, header = TRUE, sep = "\t", stringsAsFactors = FALSE)

# View the first few rows
head(cristiano_samplemap)


## Merge cell fraction data with the sample metadata

In [None]:
# Ensure 'sample' column exists in both datasets
if(!"sample" %in% colnames(cristiano_samplemap)){
  stop("Column 'sample' not found in Cristiano sample map.")
}

# Merge the cell fraction data with the sample metadata
df_merged <- df_long %>%
  left_join(cristiano_samplemap, by = "sample")

# Check merged structure
head(df_merged)


## Exclude Bile Duct Cancer

In [None]:
# Exclude Bile Duct Cancer
df_filtered <- df_merged %>% filter(Patient.Type != "Bile Duct Cancer")

# Ensure cell types remain in the original order
df_filtered$cellType <- factor(df_filtered$cellType, levels = unique(df_filtered$cellType))


## Plot cell-type proportions in cancer cfDNA samples

In [None]:
# Adjust display size
options(repr.plot.width = 14, repr.plot.height = 14)

# Set cell type order 
cell_order <- c(
  "Bcells",
  "CD4_Tcells",
  "CD8_Tcells",
  "DCs",
  "Endothelial",
  "Fibroblasts",
  "Macrophages",
  "Neutrophils",
  "NK"
)

# Apply to factor levels in cancer dataset
df_filtered$cellType <- factor(df_filtered$cellType, levels = cell_order)

# Create the plot
p_cancer <- ggplot(df_filtered, aes(
  x = cellType, 
  y = fraction,
  fill = Patient.Type  
)) +
  geom_boxplot(
    width = 0.6,
    color = "black",     
    size = 0.2,
    outlier.shape = 16,
    outlier.size = 1.0,
    outlier.color = "black"
  ) +
  labs(
    x = "",
    y = "Proportion"
  ) +
  scale_y_continuous(expand = c(0, 0), limits = c(0, NA)) +
  facet_wrap(~Patient.Type, scales = "fixed") +
  scale_fill_manual(values = c(
    "Breast Cancer" = "lavender",
    "Colorectal Cancer" = "honeydew",
    "Gastric cancer" = "mistyrose",
    "Lung Cancer" = "azure",
    "Ovarian Cancer" = "wheat",
    "Pancreatic Cancer" = "lightgoldenrodyellow"
  )) +
  theme_minimal(base_size = 14) +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1, size = 14),
    axis.text.y = element_text(size = 14),
    axis.title.x = element_blank(),
    axis.title.y = element_text(size = 16, margin = margin(r = 15)),
    strip.text = element_text(size = 16),
    legend.position = "none"
  )

# Show the plot
p_cancer

# Save the plot
ggsave("original_cancer_cfDNA_proportions.png", p_cancer, width = 14, height = 14, dpi = 300)
