# Diffbind analysis to identify differential peaks between active and nonactive patients and healthy

requires precomputed diffbind dataset

In [3]:
library("tidyverse")
library("DiffBind")
library("readxl")
library("pheatmap")
options(bitmapType="cairo")

setwd("/mnt/jw01-aruk-home01/projects/psa_functional_genomics/PsA_cleaned_analysis/ATAC_seq_analysis")


In [15]:
dataset_info_file = "../metadata/cleaned_ATAC_metadata.csv"
dataset_info = read.csv(dataset_info_file)
dataset_peaks_location = "/mnt/jw01-aruk-home01/projects/psa_functional_genomics/master_ATAC_ChIP_analyzer/macs2"
dataset_alignment_location = "/mnt/jw01-aruk-home01/projects/psa_functional_genomics/master_ATAC_ChIP_analyzer/clean_alignments"

dataset_info <- dataset_info %>% filter(condition %in% c("healthy", "patient", "synovium"))

data_for_diffbind <- dataset_info %>% select(id, patient, cell_type, condition, active_disease, female_sex, on_bDMARD_tsDMARD,
on_csDMARD, on_steroid, On_MTX, group, age, disease_duration, cell_type) %>% mutate(folder = paste0(id, "_ATAC"))
data_for_diffbind <- data_for_diffbind %>% mutate(Peaks = paste0(dataset_peaks_location, "/", folder, "/", folder, "_peaks_nosex.narrowPeak"), 
bamReads = paste0(dataset_alignment_location, "/", folder, "/", folder, "_align_filtered_macs2.bam"), PeakCaller = "narrow")

load(".local/diffbind_object.Rdata")

# set the parameters right for diffbind as it wants specific names for the variables
data_object$class[DBA_TISSUE, ] = data_for_diffbind$active_disease
data_object$class[DBA_FACTOR, ] = data_for_diffbind$female_sex
data_object$class[DBA_TREATMENT, ] = data_for_diffbind$condition

# using option B 
norm_dba_object <- dba.normalize(data_object, normalize=DBA_NORM_NATIVE,
library=DBA_LIBSIZE_PEAKREADS,background=FALSE)

## running individual contrasts for the results we want

In [20]:
# CD4
# active vs non-active
# include the healthys

res_condition <- dba(norm_dba_object, data_for_diffbind$cell_type == "CD4" & data_for_diffbind$condition != "synovium")
res_condition = dba.contrast(res_condition, design = "~ Tissue + Factor", contrast = c("Tissue", "1", "0") )
res_condition = dba.analyze(res_condition)
differential_peaks <- dba.report(res_condition, th = 1)
write.csv(differential_peaks, file=".local/diffbind_result/DE_CD4_active_vs_nonactive_with_sex_with_healthy.csv")

Computing results names...

  Note: levels of factors in the design contain characters other than
  letters, numbers, '_' and '.'. It is recommended (but not required) to use
  only letters, numbers, and delimiters '_' or '.', as these are safe characters

  Note: levels of factors in the design contain characters other than
  letters, numbers, '_' and '.'. It is recommended (but not required) to use
  only letters, numbers, and delimiters '_' or '.', as these are safe characters

Normalize DESeq2 with defaults...

  Note: levels of factors in the design contain characters other than
  letters, numbers, '_' and '.'. It is recommended (but not required) to use
  only letters, numbers, and delimiters '_' or '.', as these are safe characters

Analyzing...

  Note: levels of factors in the design contain characters other than
  letters, numbers, '_' and '.'. It is recommended (but not required) to use
  only letters, numbers, and delimiters '_' or '.', as these are safe characters

gene-wi

In [21]:
# active vs healthy

res_condition <- dba(norm_dba_object, data_for_diffbind$cell_type == "CD4" & data_for_diffbind$condition != "synovium")
res_condition = dba.contrast(res_condition, design = "~ Tissue + Factor", contrast = c("Tissue", "1", "-1") )
res_condition = dba.analyze(res_condition)
differential_peaks <- dba.report(res_condition, th = 1)
write.csv(differential_peaks, file=".local/diffbind_result/DE_CD4_active_vs_healthy_with_sex.csv")

Computing results names...

  Note: levels of factors in the design contain characters other than
  letters, numbers, '_' and '.'. It is recommended (but not required) to use
  only letters, numbers, and delimiters '_' or '.', as these are safe characters

  Note: levels of factors in the design contain characters other than
  letters, numbers, '_' and '.'. It is recommended (but not required) to use
  only letters, numbers, and delimiters '_' or '.', as these are safe characters

Normalize DESeq2 with defaults...

  Note: levels of factors in the design contain characters other than
  letters, numbers, '_' and '.'. It is recommended (but not required) to use
  only letters, numbers, and delimiters '_' or '.', as these are safe characters

Analyzing...

  Note: levels of factors in the design contain characters other than
  letters, numbers, '_' and '.'. It is recommended (but not required) to use
  only letters, numbers, and delimiters '_' or '.', as these are safe characters

gene-wi

In [22]:
# non active vs healthy

res_condition <- dba(norm_dba_object, data_for_diffbind$cell_type == "CD4" & data_for_diffbind$condition != "synovium")
res_condition = dba.contrast(res_condition, design = "~ Tissue + Factor", contrast = c("Tissue", "0", "-1") )
res_condition = dba.analyze(res_condition)
differential_peaks <- dba.report(res_condition, th = 1)
write.csv(differential_peaks, file=".local/diffbind_result/DE_CD4_nonactive_vs_healthy_with_sex.csv")

Computing results names...

  Note: levels of factors in the design contain characters other than
  letters, numbers, '_' and '.'. It is recommended (but not required) to use
  only letters, numbers, and delimiters '_' or '.', as these are safe characters

  Note: levels of factors in the design contain characters other than
  letters, numbers, '_' and '.'. It is recommended (but not required) to use
  only letters, numbers, and delimiters '_' or '.', as these are safe characters

Normalize DESeq2 with defaults...

  Note: levels of factors in the design contain characters other than
  letters, numbers, '_' and '.'. It is recommended (but not required) to use
  only letters, numbers, and delimiters '_' or '.', as these are safe characters

Analyzing...

  Note: levels of factors in the design contain characters other than
  letters, numbers, '_' and '.'. It is recommended (but not required) to use
  only letters, numbers, and delimiters '_' or '.', as these are safe characters

gene-wi

In [23]:
# CD8 cells
# active vs non-active
# include the healthys

res_condition <- dba(norm_dba_object, data_for_diffbind$cell_type == "CD8" & data_for_diffbind$condition != "synovium")
res_condition = dba.contrast(res_condition, design = "~ Tissue + Factor", contrast = c("Tissue", "1", "0") )
res_condition = dba.analyze(res_condition)
differential_peaks <- dba.report(res_condition, th = 1)
write.csv(differential_peaks, file=".local/diffbind_result/DE_CD8_active_vs_nonactive_with_sex_with_healthy.csv")

Computing results names...

  Note: levels of factors in the design contain characters other than
  letters, numbers, '_' and '.'. It is recommended (but not required) to use
  only letters, numbers, and delimiters '_' or '.', as these are safe characters

  Note: levels of factors in the design contain characters other than
  letters, numbers, '_' and '.'. It is recommended (but not required) to use
  only letters, numbers, and delimiters '_' or '.', as these are safe characters

Normalize DESeq2 with defaults...

  Note: levels of factors in the design contain characters other than
  letters, numbers, '_' and '.'. It is recommended (but not required) to use
  only letters, numbers, and delimiters '_' or '.', as these are safe characters

Analyzing...

  Note: levels of factors in the design contain characters other than
  letters, numbers, '_' and '.'. It is recommended (but not required) to use
  only letters, numbers, and delimiters '_' or '.', as these are safe characters

gene-wi

In [24]:
# active vs healthy

res_condition <- dba(norm_dba_object, data_for_diffbind$cell_type == "CD8" & data_for_diffbind$condition != "synovium")
res_condition = dba.contrast(res_condition, design = "~ Tissue + Factor", contrast = c("Tissue", "1", "-1") )
res_condition = dba.analyze(res_condition)
differential_peaks <- dba.report(res_condition, th = 1)
write.csv(differential_peaks, file=".local/diffbind_result/DE_CD8_active_vs_healthy_with_sex.csv")

Computing results names...

  Note: levels of factors in the design contain characters other than
  letters, numbers, '_' and '.'. It is recommended (but not required) to use
  only letters, numbers, and delimiters '_' or '.', as these are safe characters

  Note: levels of factors in the design contain characters other than
  letters, numbers, '_' and '.'. It is recommended (but not required) to use
  only letters, numbers, and delimiters '_' or '.', as these are safe characters

Normalize DESeq2 with defaults...

  Note: levels of factors in the design contain characters other than
  letters, numbers, '_' and '.'. It is recommended (but not required) to use
  only letters, numbers, and delimiters '_' or '.', as these are safe characters

Analyzing...

  Note: levels of factors in the design contain characters other than
  letters, numbers, '_' and '.'. It is recommended (but not required) to use
  only letters, numbers, and delimiters '_' or '.', as these are safe characters

gene-wi

In [25]:
# non active vs healthy

res_condition <- dba(norm_dba_object, data_for_diffbind$cell_type == "CD8" & data_for_diffbind$condition != "synovium")
res_condition = dba.contrast(res_condition, design = "~ Tissue + Factor", contrast = c("Tissue", "0", "-1") )
res_condition = dba.analyze(res_condition)
differential_peaks <- dba.report(res_condition, th = 1)
write.csv(differential_peaks, file=".local/diffbind_result/DE_CD8_nonactive_vs_healthy_with_sex.csv")

Computing results names...

  Note: levels of factors in the design contain characters other than
  letters, numbers, '_' and '.'. It is recommended (but not required) to use
  only letters, numbers, and delimiters '_' or '.', as these are safe characters

  Note: levels of factors in the design contain characters other than
  letters, numbers, '_' and '.'. It is recommended (but not required) to use
  only letters, numbers, and delimiters '_' or '.', as these are safe characters

Normalize DESeq2 with defaults...

  Note: levels of factors in the design contain characters other than
  letters, numbers, '_' and '.'. It is recommended (but not required) to use
  only letters, numbers, and delimiters '_' or '.', as these are safe characters

Analyzing...

  Note: levels of factors in the design contain characters other than
  letters, numbers, '_' and '.'. It is recommended (but not required) to use
  only letters, numbers, and delimiters '_' or '.', as these are safe characters

gene-wi

In [26]:
sessionInfo()

R version 4.1.2 (2021-11-01)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Scientific Linux 7.4 (Nitrogen)

Matrix products: default
BLAS:   /opt/gridware/el7/apps/gcc/R/4.1.2/lib64/R/lib/libRblas.so
LAPACK: /opt/gridware/el7/apps/gcc/R/4.1.2/lib64/R/lib/libRlapack.so

locale:
 [1] LC_CTYPE=en_GB.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_GB.UTF-8        LC_COLLATE=en_GB.UTF-8    
 [5] LC_MONETARY=en_GB.UTF-8    LC_MESSAGES=en_GB.UTF-8   
 [7] LC_PAPER=en_GB.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_GB.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] stats4    stats     graphics  grDevices utils     datasets  methods  
[8] base     

other attached packages:
 [1] pheatmap_1.0.12             readxl_1.3.1               
 [3] DiffBind_3.4.7              SummarizedExperiment_1.24.0
 [5] Biobase_2.54.0              MatrixGenerics_1.6.0       
 [7] matrixStats_0.61.0          Genom