**Set environment**

In [1]:
suppressMessages(suppressWarnings(source("../run_config_project_sing.R")))
show_env()

You are working on        Singularity 
BASE DIRECTORY (FD_BASE): /mount 
REPO DIRECTORY (FD_REPO): /mount/repo 
WORK DIRECTORY (FD_WORK): /mount/work 
DATA DIRECTORY (FD_DATA): /mount/data 

You are working with      ENCODE FCC 
PATH OF PROJECT (FD_PRJ): /mount/repo/Proj_ENCODE_FCC 
PROJECT RESULTS (FD_RES): /mount/repo/Proj_ENCODE_FCC/results 
PROJECT SCRIPTS (FD_EXE): /mount/repo/Proj_ENCODE_FCC/scripts 
PROJECT DATA    (FD_DAT): /mount/repo/Proj_ENCODE_FCC/data 
PROJECT NOTE    (FD_NBK): /mount/repo/Proj_ENCODE_FCC/notebooks 
PROJECT DOCS    (FD_DOC): /mount/repo/Proj_ENCODE_FCC/docs 
PROJECT LOG     (FD_LOG): /mount/repo/Proj_ENCODE_FCC/log 
PROJECT APP     (FD_APP): /mount/repo/Proj_ENCODE_FCC/app 
PROJECT REF     (FD_REF): /mount/repo/Proj_ENCODE_FCC/references 



## Import metadata from reference file

In [2]:
### set file path
txt_fdiry = file.path(FD_REF, "encode_open_chromatin")
txt_fname = "ENCODE_K562_hg38_ATAC_DNase.tsv"
txt_fpath = file.path(txt_fdiry, txt_fname)

### read table
dat = read_tsv(txt_fpath, show_col_types = FALSE)

### show and assign
dat_metadata = dat
fun_display_table(dat)

Assay,Biosample,Index_Experiment,Index_Process,Index_File,File_Type,Output_Type,isogenic_replicate,Genome,File_Summary,Lab
ATAC,K562,ENCSR868FGK,ENCODE4 v1.9.1 GRCh38 (ENCAN824UKX),ENCFF357GNC,bigWig,signal p-value,"1, 2, 3",hg38,call-macs2_signal_track_pooled/rep.pooled.pval.signal.bigwig,"Michael Snyder, Stanford"
ATAC,K562,ENCSR868FGK,ENCODE4 v1.9.1 GRCh38 (ENCAN824UKX),ENCFF333TAT,bed narrowPeak,pseudoreplicated peaks,"1, 2, 3",hg38,call-overlap_ppr/pooled-pr1_vs_pooled-pr2.overlap.bfilt.narrowPeak.gz,"Michael Snyder, Stanford"
ATAC,K562,ENCSR868FGK,ENCODE4 v1.9.1 GRCh38 (ENCAN824UKX),ENCFF948AFM,bed narrowPeak,IDR thresholded peaks,"1, 2, 3",hg38,call-idr_ppr/pooled-pr1_vs_pooled-pr2.idr0.05.bfilt.narrowPeak.gz,"Michael Snyder, Stanford"
ATAC,K562,ENCSR483RKN,ENCODE4 v1.9.1 GRCh38 (ENCAN217QUL),ENCFF600FDO,bigWig,signal p-value,"1, 2",hg38,call-macs2_signal_track_pooled/rep.pooled.pval.signal.bigwig,"Michael Snyder, Stanford"
ATAC,K562,ENCSR483RKN,ENCODE4 v1.9.1 GRCh38 (ENCAN217QUL),ENCFF558BLC,bed narrowPeak,pseudoreplicated peaks,"1, 2",hg38,call-overlap_ppr/pooled-pr1_vs_pooled-pr2.overlap.bfilt.narrowPeak.gz,"Michael Snyder, Stanford"
ATAC,K562,ENCSR483RKN,ENCODE4 v1.9.1 GRCh38 (ENCAN217QUL),ENCFF925CYR,bed narrowPeak,IDR thresholded peaks,"1, 2",hg38,call-idr_ppr/pooled-pr1_vs_pooled-pr2.idr0.05.bfilt.narrowPeak.gz,"Michael Snyder, Stanford"
DNase,K562,ENCSR000EKS,ENCODE4 v3.0.0 GRCh38 (ENCAN694OCK),ENCFF972GVB,bigWig,read-depth normalized signal,1,hg38,call-starch_to_bigwig/normalized.nuclear.0.05.density.bw,"Gregory Crawford, Duke"
DNase,K562,ENCSR000EKS,ENCODE4 v3.0.0 GRCh38 (ENCAN694OCK),ENCFF274YGF,bed narrowPeak,peaks,1,hg38,call-compress/nuclear.0.001.peaks.narrowpeaks.bed.gz,"Gregory Crawford, Duke"
DNase,K562,ENCSR000EOT,ENCODE4 v3.0.0-alpha.2 GRCh38 (ENCAN780RWD),ENCFF414OGC,bigWig,read-depth normalized signal,1,hg38,call-starch_to_bigwig/normalized.nuclear.0.05.density.bw,"John Stamatoyannopoulos, UW"
DNase,K562,ENCSR000EOT,ENCODE4 v3.0.0-alpha.2 GRCh38 (ENCAN780RWD),ENCFF185XRG,bed narrowPeak,peaks,1,hg38,call-compress/nuclear.0.001.peaks.narrowpeaks.bed.gz,"John Stamatoyannopoulos, UW"


## Generate download commands
```
wget -O FILE URL
```

In [3]:
### init
dat = dat_metadata

### setup download file name and wget command
dat = dat %>%
    dplyr::mutate(
        File_Ext1 = ifelse(str_detect(File_Type, "bed"), "bed.gz", "bw"),
        File_Ext2 = ifelse(str_detect(File_Type, "bed"), "bed.gz", "bigWig")
    ) %>%
    dplyr::mutate(
        File_Name = paste(
            Biosample, 
            "hg38", 
            Index_Experiment, 
            Index_File,
            Assay,
            File_Ext1, 
            sep=".")
    ) %>%
    dplyr::mutate(
        File_URL_Download = file.path(
            "https://www.encodeproject.org/files",
            Index_File,
            "@@download",
            paste(Index_File, File_Ext2, sep = ".")
        )
    ) %>%
    dplyr::mutate(
        CMD = paste("wget", "--append-output=run_download.log.txt", "-O", File_Name, File_URL_Download)
    )

### add Shebang and initial commands
#dat = dat %>% dplyr::select(Assay, Biosample, Index_Experiment, Index_File, File_Name, CMD)
dat = dat %>% dplyr::select(CMD)
dat = rbind('echo -n "" > run_download.log.txt', dat)
colnames(dat) = "#!/bin/bash"

### assign and show
dat_cmd = dat
fun_display_table(dat)

#!/bin/bash
"echo -n """" > run_download.log.txt"
wget --append-output=run_download.log.txt -O K562.hg38.ENCSR868FGK.ENCFF357GNC.ATAC.bw https://www.encodeproject.org/files/ENCFF357GNC/@@download/ENCFF357GNC.bigWig
wget --append-output=run_download.log.txt -O K562.hg38.ENCSR868FGK.ENCFF333TAT.ATAC.bed.gz https://www.encodeproject.org/files/ENCFF333TAT/@@download/ENCFF333TAT.bed.gz
wget --append-output=run_download.log.txt -O K562.hg38.ENCSR868FGK.ENCFF948AFM.ATAC.bed.gz https://www.encodeproject.org/files/ENCFF948AFM/@@download/ENCFF948AFM.bed.gz
wget --append-output=run_download.log.txt -O K562.hg38.ENCSR483RKN.ENCFF600FDO.ATAC.bw https://www.encodeproject.org/files/ENCFF600FDO/@@download/ENCFF600FDO.bigWig
wget --append-output=run_download.log.txt -O K562.hg38.ENCSR483RKN.ENCFF558BLC.ATAC.bed.gz https://www.encodeproject.org/files/ENCFF558BLC/@@download/ENCFF558BLC.bed.gz
wget --append-output=run_download.log.txt -O K562.hg38.ENCSR483RKN.ENCFF925CYR.ATAC.bed.gz https://www.encodeproject.org/files/ENCFF925CYR/@@download/ENCFF925CYR.bed.gz
wget --append-output=run_download.log.txt -O K562.hg38.ENCSR000EKS.ENCFF972GVB.DNase.bw https://www.encodeproject.org/files/ENCFF972GVB/@@download/ENCFF972GVB.bigWig
wget --append-output=run_download.log.txt -O K562.hg38.ENCSR000EKS.ENCFF274YGF.DNase.bed.gz https://www.encodeproject.org/files/ENCFF274YGF/@@download/ENCFF274YGF.bed.gz
wget --append-output=run_download.log.txt -O K562.hg38.ENCSR000EOT.ENCFF414OGC.DNase.bw https://www.encodeproject.org/files/ENCFF414OGC/@@download/ENCFF414OGC.bigWig


## Save to script
Save the command lines for each row into a bash script

In [4]:
### set output path
txt_fdiry = file.path(FD_DAT, "external", "encode_open_chromatin")
txt_fname = "run_download.sh"
txt_fpath = file.path(txt_fdiry, txt_fname)

### save table
dir.create(txt_fdiry, showWarnings = FALSE)
dat = dat_cmd
write_tsv(dat, txt_fpath)  