**Set environment**

In [1]:
suppressMessages(suppressWarnings(source("../run_config_project_sing.R")))
show_env()

You are working on        Singularity 
BASE DIRECTORY (FD_BASE): /mount 
REPO DIRECTORY (FD_REPO): /mount/repo 
WORK DIRECTORY (FD_WORK): /mount/work 
DATA DIRECTORY (FD_DATA): /mount/data 

You are working with      ENCODE FCC 
PATH OF PROJECT (FD_PRJ): /mount/repo/Proj_CombEffect_ENCODE_FCC 
PROJECT RESULTS (FD_RES): /mount/repo/Proj_CombEffect_ENCODE_FCC/results 
PROJECT SCRIPTS (FD_EXE): /mount/repo/Proj_CombEffect_ENCODE_FCC/scripts 
PROJECT DATA    (FD_DAT): /mount/repo/Proj_CombEffect_ENCODE_FCC/data 
PROJECT NOTE    (FD_NBK): /mount/repo/Proj_CombEffect_ENCODE_FCC/notebooks 
PROJECT DOCS    (FD_DOC): /mount/repo/Proj_CombEffect_ENCODE_FCC/docs 
PROJECT LOG     (FD_LOG): /mount/repo/Proj_CombEffect_ENCODE_FCC/log 
PROJECT APP     (FD_APP): /mount/repo/Proj_CombEffect_ENCODE_FCC/app 
PROJECT REF     (FD_REF): /mount/repo/Proj_CombEffect_ENCODE_FCC/references 



## Import metadata from reference file
Read the table with the file accession numbers of ATAC peaks and DHS regions

In [2]:
### set file path
txt_fdiry = file.path(FD_REF, "encode_crispri_hcrff")
txt_fname = "ENCODE_K562_hg38_CRISPRi_HCRFF_CASA.tsv"
txt_fpath = file.path(txt_fdiry, txt_fname)

### read table
dat = read_tsv(txt_fpath, show_col_types = FALSE)

### show and assign
dat_metadata = dat
fun_display_table(dat)

Assay,Biosample,Index_Experiment,Index_File,File_Type,Output_Type,Genome,Target
Flow-FISH CRISPR screen,K562,ENCSR793WTM,ENCFF863AVQ,bed CRISPR element quantifications,element quantifications,hg38,CAPRIN1
Flow-FISH CRISPR screen,K562,ENCSR794SPV,ENCFF619FXH,bed CRISPR element quantifications,element quantifications,hg38,CAT
Flow-FISH CRISPR screen,K562,ENCSR157WAN,ENCFF270LYK,bed CRISPR element quantifications,element quantifications,hg38,CD164
Flow-FISH CRISPR screen,K562,ENCSR979QQN,ENCFF813GCK,bed CRISPR element quantifications,element quantifications,hg38,ERP29
Flow-FISH CRISPR screen,K562,ENCSR382ZJS,ENCFF786ZPA,bed CRISPR element quantifications,element quantifications,hg38,FADS1
Flow-FISH CRISPR screen,K562,ENCSR968CIN,ENCFF149IDL,bed CRISPR element quantifications,element quantifications,hg38,FADS2
Flow-FISH CRISPR screen,K562,ENCSR630WLB,ENCFF227DUX,bed CRISPR element quantifications,element quantifications,hg38,FADS3
Flow-FISH CRISPR screen,K562,ENCSR278YTB,ENCFF151MNC,bed CRISPR element quantifications,element quantifications,hg38,FEN1
Flow-FISH CRISPR screen,K562,ENCSR917XEU,ENCFF413WYU,bed CRISPR element quantifications,element quantifications,hg38,GATA1
Flow-FISH CRISPR screen,K562,ENCSR564EPW,ENCFF845YHV,bed CRISPR element quantifications,element quantifications,hg38,HBE1


In [5]:
### init
dat = dat_metadata

###
dat = dat %>%
    dplyr::mutate(
        File_URL_Download = file.path(
            "https://www.encodeproject.org/files",
            Index_File,
            "@@download",
            paste(Index_File, "bed.gz", sep = ".")
        )
    )
head(dat)

Assay,Biosample,Index_Experiment,Index_File,File_Type,Output_Type,Genome,Target,File_URL_Download
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
Flow-FISH CRISPR screen,K562,ENCSR793WTM,ENCFF863AVQ,bed CRISPR element quantifications,element quantifications,hg38,CAPRIN1,https://www.encodeproject.org/files/ENCFF863AVQ/@@download/ENCFF863AVQ.bed.gz
Flow-FISH CRISPR screen,K562,ENCSR794SPV,ENCFF619FXH,bed CRISPR element quantifications,element quantifications,hg38,CAT,https://www.encodeproject.org/files/ENCFF619FXH/@@download/ENCFF619FXH.bed.gz
Flow-FISH CRISPR screen,K562,ENCSR157WAN,ENCFF270LYK,bed CRISPR element quantifications,element quantifications,hg38,CD164,https://www.encodeproject.org/files/ENCFF270LYK/@@download/ENCFF270LYK.bed.gz
Flow-FISH CRISPR screen,K562,ENCSR979QQN,ENCFF813GCK,bed CRISPR element quantifications,element quantifications,hg38,ERP29,https://www.encodeproject.org/files/ENCFF813GCK/@@download/ENCFF813GCK.bed.gz
Flow-FISH CRISPR screen,K562,ENCSR382ZJS,ENCFF786ZPA,bed CRISPR element quantifications,element quantifications,hg38,FADS1,https://www.encodeproject.org/files/ENCFF786ZPA/@@download/ENCFF786ZPA.bed.gz
Flow-FISH CRISPR screen,K562,ENCSR968CIN,ENCFF149IDL,bed CRISPR element quantifications,element quantifications,hg38,FADS2,https://www.encodeproject.org/files/ENCFF149IDL/@@download/ENCFF149IDL.bed.gz


## Generate download commands
```
wget -O FILE URL
```

In [10]:
### init
dat = dat_metadata

### setup download url
dat = dat %>%
    dplyr::mutate(
        File_URL_Download = file.path(
            "https://www.encodeproject.org/files",
            Index_File,
            "@@download",
            paste(Index_File, "bed.gz", sep = ".")
        )
    )

### setup download file name and wget command
dat = dat %>%
    dplyr::mutate(
        File_Name = paste(
            Biosample, 
            Genome, 
            Index_Experiment, 
            Index_File,
            "CRISPRi_HCRFF",
            "CASA",
            Target,
            "bed.gz", 
            sep=".")
    ) %>%
    dplyr::mutate(
        CMD = paste("wget", "--append-output=run_download.log.txt", "-O", File_Name, File_URL_Download)
    )
dat %>% dplyr::select(File_Name)

File_Name
<chr>
K562.hg38.ENCSR793WTM.ENCFF863AVQ.CRISPRi_HCRFF.CASA.CAPRIN1.bed.gz
K562.hg38.ENCSR794SPV.ENCFF619FXH.CRISPRi_HCRFF.CASA.CAT.bed.gz
K562.hg38.ENCSR157WAN.ENCFF270LYK.CRISPRi_HCRFF.CASA.CD164.bed.gz
K562.hg38.ENCSR979QQN.ENCFF813GCK.CRISPRi_HCRFF.CASA.ERP29.bed.gz
K562.hg38.ENCSR382ZJS.ENCFF786ZPA.CRISPRi_HCRFF.CASA.FADS1.bed.gz
K562.hg38.ENCSR968CIN.ENCFF149IDL.CRISPRi_HCRFF.CASA.FADS2.bed.gz
K562.hg38.ENCSR630WLB.ENCFF227DUX.CRISPRi_HCRFF.CASA.FADS3.bed.gz
K562.hg38.ENCSR278YTB.ENCFF151MNC.CRISPRi_HCRFF.CASA.FEN1.bed.gz
K562.hg38.ENCSR917XEU.ENCFF413WYU.CRISPRi_HCRFF.CASA.GATA1.bed.gz
K562.hg38.ENCSR564EPW.ENCFF845YHV.CRISPRi_HCRFF.CASA.HBE1.bed.gz


In [9]:
### init
dat = dat_metadata

### setup download url
dat = dat %>%
    dplyr::mutate(
        File_URL_Download = file.path(
            "https://www.encodeproject.org/files",
            Index_File,
            "@@download",
            paste(Index_File, "bed.gz", sep = ".")
        )
    )

### setup download file name and wget command
dat = dat %>%
    dplyr::mutate(
        File_Name = paste(
            Biosample, 
            Genome, 
            Index_Experiment, 
            Index_File,
            "CRISPRi_HCRFF",
            "CASA",
            Target,
            "bed.gz", 
            sep=".")
    ) %>%
    dplyr::mutate(
        CMD = paste("wget", "--append-output=run_download.log.txt", "-O", File_Name, File_URL_Download)
    )

### add Shebang and initial commands
#dat = dat %>% dplyr::select(Assay, Biosample, Index_Experiment, Index_File, File_Name, CMD)
dat = dat %>% dplyr::select(CMD)
dat = rbind('echo -n "" > run_download.log.txt', dat)
colnames(dat) = "#!/bin/bash"

### assign and show
dat_cmd = dat
fun_display_table(dat)

#!/bin/bash
"echo -n """" > run_download.log.txt"
wget --append-output=run_download.log.txt -O K562.hg38.ENCSR793WTM.ENCFF863AVQ.CRISPRi_HCRFF.CASA.CAPRIN1.bed.gz https://www.encodeproject.org/files/ENCFF863AVQ/@@download/ENCFF863AVQ.bed.gz
wget --append-output=run_download.log.txt -O K562.hg38.ENCSR794SPV.ENCFF619FXH.CRISPRi_HCRFF.CASA.CAT.bed.gz https://www.encodeproject.org/files/ENCFF619FXH/@@download/ENCFF619FXH.bed.gz
wget --append-output=run_download.log.txt -O K562.hg38.ENCSR157WAN.ENCFF270LYK.CRISPRi_HCRFF.CASA.CD164.bed.gz https://www.encodeproject.org/files/ENCFF270LYK/@@download/ENCFF270LYK.bed.gz
wget --append-output=run_download.log.txt -O K562.hg38.ENCSR979QQN.ENCFF813GCK.CRISPRi_HCRFF.CASA.ERP29.bed.gz https://www.encodeproject.org/files/ENCFF813GCK/@@download/ENCFF813GCK.bed.gz
wget --append-output=run_download.log.txt -O K562.hg38.ENCSR382ZJS.ENCFF786ZPA.CRISPRi_HCRFF.CASA.FADS1.bed.gz https://www.encodeproject.org/files/ENCFF786ZPA/@@download/ENCFF786ZPA.bed.gz
wget --append-output=run_download.log.txt -O K562.hg38.ENCSR968CIN.ENCFF149IDL.CRISPRi_HCRFF.CASA.FADS2.bed.gz https://www.encodeproject.org/files/ENCFF149IDL/@@download/ENCFF149IDL.bed.gz
wget --append-output=run_download.log.txt -O K562.hg38.ENCSR630WLB.ENCFF227DUX.CRISPRi_HCRFF.CASA.FADS3.bed.gz https://www.encodeproject.org/files/ENCFF227DUX/@@download/ENCFF227DUX.bed.gz
wget --append-output=run_download.log.txt -O K562.hg38.ENCSR278YTB.ENCFF151MNC.CRISPRi_HCRFF.CASA.FEN1.bed.gz https://www.encodeproject.org/files/ENCFF151MNC/@@download/ENCFF151MNC.bed.gz
wget --append-output=run_download.log.txt -O K562.hg38.ENCSR917XEU.ENCFF413WYU.CRISPRi_HCRFF.CASA.GATA1.bed.gz https://www.encodeproject.org/files/ENCFF413WYU/@@download/ENCFF413WYU.bed.gz


## Save to script
Save the command lines for each row into a bash script

In [13]:
### set output path
txt_fdiry = file.path(FD_DAT, "external", "encode_open_chromatin")
txt_fname = "run_download.sh"
txt_fpath = file.path(txt_fdiry, txt_fname)

### arrange table
dat = dat_cmd
colnames(dat) = "#!/bin/bash"

### save table
dir.create(txt_fdiry, showWarnings = FALSE)
write_tsv(dat, txt_fpath)  