**Set environment**

In [1]:
suppressMessages(suppressWarnings(source("../run_config_project_sing.R")))
show_env()

You are working on        Singularity 
BASE DIRECTORY (FD_BASE): /mount 
REPO DIRECTORY (FD_REPO): /mount/repo 
WORK DIRECTORY (FD_WORK): /mount/work 
DATA DIRECTORY (FD_DATA): /mount/data 

You are working with      ENCODE FCC 
PATH OF PROJECT (FD_PRJ): /mount/repo/Proj_ENCODE_FCC 
PROJECT RESULTS (FD_RES): /mount/repo/Proj_ENCODE_FCC/results 
PROJECT SCRIPTS (FD_EXE): /mount/repo/Proj_ENCODE_FCC/scripts 
PROJECT DATA    (FD_DAT): /mount/repo/Proj_ENCODE_FCC/data 
PROJECT NOTE    (FD_NBK): /mount/repo/Proj_ENCODE_FCC/notebooks 
PROJECT DOCS    (FD_DOC): /mount/repo/Proj_ENCODE_FCC/docs 
PROJECT LOG     (FD_LOG): /mount/repo/Proj_ENCODE_FCC/log 
PROJECT APP     (FD_APP): /mount/repo/Proj_ENCODE_FCC/app 
PROJECT REF     (FD_REF): /mount/repo/Proj_ENCODE_FCC/references 



## Import metadata from reference file
Read the table with the file accession numbers of ATAC peaks and DHS regions

In [2]:
### set file path
txt_fdiry = file.path(FD_REF, "encode_crispri_hcrff")
txt_fname = "ENCODE_K562_hg38_CRISPRi_HCRFF.tsv"
txt_fpath = file.path(txt_fdiry, txt_fname)

### read table
dat = read_tsv(txt_fpath, show_col_types = FALSE)

### show and assign
dat_metadata = dat
fun_display_table(head(dat))

Assay,Biosample,Index_Experiment,Index_Process,Index_File,File_Type,Output_Type,Genome,Target,File_Summary,Lab
Flow-FISH CRISPR screen,K562,ENCSR793WTM,Lab custom GRCh38 (ENCAN194INC),ENCFF863AVQ,bed CRISPR element quantifications,element quantifications,hg38,CAPRIN1,CAPRIN1_peakwise_file,"Pardis Sabeti, Broad"
Flow-FISH CRISPR screen,K562,ENCSR793WTM,Lab custom GRCh38 (ENCAN194INC),ENCFF444UXP,bigWig,perturbation signal,hg38,CAPRIN1,CAPRIN1_perturb_signal_R1,"Pardis Sabeti, Broad"
Flow-FISH CRISPR screen,K562,ENCSR793WTM,Lab custom GRCh38 (ENCAN194INC),ENCFF994KKV,bigWig,perturbation signal,hg38,CAPRIN1,CAPRIN1_perturb_signal_R2,"Pardis Sabeti, Broad"
Flow-FISH CRISPR screen,K562,ENCSR794SPV,Lab custom GRCh38 (ENCAN095VUQ),ENCFF619FXH,bed CRISPR element quantifications,element quantifications,hg38,CAT,CAT_peakwise_file,"Pardis Sabeti, Broad"
Flow-FISH CRISPR screen,K562,ENCSR794SPV,Lab custom GRCh38 (ENCAN095VUQ),ENCFF918VCM,bigWig,perturbation signal,hg38,CAT,CAT_perturb_signal_R1,"Pardis Sabeti, Broad"
Flow-FISH CRISPR screen,K562,ENCSR794SPV,Lab custom GRCh38 (ENCAN095VUQ),ENCFF965PMF,bigWig,perturbation signal,hg38,CAT,CAT_perturb_signal_R2,"Pardis Sabeti, Broad"


## Generate download commands
```
wget -O FILE URL
```

In [3]:
### define helper function
fun_inner = function(txt){
    vec1 = c("peak", "signal_R1", "signal_R2")
    vec2 = c("CASA", "signal_R1", "signal_R2")
    
    idx = str_detect(string  = txt, pattern = vec1)
    vec = vec2[idx]
    return(vec)
}
fun_get_info = function(strings){
    res = sapply(strings, fun_inner)
    return(res)
}

### test function
vec = c("CAPRIN1_peakwise_file", "CAPRIN1_perturb_signal_R1", "CAPRIN1_perturb_signal_R2")
fun_get_info(vec)

In [4]:
### init
dat = dat_metadata

### setup download file name and wget command
dat = dat %>%
    dplyr::mutate(
        File_Ext1 = ifelse(str_detect(File_Type, "bed"), "bed.gz", "bw"),
        File_Ext2 = ifelse(str_detect(File_Type, "bed"), "bed.gz", "bigWig")
    )  %>% 
    dplyr::mutate(Sample_Type = fun_get_info(File_Summary)) %>%
    dplyr::mutate(
        File_Name = paste(
            Biosample, 
            Genome, 
            Index_Experiment, 
            Index_File,
            "CRISPRi_HCRFF",
            Sample_Type,
            Target,
            File_Ext1, 
            sep=".")
    ) %>%
    dplyr::mutate(
        File_URL_Download = file.path(
            "https://www.encodeproject.org/files",
            Index_File,
            "@@download",
            paste(Index_File, File_Ext2, sep = ".")
        )
    ) %>%
    dplyr::mutate(
        CMD = paste("wget", "--append-output=run_download.log.txt", "-O", File_Name, File_URL_Download)
    )

### add Shebang and initial commands
#dat = dat %>% dplyr::select(Assay, Biosample, Index_Experiment, Index_File, File_Name, CMD)
dat = dat %>% dplyr::select(CMD)
dat = rbind('echo -n "" > run_download.log.txt', dat)
colnames(dat) = "#!/bin/bash"

### assign and show
dat_cmd = dat
fun_display_table(dat)

#!/bin/bash
"echo -n """" > run_download.log.txt"
wget --append-output=run_download.log.txt -O K562.hg38.ENCSR793WTM.ENCFF863AVQ.CRISPRi_HCRFF.CASA.CAPRIN1.bed.gz https://www.encodeproject.org/files/ENCFF863AVQ/@@download/ENCFF863AVQ.bed.gz
wget --append-output=run_download.log.txt -O K562.hg38.ENCSR793WTM.ENCFF444UXP.CRISPRi_HCRFF.signal_R1.CAPRIN1.bw https://www.encodeproject.org/files/ENCFF444UXP/@@download/ENCFF444UXP.bigWig
wget --append-output=run_download.log.txt -O K562.hg38.ENCSR793WTM.ENCFF994KKV.CRISPRi_HCRFF.signal_R2.CAPRIN1.bw https://www.encodeproject.org/files/ENCFF994KKV/@@download/ENCFF994KKV.bigWig
wget --append-output=run_download.log.txt -O K562.hg38.ENCSR794SPV.ENCFF619FXH.CRISPRi_HCRFF.CASA.CAT.bed.gz https://www.encodeproject.org/files/ENCFF619FXH/@@download/ENCFF619FXH.bed.gz
wget --append-output=run_download.log.txt -O K562.hg38.ENCSR794SPV.ENCFF918VCM.CRISPRi_HCRFF.signal_R1.CAT.bw https://www.encodeproject.org/files/ENCFF918VCM/@@download/ENCFF918VCM.bigWig
wget --append-output=run_download.log.txt -O K562.hg38.ENCSR794SPV.ENCFF965PMF.CRISPRi_HCRFF.signal_R2.CAT.bw https://www.encodeproject.org/files/ENCFF965PMF/@@download/ENCFF965PMF.bigWig
wget --append-output=run_download.log.txt -O K562.hg38.ENCSR157WAN.ENCFF270LYK.CRISPRi_HCRFF.CASA.CD164.bed.gz https://www.encodeproject.org/files/ENCFF270LYK/@@download/ENCFF270LYK.bed.gz
wget --append-output=run_download.log.txt -O K562.hg38.ENCSR157WAN.ENCFF186XBW.CRISPRi_HCRFF.signal_R1.CD164.bw https://www.encodeproject.org/files/ENCFF186XBW/@@download/ENCFF186XBW.bigWig
wget --append-output=run_download.log.txt -O K562.hg38.ENCSR157WAN.ENCFF786CHF.CRISPRi_HCRFF.signal_R2.CD164.bw https://www.encodeproject.org/files/ENCFF786CHF/@@download/ENCFF786CHF.bigWig


## Save to script
Save the command lines for each row into a bash script

In [5]:
### set output path
txt_fdiry = file.path(FD_DAT, "external", "encode_crispri_hcrff")
txt_fname = "run_download.sh"
txt_fpath = file.path(txt_fdiry, txt_fname)

### save table
dir.create(txt_fdiry, showWarnings = FALSE)
dat = dat_cmd
write_tsv(dat, txt_fpath)  