**Set environment**

In [1]:
suppressMessages(suppressWarnings(source("../run_config_project_sing.R")))
show_env()

You are working on        Singularity: singularity_proj_encode_fcc 
BASE DIRECTORY (FD_BASE): /data/reddylab/Kuei 
REPO DIRECTORY (FD_REPO): /data/reddylab/Kuei/repo 
WORK DIRECTORY (FD_WORK): /data/reddylab/Kuei/work 
DATA DIRECTORY (FD_DATA): /data/reddylab/Kuei/data 

You are working with      ENCODE FCC 
PATH OF PROJECT (FD_PRJ): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC 
PROJECT RESULTS (FD_RES): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results 
PROJECT SCRIPTS (FD_EXE): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/scripts 
PROJECT DATA    (FD_DAT): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/data 
PROJECT NOTE    (FD_NBK): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/notebooks 
PROJECT DOCS    (FD_DOC): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/docs 
PROJECT LOG     (FD_LOG): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/log 
PROJECT REF     (FD_REF): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/references 



## Prepare

**Set global variable**

In [2]:
vec = c(
    "fcc_astarr_macs_input_overlap",
    "fcc_astarr_macs_input_union"
)
names(vec) = vec

VEC_TXT_FOLDER = vec
for(txt in vec){cat(txt, "\n")}

fcc_astarr_macs_input_overlap 
fcc_astarr_macs_input_union 


In [3]:
TXT_FNAME_INP = "umap.metric_euclidean.neighbors_070.coverage.encode_chipseq_full_log2p.tsv"

**View files**

In [4]:
txt_fdiry = file.path(
    FD_RES, 
    "analysis_umap_cluster_by_coverage", 
    "*",
    "fcc_starrmpracrispri_vote2",
    "run_umap"
)
txt_fname = TXT_FNAME_INP
txt_fglob = file.path(txt_fdiry, txt_fname)

vec = Sys.glob(txt_fglob)
for(txt in vec){cat(txt, "\n")}

/data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/analysis_umap_cluster_by_coverage/fcc_astarr_macs_input_overlap/fcc_starrmpracrispri_vote2/run_umap/umap.metric_euclidean.neighbors_070.coverage.encode_chipseq_full_log2p.tsv 
/data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/analysis_umap_cluster_by_coverage/fcc_astarr_macs_input_union/fcc_starrmpracrispri_vote2/run_umap/umap.metric_euclidean.neighbors_070.coverage.encode_chipseq_full_log2p.tsv 


## Import data

**Import region pairs**

In [6]:
### loop to import data
lst = lapply(VEC_TXT_FOLDER, function(txt_folder){
    ### set file directory
    txt_fdiry = file.path(
        FD_RES, 
        "analysis_umap_cluster_by_coverage", 
        txt_folder,
        "fcc_starrmpracrispri_vote2",
        "run_umap"
    )
    txt_fname = TXT_FNAME_INP
    txt_fpath = file.path(txt_fdiry, txt_fname)

    ### read table
    dat = read_tsv(txt_fpath, show_col_types = FALSE)
    return(dat)
})

### assign and show
lst_dat_region_umap_import = lst

res = lapply(lst, dim)
print(res)

dat = lst[[1]]
fun_display_table(head(dat, 3))

$fcc_astarr_macs_input_overlap
[1] 33953     3

$fcc_astarr_macs_input_union
[1] 39788     3



UMAP1,UMAP2,Region
-1.680812,5.204691,chr1:605104-605675
8.620607,5.645398,chr1:778233-779389
-1.475871,3.743281,chr1:818708-819335


## Summarize

In [8]:
### rename columns
lst = lst_dat_region_umap_import
lst = lapply(lst, function(dat){
    ### convert into bed file format
    dat = dat %>% 
        tidyr::separate(Region, into = c("Chrom", "ChromStart", "ChromEnd"), remove = FALSE) %>%
        dplyr::select(Chrom, ChromStart, ChromEnd, Region, UMAP1, UMAP2) %>%
        dplyr::distinct()
    return(dat)
})

### assign and show
lst_dat_region_umap_arrange = lst

res = lapply(lst, dim)
print(res)

dat = lst[[1]]
fun_display_table(head(dat, 3))

$fcc_astarr_macs_input_overlap
[1] 33953     6

$fcc_astarr_macs_input_union
[1] 39788     6



Chrom,ChromStart,ChromEnd,Region,UMAP1,UMAP2
chr1,605104,605675,chr1:605104-605675,-1.680812,5.204691
chr1,778233,779389,chr1:778233-779389,8.620607,5.645398
chr1,818708,819335,chr1:818708-819335,-1.475871,3.743281


## Export

In [10]:
for (txt_folder in VEC_TXT_FOLDER){
    ### set file directory
    txt_fdiry = file.path(
        FD_RES, 
        "analysis_umap_cluster_by_coverage", 
        txt_folder,
        "fcc_starrmpracrispri_vote2",
        "summary"
    )
    ### set command
    txt_cmd = paste("mkdir -p", txt_fdiry)
    
    ### execute
    system(txt_cmd)
}

In [11]:
for (txt_folder in VEC_TXT_FOLDER){

    ### get tables
    dat_region_result = lst_dat_region_umap_arrange[[txt_folder]]
    
    ### set file directory
    txt_fdiry = file.path(
        FD_RES, 
        "analysis_umap_cluster_by_coverage", 
        txt_folder,
        "fcc_starrmpracrispri_vote2",
        "summary"
    )
    
    ### write table
    txt_fname = "region.umap.metric_euclidean.neighbors_070.coverage.encode_chipseq_full_log2p.tsv"
    txt_fpath = file.path(txt_fdiry, txt_fname)
    
    dat = dat_region_result
    dat = dat %>% dplyr::arrange(Chrom, ChromStart, ChromEnd)
    write_tsv(dat, txt_fpath)
}

**Save a copy to reference folder**

In [14]:
txt_fdiry = file.path(FD_REF, "fcc_region_results")
txt_fname = "region.fcc_starrmpra_vote2.umap.metric_euclidean.neighbors_070.coverage.encode_chipseq_full_log2p.tsv"
txt_fpath = file.path(txt_fdiry, txt_fname)

idx = "fcc_astarr_macs_input_overlap"
dat = lst_dat_region_umap_arrange[[idx]]
dat = dat %>% dplyr::arrange(Chrom, ChromStart, ChromEnd) 
write_tsv(dat, txt_fpath)