**Set environment**

In [1]:
suppressMessages(suppressWarnings(source("../config/config_sing.R")))
show_env()

You are in Singularity: singularity_proj_encode_fcc 
BASE DIRECTORY (FD_BASE): /data/reddylab/Kuei 
WORK DIRECTORY (FD_WORK): /data/reddylab/Kuei/out 
CODE DIRECTORY (FD_CODE): /data/reddylab/Kuei/code 
PATH OF PROJECT (FD_PRJ): /data/reddylab/Kuei/code/Proj_CombEffect_ENCODE_FCC 
PATH OF RESULTS (FD_RES): /data/reddylab/Kuei/out/proj_combeffect_encode_fcc 
PATH OF LOG     (FD_LOG): /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/log 


## Import data

In [2]:
### set file directories
fdiry = file.path(FD_RES, "results", "region", "KS91_K562_ASTARRseq_peak_macs_input")
fname = "peak.annotation.screen.tsv"
fpath = file.path(fdiry, fname)

### read data
dat = read_tsv(fpath, show_col_types = FALSE)

### assign and show
dat_peak_screen = dat
print(dim(dat))
head(dat)

[1] 247520      7


Chrom,Start,End,Peak,TMPRA_Gene,TMPRA_Dataset,CRISPR_HCFF
<chr>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>
chr1,10015,10442,chr1:10015-10442,,,
chr1,14253,14645,chr1:14253-14645,,,
chr1,16015,16477,chr1:16015-16477,,,
chr1,17237,17772,chr1:17237-17772,,,
chr1,28903,29613,chr1:28903-29613,,,
chr1,30803,31072,chr1:30803-31072,,,


In [3]:
### set file directories
fdiry = file.path(FD_RES, "results", "comparison")
fname = "result.Log2FC.raw.deseq.starrmpra.tsv"
fpath = file.path(fdiry, fname)

### read data
dat = read_tsv(fpath, show_col_types = FALSE)

### assign and show
dat_peak_log2fc = dat
print(dim(dat))
head(dat)

[1] 1722    4


Peak,ASTARR,TMPRA,WSTARR
<chr>,<dbl>,<dbl>,<dbl>
chr11:32870601-32871324,-0.56726545,-0.7592992,-0.45442671
chr11:32874343-32875070,-0.54920178,-0.4159155,-0.2399468
chr11:32884749-32885822,-0.66011484,0.2420861,-0.17415901
chr11:32892099-32894437,-0.04192326,0.9418955,0.42636977
chr11:32901572-32902485,-0.38438377,-0.854115,-0.09841904
chr11:32903491-32904506,-0.05446687,-0.7215339,-0.68078185


## Helper function

In [4]:
fun_ranknorm = function(x){
    return(rank(x)/length(x))
}

## Arrange

In [5]:
dat = inner_join(
    dat_peak_screen,
    dat_peak_log2fc,
    by = "Peak"
)

###
dat_peak_merge = dat
print(dim(dat))
head(dat)

[1] 1722   10


Chrom,Start,End,Peak,TMPRA_Gene,TMPRA_Dataset,CRISPR_HCFF,ASTARR,TMPRA,WSTARR
<chr>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>
chr11,4539569,4540043,chr11:4539569-4540043,HBE1,OL45,"HBE1,HBG1,HBG2,HBS1L,MYB",0.14501695,-0.9324732,-0.68945769
chr11,4551336,4552012,chr11:4551336-4552012,HBE1,OL45,,0.32125162,0.1172148,-0.01719732
chr11,4553969,4555012,chr11:4553969-4555012,HBE1,OL45,,0.14529397,2.1228188,0.32524277
chr11,4569016,4569992,chr11:4569016-4569992,HBE1,OL45,,0.01136464,-1.1864232,0.10228636
chr11,4577444,4578031,chr11:4577444-4578031,HBE1,OL45,,0.24524353,-0.569603,-0.06870693
chr11,4601808,4602487,chr11:4601808-4602487,HBE1,OL45,,-0.57215284,-1.03932,-0.53855046


In [6]:
dat = dat_peak_merge

dat = dat %>% 
    tidyr::gather(
        Assay, Log2FC, 
        -Chrom, -Start, -End, -Peak, 
        -TMPRA_Gene, 
        -TMPRA_Dataset, 
        -CRISPR_HCFF)

dat = dat %>%
    dplyr::group_by(Assay) %>%
    dplyr::mutate(RankNorm_Total = fun_ranknorm(Log2FC)) %>%
    dplyr::ungroup()

###
dat_peak_score_full = dat
print(dim(dat))
head(dat)

[1] 5166   10


Chrom,Start,End,Peak,TMPRA_Gene,TMPRA_Dataset,CRISPR_HCFF,Assay,Log2FC,RankNorm_Total
<chr>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>
chr11,4539569,4540043,chr11:4539569-4540043,HBE1,OL45,"HBE1,HBG1,HBG2,HBS1L,MYB",ASTARR,0.14501695,0.6550523
chr11,4551336,4552012,chr11:4551336-4552012,HBE1,OL45,,ASTARR,0.32125162,0.7560976
chr11,4553969,4555012,chr11:4553969-4555012,HBE1,OL45,,ASTARR,0.14529397,0.655633
chr11,4569016,4569992,chr11:4569016-4569992,HBE1,OL45,,ASTARR,0.01136464,0.5673635
chr11,4577444,4578031,chr11:4577444-4578031,HBE1,OL45,,ASTARR,0.24524353,0.7154472
chr11,4601808,4602487,chr11:4601808-4602487,HBE1,OL45,,ASTARR,-0.57215284,0.1277584


In [7]:
dat = dat_peak_score_full
dat = dat %>% 
    dplyr::group_by(Peak) %>% 
    dplyr::summarise(
        RankNorm = mean(RankNorm_Total),
        .groups  = "drop")

dat_peak_score_mean = dat
print(dim(dat))
head(dat)

[1] 1722    2


Peak,RankNorm
<chr>,<dbl>
chr11:32870601-32871324,0.1445993
chr11:32874343-32875070,0.2466125
chr11:32884749-32885822,0.3211382
chr11:32892099-32894437,0.7084785
chr11:32901572-32902485,0.2684863
chr11:32903491-32904506,0.258614


## Save results

In [8]:
### set file directories
fdiry = file.path(FD_RES, "results", "comparison")
fname = "result.ranknorm.starrmpra.full.tsv"
fpath = file.path(fdiry, fname)
print(fpath)

dat = dat_peak_score_full
write_tsv(dat, fpath)

[1] "/data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/comparison/result.ranknorm.starrmpra.full.tsv"


In [11]:
### set file directories
fdiry = file.path(FD_RES, "results", "comparison")
fname = "result.ranknorm.starrmpra.mean.tsv"
fpath = file.path(fdiry, fname)
print(fpath)

dat = dat_peak_score_mean
write_tsv(dat, fpath)

[1] "/data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/comparison/result.ranknorm.starrmpra.mean.tsv"
