# Estimate motif marginal effect

**Set environment**

In [1]:
source ../config_duke.sh -v

You are on Duke Server: HARDAC
BASE DIRECTORY:     /gpfs/fs1/data/reddylab/Kuei
PATH OF SOURCE:     /gpfs/fs1/data/reddylab/Kuei/source
PATH OF EXECUTABLE: /gpfs/fs1/data/reddylab/Kuei/exe
PATH OF ANNOTATION: /gpfs/fs1/data/reddylab/Kuei/annotation
PATH OF PROJECT:    /gpfs/fs1/data/reddylab/Kuei/GitRepo/Proj_CombEffect_STARRseq/notebooks
PATH OF RESULTS:    /gpfs/fs1/data/reddylab/Kuei/out/proj_combeffect



**R script**

In [33]:
cat > motif_marginal.R << 'EOF'

###################################################
# Set environment
###################################################
cat("\n++++++++++ Set environment  ++++++++++\n")

#source("/home/mount/project/config_sing.R")
source("config_sing.R")

###################################################
# Set global variables
###################################################
cat("\n++++++++++ Set global variables ++++++++++\n")

### Get argument: Chromomsome
ARGS            = commandArgs(trailingOnly=TRUE)
TARGET          = as.character(ARGS[1])  # which chromosome or region to run
IS_INPUT20X     = as.logical(ARGS[2])    # is the new input being used
IS_LOG          = as.logical(ARGS[3])
FDIRY           = as.character(ARGS[4])  # the name of the output folder
N_CORE          = as.integer(ARGS[5])    # number of cores to register during the parallelization
THRESHOLD_COVER = as.integer(ARGS[6])    # threshold for the low coverage filteration
THRESHOLD_MOTIF = as.numeric(ARGS[7])    # threshold for the motif score filteration

### set global variables
SAMPLES_INP = c(
    paste0("Input", 1:5),
    paste0("TFX",   2:5, "_DMSO"),
    paste0("TFX",   2:5, "_Dex"))

SAMPLES_INP20X = c(
    paste0("Input", 1:5, "_20x"),
    paste0("TFX",   2:5, "_DMSO"),
    paste0("TFX",   2:5, "_Dex"))

if (IS_INPUT20X) {
    SAMPLES = SAMPLES_INP20X
    FDIRY   = paste0(FDIRY, "_", "input20x")
} else {
    SAMPLES = SAMPLES_INP
}

if (IS_LOG) {
    FDIRY   = paste0(FDIRY, "_", "log")
} 

FD_OUT = file.path(FD_RES, "model_linear", FDIRY, TARGET)
dir.create(FD_OUT, recursive = TRUE, showWarnings = FALSE)

### set motifs
fdiry  = file.path(FD_RES, "annotation_fragment", SAMPLES[1], TARGET)
fname  = "*_merge.bed.gz"
fglob  = file.path(fdiry, fname)
fpaths = Sys.glob(fglob)
MOTIFS = basename(fpaths)

### set column names and types
CTYPES = c(col_character(), col_integer(), col_integer(), col_integer(),
           col_character(), col_integer(), col_integer(),
           col_character(), col_double(),  col_integer())
CNAMES = c("Chrom_Frag", "Start_Frag", "End_Frag", "Count_Frag",
           "Chrom_MTF",  "Start_MTF",  "End_MTF",
           "Motif", "Score", "Overlap")
           

### print start message
cat("Target:           ", TARGET,          "\n")
cat("Is Input20x used? ", IS_INPUT20X,     "\n")
cat("Is Log transform? ", IS_LOG,          "\n")
cat("Output Directory: ", FD_OUT,          "\n")
cat("#Cores Resgister: ", N_CORE,          "\n")
cat("Threshold (Cover):", THRESHOLD_COVER, "\n")
cat("Threshold (Motif):", THRESHOLD_MOTIF, "\n")

###################################################
# Import library size
###################################################
cat("\n++++++++++ Import library size ++++++++++\n")

### Helper function to get
get_group = function(idn_sample){
    idn = idn_sample
    
    idn = str_replace(
        string = idn, 
        pattern = "Input[0-9]", 
        replacement = "Input")
    
    idn = str_remove(
        string = idn, 
        pattern = "_20x")
    
    idn = str_replace(
        string = idn, 
        pattern = "TFX[0-9]_", 
        replacement="TFX_")
    return(idn)
}

### set path
fdiry = file.path(FD_RES, "source")
fname = "library_size.txt"
fpath = file.path(fdiry, fname)

### import library size
ctypes = c(col_integer(), col_character())
cnames = c("Size", "FPath")
dat_lib = read_tsv(fpath, col_types=ctypes, col_names = cnames)

### remove the total size
dat_lib = dat_lib %>% dplyr::filter(FPath != "total")

### summarize info from the file path
### stackoverflow: Extract only folder name right before filename from full path
dat_lib = dat_lib %>% 
    mutate(Sample = basename(dirname(FPath))) %>%
    mutate(Group = get_group(Sample))
dat_lib = dat_lib %>% dplyr::select(Size, Sample, Group)

###################################################
# Run motif analysis
###################################################
cat("\n++++++++++ Run motif analysis ++++++++++\n")

### start
#registerDoParallel(N_CORE)
timer_start = Sys.time()

### loop through each motif to get the marginal effect
lst_res = foreach(fname = MOTIFS[1:10]) %do% {
    
    ### start message and get the name of motif
    mtf = str_remove_all(fname, pattern = "_merge.bed.gz")
    msg = paste(mtf, "Start")
    cat(msg, "\n"); flush.console()
    
    ### import fragment annotation
    fdiry  = file.path(FD_RES, "annotation_fragment")
    lst_dat = lapply(SAMPLES, function(sam){
        ### set path
        fpath = file.path(fdiry, sam, TARGET, fname)    
        msg = paste(mtf, "Import", fpath)
        cat(msg, "\n"); flush.console()
        
        ### import data
        dat = read_tsv(fpath, col_types=CTYPES, col_names=CNAMES)
        if (nrow(dat) == 0){
            return(NULL)
        } else {
            ###
            num1 = nrow(dat)    
            dat = dat %>% 
                mutate(Sample = sam) %>%
                mutate(Length_MTF = End_MTF - Start_MTF)  %>%
                mutate(Length_Dif = Length_MTF - Overlap) %>% 
                dplyr::filter(Length_Dif == 0) %>%
                dplyr::filter(Score >= THRESHOLD_MOTIF)
            num2 = nrow(dat)
            
            ###
            msg = paste(num1, num2, sep="-")
            msg = paste(mtf, "Filter", sam, msg)
            cat(msg, "\n"); flush.console()
            return(dat)
        }
    })
    
    ### arrange data
    dat = bind_rows(lst_dat)
    msg = paste(mtf, "Total", nrow(dat))
    cat(msg, "\n"); flush.console()
    
    ###################################################
    # Preprocess
    ###################################################
    #cat("+++++ Preprocess +++++\n")
    
    ### Filter out empty data
    if(nrow(dat) == 0){
        msg = paste(mtf, "Skip Empty")
        cat(msg, "\n"); flush.console()
        return(msg)
    }
    
    ### Filter: fully cover the motif and motif score
    #num1 = nrow(dat)
    #dat = dat %>% 
    #    dplyr::filter(Length_Dif == 0) %>%
    #    dplyr::filter(Score >= THRESHOLD_MOTIF)
    #num2 = nrow(dat)
    #msg = paste(num1, num2, sep="-")
    #msg = paste(mtf, "Filter", msg)
    #cat(msg, "\n"); flush.console()
    
    ### Filter out empty data    
    #if(nrow(dat) == 0){
    #    msg = paste(mtf, "Filter Empty")
    #    cat(msg, "\n"); flush.console()
    #    return(msg)
    #}
    
    ### Filter: No/Low coverage
    cnt = sum(dat$Count_Frag)
    if(cnt <= THRESHOLD_COVER){
        msg = paste(mtf, "Filter Low_Coverage")
        cat(msg, "\n"); flush.console()
        return(msg)
    }
    
    ###################################################
    # Create Count Table
    ###################################################
    #cat("+++++ Create Count Table +++++\n")
    
    ### get fragments
    dat = dat %>% 
        dplyr::select(Chrom_Frag, Start_Frag, End_Frag, Count_Frag, Sample) %>%
        distinct()

    ### get count for each sample
    dat = dat %>% group_by(Sample) %>% summarise(Value = sum(Count_Frag))
    
    ### normalize counts by library size
    dat = dat %>% left_join(dat_lib, by="Sample")
    dat = dat %>%
        mutate(Norm_Value    = Value / Size) %>% 
        mutate(Lognorm_Value = log10(Value) - log10(Size)) %>%
        mutate(X = Group)
    
    ###################################################
    # Analyze w/ Linear Model
    ###################################################
    #cat("+++++ Analyze w/ Linear Model +++++\n")
    
    ### create design matrix
    idxs  = c("Input", "TFX_DMSO", "TFX_Dex")
    dat$X = factor(dat$X, levels=idxs)
    X = model.matrix(~X, dat)
    if (IS_LOG){
        y = dat$Lognorm_Value
    } else {
        y = dat$Norm_Value
    }

    ### setup design matrix
    X[,"XTFX_DMSO"] = X[,"XTFX_DMSO"] + X[,"XTFX_Dex"]
    
    ### fit model and get the summary
    fit = lm(y ~ X + 0)
    
    ### arrange
    lst = list()
    lst$fit = fit
    lst$cnt = dat
    lst$X   = X
    lst$y   = y
    
    ### store the results
    fdiry = FD_OUT
    fname = paste0(mtf, ".RDS") # str_replace(mtf, pattern = "/", replacement = "_")
    fpath = file.path(fdiry, fname)
    saveRDS(lst, fpath)
    
    ### end message
    msg = paste(mtf, "Done")
    cat(msg, "\n"); flush.console()
    return(msg)
}

### print end message
timer = Sys.time()
cat("Timer of the loop:\n")
print(timer - timer_start)

EOF

## Test: upstream of PER1 region

**Remove the folder**

`rm -rf ${FD_RES}/model_linear/marginal_filter*/target_PER1`

**Run all the possible combinations**

In [9]:
### set log file directory
sbatch -p ${NODE} \
    --mem=8G \
    --tasks-per-node=1 \
    --cpus-per-task=21 \a
    --job-name='Marginal PER1 f00 logF' \
    -o ${FD_LOG}/linear_model_marginal_input20x_per1_filter00.txt \
    <<'EOF'
#!/bin/bash
### set directories & global variables
source ../config_duke.sh -v

#CHROMS=($(seq 1 22) X Y)
#CHROM=chr${CHROMS[${SLURM_ARRAY_TASK_ID}]}
#CHROM=chrY

TARGET=target_PER1
IS_INPUT20X=TRUE
IS_LOG=FALSE
FDIRY=marginal_filter00
NCORE=20
THRESHOLD_COVER=10
THRESHOLD_MOTIF=0

### print start message
timer_start=`date +%s`
echo "Slurm Array Index: " ${SLURM_ARRAY_TASK_ID}
echo "Time Stamp:        " $(date +"%m-%d-%y+%T")
echo

### run the model
srun ${FD_PRJ}/sing_proj_combeffect.sh Rscript motif_marginal.R \
    ${TARGET} ${IS_INPUT20X} ${IS_LOG} ${FDIRY} ${NCORE} ${THRESHOLD_COVER} ${THRESHOLD_MOTIF}
echo

### print end message
timer=`date +%s`
runtime=$(echo "${timer} - ${timer_start}" | bc -l)
echo 'Done!'
echo "Run Time: $(displaytime ${runtime})"

EOF

Submitted batch job 26604334


In [10]:
### set log file directory
sbatch -p ${NODE} \
    --mem=8G \
    --tasks-per-node=1 \
    --cpus-per-task=21 \
    --job-name='Marginal PER1 f00 logT' \
    -o ${FD_LOG}/linear_model_marginal_input20x_per1_filter00_log.txt \
    <<'EOF'
#!/bin/bash
### set directories & global variables
source ../config_duke.sh -v

#CHROMS=($(seq 1 22) X Y)
#CHROM=chr${CHROMS[${SLURM_ARRAY_TASK_ID}]}
#CHROM=chrY

TARGET=target_PER1
IS_INPUT20X=TRUE
IS_LOG=TRUE
FDIRY=marginal_filter00
NCORE=20
THRESHOLD_COVER=10
THRESHOLD_MOTIF=0

### print start message
timer_start=`date +%s`
echo "Slurm Array Index: " ${SLURM_ARRAY_TASK_ID}
echo "Time Stamp:        " $(date +"%m-%d-%y+%T")
echo

### run the model
srun ${FD_PRJ}/sing_proj_combeffect.sh Rscript motif_marginal.R \
    ${TARGET} ${IS_INPUT20X} ${IS_LOG} ${FDIRY} ${NCORE} ${THRESHOLD_COVER} ${THRESHOLD_MOTIF}
echo

### print end message
timer=`date +%s`
runtime=$(echo "${timer} - ${timer_start}" | bc -l)
echo 'Done!'
echo "Run Time: $(displaytime ${runtime})"

EOF

Submitted batch job 26604335


In [11]:
### set log file directory
sbatch -p ${NODE} \
    --mem=8G \
    --tasks-per-node=1 \
    --cpus-per-task=21 \
    --job-name='Marginal PER1 f95 logF' \
    -o ${FD_LOG}/linear_model_marginal_input20x_per1_filter95.txt \
    <<'EOF'
#!/bin/bash
### set directories & global variables
source ../config_duke.sh -v

#CHROMS=($(seq 1 22) X Y)
#CHROM=chr${CHROMS[${SLURM_ARRAY_TASK_ID}]}
#CHROM=chrY

TARGET=target_PER1
IS_INPUT20X=TRUE
IS_LOG=FALSE
FDIRY=marginal_filter95
NCORE=20
THRESHOLD_COVER=10
THRESHOLD_MOTIF=10.81

### print start message
timer_start=`date +%s`
echo "Slurm Array Index: " ${SLURM_ARRAY_TASK_ID}
echo "Time Stamp:        " $(date +"%m-%d-%y+%T")
echo

### run the model
srun ${FD_PRJ}/sing_proj_combeffect.sh Rscript motif_marginal.R \
    ${TARGET} ${IS_INPUT20X} ${IS_LOG} ${FDIRY} ${NCORE} ${THRESHOLD_COVER} ${THRESHOLD_MOTIF}
echo

### print end message
timer=`date +%s`
runtime=$(echo "${timer} - ${timer_start}" | bc -l)
echo 'Done!'
echo "Run Time: $(displaytime ${runtime})"

EOF

Submitted batch job 26604336


In [12]:
### set log file directory
sbatch -p ${NODE} \
    --mem=8G \
    --tasks-per-node=1 \
    --cpus-per-task=21 \
    --job-name='Marginal PER1 f95 logT' \
    -o ${FD_LOG}/linear_model_marginal_input20x_per1_filter95_log.txt \
    <<'EOF'
#!/bin/bash
### set directories & global variables
source ../config_duke.sh -v

#CHROMS=($(seq 1 22) X Y)
#CHROM=chr${CHROMS[${SLURM_ARRAY_TASK_ID}]}
#CHROM=chrY

TARGET=target_PER1
IS_INPUT20X=TRUE
IS_LOG=TRUE
FDIRY=marginal_filter95
NCORE=20
THRESHOLD_COVER=10
THRESHOLD_MOTIF=10.81

### print start message
timer_start=`date +%s`
echo "Slurm Array Index: " ${SLURM_ARRAY_TASK_ID}
echo "Time Stamp:        " $(date +"%m-%d-%y+%T")
echo

### run the model
srun ${FD_PRJ}/sing_proj_combeffect.sh Rscript motif_marginal.R \
    ${TARGET} ${IS_INPUT20X} ${IS_LOG} ${FDIRY} ${NCORE} ${THRESHOLD_COVER} ${THRESHOLD_MOTIF}
echo

### print end message
timer=`date +%s`
runtime=$(echo "${timer} - ${timer_start}" | bc -l)
echo 'Done!'
echo "Run Time: $(displaytime ${runtime})"

EOF

Submitted batch job 26604337


**Check**

In [13]:
cat ${FD_LOG}/linear_model_marginal_input20x_per1_filter00.txt

You are on Duke Server: HARDAC
BASE DIRECTORY:     /gpfs/fs1/data/reddylab/Kuei
PATH OF SOURCE:     /gpfs/fs1/data/reddylab/Kuei/source
PATH OF EXECUTABLE: /gpfs/fs1/data/reddylab/Kuei/exe
PATH OF ANNOTATION: /gpfs/fs1/data/reddylab/Kuei/annotation
PATH OF PROJECT:    /gpfs/fs1/data/reddylab/Kuei/GitRepo/Proj_CombEffect_STARRseq/notebooks
PATH OF RESULTS:    /gpfs/fs1/data/reddylab/Kuei/out/proj_combeffect

Slurm Array Index: 
Time Stamp:         10-15-21+20:38:03


++++++++++ Set environment  ++++++++++
── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
✔ ggplot2 3.3.5     ✔ purrr   0.3.4
✔ tibble  3.1.4     ✔ dplyr   1.0.7
✔ tidyr   1.1.3     ✔ stringr 1.4.0
✔ readr   2.0.1     ✔ forcats 0.5.1
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()

Attaching package: ‘pryr’

The following objects are masked from ‘package:purrr’:

    compose, partial




In [65]:
cat ${FD_LOG}/linear_model_marginal_input20x_per1_filter95.txt

You are on Duke Server: HARDAC
BASE DIRECTORY:     /gpfs/fs1/data/reddylab/Kuei
PATH OF SOURCE:     /gpfs/fs1/data/reddylab/Kuei/source
PATH OF EXECUTABLE: /gpfs/fs1/data/reddylab/Kuei/exe
PATH OF ANNOTATION: /gpfs/fs1/data/reddylab/Kuei/annotation
PATH OF PROJECT:    /gpfs/fs1/data/reddylab/Kuei/GitRepo/Proj_CombEffect_STARRseq/notebooks
PATH OF RESULTS:    /gpfs/fs1/data/reddylab/Kuei/out/proj_combeffect

Slurm Array Index: 
Time Stamp:         10-15-21+20:18:32


++++++++++ Set environment  ++++++++++
── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
✔ ggplot2 3.3.5     ✔ purrr   0.3.4
✔ tibble  3.1.4     ✔ dplyr   1.0.7
✔ tidyr   1.1.3     ✔ stringr 1.4.0
✔ readr   2.0.1     ✔ forcats 0.5.1
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()

Attaching package: ‘pryr’

The following objects are masked from ‘package:purrr’:

    compose, partial




## Test: chromosome Y

In [48]:
### set log file directory
sbatch -p ${NODE} \
    --mem=8G \
    --tasks-per-node=1 \
    --cpus-per-task=21 \
    --job-name='Marginal PER1 f00 logF' \
    -o ${FD_LOG}/linear_model_marginal_input20x_per1_filter00.txt \
    <<'EOF'
#!/bin/bash
### set directories & global variables
source ../config_duke.sh -v

#CHROMS=($(seq 1 22) X Y)
#CHROM=chr${CHROMS[${SLURM_ARRAY_TASK_ID}]}
#CHROM=chrY

TARGET=target_PER1
IS_INPUT20X=TRUE
IS_LOG=FALSE
FDIRY=marginal_filter00
NCORE=20
THRESHOLD_COVER=10
THRESHOLD_MOTIF=0

### print start message
timer_start=`date +%s`
echo "Slurm Array Index: " ${SLURM_ARRAY_TASK_ID}
echo "Time Stamp:        " $(date +"%m-%d-%y+%T")
echo

### run the model
srun ${FD_PRJ}/sing_proj_combeffect.sh Rscript motif_marginal.R \
    ${TARGET} ${IS_INPUT20X} ${IS_LOG} ${FDIRY} ${NCORE} ${THRESHOLD_COVER} ${THRESHOLD_MOTIF}
echo

### print end message
timer=`date +%s`
runtime=$(echo "${timer} - ${timer_start}" | bc -l)
echo 'Done!'
echo "Run Time: $(displaytime ${runtime})"

EOF

Submitted batch job 26597506


**Check**

In [27]:
cat ${FD_LOG}/linear_model_marginal_input20x_chrY.txt

You are on Duke Server: HARDAC
BASE DIRECTORY:     /gpfs/fs1/data/reddylab/Kuei
PATH OF SOURCE:     /gpfs/fs1/data/reddylab/Kuei/source
PATH OF EXECUTABLE: /gpfs/fs1/data/reddylab/Kuei/exe
PATH OF ANNOTATION: /gpfs/fs1/data/reddylab/Kuei/annotation
PATH OF PROJECT:    /gpfs/fs1/data/reddylab/Kuei/GitRepo/Proj_CombEffect_STARRseq/notebooks
PATH OF RESULTS:    /gpfs/fs1/data/reddylab/Kuei/out/proj_combeffect

Slurm Array Index: 
Time Stamp:         10-14-21+14:40:14


++++++++++ Set environment  ++++++++++
── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
✔ ggplot2 3.3.5     ✔ purrr   0.3.4
✔ tibble  3.1.4     ✔ dplyr   1.0.7
✔ tidyr   1.1.3     ✔ stringr 1.4.0
✔ readr   2.0.1     ✔ forcats 0.5.1
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()

Attaching package: ‘pryr’

The following objects are masked from ‘package:purrr’:

    compose, partial




## Test: chromsome X

In [36]:
echo ${FD_PRJ}

/gpfs/fs1/data/reddylab/Kuei/GitRepo/Proj_CombEffect_STARRseq/notebooks


In [45]:
### set log file directory
sbatch -p all \
    --mem=20G \
    --nodelist=x1-02-3 \
    --job-name='Test' \
    -o ./test.txt \
    <<'EOF'
#!/bin/bash
### set directories & global variables
source ../config_duke.sh -v

### print start message
timer_start=`date +%s`
echo "Slurm Array Index: " ${SLURM_ARRAY_TASK_ID}
echo "Time Stamp:        " $(date +"%m-%d-%y+%T")
echo

### run the model
srun /data/reddylab/Kuei/GitRepo/Proj_CombEffect_STARRseq/notebooks/sing_proj_combeffect.sh Rscript -e "print('hello')"
echo

### print end message
timer=`date +%s`
runtime=$(echo "${timer} - ${timer_start}" | bc -l)
echo 'Done!'
echo "Run Time: $(displaytime ${runtime})"

EOF

Submitted batch job 26607225


In [47]:
cat ./test.txt

You are on Duke Server: HARDAC
BASE DIRECTORY:     /gpfs/fs1/data/reddylab/Kuei
PATH OF SOURCE:     /gpfs/fs1/data/reddylab/Kuei/source
PATH OF EXECUTABLE: /gpfs/fs1/data/reddylab/Kuei/exe
PATH OF ANNOTATION: /gpfs/fs1/data/reddylab/Kuei/annotation
PATH OF PROJECT:    /gpfs/fs1/data/reddylab/Kuei/GitRepo/Proj_CombEffect_STARRseq/notebooks
PATH OF RESULTS:    /gpfs/fs1/data/reddylab/Kuei/out/proj_combeffect

Slurm Array Index: 
Time Stamp:         10-18-21+12:55:58

[1] "hello"

Done!
Run Time: 3 seconds


In [35]:
### set log file directory
sbatch -p ${NODE} \
    --mem=20G \
    --job-name='Marginal PER1 f00 logF' \
    -o ${FD_LOG}/linear_model_marginal_input20x_chrX_filter95.txt \
    <<'EOF'
#!/bin/bash
### set directories & global variables
source ../config_duke.sh -v

#CHROMS=($(seq 1 22) X Y)
#CHROM=chr${CHROMS[${SLURM_ARRAY_TASK_ID}]}
CHROM=chrX

TARGET=${CHROM}
IS_INPUT20X=TRUE
IS_LOG=FALSE
FDIRY=marginal_filter95
NCORE=20
THRESHOLD_COVER=10
THRESHOLD_MOTIF=10.81

### print start message
timer_start=`date +%s`
echo "Slurm Array Index: " ${SLURM_ARRAY_TASK_ID}
echo "Time Stamp:        " $(date +"%m-%d-%y+%T")
echo

### run the model
srun ${FD_PRJ}/sing_proj_combeffect.sh Rscript motif_marginal.R \
    ${TARGET} ${IS_INPUT20X} ${IS_LOG} ${FDIRY} ${NCORE} ${THRESHOLD_COVER} ${THRESHOLD_MOTIF}
echo

### print end message
timer=`date +%s`
runtime=$(echo "${timer} - ${timer_start}" | bc -l)
echo 'Done!'
echo "Run Time: $(displaytime ${runtime})"

EOF

Submitted batch job 26606924


In [9]:
### set log file directory
sbatch -p ${NODE} \
    --mem=20G \
    --job-name='Marginal PER1 f00 logF' \
    -o ${FD_LOG}/linear_model_marginal_input20x_chrX_filter95_log.txt \
    <<'EOF'
#!/bin/bash
### set directories & global variables
source ../config_duke.sh -v

#CHROMS=($(seq 1 22) X Y)
#CHROM=chr${CHROMS[${SLURM_ARRAY_TASK_ID}]}
CHROM=chrX

TARGET=${CHROM}
IS_INPUT20X=TRUE
IS_LOG=TRUE
FDIRY=marginal_filter95
NCORE=20
THRESHOLD_COVER=10
THRESHOLD_MOTIF=10.81

### print start message
timer_start=`date +%s`
echo "Slurm Array Index: " ${SLURM_ARRAY_TASK_ID}
echo "Time Stamp:        " $(date +"%m-%d-%y+%T")
echo

### run the model
srun ${FD_PRJ}/sing_proj_combeffect.sh Rscript motif_marginal.R \
    ${TARGET} ${IS_INPUT20X} ${IS_LOG} ${FDIRY} ${NCORE} ${THRESHOLD_COVER} ${THRESHOLD_MOTIF}
echo

### print end message
timer=`date +%s`
runtime=$(echo "${timer} - ${timer_start}" | bc -l)
echo 'Done!'
echo "Run Time: $(displaytime ${runtime})"

EOF

Submitted batch job 26606640


In [None]:
### set log file directory
sbatch -p ${NODE} \
    --mem=20G \
    --job-name='Marginal chr1 f95 logF' \
    -o ${FD_LOG}/linear_model_marginal_input20x_chr1_filter95.txt \
    <<'EOF'
#!/bin/bash
### set directories & global variables
source ../config_duke.sh -v

#CHROMS=($(seq 1 22) X Y)
#CHROM=chr${CHROMS[${SLURM_ARRAY_TASK_ID}]}
CHROM=chr1

TARGET=${CHROM}
IS_INPUT20X=TRUE
IS_LOG=FALSE
FDIRY=marginal_filter95
NCORE=20
THRESHOLD_COVER=10
THRESHOLD_MOTIF=10.81

### print start message
timer_start=`date +%s`
echo "Slurm Array Index: " ${SLURM_ARRAY_TASK_ID}
echo "Time Stamp:        " $(date +"%m-%d-%y+%T")
echo

### run the model
srun ${FD_PRJ}/sing_proj_combeffect.sh Rscript motif_marginal.R \
    ${TARGET} ${IS_INPUT20X} ${IS_LOG} ${FDIRY} ${NCORE} ${THRESHOLD_COVER} ${THRESHOLD_MOTIF}
echo

### print end message
timer=`date +%s`
runtime=$(echo "${timer} - ${timer_start}" | bc -l)
echo 'Done!'
echo "Run Time: $(displaytime ${runtime})"

EOF

Submitted batch job 26606638


In [29]:
### set log file directory
sbatch -p ${NODE} \
    --mem=20G \
    --job-name='Marginal chr1 f95 logT' \
    -o ${FD_LOG}/linear_model_marginal_input20x_chr1_filter95_log.txt \
    <<'EOF'
#!/bin/bash
### set directories & global variables
source ../config_duke.sh -v

#CHROMS=($(seq 1 22) X Y)
#CHROM=chr${CHROMS[${SLURM_ARRAY_TASK_ID}]}
CHROM=chr1

TARGET=${CHROM}
IS_INPUT20X=TRUE
IS_LOG=TRUE
FDIRY=marginal_filter95_log
NCORE=20
THRESHOLD_COVER=10
THRESHOLD_MOTIF=10.81

### print start message
timer_start=`date +%s`
echo "Slurm Array Index: " ${SLURM_ARRAY_TASK_ID}
echo "Time Stamp:        " $(date +"%m-%d-%y+%T")
echo

### run the model
srun ${FD_PRJ}/sing_proj_combeffect.sh Rscript motif_marginal.R \
    ${TARGET} ${IS_INPUT20X} ${IS_LOG} ${FDIRY} ${NCORE} ${THRESHOLD_COVER} ${THRESHOLD_MOTIF}
echo

### print end message
timer=`date +%s`
runtime=$(echo "${timer} - ${timer_start}" | bc -l)
echo 'Done!'
echo "Run Time: $(displaytime ${runtime})"

EOF

Submitted batch job 26606658


In [32]:
cat ${FD_LOG}/linear_model_marginal_input20x_chrX_filter95.txt

You are on Duke Server: HARDAC
BASE DIRECTORY:     /gpfs/fs1/data/reddylab/Kuei
PATH OF SOURCE:     /gpfs/fs1/data/reddylab/Kuei/source
PATH OF EXECUTABLE: /gpfs/fs1/data/reddylab/Kuei/exe
PATH OF ANNOTATION: /gpfs/fs1/data/reddylab/Kuei/annotation
PATH OF PROJECT:    /gpfs/fs1/data/reddylab/Kuei/GitRepo/Proj_CombEffect_STARRseq/notebooks
PATH OF RESULTS:    /gpfs/fs1/data/reddylab/Kuei/out/proj_combeffect

Slurm Array Index: 
Time Stamp:         10-18-21+01:44:24


++++++++++ Set environment  ++++++++++
── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
✔ ggplot2 3.3.5     ✔ purrr   0.3.4
✔ tibble  3.1.4     ✔ dplyr   1.0.7
✔ tidyr   1.1.3     ✔ stringr 1.4.0
✔ readr   2.0.1     ✔ forcats 0.5.1
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()

Attaching package: ‘pryr’

The following objects are masked from ‘package:purrr’:

    compose, partial




## RUN

In [18]:
### set log file directory
sbatch -p ${NODE} \
    --array=0-23 \
    --mem=16G \
    --tasks-per-node=1 \
    --cpus-per-task=21 \
    --job-name='Marginal PER1 f00 logF' \
    -o ${FD_LOG}/linear_model_marginal_input20x_chrom_filter95.%a.txt \
    <<'EOF'
#!/bin/bash
### set directories & global variables
source ../config_duke.sh -v

###
CHROMS=($(seq 1 22) X Y)
CHROM=chr${CHROMS[${SLURM_ARRAY_TASK_ID}]}

###
TARGET=${CHROM}
IS_INPUT20X=TRUE
IS_LOG=FALSE
FDIRY=marginal_filter95
NCORE=20
THRESHOLD_COVER=10
THRESHOLD_MOTIF=10.81

### print start message
timer_start=`date +%s`
echo "Slurm Array Index: " ${SLURM_ARRAY_TASK_ID}
echo "Time Stamp:        " $(date +"%m-%d-%y+%T")
echo

### run the model
srun ${FD_PRJ}/sing_proj_combeffect.sh Rscript motif_marginal.R \
    ${TARGET} ${IS_INPUT20X} ${IS_LOG} ${FDIRY} ${NCORE} ${THRESHOLD_COVER} ${THRESHOLD_MOTIF}
echo

### print end message
timer=`date +%s`
runtime=$(echo "${timer} - ${timer_start}" | bc -l)
echo 'Done!'
echo "Run Time: $(displaytime ${runtime})"

EOF

Submitted batch job 26604348
