In [1]:
%%bash 
eu_register.py --help

usage: eu_register.py [-h] [-m DCC_MODE] [-d] [--no-aliases] -p PROFILE_ID -i
                      INFILE [--patch] [-w]

Given a tab-delimited or JSON input file containing one or more records belonging to one of the profiles
listed on the ENCODE Portal (such as https://www.encodeproject.org/profiles/biosample.json),
either POSTS or PATCHES the records. The default is to POST each record; to PATCH instead, see
the ``--patch`` option.

When POSTING file records, the md5sum of each file will be calculated for you if you haven't
already provided the `md5sum` property. Then, after the POST operation completes, the actual file
will be uploaded to AWS S3. In order for this to work, you must set the `submitted_file_name`
property to the full, local path to your file to upload. Alternatively, you can set
`submitted_file_name` to and existing S3 object, i.e. s3://mybucket/reads.fastq.

Note that there is a special 'trick' defined in the ``encode_utils.connection.Connection()``
class that can 

In [3]:
%%writefile /data/reddylab/Revathy/scripts/excel_to_text_for_ENCODE.py

import pandas as pd
import numpy
import xlrd
import argparse
parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter, \
description="""

excel_to_text_for_ENCODE.py

This script enables the conversion of a metadata objects spreadsheet required for 
data submission into ENCODE portal from the excel form into .txt files for each of 
the sheet present in the spreadsheet. Each sheet represents each metadata object. 
The default sheet options are : [genetic_modification, biosample, library, 
functional_characterization_exp, replicate, file]. The script is WIP as it 
incorporates only for the above mentioned 6 metadata objects for now.

""")

###############################
# required arguments:

parser.add_argument("-i", "--excel-file", help="""required, file path to the metadata objects excel spreadsheet for ENCODE data submission.""", 
                        required=True)
parser.add_argument("-o", "--out-directory", help="""required, path to the directory where the output .txt files should be located.""")
###############################
# optional arguments:

parser.add_argument("--sheet-names", type=str, nargs='*', default=['genetic_modification','biosample','library','functional_characterization_exp','replicate','file'],
                    help="""required, names of the sheets present in the metadata objects excel spreadsheet.""")

args = parser.parse_args()

for sheet in args.sheet_names:
    df = pd.read_excel(args.excel_file, sheet_name = sheet)
    df.fillna('', inplace=True)
    if (sheet=='functional_characterization_exp'):
        sheet = 'functional_characterization_experiment'
    df.to_csv('%s/%s.txt' % (args.out_directory, sheet), sep='\t', index=False)



Overwriting /data/reddylab/Revathy/scripts/excel_to_text_for_ENCODE.py


In [4]:
%%bash
source /data/reddylab/software/miniconda3/bin/activate revathy_py3
python /data/reddylab/Revathy/scripts/excel_to_text_for_ENCODE.py \
-i /data/reddylab/Revathy/collabs/Kari/ENCODE/Reddy-DCC-data-wgSTARRseq-A001_final_v2.xlsx \
-o /data/reddylab/Revathy/collabs/Kari/ENCODE/metadata \
--sheet-names biosample library replicate file 

#### Script for submission into production server 


In [3]:
%%bash
source /data/reddylab/software/miniconda3/bin/activate revathy_py3
mkdir -p /data/reddylab/Revathy/collabs/Kari/ENCODE/logs
cd /data/reddylab/Revathy/collabs/Kari/ENCODE/metadata
sbatch -p all \
    --mem 16G \
    -o /data/reddylab/Revathy/collabs/Kari/ENCODE/logs/submission_script.out \
    <<'EOF'
#!/bin/bash
NAMES=( \
     file \
)
NAME=${NAMES[${SLURM_ARRAY_TASK_ID}]}
eu_register.py -m prod -p ${NAME} -i Reddy-DCC-data-wgSTARRseq-A001_file_narrowPeak.txt \
EOF

Submitted batch job 25960842


In [1]:
import encode_utils as eu
from encode_utils.connection import Connection

conn = Connection("prod")

2021-09-07 15:10:28,445:eu_debug:	Connecting to www.encodeproject.org
2021-09-07 15:10:28,489:eu_debug:	submission=False: In non-submission mode.


In [11]:
conn.get("ENCFF814CNR")

2021-03-02 12:08:35,256:eu_debug:	>>>>>>GET ENCFF814CNR From DCC with URL https://www.encodeproject.org/ENCFF814CNR/?format=json


{'@context': '/terms/',
 '@id': '/files/ENCFF814CNR/',
 '@type': ['File', 'Item'],
 'accession': 'ENCFF814CNR',
 'actions': [{'href': '/files/ENCFF814CNR/#!edit',
   'name': 'edit',
   'profile': '/profiles/File.json',
   'title': 'Edit'},
  {'href': '/files/ENCFF814CNR/#!edit-json',
   'name': 'edit-json',
   'profile': '/profiles/File.json',
   'title': 'Edit JSON'}],
 'aliases': ['tim-reddy:k562_wgstarrseq_input_rep1_bam'],
 'alternate_accessions': [],
 'assay_term_name': 'pooled clone sequencing',
 'assembly': 'GRCh38',
 'audit': {},
 'award': {'@id': '/awards/UM1HG009428/',
  '@type': ['Award', 'Item'],
  'component': 'functional characterization',
  'description': 'There is a fundamental gap in understanding how the millions of known regulatory elements functionally contribute to gene regulation and phenotypes. Continued existence of that gap is an important problem because, until it is filled, it will remain extremely difficult to identify the genetic mechanisms underlying the t

In [2]:
conn.upload_file(file_id='ENCFF814CNR', file_path='/data/reddylab/kstrouse/god/input_libs/A001_v2/combined_reads/processing/starr_seq/A001_combined-pe/A001.f3q10.sorted.dedup.rpkm.bw')


2021-03-02 16:30:26,487:eu_debug:	
IN upload_file()

2021-03-02 16:30:26,489:eu_debug:	Attempting to generate new file upload credentials
2021-03-02 16:30:27,286:eu_debug:	Success: upload credentials for 'ENCFF814CNR' regenerated.
2021-03-02 16:30:27,287:eu_debug:	>>>>>>GET ENCFF814CNR From DCC with URL https://www.encodeproject.org/ENCFF814CNR/?format=json
2021-03-02 16:30:28,000:eu_debug:	Running command 'aws s3 cp /data/reddylab/kstrouse/god/input_libs/A001_v2/combined_reads/processing/starr_seq/A001_combined-pe/A001.f3q10.sorted.dedup.rpkm.bw s3://encode-files/2021/02/24/20474e49-7c73-4a7b-a48f-795d6969bcda/ENCFF814CNR.bam'.
2021-03-02 16:31:32,745:eu_debug:	AWS upload successful.


In [12]:
conn.get('ENCFF778LRW')

2021-03-02 12:09:27,715:eu_debug:	>>>>>>GET ENCFF778LRW From DCC with URL https://www.encodeproject.org/ENCFF778LRW/?format=json


{'@context': '/terms/',
 '@id': '/files/ENCFF778LRW/',
 '@type': ['File', 'Item'],
 'accession': 'ENCFF778LRW',
 'actions': [{'href': '/files/ENCFF778LRW/#!edit',
   'name': 'edit',
   'profile': '/profiles/File.json',
   'title': 'Edit'},
  {'href': '/files/ENCFF778LRW/#!edit-json',
   'name': 'edit-json',
   'profile': '/profiles/File.json',
   'title': 'Edit JSON'}],
 'aliases': ['tim-reddy:k562_wgstarrseq_input_rep1_bw'],
 'alternate_accessions': [],
 'assay_term_name': 'pooled clone sequencing',
 'assembly': 'GRCh38',
 'audit': {},
 'award': {'@id': '/awards/UM1HG009428/',
  '@type': ['Award', 'Item'],
  'component': 'functional characterization',
  'description': 'There is a fundamental gap in understanding how the millions of known regulatory elements functionally contribute to gene regulation and phenotypes. Continued existence of that gap is an important problem because, until it is filled, it will remain extremely difficult to identify the genetic mechanisms underlying the th

In [3]:
conn.upload_file(file_id='ENCFF778LRW', file_path='/data/reddylab/kstrouse/god/input_libs/A001_v2/combined_reads/processing/starr_seq/A001_combined-pe/A001.f3q10.sorted.dups_marked.bam')

2021-03-02 16:32:28,736:eu_debug:	
IN upload_file()

2021-03-02 16:32:28,737:eu_debug:	Attempting to generate new file upload credentials
2021-03-02 16:32:29,581:eu_debug:	Success: upload credentials for 'ENCFF778LRW' regenerated.
2021-03-02 16:32:29,583:eu_debug:	>>>>>>GET ENCFF778LRW From DCC with URL https://www.encodeproject.org/ENCFF778LRW/?format=json
2021-03-02 16:32:30,387:eu_debug:	Running command 'aws s3 cp /data/reddylab/kstrouse/god/input_libs/A001_v2/combined_reads/processing/starr_seq/A001_combined-pe/A001.f3q10.sorted.dups_marked.bam s3://encode-files/2021/02/24/c4e5f893-ff09-4653-891e-bc7d70823658/ENCFF778LRW.bigWig'.
2021-03-02 16:38:32,611:eu_debug:	AWS upload successful.


In [21]:
%%bash
cd /data/reddylab/Revathy/collabs/Kari/ENCODE/metadata
eu_register.py -m prod -p file -i Reddy-DCC-data-wgSTARRseq-A001_file_CRADLE.txt

2023-04-21 18:54:49,655:eu_debug:	Connecting to www.encodeproject.org
2023-04-21 18:54:49,699:eu_debug:	submission=False: In non-submission mode.
2023-04-21 18:54:50,369:eu_debug:	submission=True: In submission mode.
2023-04-21 18:54:50,374:eu_debug:	
IN post().
2023-04-21 18:54:50,377:eu_debug:	<<<<<< POST file record tim-reddy:k562_wgstarrseq_k562_CRADLE_peak-bed To DCC with URL https://www.encodeproject.org/file and this payload:

{
  "aliases": [
    "tim-reddy:k562_wgstarrseq_k562_CRADLE_peak-bed"
  ],
  "assembly": "GRCh38",
  "award": "/awards/UM1HG009428/",
  "dataset": "tim-reddy:wg-starrseq-NA18517-k562",
  "derived_from": [
    "tim-reddy:k562_wgstarrseq_input_rep1_bw",
    "tim-reddy:k562_wgstarrseq_k562_rep1_bw",
    "tim-reddy:k562_wgstarrseq_k562_rep2_bw",
    "tim-reddy:k562_wgstarrseq_k562_rep3_bw"
  ],
  "file_format": "bed",
  "file_format_type": "element enrichments",
  "file_size": 842680,
  "lab": "/labs/tim-reddy/",
  "md5sum": "3b234d133a61576915a8fb1cbaabc852",

In [20]:
!cat /data/reddylab/Revathy/collabs/Kari/ENCODE/metadata/Reddy-DCC-data-wgSTARRseq-A001_file_CRADLE.txt

dataset	file_format	file_format_type	output_type	submitted_file_name	award	lab	aliases	assembly	derived_from
tim-reddy:wg-starrseq-NA18517-k562	bed	element enrichments	element quantifications	/data/reddylab/Revathy/collabs/Kari/ENCODE/data/wgSTARRseq_k562_kmer36_fdr0.05.CRADLE_peaks.bed.gz	/awards/UM1HG009428/	/labs/tim-reddy/	tim-reddy:k562_wgstarrseq_k562_CRADLE_peak-bed	GRCh38	tim-reddy:k562_wgstarrseq_input_rep1_bw, tim-reddy:k562_wgstarrseq_k562_rep1_bw, tim-reddy:k562_wgstarrseq_k562_rep2_bw, tim-reddy:k562_wgstarrseq_k562_rep3_bw

In [9]:
%%bash
cat /data/reddylab/kstrouse/superstarr/output_libs/A001_K562/CRADLE/CRADLEcorr_kmer36/peaks/fdr0.05/CRADLE_peaks \
| tail -n+2 \
| awk -vOFS="\t" '{print $1, $2, $3, $4, '100',$6, $7, $8, $9, $10, $11}' \
> /data/reddylab/Revathy/collabs/Kari/ENCODE/data/wgSTARRseq_k562_kmer36_fdr0.05.CRADLE_peaks


In [19]:
%%bash
gzip -c /data/reddylab/Revathy/collabs/Kari/ENCODE/data/wgSTARRseq_k562_kmer36_fdr0.05.CRADLE_peaks \
> /data/reddylab/Revathy/collabs/Kari/ENCODE/data/wgSTARRseq_k562_kmer36_fdr0.05.CRADLE_peaks.bed.gz

In [10]:
!head /data/reddylab/kstrouse/superstarr/output_libs/A001_K562/CRADLE/CRADLEcorr_kmer36/peaks/fdr0.05/CRADLE_peaks

chr	start	end	name	score	strand	effectSize	inputCount	outputCount	-log(pvalue)	-log(qvalue)	cohen's_d
chr1	17320	17470	chr1:17320-17470	.	.	-65	-59	-125	2.92	2.0	nan
chr1	492302	492378	chr1:492302-492378	.	.	-54	-42	-96	2.81	2.33	nan
chr1	844088	844138	chr1:844088-844138	.	.	113	-35	77	3.07	2.29	nan
chr1	869788	870038	chr1:869788-870038	.	.	-107	-17	-124	3.89	3.11	nan
chr1	902870	902920	chr1:902870-902920	.	.	80	-24	56	3.55	2.31	nan
chr1	913038	913238	chr1:913038-913238	.	.	105	-36	69	3.03	2.25	nan
chr1	925588	925888	chr1:925588-925888	.	.	-100	-35	-136	2.59	1.97	nan
chr1	930538	930588	chr1:930538-930588	.	.	-34	-26	-60	3.89	2.63	nan
chr1	931138	931188	chr1:931138-931188	.	.	-59	-74	-134	3.07	2.12	nan


In [11]:
!head /data/reddylab/Revathy/collabs/Kari/ENCODE/data/wgSTARRseq_k562_kmer36_fdr0.05.CRADLE_peaks

chr1	17320	17470	chr1:17320-17470	100	.	-65	-59	-125	2.92	2.0
chr1	492302	492378	chr1:492302-492378	100	.	-54	-42	-96	2.81	2.33
chr1	844088	844138	chr1:844088-844138	100	.	113	-35	77	3.07	2.29
chr1	869788	870038	chr1:869788-870038	100	.	-107	-17	-124	3.89	3.11
chr1	902870	902920	chr1:902870-902920	100	.	80	-24	56	3.55	2.31
chr1	913038	913238	chr1:913038-913238	100	.	105	-36	69	3.03	2.25
chr1	925588	925888	chr1:925588-925888	100	.	-100	-35	-136	2.59	1.97
chr1	930538	930588	chr1:930538-930588	100	.	-34	-26	-60	3.89	2.63
chr1	931138	931188	chr1:931138-931188	100	.	-59	-74	-134	3.07	2.12
chr1	936838	936938	chr1:936838-936938	100	.	117	-21	96	3.18	2.1


In [13]:
%%bash
/data/common/shared_conda_envs/ucsc/bin/bedToBigBed \
/data/reddylab/Revathy/collabs/Kari/ENCODE/data/wgSTARRseq_k562_kmer36_fdr0.05.CRADLE_peaks \
/data/reddylab/Reference_Data/Genomes/hg38/hg38.fa.chrom.sizes \
-as=/home/rv103/reddylab_data/Revathy/software/mpra_starr.as \
-type=bed6+5 \
/data/reddylab/Revathy/collabs/Kari/ENCODE/data/wgSTARRseq_k562_kmer36_fdr0.05.CRADLE_peaks.bigBed 

pass1 - making usageList (23 chroms): 7 millis
pass2 - checking and writing primary data (38671 records, 11 fields): 174 millis


In [15]:
!ls -lah /data/reddylab/Revathy/collabs/Kari/ENCODE/data/wgSTARRseq_k562_kmer36_fdr0.05.CRADLE_peaks.bigBed

�� 
 �      �      /       0      (      ��  P      0      J     �s     �0      �     �9      �      �S     ��                Jt      0     �     ],      �0     �D     c|       �     ��     �           G�     P�       0    ��     ��       �0    ��     ��     table mpra_starr
"BED6+5 MPRA/STARR-seq common file format"
(
string  chrom;		"Reference sequence chromosome or scaffold"
uint    chromStart;	"Start position in chromosome"
uint    chromEnd;	"End position in chromosome"
string  name;		"Name of tested element or region"
uint    score;		"Indicates how dark the peak will be displayed in the browser (0-1000)"
char[1] strand;		"+ or - for strand, . for unknown"


In [14]:
!md5sum /data/reddylab/Revathy/collabs/Kari/ENCODE/data/wgSTARRseq_k562_kmer36_fdr0.05.CRADLE_peaks.bigBed

5ad8b343085b18b8c1960e7daafa4b27  /data/reddylab/Revathy/collabs/Kari/ENCODE/data/wgSTARRseq_k562_kmer36_fdr0.05.CRADLE_peaks.bigBed


In [17]:
import encode_utils as eu
from encode_utils.connection import Connection

conn = Connection("prod")
conn.upload_file(file_id='ENCFF181MIF', file_path='/data/reddylab/Revathy/collabs/Kari/ENCODE/data/wgSTARRseq_k562_kmer36_fdr0.05.CRADLE_peaks.bigBed')

2023-04-21 14:55:20,306:eu_debug:	Connecting to www.encodeproject.org
2023-04-21 14:55:20,351:eu_debug:	submission=False: In non-submission mode.
2023-04-21 14:55:21,042:eu_debug:	
IN upload_file()

2023-04-21 14:55:21,044:eu_debug:	Attempting to generate new file upload credentials
2023-04-21 14:55:22,070:eu_debug:	Success: upload credentials for 'ENCFF181MIF' regenerated.
2023-04-21 14:55:22,071:eu_debug:	>>>>>>GET ENCFF181MIF From DCC with URL https://www.encodeproject.org/ENCFF181MIF/?format=json
2023-04-21 14:55:22,631:eu_debug:	Running command 'aws s3 cp /data/reddylab/Revathy/collabs/Kari/ENCODE/data/wgSTARRseq_k562_kmer36_fdr0.05.CRADLE_peaks.bigBed s3://encode-files/2022/10/19/d9236427-6339-48ee-b2b3-5531ae4cea06/ENCFF181MIF.bed.gz'.
2023-04-21 14:55:24,856:eu_debug:	AWS upload successful.


In [1]:
!ls /data/reddylab/Revathy/collabs/Kari/ENCODE/data/

A001-K562-rep1.f3q10.sorted.dups_marked.macs_keep_dups_norm_peaks.narrowPeak.gz
A001-K562-rep2.f3q10.sorted.dups_marked.macs_norm_peaks.narrowPeak.gz
A001-K562-rep3.f3q10.sorted.dups_marked.macs_norm_peaks.narrowPeak.gz
corrected_peaks
wgSTARRseq_k562_kmer36_fdr0.05.CRADLE_peaks
wgSTARRseq_k562_kmer36_fdr0.05_CRADLE_peaks.bed.gz
wgSTARRseq_k562_kmer36_fdr0.05.CRADLE_peaks.bigBed
wgSTARRseq_k562_kmer36_fdr0.05.CRADLE_peaks.gz
wgSTARRseq_k562_kmer36_fdr0.05_CRADLE_peaks.narrowPeak.gz


#### correct the score column in narrowPeak files

In [3]:
%%bash 
mkdir -p /data/reddylab/Revathy/collabs/Kari/ENCODE/data/corrected_peaks
cd /data/reddylab/Revathy/collabs/Kari/ENCODE/data/
files=$(ls *.gz)
for file in ${files[@]};
do 
zcat ${file} | awk 'BEGIN{OFS=FS}$5>1000{$5=1000}{print}' > /data/reddylab/Revathy/collabs/Kari/ENCODE/data/corrected_peaks/${file/.gz}
done

In [2]:
conn.upload_file(file_id='ENCFF454ZKK', file_path='/data/reddylab/Revathy/collabs/Kari/ENCODE/data/corrected_peaks/A001-K562-rep1.f3q10.sorted.dups_marked.macs_keep_dups_norm_peaks.narrowPeak.gz')
conn.upload_file(file_id='ENCFF919UHO', file_path='/data/reddylab/Revathy/collabs/Kari/ENCODE/data/corrected_peaks/A001-K562-rep2.f3q10.sorted.dups_marked.macs_norm_peaks.narrowPeak.gz')
conn.upload_file(file_id='ENCFF549QUG', file_path='/data/reddylab/Revathy/collabs/Kari/ENCODE/data/corrected_peaks/A001-K562-rep3.f3q10.sorted.dups_marked.macs_norm_peaks.narrowPeak.gz')




2021-09-07 15:12:03,929:eu_debug:	
IN upload_file()

2021-09-07 15:12:03,932:eu_debug:	Attempting to generate new file upload credentials
2021-09-07 15:12:04,809:eu_debug:	Success: upload credentials for 'ENCFF454ZKK' regenerated.
2021-09-07 15:12:04,810:eu_debug:	>>>>>>GET ENCFF454ZKK From DCC with URL https://www.encodeproject.org/ENCFF454ZKK/?format=json
2021-09-07 15:12:05,402:eu_debug:	Running command 'aws s3 cp /data/reddylab/Revathy/collabs/Kari/ENCODE/data/corrected_peaks/A001-K562-rep1.f3q10.sorted.dups_marked.macs_keep_dups_norm_peaks.narrowPeak.gz s3://encode-files/2021/09/02/e3276af7-d53b-44ca-994f-87d249d96617/ENCFF454ZKK.bed.gz'.
2021-09-07 15:12:14,085:eu_debug:	AWS upload successful.
2021-09-07 15:12:14,088:eu_debug:	
IN upload_file()

2021-09-07 15:12:14,090:eu_debug:	Attempting to generate new file upload credentials
2021-09-07 15:12:15,023:eu_debug:	Success: upload credentials for 'ENCFF919UHO' regenerated.
2021-09-07 15:12:15,023:eu_debug:	>>>>>>GET ENCFF919UHO From

In [4]:
%%bash
source /data/reddylab/software/miniconda3/bin/activate revathy_py3
cd /data/reddylab/Revathy/collabs/Kari/ENCODE/metadata
eu_register.py -m prod -p file -i kari_analysis_step_run.txt --patch 

2021-09-29 15:29:45,736:eu_debug:	Connecting to www.encodeproject.org
2021-09-29 15:29:45,781:eu_debug:	submission=False: In non-submission mode.
2021-09-29 15:29:46,701:eu_debug:	submission=True: In submission mode.
2021-09-29 15:29:46,750:eu_debug:	
IN patch()
2021-09-29 15:29:46,751:eu_debug:	>>>>>>GET files/ENCFF058NAC From DCC with URL https://www.encodeproject.org/files/ENCFF058NAC/?format=json&datastore=database&frame=edit
2021-09-29 15:29:47,104:eu_debug:	<<<<<< PATCHING /files/ENCFF058NAC/ To DCC with URL https://www.encodeproject.org/files/ENCFF058NAC and this payload:

{
  "step_run": "tim-reddy:starrseq_fastq_to_bam_with_umi_step_run"
}


2021-09-29 15:29:48,077:eu_debug:	Success.
2021-09-29 15:29:48,078:eu_debug:	
IN patch()
2021-09-29 15:29:48,078:eu_debug:	>>>>>>GET files/ENCFF294XNE From DCC with URL https://www.encodeproject.org/files/ENCFF294XNE/?format=json&datastore=database&frame=edit
2021-09-29 15:29:48,424:eu_debug:	<<<<<< PATCHING /files/ENCFF294XNE/ To DCC with

In [6]:
%%bash
zcat /data/reddylab/Revathy/collabs/Kari/ENCODE/data/wgSTARRseq_k562_kmer36_fdr0.05_CRADLE_peaks.narrowPeak.gz \
| tail -n+2 \
| awk -vOFS="\t" '{print $1, $2, $3, $4, '100', $6, ".", $7, $10, $11}' \
| gzip -c \
> /data/reddylab/Revathy/collabs/Kari/ENCODE/data/wgSTARRseq_k562_kmer36_fdr0.05_CRADLE_peaks.bed.gz

In [2]:
!ls -lah /data/reddylab/Revathy/collabs/Kari/ENCODE/data/wgSTARRseq_k562_kmer36_fdr0.05_CRADLE_peaks.bed.gz

-rw-rw-r-- 1 rv103 reddylab 627K Nov 21 10:29 /data/reddylab/Revathy/collabs/Kari/ENCODE/data/wgSTARRseq_k562_kmer36_fdr0.05_CRADLE_peaks.bed.gz


In [8]:
!zcat /data/reddylab/Revathy/collabs/Kari/ENCODE/data/wgSTARRseq_k562_kmer36_fdr0.05_CRADLE_peaks.bed.gz | head

chr1	492302	492378	chr1:492302-492378	100	.	.	-54	nan	
chr1	844088	844138	chr1:844088-844138	100	.	.	113	nan	
chr1	869788	870038	chr1:869788-870038	100	.	.	-107	nan	
chr1	902870	902920	chr1:902870-902920	100	.	.	80	nan	
chr1	913038	913238	chr1:913038-913238	100	.	.	105	nan	
chr1	925588	925888	chr1:925588-925888	100	.	.	-100	nan	
chr1	930538	930588	chr1:930538-930588	100	.	.	-34	nan	
chr1	931138	931188	chr1:931138-931188	100	.	.	-59	nan	
chr1	936838	936938	chr1:936838-936938	100	.	.	117	nan	
chr1	941058	941108	chr1:941058-941108	100	.	.	92	nan	

gzip: stdout: Broken pipe


In [1]:
%%bash
zcat /data/reddylab/Revathy/collabs/Kari/ENCODE/data/wgSTARRseq_k562_kmer36_fdr0.05_CRADLE_peaks.narrowPeak.gz \
| head

chr1	17320	17470	chr1:17320-17470	.	.	-65	2.92	2.0	nan
chr1	492302	492378	chr1:492302-492378	.	.	-54	2.81	2.33	nan
chr1	844088	844138	chr1:844088-844138	.	.	113	3.07	2.29	nan
chr1	869788	870038	chr1:869788-870038	.	.	-107	3.89	3.11	nan
chr1	902870	902920	chr1:902870-902920	.	.	80	3.55	2.31	nan
chr1	913038	913238	chr1:913038-913238	.	.	105	3.03	2.25	nan
chr1	925588	925888	chr1:925588-925888	.	.	-100	2.59	1.97	nan
chr1	930538	930588	chr1:930538-930588	.	.	-34	3.89	2.63	nan
chr1	931138	931188	chr1:931138-931188	.	.	-59	3.07	2.12	nan
chr1	936838	936938	chr1:936838-936938	.	.	117	3.18	2.1	nan


In [2]:
!head /data/reddylab/kstrouse/superstarr/output_libs/A001_K562/CRADLE/CRADLEcorr_kmer36/peaks/fdr0.05/CRADLE_peaks

chr	start	end	name	score	strand	effectSize	inputCount	outputCount	-log(pvalue)	-log(qvalue)	cohen's_d
chr1	17320	17470	chr1:17320-17470	.	.	-65	-59	-125	2.92	2.0	nan
chr1	492302	492378	chr1:492302-492378	.	.	-54	-42	-96	2.81	2.33	nan
chr1	844088	844138	chr1:844088-844138	.	.	113	-35	77	3.07	2.29	nan
chr1	869788	870038	chr1:869788-870038	.	.	-107	-17	-124	3.89	3.11	nan
chr1	902870	902920	chr1:902870-902920	.	.	80	-24	56	3.55	2.31	nan
chr1	913038	913238	chr1:913038-913238	.	.	105	-36	69	3.03	2.25	nan
chr1	925588	925888	chr1:925588-925888	.	.	-100	-35	-136	2.59	1.97	nan
chr1	930538	930588	chr1:930538-930588	.	.	-34	-26	-60	3.89	2.63	nan
chr1	931138	931188	chr1:931138-931188	.	.	-59	-74	-134	3.07	2.12	nan


In [1]:
%%bash
ls /data/reddylab/kstrouse/superstarr/output_libs/A001_K562/CRADLE/CRADLEcorr_kmer36/cradle_corrected_1/A001.f3q10.sorted.dedup.raw_corrected.bw \
/data/reddylab/kstrouse/superstarr/output_libs/A001_K562/CRADLE/CRADLEcorr_kmer36/cradle_corrected_1/A001-K562-rep1.f3q10.sorted.dedup.raw_corrected.bw \
/data/reddylab/kstrouse/superstarr/output_libs/A001_K562/CRADLE/CRADLEcorr_kmer36/cradle_corrected_2/A001-K562-rep2.f3q10.sorted.dedup.raw_corrected.bw \
/data/reddylab/kstrouse/superstarr/output_libs/A001_K562/CRADLE/CRADLEcorr_kmer36/cradle_corrected_3/A001-K562-rep3.f3q10.sorted.dedup.raw_corrected.bw \

/data/reddylab/kstrouse/superstarr/output_libs/A001_K562/CRADLE/CRADLEcorr_kmer36/cradle_corrected_1/A001.f3q10.sorted.dedup.raw_corrected.bw
/data/reddylab/kstrouse/superstarr/output_libs/A001_K562/CRADLE/CRADLEcorr_kmer36/cradle_corrected_1/A001-K562-rep1.f3q10.sorted.dedup.raw_corrected.bw
/data/reddylab/kstrouse/superstarr/output_libs/A001_K562/CRADLE/CRADLEcorr_kmer36/cradle_corrected_2/A001-K562-rep2.f3q10.sorted.dedup.raw_corrected.bw
/data/reddylab/kstrouse/superstarr/output_libs/A001_K562/CRADLE/CRADLEcorr_kmer36/cradle_corrected_3/A001-K562-rep3.f3q10.sorted.dedup.raw_corrected.bw
