Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions bin/chipdb_data_upload
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import argparse
from chipdb_upload.data_upload import run


def main():
parser = argparse.ArgumentParser('Generates QC metric summary file for available ChIP-seq samples')
parser.add_argument('-i', '--in_dirs', required=True, nargs='+',
help='Directory(ies)for fingerprint data')
parser.add_argument('-u', '--uri', required=True,
help='URI for database upload')
parser.add_argument('-d', '--database', required=True,
help='Database name for upload')
parser.add_argument('-c', '--collection', required=True,
help='Collection name for database')
parser.add_argument('-o', '--output', required=True, help="Filename for output log")
args = parser.parse_args()
run(args)
35 changes: 35 additions & 0 deletions bin/ggr_cwl_ipynb_gen
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#!/usr/bin/env python3
import argparse
from ipynb_gen.main import run
import ipynb_gen.consts as consts

if __name__ == '__main__':
parser = argparse.ArgumentParser('Generator of Jupyter notebooks to execute CWL pre-processing pipelines')
parser.add_argument('-o', '--out', required=True, type=str, help='Jupyter notebook output file name')
parser.add_argument('-m', '--metadata', required=True, type=argparse.FileType('r'),
help='Metadata file with samples information')
parser.add_argument('-f', '--force', action='store_true', help='Force to overwrite output file')
parser.add_argument('-n', '--no-upload', action='store_false',
help='Avoids uploading generated data to database when specified')
parser.add_argument('--metadata-sep', dest='sep', required=False, type=str, default='\t',
help='Separator for metadata file (when different than Excel spread sheet)')
parser.add_argument('--project-name', required=False, type=str,
help='Project name (by default, basename of metadata file name)')
parser.add_argument('--data-from', required=False, choices=consts.data_sources,
default=consts.data_sources[0],
help='Choices: %s' % (', '.join(consts.data_sources)))
parser.add_argument('-c', '--conf-file', required=False, type=argparse.FileType('r'),
help='YAML configuration file (see examples)')
parser.add_argument('-u', '--user', required=False,
help='HARDAC User used in SLURM (default: ${USER})')
parser.add_argument('-e', '--user-duke-email', required=False,
help='Email(s) notified when execution is finished (default: ${USER}@duke.edu)')
parser.add_argument('-r', '--root-dir', required=False,
help='Root directory where all subfolders and files will be created '
'(semi-required: either defined here or in conf-file)')
parser.add_argument('-v', '--version', required=False,
help='Print version of the program and exit')

args = parser.parse_args()
run(args)

32 changes: 8 additions & 24 deletions chipdb_upload/data_upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,16 +107,16 @@ def process_directory(in_dir):
# Append the file path
file_path = os.path.join(in_dir, filename)
if os.stat(file_path).st_size != 0:
if filename.lower().endswith('_metadata.txt'): # Find metadata
if filename.lower().endswith('_metadata.txt'): # Find metadata
metadata_files.append(file_path)
elif filename.endswith('_QCmetrics.txt'): # If fingerprint QC file, add to array
elif filename.endswith('_QCmetrics.txt'): # If fingerprint QC file, add to array
fingerprint_qc_arr.append(file_path)
elif filename.lower() == 'qc.csv' or filename.lower() == 'qc.txt' \
or filename.lower() == 'chip_seq_summary_iter0.tsv': # If lab-computed QC file, set var
or filename.lower() == 'chip_seq_summary_iter0.tsv': # If lab-computed QC file, set var
qc_file = file_path
elif filename.endswith(".png") or filename.endswith(".pdf"):
images.append(file_path)
elif filename.endswith('.cross_corr.txt'): # If cross corr data, add to array
elif filename.endswith('.cross_corr.txt'): # If cross corr data, add to array
spp_data_arr.append(file_path)

# Raise error if QC file was not found.
Expand Down Expand Up @@ -167,7 +167,7 @@ def process_directory(in_dir):
df = df.merge(fp_df, left_index=True, right_index=True, how='outer')

# Add fingerprint images and metadata information
for sample in df.index.values: # Index is sample name
for sample in df.index.values: # Index is sample name
fp_image = ''
spp_image = ''
metadata_file = ''
Expand All @@ -193,30 +193,18 @@ def process_directory(in_dir):
return df


def main():
parser = argparse.ArgumentParser('Generates QC metric summary file for available ChIP-seq samples')
parser.add_argument('-i', '--in_dirs', required=True, nargs='+',
help='Directory(ies)for fingerprint data')
parser.add_argument('-u', '--uri', required=True,
help='URI for database upload')
parser.add_argument('-d', '--database', required=True,
help='Database name for upload')
parser.add_argument('-c', '--collection', required=True,
help='Collection name for database')
parser.add_argument('-o', '--output', required=True, help="Filename for output log")
args = parser.parse_args()
def run(args):

logging.basicConfig(filename=args.output, level=logging.DEBUG)


# Process each given data directory
df = pd.DataFrame()
for i in range(len(args.in_dirs)):
if os.path.isdir(args.in_dirs[i]):
new_df = process_directory(args.in_dirs[i])
df = df.append(new_df)

df.rename(columns={'diff._enrichment':'diff_enrichment'}, inplace=True)
df.rename(columns={'diff._enrichment': 'diff_enrichment'}, inplace=True)

# Convert Pandas dataframe into list of dictionaries
data = df.to_dict(orient='index')
Expand All @@ -236,7 +224,7 @@ def main():
# Set sample data
sample = data[sample_name]
sample['sample'] = sample_name
sample['last_modified'] = datetime.datetime.utcnow()
sample['last_modified'] = datetime.datetime.utcnow()
logging.info("Uploading sample: %s", sample_name)
sample_coll.replace_one({'sample': sample_name}, sample, upsert=True)

Expand All @@ -251,7 +239,3 @@ def main():
flowcell_coll.replace_one({'name': flowcell_name}, flowcell_data, upsert=True)

logging.info("Data upload terminated successfully")


if __name__ == '__main__':
main()
7 changes: 0 additions & 7 deletions ggr_cwl_ipynb_gen/templates/data_upload.j2

This file was deleted.

File renamed without changes.
16 changes: 8 additions & 8 deletions ggr_cwl_ipynb_gen/consts.py → ipynb_gen/consts.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
# ipynb Generator configuration
DATA_SOURCES_SFTP = 'sftp'
DATA_SOURCES_MISEQ = 'miseq'
DATA_SOURCES_SFTP = 'sftp'
DATA_SOURCES_MISEQ = 'miseq'
DATA_SOURCES_DUKEDS = 'dukeds'
DATA_SOURCES_OTHER = 'other'
DATA_SOURCES_LOCAL = 'local'
DATA_SOURCES_OTHER = 'other'
DATA_SOURCES_LOCAL = 'local'
data_sources = [
DATA_SOURCES_SFTP,
DATA_SOURCES_MISEQ,
Expand Down Expand Up @@ -47,12 +47,12 @@

# Environment configuration
CONDA_ACTIVATE = '/data/reddylab/software/miniconda2/bin/activate'
CONTAMINATION_SCRIPT = '/data/reddylab/Darryl/GitHub/reddylab/contamination_check' # not used
CONTAMINATION_SCRIPT = '/data/reddylab/Darryl/GitHub/reddylab/contamination_check' # not used
CONDA_ENVIRONMENT = 'cwl10'
PLOT_SCRIPT = '/data/reddylab/Darryl/GitHub/reddylab/countFactors_metadata.sh' # not used
PLOT_SCRIPT = '/data/reddylab/Darryl/GitHub/reddylab/countFactors_metadata.sh' # not used
QC_SCRIPT_DIR = '/data/reddylab/software/cwl/bin'
DATA_UPLOAD_SCRIPT = '/data/reddylab/Darryl/GitHub/reddylab/csv_to_mongo.py' # not used
DATA_UPLOAD_SCRIPT = '/data/reddylab/Darryl/GitHub/reddylab/csv_to_mongo.py'
HOST_FOR_TUNNELED_DOWNLOAD = "Hardac-xfer.genome.duke.edu"

# Package constants
PACKAGE_NAME = "ggr_cwl_ipynb_gen"
PACKAGE_NAME = "ipynb_gen"
Loading