Skip to content

Commit

Permalink
Merge efe8229 into c5ab8db
Browse files Browse the repository at this point in the history
  • Loading branch information
MalinAhlberg committed May 22, 2019
2 parents c5ab8db + efe8229 commit 8840c49
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 13 deletions.
28 changes: 18 additions & 10 deletions scripts/importer/data_importer/raw_data_importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,30 +199,38 @@ def _parse_manta(self):
continue

if base["chrom"].startswith('GL') or base["chrom"].startswith('MT'):
# TODO keep this?
# A BND from GL or MT.
continue

if 'NSAMPLES' in info:
# save this unless we already know the sample size
samples = int(info['NSAMPLES'])

alt_alleles = base['alt'].split(",")
# TODO suspect for allelecount or callcount:
# OCC,Number=1,Type=Integer,Description="The number of occurences of the event in the database"
for i, alt in enumerate(alt_alleles):
data = dict(base)
data['allele_freq'] = float(info.get('FRQ'))
data['alt'], data['mate_chrom'], data['mate_start'] = re.search('(.+)[[\]](.*?):(\d+)[[\]]', alt).groups()
if data['mate_chrom'].startswith('GL') or data['mate_chrom'].startswith('MT'):
# A BND from a chromosome to GL or MT.
# TODO ask a bioinformatician if these cases should be included or not
continue
if 'MATEID' in info:
data['mate_id'] = info['MATEID']
data['mate_id'] = info.get('MATEID', '')
data['variant_id'] = '{}-{}-{}-{}'.format(data['chrom'], data['pos'], data['ref'], alt)

data['allele_count'] = data.get('allele_count', 0)
data['allele_num'] = data.get('allele_num', 0)
batch += [data]
if self.settings.count_calls:
self.get_callcount(data) # count calls (one per reference)
self.counter['beaconvariants'] += 1 # count variants (one per alternate)
if self.settings.add_reversed_mates:
# If the vcf only contains one line per breakend, add the reversed version to the database here.
reversed = dict(data)
# TODO Note: in general, ref and alt cannot be assumed to be the same in the reversed direction,
# but our data (so far) only contains N, so we just keep them as is for now.
reversed.update({'mate_chrom': data['chrom'], 'chrom': data['mate_chrom'],
'mate_start': data['pos'], 'pos': data['mate_start'],
'chrom_id': data['mate_id'], 'mate_id': data['chrom_id']})
reversed['variant_id'] = '{}-{}-{}-{}'.format(reversed['chrom'], reversed['pos'], reversed['ref'], alt)
# TODO should the `counter` be increased here?
batch += [reversed]

counter += 1 # count variants (one per vcf row)

Expand Down Expand Up @@ -491,7 +499,7 @@ def start_import(self):
if self.settings.add_mates:
self._parse_manta()
if self.settings.count_calls:
self._create_beacon_counts()
logging.warning('Do not know how to count calls in the manta file. Skipping this...')
elif self.settings.variant_file:
self._insert_variants()
if self.settings.count_calls:
Expand Down
2 changes: 2 additions & 0 deletions scripts/importer/importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,8 @@
" the requirements"))
PARSER.add_argument("--add_mates", action="store_true",
help=("Parse MANTA file and add the breakends to the db"))
PARSER.add_argument("--add_reversed_mates", action="store_true",
help=("Assume input data only contain one line per BND, covering both directions"))

ARGS = PARSER.parse_args()

Expand Down
6 changes: 3 additions & 3 deletions sql/beacon_schema.sql
Original file line number Diff line number Diff line change
Expand Up @@ -91,17 +91,17 @@ CREATE OR REPLACE VIEW beacon.beacon_mate_table AS
d.short_name,
av.dataset_version) AS datasetId,
substr(dm.chrom, 1, 2) AS chromosome,
dm.pos - 1 AS "chromosomeStart",
dm.pos - 1 AS chromosomeStart,
dm.chrom_id as chromosomePos,
dm.mate_chrom as mate,
dm.mate_start as mateStart,
dm.mate_start - 1 as mateStart,
dm.mate_id as matePos,
dm.ref as reference,
dm.alt as alternate,
dm.allele_count as alleleCount,
dm.allele_num as callCount,
dm.allele_freq as frequency,
dm.mate_start as "end",
dm.mate_start - 1 as "end",
'BND' as variantType
FROM data.mate AS dm
JOIN beacon.available_datasets as av
Expand Down

0 comments on commit 8840c49

Please sign in to comment.