Skip to content

Commit

Permalink
Merge 3077c3c into cb06f48
Browse files Browse the repository at this point in the history
  • Loading branch information
talavis committed Mar 8, 2019
2 parents cb06f48 + 3077c3c commit d7d5ca1
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 11 deletions.
16 changes: 16 additions & 0 deletions backend/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -516,3 +516,19 @@ def build_dict_from_row(row):
continue
d[field] = value
return d


def get_reference_set_for_dataset(dataset):
"""
Get the reference set associated with a dataset
Args:
dataset (str): short name of the dataset
Returns:
ReferenceSet: the associated reference set; returns None if not available
"""
try:
return (Dataset.select()
.where(Dataset.short_name==dataset)
.get()).reference_set
except Dataset.DoesNotExist:
return None
27 changes: 16 additions & 11 deletions scripts/importer/data_importer/raw_data_importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,9 +281,14 @@ def _insert_variants(self):
gq_mids = None
with db.database.atomic():
for filename in self.settings.variant_file:
# gene/transctipt dbids; need to add support for version
refgenes = {gene.gene_id: gene.id for gene in db.Gene.select(db.Gene.id, db.Gene.gene_id)}
reftranscripts = {tran.transcript_id: tran.id for tran in db.Transcript.select(db.Transcript.id, db.Transcript.transcript_id)}
ref_set = get_reference_set_for_dataset(self.settings.dataset)
ref_genes = {gene.gene_id: gene.id for gene in (db.Gene.select(db.Gene.id, db.Gene.gene_id)
.where(db.Gene.reference_set == ref_set))}
ref_transcripts = {tran.transcript_id: tran.id for tran in (db.Transcript
.select(db.Transcript.id,
db.Transcript.transcript_id)
.join(db.Gene)
.where(db.Gene.reference_set == ref_set))}
for line in self._open(filename):
line = bytes(line).decode('utf8').strip()

Expand Down Expand Up @@ -395,8 +400,8 @@ def _insert_variants(self):
indexes = []
for entry in batch:
indexes.append(db.Variant.select(db.Variant.id).where(db.Variant.variant_id == entry['variant_id']).get().id)
self.add_variant_genes(indexes, genes, refgenes)
self.add_variant_transcripts(indexes, transcripts, reftranscripts)
self.add_variant_genes(indexes, genes, ref_genes)
self.add_variant_transcripts(indexes, transcripts, ref_transcripts)

genes = []
transcripts = []
Expand Down Expand Up @@ -430,8 +435,8 @@ def _insert_variants(self):
indexes = []
for entry in batch:
indexes.append(db.Variant.select(db.Variant.id).where(db.Variant.variant_id == entry['variant_id']).get().id)
self.add_variant_genes(indexes, genes, refgenes)
self.add_variant_transcripts(indexes, transcripts, reftranscripts)
self.add_variant_genes(indexes, genes, ref_genes)
self.add_variant_transcripts(indexes, transcripts, ref_transcripts)

if self.settings.set_vcf_sampleset_size and samples:
self.sampleset.sample_size = samples
Expand Down Expand Up @@ -479,18 +484,18 @@ def start_import(self):
if not self.settings.beacon_only:
self._insert_coverage()

def add_variant_genes(self, variant_indexes:list, genes_to_add:list, refgenes:dict):
def add_variant_genes(self, variant_indexes:list, genes_to_add:list, ref_genes:dict):
batch = []
for i in range(len(variant_indexes)):
connected_genes = [{'variant':variant_indexes[i], 'gene':refgenes[gene]} for gene in genes_to_add[i] if gene]
connected_genes = [{'variant':variant_indexes[i], 'gene':ref_genes[gene]} for gene in genes_to_add[i] if gene]
batch += connected_genes
if not self.settings.dry_run:
db.VariantGenes.insert_many(batch).execute()

def add_variant_transcripts(self, variant_indexes:list, transcripts_to_add:list, reftranscripts:dict):
def add_variant_transcripts(self, variant_indexes:list, transcripts_to_add:list, ref_transcripts:dict):
batch = []
for i in range(len(variant_indexes)):
connected_transcripts = [{'variant':variant_indexes[i], 'transcript':reftranscripts[transcript]}
connected_transcripts = [{'variant':variant_indexes[i], 'transcript':ref_transcripts[transcript]}
for transcript in transcripts_to_add[i]]
batch += connected_transcripts
if not self.settings.dry_run:
Expand Down

0 comments on commit d7d5ca1

Please sign in to comment.