From d96ab570b196b1b92f65aa945ae6816a60ddb54e Mon Sep 17 00:00:00 2001 From: peterjc Date: Tue, 18 May 2010 19:36:42 +0100 Subject: [PATCH] Store database and primary accessions from DR lines in EMBL files (Bug 3069) --- Bio/GenBank/Scanner.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/Bio/GenBank/Scanner.py b/Bio/GenBank/Scanner.py index b7c744e5e95..04b14b62728 100644 --- a/Bio/GenBank/Scanner.py +++ b/Bio/GenBank/Scanner.py @@ -658,7 +658,7 @@ def _feed_header_lines(self, consumer, lines): 'RL' : 'journal', 'OS' : 'organism', 'OC' : 'taxonomy', - #'DR' : data reference? + #'DR' : data reference 'CC' : 'comment', #'XX' : splitter } @@ -729,9 +729,12 @@ def _feed_header_lines(self, consumer, lines): # e.g. # DR MGI; 98599; Tcrb-V4. # - # TODO - Data reference... - # How should we store the secondary identifier (if present)? Ignore it? - pass + # TODO - How should we store any secondary identifier? + parts = data.rstrip(".").split(";") + #Turn it into "database_identifier:primary_identifier" to + #mimic the GenBank parser. e.g. "MGI:98599" + consumer.dblink("%s:%s" % (parts[0].strip(), + parts[1].strip())) elif line_type == 'RA': # Remove trailing ; at end of authors list consumer.authors(data.rstrip(";"))