From d96ab570b196b1b92f65aa945ae6816a60ddb54e Mon Sep 17 00:00:00 2001
From: peterjc
Date: Tue, 18 May 2010 19:36:42 +0100
Subject: [PATCH] Store database and primary accessions from DR lines in EMBL
files (Bug 3069)
---
Bio/GenBank/Scanner.py | 11 +++++++----
1 file changed, 7 insertions(+), 4 deletions(-)
diff --git a/Bio/GenBank/Scanner.py b/Bio/GenBank/Scanner.py
index b7c744e5e95..04b14b62728 100644
--- a/Bio/GenBank/Scanner.py
+++ b/Bio/GenBank/Scanner.py
@@ -658,7 +658,7 @@ def _feed_header_lines(self, consumer, lines):
'RL' : 'journal',
'OS' : 'organism',
'OC' : 'taxonomy',
- #'DR' : data reference?
+ #'DR' : data reference
'CC' : 'comment',
#'XX' : splitter
}
@@ -729,9 +729,12 @@ def _feed_header_lines(self, consumer, lines):
# e.g.
# DR MGI; 98599; Tcrb-V4.
#
- # TODO - Data reference...
- # How should we store the secondary identifier (if present)? Ignore it?
- pass
+ # TODO - How should we store any secondary identifier?
+ parts = data.rstrip(".").split(";")
+ #Turn it into "database_identifier:primary_identifier" to
+ #mimic the GenBank parser. e.g. "MGI:98599"
+ consumer.dblink("%s:%s" % (parts[0].strip(),
+ parts[1].strip()))
elif line_type == 'RA':
# Remove trailing ; at end of authors list
consumer.authors(data.rstrip(";"))