From d96ab570b196b1b92f65aa945ae6816a60ddb54e Mon Sep 17 00:00:00 2001
From: peterjc <p.j.a.cock@googlemail.com>
Date: Tue, 18 May 2010 19:36:42 +0100
Subject: [PATCH] Store database and primary accessions from DR lines in EMBL
 files (Bug 3069)

---
 Bio/GenBank/Scanner.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/Bio/GenBank/Scanner.py b/Bio/GenBank/Scanner.py
index b7c744e5e95..04b14b62728 100644
--- a/Bio/GenBank/Scanner.py
+++ b/Bio/GenBank/Scanner.py
@@ -658,7 +658,7 @@ def _feed_header_lines(self, consumer, lines):
             'RL' : 'journal',
             'OS' : 'organism',
             'OC' : 'taxonomy',
-            #'DR' : data reference?
+            #'DR' : data reference
             'CC' : 'comment',
             #'XX' : splitter
         }
@@ -729,9 +729,12 @@ def _feed_header_lines(self, consumer, lines):
                     # e.g.
                     # DR   MGI; 98599; Tcrb-V4.
                     #
-                    # TODO - Data reference...
-                    # How should we store the secondary identifier (if present)?  Ignore it?
-                    pass
+                    # TODO - How should we store any secondary identifier?
+                    parts = data.rstrip(".").split(";")
+                    #Turn it into "database_identifier:primary_identifier" to
+                    #mimic the GenBank parser. e.g. "MGI:98599"
+                    consumer.dblink("%s:%s" % (parts[0].strip(),
+                                               parts[1].strip()))
                 elif line_type == 'RA':
                     # Remove trailing ; at end of authors list
                     consumer.authors(data.rstrip(";"))