GVKFetcher: Ensure that first letter of subtitle is always capitalized

Minor improvements: * replace calls to datafield.getAttribute("tag") by a single variable
JabRef · Nov 21, 2015 · 7047a1d · 7047a1d
1 parent b2c8db5
commit 7047a1d
Show file tree

Hide file tree

Showing 3 changed files with 141 additions and 25 deletions.
diff --git a/src/main/java/net/sf/jabref/importer/fetcher/GVKParser.java b/src/main/java/net/sf/jabref/importer/fetcher/GVKParser.java
@@ -17,14 +17,22 @@
 import net.sf.jabref.importer.ImportFormatReader;
 import net.sf.jabref.model.entry.BibtexEntry;
 import net.sf.jabref.model.entry.IdGenerator;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.w3c.dom.Document;
 import org.w3c.dom.Element;
 import org.w3c.dom.Node;
 import org.w3c.dom.NodeList;
 import org.xml.sax.SAXException;
 
+import com.google.common.base.Strings;
+
 public class GVKParser {
 
+    private static final Log LOGGER = LogFactory.getLog(GVKParser.class);
+
+
     public List<BibtexEntry> parseEntries(InputStream is)
             throws ParserConfigurationException, SAXException, IOException {
         DocumentBuilder dbuild = DocumentBuilderFactory.newInstance().newDocumentBuilder();
@@ -91,20 +99,21 @@ private BibtexEntry parseEntry(Element e) {
         while (iter.hasNext()) {
             Element datafield = iter.next();
 
-            // System.out.println(datafield.getAttributeValue("tag"));
+            String tag = datafield.getAttribute("tag");
+            LOGGER.debug("tag: " + tag);
 
             // mak
-            if (datafield.getAttribute("tag").equals("002@")) {
+            if (tag.equals("002@")) {
                 mak = getSubfield("0", datafield);
             }
 
             //ppn
-            if (datafield.getAttribute("tag").equals("003@")) {
+            if (tag.equals("003@")) {
                 ppn = getSubfield("0", datafield);
             }
 
             //author
-            if (datafield.getAttribute("tag").equals("028A")) {
+            if (tag.equals("028A")) {
                 String vorname = getSubfield("d", datafield);
                 String nachname = getSubfield("a", datafield);
 
@@ -116,7 +125,7 @@ private BibtexEntry parseEntry(Element e) {
                 author = author.concat(vorname + " " + nachname);
             }
             //author (weiterer)
-            if (datafield.getAttribute("tag").equals("028B")) {
+            if (tag.equals("028B")) {
                 String vorname = getSubfield("d", datafield);
                 String nachname = getSubfield("a", datafield);
 
@@ -129,7 +138,7 @@ private BibtexEntry parseEntry(Element e) {
             }
 
             //editor
-            if (datafield.getAttribute("tag").equals("028C")) {
+            if (tag.equals("028C")) {
                 String vorname = getSubfield("d", datafield);
                 String nachname = getSubfield("a", datafield);
 
@@ -142,24 +151,24 @@ private BibtexEntry parseEntry(Element e) {
             }
 
             //title and subtitle
-            if (datafield.getAttribute("tag").equals("021A")) {
+            if (tag.equals("021A")) {
                 title = getSubfield("a", datafield);
                 subtitle = getSubfield("d", datafield);
             }
 
             //publisher and address
-            if (datafield.getAttribute("tag").equals("033A")) {
+            if (tag.equals("033A")) {
                 publisher = getSubfield("n", datafield);
                 address = getSubfield("p", datafield);
             }
 
             //date
-            if (datafield.getAttribute("tag").equals("011@")) {
+            if (tag.equals("011@")) {
                 date = getSubfield("a", datafield);
             }
 
             //date, volume, number, pages (year bei Zeitschriften (evtl. redundant mit 011@))
-            if (datafield.getAttribute("tag").equals("031A")) {
+            if (tag.equals("031A")) {
                 date = getSubfield("j", datafield);
                 volume = getSubfield("e", datafield);
                 number = getSubfield("a", datafield);
@@ -170,7 +179,7 @@ private BibtexEntry parseEntry(Element e) {
             // 036D seems to contain more information than the other fields
             // overwrite information using that field
             // 036D also contains information normally found in 036E
-            if (datafield.getAttribute("tag").equals("036D")) {
+            if (tag.equals("036D")) {
                 // 021 might have been present
                 if (title != null) {
                     // convert old title (contained in "a" of 021A) to volume
@@ -189,7 +198,7 @@ private BibtexEntry parseEntry(Element e) {
             }
 
             //series and number
-            if (datafield.getAttribute("tag").equals("036E")) {
+            if (tag.equals("036E")) {
                 series = getSubfield("a", datafield);
                 number = getSubfield("l", datafield);
                 String kor = getSubfield("b", datafield);
@@ -200,17 +209,17 @@ private BibtexEntry parseEntry(Element e) {
             }
 
             //note
-            if (datafield.getAttribute("tag").equals("037A")) {
+            if (tag.equals("037A")) {
                 note = getSubfield("a", datafield);
             }
 
             //edition
-            if (datafield.getAttribute("tag").equals("032@")) {
+            if (tag.equals("032@")) {
                 edition = getSubfield("a", datafield);
             }
 
             //isbn
-            if (datafield.getAttribute("tag").equals("004A")) {
+            if (tag.equals("004A")) {
                 String isbn_10 = getSubfield("0", datafield);
                 String isbn_13 = getSubfield("A", datafield);
 
@@ -226,7 +235,7 @@ private BibtexEntry parseEntry(Element e) {
 
             // Hochschulschriftenvermerk
             // Bei einer Verlagsdissertation ist der Ort schon eingetragen
-            if (datafield.getAttribute("tag").equals("037C")) {
+            if (tag.equals("037C")) {
                 if (address == null) {
                     address = getSubfield("b", datafield);
                     address = removeSortCharacters(address);
@@ -252,23 +261,23 @@ private BibtexEntry parseEntry(Element e) {
              * Buchbeiträgen Verlag und Ort wichtig sind
              * (sonst in Kategorie 033A).
              */
-            if (datafield.getAttribute("tag").equals("027D")) {
+            if (tag.equals("027D")) {
                 journal = getSubfield("a", datafield);
                 booktitle = getSubfield("a", datafield);
                 address = getSubfield("p", datafield);
                 publisher = getSubfield("n", datafield);
             }
 
             //pagetotal
-            if (datafield.getAttribute("tag").equals("034D")) {
+            if (tag.equals("034D")) {
                 pagetotal = getSubfield("a", datafield);
 
                 // S, S. etc. entfernen
                 pagetotal = pagetotal.replaceAll(" S\\.?$", "");
             }
 
             // Behandlung von Konferenzen
-            if (datafield.getAttribute("tag").equals("030F")) {
+            if (tag.equals("030F")) {
                 address = getSubfield("k", datafield);
 
                 if (!entryType.equals("proceedings")) {
@@ -292,17 +301,17 @@ private BibtexEntry parseEntry(Element e) {
             //SRU-Schnittstelle gelieferten Daten zur
             //Quelle unvollständig sind (z.B. nicht Serie
             //und Nummer angegeben werden)
-            if (datafield.getAttribute("tag").equals("039B")) {
+            if (tag.equals("039B")) {
                 quelle = getSubfield("8", datafield);
             }
-            if (datafield.getAttribute("tag").equals("046R")) {
+            if (tag.equals("046R")) {
                 if (quelle.equals("") || (quelle == null)) {
                     quelle = getSubfield("a", datafield);
                 }
             }
 
             // URLs behandeln
-            if (datafield.getAttribute("tag").equals("009P")) {
+            if (tag.equals("009P")) {
                 if (datafield.getAttribute("occurrence").equals("03")
                         || datafield.getAttribute("occurrence").equals("05")) {
                     if (url == null) {
@@ -367,8 +376,15 @@ private BibtexEntry parseEntry(Element e) {
         if (title != null) {
             result.setField("title", title);
         }
-        if (subtitle != null) {
-            result.setField("subtitle", subtitle);
+        if (!Strings.isNullOrEmpty(subtitle)) {
+            // ensure that first letter is an upper case letter
+            // there could be the edge case that the string is only one character long, therefore, this special treatment
+            // this is apache commons lang StringUtils.capitalize (https://commons.apache.org/proper/commons-lang/javadocs/api-release/org/apache/commons/lang3/StringUtils.html#capitalize%28java.lang.String%29), but we don't want to add an additional dependency  ('org.apache.commons:commons-lang3:3.4')
+            String newSubtitle = Character.toString(Character.toUpperCase(subtitle.charAt(0)));
+            if (subtitle.length() > 1) {
+                newSubtitle += subtitle.substring(1);
+            }
+            result.setField("subtitle", newSubtitle);
         }
         if (publisher != null) {
             result.setField("publisher", publisher);

diff --git a/src/test/java/net/sf/jabref/importer/fetcher/GVKParserTest.java b/src/test/java/net/sf/jabref/importer/fetcher/GVKParserTest.java
@@ -23,7 +23,7 @@ private void doTest(String xmlName, int expectedSize, List<String> resourceNames
             GVKParser parser = new GVKParser();
             List<BibtexEntry> entries = parser.parseEntries(is);
             Assert.assertNotNull(entries);
-            Assert.assertEquals(entries.size(), expectedSize);
+            Assert.assertEquals(expectedSize, entries.size());
             int i = 0;
             for (String resourceName : resourceNames) {
                 BibtexEntryUtil.doAssertEquals(GVKParser.class, resourceName, entries.get(i));
@@ -46,4 +46,29 @@ public void resultFor797485368() throws Exception {
     public void GMP() throws Exception {
         doTest("gvk_gmp.xml", 2, Arrays.asList(new String[] {"gvk_gmp.1.bib", "gvk_gmp.2.bib"}));
     }
+
+    @Test
+    public void subTitleTest() throws Exception {
+        try (InputStream is = GVKParser.class.getResourceAsStream("gvk_artificial_subtitle_test.xml")) {
+            GVKParser parser = new GVKParser();
+            List<BibtexEntry> entries = parser.parseEntries(is);
+            Assert.assertNotNull(entries);
+            Assert.assertEquals(5, entries.size());
+
+            BibtexEntry entry = entries.get(0);
+            Assert.assertEquals(null, entry.getField("subtitle"));
+
+            entry = entries.get(1);
+            Assert.assertEquals("C", entry.getField("subtitle"));
+
+            entry = entries.get(2);
+            Assert.assertEquals("Word", entry.getField("subtitle"));
+
+            entry = entries.get(3);
+            Assert.assertEquals("Word1 word2", entry.getField("subtitle"));
+
+            entry = entries.get(4);
+            Assert.assertEquals("Word1 word2", entry.getField("subtitle"));
+        }
+    }
 }
diff --git a/src/test/resources/net/sf/jabref/importer/fetcher/gvk_artificial_subtitle_test.xml b/src/test/resources/net/sf/jabref/importer/fetcher/gvk_artificial_subtitle_test.xml
@@ -0,0 +1,75 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<zs:searchRetrieveResponse xmlns:zs="http://www.loc.gov/zing/srw/">
+<zs:version>1.1</zs:version>
+<zs:numberOfRecords>1</zs:numberOfRecords>
+<zs:records>
+<zs:record>
+<zs:recordSchema>picaxml</zs:recordSchema>
+<zs:recordPacking>xml</zs:recordPacking>
+<zs:recordData>
+<record xmlns="info:srw/schema/5/picaXML-v1.0">
+  <datafield tag="021A">
+    <subfield code="d"></subfield>
+  </datafield>
+</record>
+</zs:recordData>
+<zs:recordPosition>1</zs:recordPosition>
+</zs:record>
+<zs:record>
+<zs:recordSchema>picaxml</zs:recordSchema>
+<zs:recordPacking>xml</zs:recordPacking>
+<zs:recordData>
+<record xmlns="info:srw/schema/5/picaXML-v1.0">
+  <datafield tag="021A">
+    <subfield code="d">c</subfield>
+  </datafield>
+</record>
+</zs:recordData>
+<zs:recordPosition>2</zs:recordPosition>
+</zs:record>
+<zs:record>
+<zs:recordSchema>picaxml</zs:recordSchema>
+<zs:recordPacking>xml</zs:recordPacking>
+<zs:recordData>
+<record xmlns="info:srw/schema/5/picaXML-v1.0">
+  <datafield tag="021A">
+    <subfield code="d">word</subfield>
+  </datafield>
+</record>
+</zs:recordData>
+<zs:recordPosition>3</zs:recordPosition>
+</zs:record>
+<zs:record>
+<zs:recordSchema>picaxml</zs:recordSchema>
+<zs:recordPacking>xml</zs:recordPacking>
+<zs:recordData>
+<record xmlns="info:srw/schema/5/picaXML-v1.0">
+  <datafield tag="021A">
+    <subfield code="d">word1 word2</subfield>
+  </datafield>
+</record>
+</zs:recordData>
+<zs:recordPosition>4</zs:recordPosition>
+</zs:record>
+<zs:record>
+<zs:recordSchema>picaxml</zs:recordSchema>
+<zs:recordPacking>xml</zs:recordPacking>
+<zs:recordData>
+<record xmlns="info:srw/schema/5/picaXML-v1.0">
+  <datafield tag="021A">
+    <subfield code="d">Word1 word2</subfield>
+  </datafield>
+</record>
+</zs:recordData>
+<zs:recordPosition>5</zs:recordPosition>
+</zs:record>
+</zs:records>
+<zs:echoedSearchRetrieveRequest>
+<zs:version>1.1</zs:version>
+<zs:query>pica.all=797485368</zs:query>
+<zs:maximumRecords>50</zs:maximumRecords>
+<zs:recordPacking>xml</zs:recordPacking>
+<zs:recordSchema>picaxml</zs:recordSchema>
+<zs:sortKeys>Year,,1</zs:sortKeys>
+</zs:echoedSearchRetrieveRequest>
+</zs:searchRetrieveResponse>