Skip to content

Commit

Permalink
Add the code to parse and store keywords, abstract and language.
Browse files Browse the repository at this point in the history
  • Loading branch information
taojing2002 committed May 16, 2019
1 parent 2c67498 commit 572441f
Showing 1 changed file with 28 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,8 @@ public class GenericDataPackageParser implements DataPackageParserInterface
protected String datasetTitlePath = null;
protected String datasetCreatorPath = null;
protected String datasetAbstractPath = null;
protected String datasetLanguagePath = null;
protected String datasetKeywordPath =null;
protected String entityAccessPath = null;

//private Hashtable entityHash = new Hashtable();
Expand Down Expand Up @@ -227,6 +229,8 @@ private void initDefaultXPaths() {
datasetTitlePath = "//dataset/title";
datasetCreatorPath = "//dataset/creator";
datasetAbstractPath = "//dataset/abstract";
datasetLanguagePath = "//dataset/language";
datasetKeywordPath = "//dataset/keywordSet/keyword";
entityAccessPath = "physical/distribution/access";
}

Expand Down Expand Up @@ -391,9 +395,31 @@ private void parseDocument(Document doc) throws Exception
}
emlDataPackage.setPubDate(pubDate);

// Parse the dataset abstract text
// Store the language
String language = null;
Node languageNode = xpathapi.selectSingleNode(doc, datasetLanguagePath);
if (languageNode != null) {
language = languageNode.getTextContent().trim();
}
emlDataPackage.setLanguage(language);

// Parse and store the dataset abstract text
NodeList datasetAbstractNodeList = xpathapi.selectNodeList(doc, datasetAbstractPath);
parseDatasetAbstract(datasetAbstractNodeList);

// Store the keywords
NodeList datasetKeywordNodeList = xpathapi.selectNodeList(doc, datasetKeywordPath);
if (datasetKeywordNodeList != null) {
for (int i=0; i<datasetKeywordNodeList.getLength(); i++) {
Node keywordNode = datasetKeywordNodeList.item(i);
if(keywordNode != null) {
String keyword = keywordNode.getTextContent().trim();
if (keyword != null && !keyword.trim().equals("")) {
emlDataPackage.getKeywords().add(keyword);
}
}
}
}

} catch (Exception e) {
e.printStackTrace();
Expand Down Expand Up @@ -1819,6 +1845,7 @@ private void parseDatasetAbstract(NodeList datasetAbstractNodeList) {
stringBuffer.append(" " + paraText);
}
String abstractText = stringBuffer.toString();
emlDataPackage.setAbsctrac(abstractText);
emlDataPackage.checkDatasetAbstract(abstractText);
}
}
Expand Down

0 comments on commit 572441f

Please sign in to comment.