Skip to content

Commit

Permalink
And some PMD cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
oscargus committed Dec 27, 2015
1 parent 896b751 commit e964e2c
Showing 1 changed file with 46 additions and 75 deletions.
121 changes: 46 additions & 75 deletions src/main/java/net/sf/jabref/importer/fetcher/ACMPortalFetcher.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,6 @@
import java.awt.Dimension;
import java.awt.GridLayout;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.ConnectException;
Expand Down Expand Up @@ -64,38 +62,40 @@ public class ACMPortalFetcher implements PreviewEntryFetcher {
private final UnitFormatter unitFormatter = new UnitFormatter();
private String terms;

private static final String startUrl = "http://portal.acm.org/";
private static final String searchUrlPart = "results.cfm?query=";
private static final String searchUrlPartII = "&dl=";
private static final String endUrl = "&coll=Portal&short=0";//&start=";
private static final String START_URL = "http://portal.acm.org/";
private static final String SEARCH_URL_PART = "results.cfm?query=";
private static final String SEARCH_URL_PART_II = "&dl=";
private static final String END_URL = "&coll=Portal&short=0";//&start=";

private static final String bibtexUrl = "exportformats.cfm?id=";
private static final String bibtexUrlEnd = "&expformat=bibtex";
private static final String abstractUrl = "tab_abstract.cfm?id=";
private static final String BIBTEX_URL = "exportformats.cfm?id=";
private static final String BIBTEX_URL_END = "&expformat=bibtex";
private static final String ABSTRACT_URL = "tab_abstract.cfm?id=";

private final JRadioButton acmButton = new JRadioButton(Localization.lang("The ACM Digital Library"));
private final JRadioButton guideButton = new JRadioButton(Localization.lang("The Guide to Computing Literature"));
private final JCheckBox absCheckBox = new JCheckBox(Localization.lang("Include abstracts"), false);

private static final int perPage = 20; // Fetch only one page. Otherwise, the user will get blocked by ACM. 100 has been the old setting. See Bug 3532752 - https://sourceforge.net/tracker/index.php?func=detail&aid=3532752&group_id=92314&atid=600306
private static final int PER_PAGE = 20; // Fetch only one page. Otherwise, the user will get blocked by ACM. 100 has been the old setting. See Bug 3532752 - https://sourceforge.net/tracker/index.php?func=detail&aid=3532752&group_id=92314&atid=600306
private static final int WAIT_TIME = 200;
private boolean shouldContinue;

// user settings
private boolean fetchAbstract;
private boolean acmOrGuide;

private static final Pattern hitsPattern = Pattern.compile("<strong>(\\d+)</strong> results found");
private static final Pattern maxHitsPattern = Pattern.compile("Result \\d+ &ndash; \\d+ of (\\d+)");
private int piv;

private static final Pattern HITS_PATTERN = Pattern.compile("<strong>(\\d+)</strong> results found");
private static final Pattern MAX_HITS_PATTERN = Pattern.compile("Result \\d+ &ndash; \\d+ of (\\d+)");

private static final Pattern fullCitationPattern = Pattern.compile("<a href=\"(citation.cfm.*)\" target.*");
private static final Pattern FULL_CITATION_PATTERN = Pattern.compile("<a href=\"(citation.cfm.*)\" target.*");

private static final Pattern idPattern = Pattern.compile("citation.cfm\\?id=(\\d+)&.*");
private static final Pattern ID_PATTERN = Pattern.compile("citation.cfm\\?id=(\\d+)&.*");

// Patterns used to extract information for the preview:
private static final Pattern titlePattern = Pattern.compile("<a href=.*?\">([^<]*)</a>");
private static final Pattern absPattern = Pattern.compile("<div .*?>(.*?)</div>");
private static final Pattern sourcePattern = Pattern.compile("<span style=\"padding-left:10px\">([^<]*)</span>");
private static final Pattern TITLE_PATTERN = Pattern.compile("<a href=.*?\">([^<]*)</a>");
private static final Pattern ABSTRACT_PATTERN = Pattern.compile("<div .*?>(.*?)</div>");
private static final Pattern SOURCE_PATTERN = Pattern.compile("<span style=\"padding-left:10px\">([^<]*)</span>");


@Override
Expand Down Expand Up @@ -131,7 +131,7 @@ public boolean processQueryGetPreview(String query, FetcherPreviewDialog preview

String page = Util.getResults(url);

int hits = getNumberOfHits(page, "<div id=\"resfound\">", ACMPortalFetcher.hitsPattern);
int hits = getNumberOfHits(page, "<div id=\"resfound\">", ACMPortalFetcher.HITS_PATTERN);

int index = page.indexOf("<div id=\"resfound\">");
if (index >= 0) {
Expand All @@ -149,23 +149,23 @@ public boolean processQueryGetPreview(String query, FetcherPreviewDialog preview
return false;
}

hits = getNumberOfHits(page, "<div class=\"pagerange\">", ACMPortalFetcher.maxHitsPattern);
parse(page, Math.min(hits, perPage), previews);
hits = getNumberOfHits(page, "<div class=\"pagerange\">", ACMPortalFetcher.MAX_HITS_PATTERN);
parse(page, Math.min(hits, PER_PAGE), previews);
for (Map.Entry<String, JLabel> entry : previews.entrySet()) {
preview.addEntry(entry.getKey(), entry.getValue());
}

return true;

} catch (MalformedURLException e) {
e.printStackTrace();
LOGGER.warn("Problem with ACM fetcher URL", e);
} catch (ConnectException e) {
status.showMessage(Localization.lang("Connection to ACM Portal failed"),
Localization.lang("Search ACM Portal"), JOptionPane.ERROR_MESSAGE);
} catch (IOException e) {
status.showMessage(e.getMessage(),
Localization.lang("Search ACM Portal"), JOptionPane.ERROR_MESSAGE);
e.printStackTrace();
LOGGER.warn("Problem with ACM Portal", e);
}
return false;

Expand Down Expand Up @@ -227,22 +227,19 @@ public boolean processQuery(String query, ImportInspector dialog, OutputPrinter
}

private String makeUrl() {
StringBuilder sb = new StringBuilder(ACMPortalFetcher.startUrl).append(ACMPortalFetcher.searchUrlPart);
sb.append(terms.replaceAll(" ", "%20"));
sb.append(ACMPortalFetcher.searchUrlPartII);
StringBuilder sb = new StringBuilder(ACMPortalFetcher.START_URL).append(ACMPortalFetcher.SEARCH_URL_PART)
.append(terms.replaceAll(" ", "%20")).append(ACMPortalFetcher.SEARCH_URL_PART_II);

if (acmOrGuide) {
sb.append("ACM");
} else {
sb.append("GUIDE");
}
sb.append(ACMPortalFetcher.endUrl);
sb.append(ACMPortalFetcher.END_URL);
return sb.toString();
}


private int piv;


private void parse(String text, int hits, Map<String, JLabel> entries) {
int entryNumber = 1;
Expand All @@ -253,7 +250,7 @@ private void parse(String text, int hits, Map<String, JLabel> entries) {

private static String getEntryBibTeXURL(String fullCitation) {
// Get ID
Matcher idMatcher = ACMPortalFetcher.idPattern.matcher(fullCitation);
Matcher idMatcher = ACMPortalFetcher.ID_PATTERN.matcher(fullCitation);
if (idMatcher.find()) {
return idMatcher.group(1);
}
Expand All @@ -272,19 +269,13 @@ private boolean getNextEntryURL(String allText, int entryNumber,
String text = allText.substring(index, endIndex);
// Always try RIS import first
Matcher fullCitation =
ACMPortalFetcher.fullCitationPattern.matcher(text);
ACMPortalFetcher.FULL_CITATION_PATTERN.matcher(text);
String item;
if (fullCitation.find()) {
String link = getEntryBibTeXURL(fullCitation.group(1));
if (endIndex > 0) {
StringBuilder sb = new StringBuilder();

/*try {
save("part" + entryNumber + ".html", part);
} catch (IOException e) {
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
} */

// Find authors:
String authMarker = "<div class=\"authors\">";
int authStart = text.indexOf(authMarker);
Expand All @@ -296,7 +287,7 @@ private boolean getNextEntryURL(String allText, int entryNumber,

}
// Find title:
Matcher titM = ACMPortalFetcher.titlePattern.matcher(text);
Matcher titM = ACMPortalFetcher.TITLE_PATTERN.matcher(text);
if (titM.find()) {
sb.append("<p>").append(titM.group(1)).append("</p>");
}
Expand All @@ -308,18 +299,14 @@ private boolean getNextEntryURL(String allText, int entryNumber,
if (sourceEnd >= 0) {
String sourceText = text.substring(sourceStart, sourceEnd);
// Find source:
Matcher source = ACMPortalFetcher.sourcePattern.matcher(sourceText);
Matcher source = ACMPortalFetcher.SOURCE_PATTERN.matcher(sourceText);
if (source.find()) {
sb.append("<p>").append(source.group(1)).append("</p>");
}
}
}

item = sb.toString();
/*.replaceAll("</tr>", "<br>");
part = part.replaceAll("</td>", "");
part = part.replaceAll("<tr valign=\"[A-Za-z]*\">", "");
part = part.replaceAll("<table style=\"padding: 5px; 5px; 5px; 5px;\" border=\"0\">", "");*/
} else {
item = link;
}
Expand All @@ -335,9 +322,9 @@ private boolean getNextEntryURL(String allText, int entryNumber,
return false;
}

private static BibEntry downloadEntryBibTeX(String ID, boolean downloadAbstract) {
private static BibEntry downloadEntryBibTeX(String id, boolean downloadAbstract) {
try {
URL url = new URL(ACMPortalFetcher.startUrl + ACMPortalFetcher.bibtexUrl + ID + ACMPortalFetcher.bibtexUrlEnd);
URL url = new URL(ACMPortalFetcher.START_URL + ACMPortalFetcher.BIBTEX_URL + id + ACMPortalFetcher.BIBTEX_URL_END);
URLConnection connection = url.openConnection();

// set user-agent to avoid being blocked as a crawler
Expand All @@ -356,9 +343,9 @@ private static BibEntry downloadEntryBibTeX(String ID, boolean downloadAbstract)

// get abstract
if (downloadAbstract) {
url = new URL(ACMPortalFetcher.startUrl + ACMPortalFetcher.abstractUrl + ID);
url = new URL(ACMPortalFetcher.START_URL + ACMPortalFetcher.ABSTRACT_URL + id);
String page = Util.getResults(url);
Matcher absM = ACMPortalFetcher.absPattern.matcher(page);
Matcher absM = ACMPortalFetcher.ABSTRACT_PATTERN.matcher(page);
if (absM.find()) {
entry.setField("abstract", absM.group(1).trim());
}
Expand All @@ -367,7 +354,7 @@ private static BibEntry downloadEntryBibTeX(String ID, boolean downloadAbstract)

return entry;
} catch (NoSuchElementException e) {
LOGGER.info("Bad Bibtex record read at: " + ACMPortalFetcher.bibtexUrl + ID + ACMPortalFetcher.bibtexUrlEnd,
LOGGER.info("Bad Bibtex record read at: " + ACMPortalFetcher.BIBTEX_URL + id + ACMPortalFetcher.BIBTEX_URL_END,
e);
return null;
} catch (MalformedURLException e) {
Expand Down Expand Up @@ -397,23 +384,17 @@ private String convertHTMLChars(String text) {
*/
private static int getNumberOfHits(String page, String marker, Pattern pattern) throws IOException {
int ind = page.indexOf(marker);
if (ind < 0) {
throw new IOException("Cannot parse number of hits");
}
String substring = page.substring(ind, Math.min(ind + 100, page.length()));
Matcher m = pattern.matcher(substring);
if (!m.find()) {
LOGGER.info("Unmatched! " + substring);
} else {
try {
// get rid of ,
String number = m.group(1);
number = number.replaceAll(",", "");
return Integer.parseInt(number);
} catch (NumberFormatException ex) {
throw new IOException("Cannot parse number of hits");
} catch (IllegalStateException e) {
throw new IOException("Cannot parse number of hits");
if (ind >= 0) {
String substring = page.substring(ind, Math.min(ind + 100, page.length()));
Matcher m = pattern.matcher(substring);
if (m.find()) {
try {
return Integer.parseInt(m.group(1));
} catch (IllegalStateException | NumberFormatException ex) {
throw new IOException("Cannot parse number of hits");
}
} else {
LOGGER.info("Unmatched! " + substring);
}
}
throw new IOException("Cannot parse number of hits");
Expand All @@ -429,22 +410,12 @@ public String getHelpPage() {
return "ACMPortalHelp.html";
}

// This method is called by the dialog when the user has cancelled the import.
public void cancelled() {
shouldContinue = false;
}

// This method is called by the dialog when the user has cancelled or
//signalled a stop. It is expected that any long-running fetch operations
//signaled a stop. It is expected that any long-running fetch operations
//will stop after this method is called.
@Override
public void stopFetching() {
shouldContinue = false;
}

private static void save(String filename, String content) throws IOException {
try(BufferedWriter out = new BufferedWriter(new FileWriter(filename))) {
out.write(content);
}
}
}

0 comments on commit e964e2c

Please sign in to comment.