Skip to content

Commit

Permalink
Simple replacement of AuthorsFormatter by usage of AuthorList
Browse files Browse the repository at this point in the history
  • Loading branch information
koppor committed Mar 12, 2016
1 parent 5baaff4 commit f8d33fd
Showing 1 changed file with 6 additions and 206 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,19 +16,12 @@
package net.sf.jabref.logic.formatter.bibtexfields;

import net.sf.jabref.logic.formatter.Formatter;

import java.util.regex.Matcher;
import java.util.regex.Pattern;
import net.sf.jabref.model.entry.AuthorList;

/**
* Class for normalizing author lists to BibTeX format.
*/
public class AuthorsFormatter implements Formatter {
private static final Pattern LAST_F_F = Pattern.compile("(\\p{javaUpperCase}[\\p{javaLowerCase}]+) (\\p{javaUpperCase}+)");
private static final Pattern LAST_FDOT_F = Pattern.compile("(\\p{javaUpperCase}[\\p{javaLowerCase}]+) ([\\. \\p{javaUpperCase}]+)");
private static final Pattern F_F_LAST = Pattern.compile("(\\p{javaUpperCase}+) (\\p{javaUpperCase}[\\p{javaLowerCase}]+)");
private static final Pattern FDOT_F_LAST = Pattern.compile("([\\. \\p{javaUpperCase}]+) (\\p{javaUpperCase}[\\p{javaLowerCase}]+)");
private static final Pattern SINGLE_NAME = Pattern.compile("(\\p{javaUpperCase}[\\p{javaLowerCase}]*)");

@Override
public String getName() {
Expand All @@ -45,205 +38,12 @@ public String getKey() {
*/
@Override
public String format(String value) {
boolean andSep = false;
// String can contain newlines. Convert each to a space
String noNewlineValue = value.replace("\n", " ");
String[] authors = noNewlineValue.split("( |,)and ", -1);
if (authors.length > 1) {
andSep = true;
} else {
/*
If there are no "and" separators in the original string, we assume it either means that
the author list is comma or semicolon separated or that it contains only a single name.
If there is a semicolon, we go by that. If not, we assume commas, and count the parts
separated by commas to determine which it is.
*/
String[] authors2 = noNewlineValue.split("; ");
if (authors2.length > 1) {
authors = authors2;
} else {
authors2 = noNewlineValue.split(", ");
if (authors2.length > 3) { // Probably more than a single author, so we split by commas.
authors = authors2;
} else {
if (authors2.length == 3) {
// This could be a BibTeX formatted name containing a Jr particle,
// e.g. Smith, Jr., Peter
// We check if the middle part is <= 3 characters. If not, we assume we are
// dealing with three authors.
if (authors2[1].length() > 3) {
authors = authors2;
}
}
}
}
}

// Remove leading and trailing whitespaces from each name:
for (int i = 0; i < authors.length; i++) {
authors[i] = authors[i].trim();
}

// If we found an and separator, there could possibly be semicolon or
// comma separation before the last separator. If there are two or more
// and separators, we can dismiss this possibility.
// If there is only a single and separator, check closer:
if (andSep && (authors.length == 2)) {
// Check if the first part is semicolon separated:
String[] semiSep = authors[0].split("; ");
if (semiSep.length > 1) {
// Ok, it looks like this is the case. Use separation by semicolons:
String[] newAuthors = new String[1 + semiSep.length];
for (int i = 0; i < semiSep.length; i++) {
newAuthors[i] = semiSep[i].trim();
}
newAuthors[semiSep.length] = authors[1];
authors = newAuthors;
} else {
// Check if there is a comma in the last name. If so, we can assume that comma
// is not used to separate the names:
boolean lnfn = authors[1].indexOf(',') >= 1;
if (!lnfn) {
String[] cmSep = authors[0].split(", ");
if (cmSep.length > 1) {
// This means that the last name doesn't contain a comma, but the first
// one contains one or more. This indicates that the names leading up to
// the single "and" are comma separated:
String[] newAuthors = new String[1 + cmSep.length];
for (int i = 0; i < cmSep.length; i++) {
newAuthors[i] = cmSep[i].trim();
}
newAuthors[cmSep.length] = authors[1];
authors = newAuthors;
}

}
}
}

StringBuilder stringBuilder = new StringBuilder();
for (int i = 0; i < authors.length; i++) {
String norm = AuthorsFormatter.normalizeName(authors[i]);
stringBuilder.append(norm);
if (i < (authors.length - 1)) {
stringBuilder.append(" and ");
}
}
return stringBuilder.toString();
}
// try to convert to BibTeX format, where multiple names are separated by " and " instead of ";" or other characters
String inputForAuthorList = value.replaceAll(";", " and ");

private static String normalizeName(String oldName) {
String name = oldName;
Matcher matcher = AuthorsFormatter.LAST_F_F.matcher(name);
if (matcher.matches()) {
String initials = matcher.group(2);
StringBuilder stringBuilder = new StringBuilder(matcher.group(1));
stringBuilder.append(", ");
fixInitials(initials, stringBuilder);
return stringBuilder.toString();
}
matcher = AuthorsFormatter.LAST_FDOT_F.matcher(name);
if (matcher.matches()) {
String initials = matcher.group(2).replaceAll("[\\. ]+", "");
StringBuilder stringBuilder = new StringBuilder(matcher.group(1));
stringBuilder.append(", ");
fixInitials(initials, stringBuilder);
return stringBuilder.toString();
}

matcher = AuthorsFormatter.F_F_LAST.matcher(name);
if (matcher.matches()) {
String initials = matcher.group(1);
StringBuilder stringBuilder = new StringBuilder(matcher.group(2));
stringBuilder.append(", ");
fixInitials(initials, stringBuilder);
return stringBuilder.toString();
}
matcher = AuthorsFormatter.FDOT_F_LAST.matcher(name);
if (matcher.matches()) {
String initials = matcher.group(1).replaceAll("[\\. ]+", "");
StringBuilder stringBuilder = new StringBuilder(matcher.group(2));
stringBuilder.append(", ");
fixInitials(initials, stringBuilder);
return stringBuilder.toString();
}

if (name.indexOf(',') >= 0) {
// Name contains comma
int index = name.lastIndexOf(',');
// If the comma is at the end of the name, just remove it to prevent index error:
if (index == (name.length() - 1)) {
name = name.substring(0, name.length() - 1);
}

StringBuilder stringBuilder = new StringBuilder(name.substring(0, index));
stringBuilder.append(", ");
// Check if the remainder is a single name:
String firstName = name.substring(index + 1).trim();
String[] firstNameParts = firstName.split(" ");
if (firstNameParts.length > 1) {
// Multiple parts. Add all of them, and add a dot if they are single letter parts:
for (int i = 0; i < firstNameParts.length; i++) {
if (firstNameParts[i].length() == 1) {
stringBuilder.append(firstNameParts[i]).append('.');
} else {
stringBuilder.append(firstNameParts[i]);
}
if (i < (firstNameParts.length - 1)) {
stringBuilder.append(' ');
}
}
} else {
// Only a single part. Check if it looks like a name or initials:
Matcher nameMatcher = AuthorsFormatter.SINGLE_NAME.matcher(firstNameParts[0]);
if (nameMatcher.matches()) {
stringBuilder.append(firstNameParts[0]);
} else {
// It looks like initials.
String initials = firstNameParts[0].replaceAll("[\\.]+", "");
fixInitials(initials, stringBuilder);
}

}
return stringBuilder.toString();
} else {
// Name doesn't contain comma
String[] parts = name.split(" +");
boolean allNames = true;
for (String part : parts) {
matcher = AuthorsFormatter.SINGLE_NAME.matcher(part);
if (!matcher.matches()) {
allNames = false;
break;
}
}
if (allNames) {
// Looks like a name written in full with first name first.
// Change into last name first format:
StringBuilder stringBuilder = new StringBuilder(parts[parts.length - 1]);
if (parts.length > 1) {
stringBuilder.append(',');
for (int i = 0; i < (parts.length - 1); i++) {
stringBuilder.append(' ').append(parts[i]);
if (parts[i].length() == 1) {
stringBuilder.append('.');
}
}
}
return stringBuilder.toString();
}
}

return name;
// AuthorList does the whole magic when the string is a well-formed BibTeX author string
AuthorList list = AuthorList.getAuthorList(inputForAuthorList);
return list.getAuthorsLastFirstAnds(false);
}

private static void fixInitials(final String initials, final StringBuilder stringBuilder) {
for (int i = 0; i < initials.length(); i++) {
stringBuilder.append(initials.charAt(i));
stringBuilder.append('.');
if (i < (initials.length() - 1)) {
stringBuilder.append(' ');
}
}
}
}

0 comments on commit f8d33fd

Please sign in to comment.