Skip to content

Commit

Permalink
Fixes citation key generation when authors list has "and others" at t… (
Browse files Browse the repository at this point in the history
#9703)

Co-authored-by: Oliver Kopp <kopp.dev@gmail.com>
  • Loading branch information
morganteg and koppor committed Apr 24, 2023
1 parent ba2c8c1 commit 21105b1
Show file tree
Hide file tree
Showing 11 changed files with 438 additions and 169 deletions.
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ Note that this project **does not** adhere to [Semantic Versioning](http://semve
- We moved the custom entry types dialog into the preferences dialog. [#9760](https://github.com/JabRef/jabref/pull/9760)
- We moved the manage content selectors dialog to the library properties. [#9768](https://github.com/JabRef/jabref/pull/9768)
- We moved the preferences menu command from the options menu to the file menu. [#9768](https://github.com/JabRef/jabref/pull/9768)

- We changed the handling of an "overflow" of authors at `[authIniN]`: JabRef uses `+` to indicate an overflow. Example: `[authIni2]` produces `A+` (instead of `AB`) for `Aachen and Berlin and Chemnitz`. [#9703](https://github.com/JabRef/jabref/pull/9703)


### Fixed
Expand All @@ -65,6 +65,7 @@ Note that this project **does not** adhere to [Semantic Versioning](http://semve
- We fixed the log text color in the event log console when using dark mode. [#9732](https://github.com/JabRef/jabref/issues/9732)
- We fixed an issue where searching for unlinked files would include the current library's .bib file [#9735](https://github.com/JabRef/jabref/issues/9735)
- We fixed an issue where it was no longer possible to connect to a shared mysql database due to an exception [#9761](https://github.com/JabRef/jabref/issues/9761)
- We fixed the citation key generation for (`[authors]`, `[authshort]`, `[authorsAlpha]`, `authIniN`, `authEtAl`, `auth.etal`)[https://docs.jabref.org/setup/citationkeypatterns#special-field-markers] to handle `and others` properly. [koppor#626](https://github.com/koppor/jabref/issues/626)

### Removed

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,13 @@
import org.slf4j.LoggerFactory;

/**
* The BracketedExpressionExpander provides methods to expand bracketed expressions, such as
* [year]_[author]_[firstpage], using information from a provided BibEntry. The above-mentioned expression would yield
* 2017_Kitsune_123 when expanded using the BibTeX entry "@Article{ authors = {O. Kitsune}, year = {2017},
* pages={123-6}}".
* This class provides methods to expand bracketed expressions, such as
* <code>[year]_[author]_[firstpage]</code>, using information from a provided BibEntry. The above-mentioned expression would yield
* <code>2017_Kitsune_123</code> when expanded using the BibTeX entry <code>@Article{ authors = {O. Kitsune}, year = {2017},
* pages={123-6}}</code>.
* <p>
* The embedding in JabRef is explained at <a href="https://docs.jabref.org/setup/citationkeypattern">Customize the citation key generator</a>.
* </p>
*/
public class BracketedPattern {
private static final Logger LOGGER = LoggerFactory.getLogger(BracketedPattern.class);
Expand All @@ -51,6 +54,7 @@ public class BracketedPattern {
* The maximum number of characters in the first author's last name.
*/
private static final int CHARS_OF_FIRST = 5;

/**
* The maximum number of name abbreviations that can be used. If there are more authors, {@code MAX_ALPHA_AUTHORS -
* 1} name abbreviations will be displayed, and a + sign will be appended at the end.
Expand All @@ -61,18 +65,23 @@ public class BracketedPattern {
* Matches everything that is not a unicode decimal digit.
*/
private static final Pattern NOT_DECIMAL_DIGIT = Pattern.compile("\\P{Nd}");

/**
* Matches everything that is not an uppercase ASCII letter. The intended use is to remove all lowercase letters
*/
private static final Pattern NOT_CAPITAL_CHARACTER = Pattern.compile("[^A-Z]");

/**
* Matches uppercase english letters between "({" and "})", which should be used to abbreviate the name of an institution
*/

private static final Pattern INLINE_ABBREVIATION = Pattern.compile("(?<=\\(\\{)[A-Z]+(?=}\\))");

/**
* Matches with "dep"/"dip", case insensitive
* Matches with "dep"/"dip", case-insensitive
*/
private static final Pattern DEPARTMENTS = Pattern.compile("^d[ei]p.*", Pattern.CASE_INSENSITIVE);

private static final Pattern WHITESPACE = Pattern.compile("\\p{javaWhitespace}");

private enum Institution {
Expand All @@ -85,10 +94,12 @@ private enum Institution {
* Matches "uni" followed by "v" or "b", at the start of a string or after a space, case insensitive
*/
private static final Pattern UNIVERSITIES = Pattern.compile("^uni(v|b|$).*", Pattern.CASE_INSENSITIVE);

/**
* Matches with "tech", case insensitive
* Matches with "tech", case-insensitive
*/
private static final Pattern TECHNOLOGICAL_INSTITUTES = Pattern.compile("^tech.*", Pattern.CASE_INSENSITIVE);

/**
* Matches with "dep"/"dip"/"lab", case insensitive
*/
Expand Down Expand Up @@ -357,7 +368,7 @@ public static String getFieldValue(BibEntry entry, String pattern, Character key
case "authEtAl":
return authEtal(authorList, "", "EtAl");
case "authshort":
return authshort(authorList);
return authShort(authorList);
}

if (pattern.matches("authIni[\\d]+")) {
Expand Down Expand Up @@ -400,7 +411,7 @@ public static String getFieldValue(BibEntry entry, String pattern, Character key
case "edtr.edtr.ea":
return authAuthEa(editorList);
case "edtrshort":
return authshort(editorList);
return authShort(editorList);
}

if (pattern.matches("edtrIni[\\d]+")) {
Expand Down Expand Up @@ -542,9 +553,9 @@ private static boolean isInstitution(Author author) {
/**
* Applies modifiers to a label generated based on a field marker.
*
* @param label The generated label.
* @param parts String array containing the modifiers.
* @param offset The number of initial items in the modifiers array to skip.
* @param label The generated label.
* @param parts String array containing the modifiers.
* @param offset The number of initial items in the modifiers array to skip.
* @param expandBracketContent a function to expand the content in the parentheses.
* @return The modified label.
*/
Expand Down Expand Up @@ -777,13 +788,17 @@ private static String lastAuthorForenameInitials(AuthorList authorList) {
}

/**
* Gets the last name of all authors/editors
* Gets the last name of all authors/editors.
* Pattern <code>[authors]</code>.
* <p>
* <code>and others</code> is converted to <code>EtAl</code>
* </p>
*
* @param authorList an {@link AuthorList}
* @return the sur name of all authors/editors
* @return the surname of all authors/editors
*/
private static String allAuthors(AuthorList authorList) {
return joinAuthorsOnLastName(authorList, authorList.getNumberOfAuthors(), "", "");
static String allAuthors(AuthorList authorList) {
return joinAuthorsOnLastName(authorList, authorList.getNumberOfAuthors(), "", "EtAl");
}

/**
Expand All @@ -792,10 +807,17 @@ private static String allAuthors(AuthorList authorList) {
* @param authorList an {@link AuthorList}
* @return the initials of all authors' names
*/
private static String authorsAlpha(AuthorList authorList) {
static String authorsAlpha(AuthorList authorList) {
StringBuilder alphaStyle = new StringBuilder();
int maxAuthors = authorList.getNumberOfAuthors() <= MAX_ALPHA_AUTHORS ?
authorList.getNumberOfAuthors() : (MAX_ALPHA_AUTHORS - 1);
int maxAuthors;
final boolean maxAuthorsExceeded;
if (authorList.getNumberOfAuthors() <= MAX_ALPHA_AUTHORS) {
maxAuthors = authorList.getNumberOfAuthors();
maxAuthorsExceeded = false;
} else {
maxAuthors = MAX_ALPHA_AUTHORS - 1;
maxAuthorsExceeded = true;
}

if (authorList.getNumberOfAuthors() == 1) {
String[] firstAuthor = authorList.getAuthor(0).getLastOnly()
Expand All @@ -808,8 +830,13 @@ private static String authorsAlpha(AuthorList authorList) {
alphaStyle.append(firstAuthor[firstAuthor.length - 1], 0,
Math.min(3, firstAuthor[firstAuthor.length - 1].length()));
} else {
boolean andOthersPresent = authorList.getAuthor(maxAuthors - 1).equals(Author.OTHERS);
if (andOthersPresent) {
maxAuthors--;
}
List<String> vonAndLastNames = authorList.getAuthors().stream()
.limit(maxAuthors).map(Author::getLastOnly)
.limit(maxAuthors)
.map(Author::getLastOnly)
.collect(Collectors.toList());
for (String vonAndLast : vonAndLastNames) {
// replace all whitespaces by " "
Expand All @@ -820,7 +847,7 @@ private static String authorsAlpha(AuthorList authorList) {
alphaStyle.append(part, 0, 1);
}
}
if (authorList.getNumberOfAuthors() > MAX_ALPHA_AUTHORS) {
if (andOthersPresent || maxAuthorsExceeded) {
alphaStyle.append("+");
}
}
Expand All @@ -834,15 +861,27 @@ private static String authorsAlpha(AuthorList authorList) {
* @param authorList the list of authors
* @param maxAuthors the maximum number of authors in the string
* @param delimiter delimiter separating the last names of the authors
* @param suffix to replace excess authors with
* @param suffix to replace excess authors with. Also used to replace <code>and others</code>.
* @return a string consisting of authors' last names separated by a `delimiter` and with any authors excess of
* `maxAuthors` replaced with `suffix`
*/
private static String joinAuthorsOnLastName(AuthorList authorList, int maxAuthors, String delimiter, String suffix) {
suffix = authorList.getNumberOfAuthors() > maxAuthors ? suffix : "";
private static String joinAuthorsOnLastName(AuthorList authorList, int maxAuthors, String delimiter, final String suffix) {
final String finalSuffix = authorList.getNumberOfAuthors() > maxAuthors ? suffix : "";
return authorList.getAuthors().stream()
.map(Author::getLast).flatMap(Optional::stream)
.limit(maxAuthors).collect(Collectors.joining(delimiter, "", suffix));
.map(author -> {
if (author.equals(Author.OTHERS)) {
if (suffix.startsWith(delimiter)) {
return Optional.of(suffix.substring(delimiter.length()));
} else {
return Optional.of(suffix);
}
} else {
return author.getLast();
}
})
.flatMap(Optional::stream)
.limit(maxAuthors)
.collect(Collectors.joining(delimiter, "", finalSuffix));
}

/**
Expand Down Expand Up @@ -877,54 +916,30 @@ private static String oneAuthorPlusInitials(AuthorList authorList) {
return authorSB.toString();
}

/**
* auth.auth.ea format:
* <ol>
* <li>Isaac Newton and James Maxwell and Albert Einstein (1960)</li>
* <li>Isaac Newton and James Maxwell (1960)</li>
* </ol>
* give:
* <ol>
* <li>Newton.Maxwell.ea</li>
* <li>Newton.Maxwell</li>
* </ol>
*/
private static String authAuthEa(AuthorList authorList) {
static String authAuthEa(AuthorList authorList) {
return joinAuthorsOnLastName(authorList, 2, ".", ".ea");
}

/**
* auth.etal, authEtAl, ... format:
* <ol>
* <li>Isaac Newton and James Maxwell and Albert Einstein (1960)</li>
* <li>Isaac Newton and James Maxwell (1960)</li>
* </ol>
* <p>
* auth.etal give (delim=".", append=".etal"):
* <ol>
* <li>Newton.etal</li>
* <li>Newton.Maxwell</li>
* </ol>
* </p>
* <p>
* authEtAl give (delim="", append="EtAl"):
* <ol>
* <li>NewtonEtAl</li>
* <li>NewtonMaxwell</li>
* </ol>
* </p>
* Note that [authEtAl] equals [authors2]
* auth.etal, authEtAl, ... format
*/
private static String authEtal(AuthorList authorList, String delim, String append) {
if (authorList.getNumberOfAuthors() <= 2) {
static String authEtal(AuthorList authorList, String delim, String append) {
if (authorList.isEmpty()) {
return "";
}
if ((authorList.getNumberOfAuthors() <= 2)
&& ((authorList.getNumberOfAuthors() == 1) || !authorList.getAuthor(1).equals(Author.OTHERS))) {
// in case 1 or two authors, just name them
// exception: If the second author is "and others", then do the appendix handling (in the other branch)
return joinAuthorsOnLastName(authorList, 2, delim, "");
} else {
return authorList.getAuthor(0).getLast().orElse("") + append;
}
}

/**
* The first N characters of the Mth author's or editor's last name. M starts counting from 1
* The first N characters of the Mth author's or editor's last name. M starts counting from 1.
* In case the Mth author is {@link Author#OTHERS}, <code>+</code> is returned.
*/
private static String authNofMth(AuthorList authorList, int n, int m) {
// have m counting from 0
Expand All @@ -934,7 +949,11 @@ private static String authNofMth(AuthorList authorList, int n, int m) {
return "";
}

String lastName = authorList.getAuthor(mminusone).getLast()
Author lastAuthor = authorList.getAuthor(mminusone);
if (lastAuthor.equals(Author.OTHERS)) {
return "+";
}
String lastName = lastAuthor.getLast()
.map(CitationKeyGenerator::removeDefaultUnwantedCharacters).orElse("");
return lastName.length() > n ? lastName.substring(0, n) : lastName;
}
Expand All @@ -947,21 +966,9 @@ private static String authN(AuthorList authorList, int num) {
}

/**
* authshort format:
* <p>
* given author names
* <ol><li>Isaac Newton and James Maxwell and Albert Einstein and N. Bohr</li>
* <li>Isaac Newton and James Maxwell and Albert Einstein</li>
* <li>Isaac Newton and James Maxwell</li>
* <li>Isaac Newton</li></ol>
* yield
* <ol><li>NME+</li>
* <li>NME</li>
* <li>NM</li>
* <li>Newton</li></ol></p>
* {@author added by Kolja Brix, kbx@users.sourceforge.net}
* authshort format
*/
private static String authshort(AuthorList authorList) {
static String authShort(AuthorList authorList) {
StringBuilder author = new StringBuilder();
final int numberOfAuthors = authorList.getNumberOfAuthors();

Expand All @@ -980,37 +987,32 @@ private static String authshort(AuthorList authorList) {
}

/**
* authIniN format:
* <p>
* Each author gets (N div #authors) chars, the remaining (N mod #authors) chars are equally distributed to the
* authors first in the row. If (N < #authors), only the first N authors get mentioned.
* <p>
* For example if
* <ol>
* <li> I. Newton and J. Maxwell and A. Einstein and N. Bohr (..) </li>
* <li> I. Newton and J. Maxwell and A. Einstein </li>
* <li> I. Newton and J. Maxwell </li>
* <li> I. Newton </li>
* </ol>
* authIni4 gives:
* <ol>
* <li> NMEB </li>
* <li> NeME </li>
* <li> NeMa </li>
* <li> Newt </li>
* </ol>
* authIniN format
*
* @param authorList The authors to format.
* @param n The maximum number of characters this string will be long. A negative number or zero will lead
* to "" be returned.
*/
private static String authIniN(AuthorList authorList, int n) {
static String authIniN(AuthorList authorList, int n) {
if ((n <= 0) || authorList.isEmpty()) {
return "";
}

StringBuilder author = new StringBuilder();
final int numberOfAuthors = authorList.getNumberOfAuthors();
final boolean lastAuthorIsOthers = authorList.getAuthor(numberOfAuthors - 1).equals(Author.OTHERS);
if ((n > 1) && ((n < numberOfAuthors) || lastAuthorIsOthers)) {
final int limit = Math.min(n - 1, numberOfAuthors - 1);
// special handling if the last author is "Others"
// This gets the single char "+" only
AuthorList allButOthers = AuthorList.of(
authorList.getAuthors()
.stream()
.limit(limit)
.toList());
return authIniN(allButOthers, n - 1) + "+";
}

StringBuilder author = new StringBuilder();

int charsAll = n / numberOfAuthors;
for (int i = 0; i < numberOfAuthors; i++) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,23 @@
* This is the utility class of the LabelPattern package.
*/
public class CitationKeyGenerator extends BracketedPattern {
/*
/**
* All single characters that we can use for extending a key to make it unique.
*/
public static final String APPENDIX_CHARACTERS = "abcdefghijklmnopqrstuvwxyz";
public static final String DEFAULT_UNWANTED_CHARACTERS = "-`ʹ:!;?^+";

/**
* List of unwanted characters. These will be removed at the end.
* Note that <code>+</code> is a wanted character to indicate "et al." in authorsAlpha.
* Example: "ABC+". See {@link org.jabref.logic.citationkeypattern.BracketedPatternTest#authorsAlpha()} for examples.
*/
public static final String DEFAULT_UNWANTED_CHARACTERS = "-`ʹ:!;?^";

private static final Logger LOGGER = LoggerFactory.getLogger(CitationKeyGenerator.class);

// Source of disallowed characters : https://tex.stackexchange.com/a/408548/9075
private static final List<Character> DISALLOWED_CHARACTERS = Arrays.asList('{', '}', '(', ')', ',', '=', '\\', '"', '#', '%', '~', '\'');

private final AbstractCitationKeyPattern citeKeyPattern;
private final BibDatabase database;
private final CitationKeyPatternPreferences citationKeyPatternPreferences;
Expand Down

0 comments on commit 21105b1

Please sign in to comment.