Skip to content

Commit

Permalink
Extract and refactor CaseChangers
Browse files Browse the repository at this point in the history
  • Loading branch information
stefan-kolb committed Oct 29, 2015
1 parent f881803 commit 2b79053
Show file tree
Hide file tree
Showing 15 changed files with 362 additions and 332 deletions.
1 change: 0 additions & 1 deletion src/main/java/net/sf/jabref/gui/actions/CleanUpAction.java
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@
import com.jgoodies.forms.builder.FormBuilder;
import com.jgoodies.forms.layout.FormLayout;
import net.sf.jabref.logic.cleanup.PageNumbersCleanup;
import net.sf.jabref.logic.formatter.FieldFormatters;
import net.sf.jabref.logic.l10n.Localization;
import net.sf.jabref.model.entry.BibtexEntry;
import net.sf.jabref.logic.util.DOI;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package net.sf.jabref.logic.cleanup;

import net.sf.jabref.logic.formatter.FieldFormatters;
import net.sf.jabref.logic.formatter.PageNumbersFormatter;
import net.sf.jabref.logic.formatter.BibtexFieldFormatters;
import net.sf.jabref.model.entry.BibtexEntry;

/**
Expand All @@ -24,7 +23,7 @@ public void cleanup() {
final String field = "pages";

String value = entry.getField(field);
String newValue = FieldFormatters.PAGE_NUMBERS.format(value);
String newValue = BibtexFieldFormatters.PAGE_NUMBERS.format(value);
entry.setField(field, newValue);
}
}
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
package net.sf.jabref.logic.formatter;

import net.sf.jabref.logic.formatter.bibtexfields.PageNumbersFormatter;

import java.util.Arrays;
import java.util.List;

public class FieldFormatters {
public class BibtexFieldFormatters {
public static final PageNumbersFormatter PAGE_NUMBERS = new PageNumbersFormatter();

public static final List<Formatter> ALL = Arrays.asList(PAGE_NUMBERS);
Expand Down
324 changes: 3 additions & 321 deletions src/main/java/net/sf/jabref/logic/formatter/CaseChangers.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,9 @@
*/
package net.sf.jabref.logic.formatter;

import net.sf.jabref.logic.formatter.casechanger.*;

import java.util.*;
import java.util.stream.Collectors;

/**
* Class with static methods for changing the case of strings and arrays of strings.
Expand All @@ -28,330 +29,11 @@
* This can be done by starting at the letter position and moving forward and backword to see if there is a '{' and '}, respectively.
*/
public class CaseChangers {

public static final Set<String> SMALLER_WORDS;

static {
Set<String> smallerWords = new HashSet<>();
// NOTE: before JabRef 2.80, it was SKIP_WORDS = {"a", "an", "the", "for", "on", "of"}; in net.sf.jabref.logic.labelPattern.LabelPatternUtil.SKIP_WORDS

This comment has been minimized.

Copy link
@koppor

koppor Oct 29, 2015

Member

Why was this comment removed? I think, it helps when thinking of the functionality, don't you?

This comment has been minimized.

Copy link
@stefan-kolb

stefan-kolb Oct 29, 2015

Author Member

How does it help? It is just a list of former words that are all included in the new lists?

This comment has been minimized.

Copy link
@koppor

koppor Oct 29, 2015

Member

I don't have a source of the current "smallerWords" list. My personal English thinking only has the former words as smaller words, and I would not have written "beneath" in small letters. Instead of opening an architectural decision document with the decision documenting why the other words have been included etc., I would have left the old behavior as note to support others thinking of the issue when they do. - I think, having some comment is more useful than no comment. I can also follow the argument that a quick comment is worse than no comment. Then I'll begin writing an decisions document referencing that code and will fill it with TODOs (because I don't know the reason for the other words - someone should fill in that).

This comment has been minimized.

Copy link
@stefan-kolb

stefan-kolb Oct 30, 2015

Author Member

Sorry bout that I haven't written the class (@simonharrer), I just extracted the subclasses in this commit.

This comment has been minimized.

Copy link
@matthiasgeiger

matthiasgeiger Oct 30, 2015

Member

But if you want to discuss this, a comment pointing to the old behavior is not be best idea, isn't it?

I agree that it might be worth to provide a source for the lists of prepositions - perhaps @oscargus remembers it?

So why not add a more concrete note at

Set<String> smallerWords = new HashSet<>();
// Articles
smallerWords.addAll(Arrays.asList("a", "an", "the"));
// Prepositions
smallerWords.addAll(Arrays.asList("above", "about", "across", "against", "along", "among", "around", "at", "before", "behind", "below", "beneath", "beside", "between", "beyond", "by", "down", "during", "except", "for", "from", "in", "inside", "into", "like", "near", "of", "off", "on", "onto", "since", "to", "toward", "through", "under", "until", "up", "upon", "with", "within", "without"));
// Conjunctions
smallerWords.addAll(Arrays.asList("and", "but", "for", "nor", "or", "so", "yet"));
:

List of all English articles, prepositions and conjunctions which should remain in lowercase in Titles - source TBD

Of course this is only sensible if we actually have a source 😉


// Articles
smallerWords.addAll(Arrays.asList("a", "an", "the"));
// Prepositions
smallerWords.addAll(Arrays.asList("above", "about", "across", "against", "along", "among", "around", "at", "before", "behind", "below", "beneath", "beside", "between", "beyond", "by", "down", "during", "except", "for", "from", "in", "inside", "into", "like", "near", "of", "off", "on", "onto", "since", "to", "toward", "through", "under", "until", "up", "upon", "with", "within", "without"));
// Conjunctions
smallerWords.addAll(Arrays.asList("and", "but", "for", "nor", "or", "so", "yet"));

// unmodifiable for thread safety
SMALLER_WORDS = Collections.unmodifiableSet(smallerWords);
}

/**
* Represents a word in a title of a bibtex entry.
* <p>
* A word can have protected chars (enclosed in '{' '}') and may be a small (a, an, the, ...) word.
*/
private static final class Word {

private final char[] chars;
private final boolean[] protectedChars;

public Word(char[] chars, boolean[] protectedChars) {
this.chars = Objects.requireNonNull(chars);
this.protectedChars = Objects.requireNonNull(protectedChars);

if (this.chars.length != this.protectedChars.length) {
throw new IllegalArgumentException("the chars and the protectedChars array must be of same length");
}
}

/**
* Only change letters of the word that are unprotected to upper case.
*/
public void toUpperCase() {
for (int i = 0; i < chars.length; i++) {
if (protectedChars[i]) {
continue;
}

chars[i] = Character.toUpperCase(chars[i]);
}
}

/**
* Only change letters of the word that are unprotected to lower case.
*/
public void toLowerCase() {
for (int i = 0; i < chars.length; i++) {
if (protectedChars[i]) {
continue;
}

chars[i] = Character.toLowerCase(chars[i]);
}
}


public void toUpperFirst() {
for (int i = 0; i < chars.length; i++) {
if (protectedChars[i]) {
continue;
}

if (i == 0) {
chars[i] = Character.toUpperCase(chars[i]);
} else {
chars[i] = Character.toLowerCase(chars[i]);
}
}
}

public boolean isSmallerWord() {
// "word:" is still a small "word"
return SMALLER_WORDS.contains(this.toString().replaceAll("[:]", "").toLowerCase());
}

public boolean isLargerWord() {
return !isSmallerWord();
}

@Override
public String toString() {
return new String(chars);
}

public boolean endsWithColon() {
return this.toString().endsWith(":");
}
}

/**
* Parses a title to a list of words.
*/
private static final class TitleParser {

private StringBuffer buffer;
private int wordStart;

public List<Word> parse(String title) {
List<Word> words = new LinkedList<>();

boolean[] isProtected = determineProtectedChars(title);

reset();

int index = 0;
for (char c : title.toCharArray()) {
if (!Character.isWhitespace(c)) {
if (wordStart == -1) {
wordStart = index;
}

buffer.append(c);
} else {
createWord(isProtected).ifPresent(words::add);
}

index++;
}
createWord(isProtected).ifPresent(words::add);

return words;
}

private Optional<Word> createWord(boolean[] isProtected) {
if (buffer.length() <= 0) {
return Optional.empty();
}

char[] chars = buffer.toString().toCharArray();
boolean[] protectedChars = new boolean[chars.length];

System.arraycopy(isProtected, wordStart, protectedChars, 0, chars.length);

reset();

return Optional.of(new Word(chars, protectedChars));
}

private void reset() {
wordStart = -1;
buffer = new StringBuffer();
}

private static boolean[] determineProtectedChars(String title) {
boolean[] isProtected = new boolean[title.length()];
char[] chars = title.toCharArray();

int brakets = 0;
for (int i = 0; i < title.length(); i++) {
if (chars[i] == '{') {
brakets++;
} else if (chars[i] == '}') {
brakets--;
} else {
isProtected[i] = brakets > 0;
}
}

return isProtected;
}

}

/**
* Represents a title of a bibtex entry.
*/
private static final class Title {

private final List<Word> words = new LinkedList<>();

public Title(String title) {
this.words.addAll(new TitleParser().parse(title));
}

public List<Word> getWords() {
return words;
}

public Optional<Word> getFirstWord() {
if (getWords().isEmpty()) {
return Optional.empty();
}
return Optional.of(getWords().get(0));
}

public Optional<Word> getLastWord() {
if (getWords().isEmpty()) {
return Optional.empty();
}
return Optional.of(getWords().get(getWords().size() - 1));
}

@Override
public String toString() {
return words.stream().map(Word::toString).collect(Collectors.joining(" "));
}

}

public static class LowerCaseChanger implements Formatter {

@Override
public String getName() {
return "lower";
}

/**
* Converts all characters of the string to lower case, but does not change words starting with "{"
*/
@Override
public String format(String input) {
Title title = new Title(input);

title.getWords().stream().forEach(Word::toLowerCase);

return title.toString();
}
}

public static class UpperCaseChanger implements Formatter {

@Override
public String getName() {
return "UPPER";
}

/**
* Converts all characters of the given string to upper case, but does not change words starting with "{"
*/
@Override
public String format(String input) {
Title title = new Title(input);

title.getWords().stream().forEach(Word::toUpperCase);

return title.toString();
}
}

public static class UpperFirstCaseChanger implements Formatter {

@Override
public String getName() {
return "Upper first";
}

/**
* Converts the first character of the first word of the given string to a upper case (and the remaining characters of the first word to lower case), but does not change anything if word starts with "{"
*/
@Override
public String format(String input) {
Title title = new Title(LOWER.format(input));

title.getWords().stream().findFirst().ifPresent(Word::toUpperFirst);

return title.toString();
}
}

public static class UpperEachFirstCaseChanger implements Formatter {

@Override
public String getName() {
return "Upper Each First";
}

/**
* Converts the first character of each word of the given string to a upper case (and all others to lower case), but does not change words starting with "{"
*/
@Override
public String format(String input) {
Title title = new Title(input);

title.getWords().stream().forEach(Word::toUpperFirst);

return title.toString();
}
}

public static class TitleCaseChanger implements Formatter {

@Override
public String getName() {
return "Title";
}

/**
* Converts all words to upper case, but converts articles, prepositions, and conjunctions to lower case
* Capitalizes first and last word
* Does not change words starting with "{"
*/
@Override
public String format(String input) {
Title title = new Title(input);

title.getWords().stream().filter(Word::isSmallerWord).forEach(Word::toLowerCase);
title.getWords().stream().filter(Word::isLargerWord).forEach(Word::toUpperFirst);

title.getFirstWord().ifPresent(Word::toUpperFirst);
title.getLastWord().ifPresent(Word::toUpperFirst);

for (int i = 0; i < (title.getWords().size() - 2); i++) {
if (title.getWords().get(i).endsWithColon()) {
title.getWords().get(i + 1).toUpperFirst();
}
}

return title.toString();
}
}

public static final LowerCaseChanger LOWER = new LowerCaseChanger();
public static final UpperCaseChanger UPPER = new UpperCaseChanger();
public static final UpperFirstCaseChanger UPPER_FIRST = new UpperFirstCaseChanger();
public static final UpperEachFirstCaseChanger UPPER_EACH_FIRST = new UpperEachFirstCaseChanger();
public static final TitleCaseChanger TITLE = new TitleCaseChanger();

public static final List<Formatter> ALL = Arrays.asList(CaseChangers.LOWER, CaseChangers.UPPER, CaseChangers.UPPER_FIRST, CaseChangers.UPPER_EACH_FIRST, CaseChangers.TITLE);
public static final List<Formatter> ALL = Arrays.asList(LOWER, UPPER, UPPER_FIRST, UPPER_EACH_FIRST, TITLE);
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
package net.sf.jabref.logic.formatter;
package net.sf.jabref.logic.formatter.bibtexfields;

import net.sf.jabref.logic.formatter.Formatter;

import java.util.regex.Matcher;
import java.util.regex.Pattern;
Expand Down

0 comments on commit 2b79053

Please sign in to comment.