Skip to content

Commit

Permalink
More generous subtitle capitalization
Browse files Browse the repository at this point in the history
  • Loading branch information
fbennett committed Apr 21, 2019
1 parent f7eb9e2 commit 19951b0
Show file tree
Hide file tree
Showing 2 changed files with 126 additions and 10 deletions.
2 changes: 1 addition & 1 deletion src/build.js
Expand Up @@ -692,7 +692,7 @@ CSL.Engine.prototype.retrieveItem = function (id) {
}
// Add support for main_title_from_short_title
if (this.opt.development_extensions.main_title_from_short_title) {
CSL.extractTitleAndSubtitle(Item);
CSL.extractTitleAndSubtitle.call(this, Item);
}
var isLegalType = ["bill","legal_case","legislation","gazette","regulation"].indexOf(Item.type) > -1;
if (this.opt.development_extensions.force_jurisdiction && isLegalType) {
Expand Down
134 changes: 125 additions & 9 deletions src/load.js
Expand Up @@ -631,7 +631,7 @@ var CSL = {
"container-title"
],
TITLE_FIELD_SPLITS: function(seg) {
var keys = ["title", "short", "main", "sub"];
var keys = ["title", "short", "main", "sub", "subjoin"];
var ret = {};
for (var i=0,ilen=keys.length;i<ilen;i++) {
ret[keys[i]] = seg + "title" + (keys[i] === "title" ? "" : "-" + keys[i]);
Expand Down Expand Up @@ -665,6 +665,8 @@ var CSL = {
},

extractTitleAndSubtitle: function (Item) {
// XXX In this function, split on split-char, but prefer exact match
// XXX of subtitle to a split-char in title if found.
var segments = ["", "container-"];
for (var i=0,ilen=segments.length;i<ilen;i++) {
var seg = segments[i];
Expand All @@ -691,12 +693,63 @@ var CSL = {
}
vals[title.main] = vals[title.title];
vals[title.sub] = false;
if (vals[title.title] && vals[title["short"]]) {
var shortTitle = vals[title["short"]];
var offset = shortTitle.length;
if (vals[title.title].slice(0,offset) === shortTitle && vals[title.title].slice(offset).match(/^\s*:/)) {
vals[title.main] = vals[title.title].slice(0,offset).replace(/\s+$/,"");
vals[title.sub] = vals[title.title].slice(offset).replace(/^\s*:\s*/,"");
var shortTitle = vals[title["short"]];
if (vals[title.title]) {
// Rules
// TITLE_SPLIT eliminates split-points of period-space preceded by a capital letter.
// If short title exists and matches exactly to a split-point, use that split-point only.
// Otherwise if there is just one split-point, use that as main/sub split.
// Otherwise use all split-points ... which is handled in titleCaseSentenceOrNormal, not here.
if (shortTitle && shortTitle === vals[title.title]) {
vals[title.main] = vals[title.title];
vals[title.subjoin] = "";
vals[title.sub] = "";
} else if (shortTitle) {
// check for valid match to shortTitle
var checkAhead = vals[title.title].slice(shortTitle.replace(/[\?\!]+$/, "").length);
var m = CSL.TITLE_SPLIT_REGEXP.matchfirst.exec(checkAhead);
if (m) {
vals[title.main] = shortTitle;
vals[title.subjoin] = m[1].replace(/[\?\!]+(\s*)$/, "$1");
vals[title.sub] = checkAhead.replace(CSL.TITLE_SPLIT_REGEXP.matchfirst, "");
} else {
var splitTitle = CSL.TITLE_SPLIT(vals[title.title]);
if (splitTitle.length == 3) {
vals[title.main] = splitTitle[0];
vals[title.subjoin] = splitTitle[1];
vals[title.sub] = splitTitle[2];
} else {
vals[title.main] = vals[title.title];
vals[title.subjoin] = "";
vals[title.sub] = "";
}
}
} else {
var splitTitle = CSL.TITLE_SPLIT(vals[title.title]);
if (splitTitle.length == 3) {
vals[title.main] = splitTitle[0];
vals[title.subjoin] = splitTitle[1];
vals[title.sub] = splitTitle[2];
} else {
vals[title.main] = vals[title.title];
vals[title.subjoin] = "";
vals[title.sub] = "";
}
}
if (vals[title.subjoin]) {
if (vals[title.subjoin].match(/([\?\!])/)) {
var m = vals[title.subjoin].match(/(\s*)$/)
vals[title.main] = vals[title.main] + vals[title.subjoin].trim();
vals[title.subjoin] = m[1];
}
}
}
if (vals[title.subjoin]) {
if (vals[title.subjoin].indexOf(":") > -1) {
vals[title.subjoin] = ": ";
}
if (vals[title.subjoin].indexOf("-") > -1 || vals[title.subjoin].indexOf("—") > -1) {
vals[title.subjoin] = "—";
}
}
if (lang) {
Expand All @@ -716,6 +769,12 @@ var CSL = {
},

titlecaseSentenceOrNormal: function(state, Item, seg, lang, sentenceCase) {
// Hold on here.
// What is seg here?
// It's ... either "" or "container-". Which is ugly, but works.
// But this ALWAYS returns the full title, never short.
// So sentence-casing cannot be applied to short.
// Goes unnoticed because forced sentence-casing almost never appears in styles.
var title = CSL.TITLE_FIELD_SPLITS(seg);
var vals = {};
if (lang && Item.multi) {
Expand All @@ -728,24 +787,45 @@ var CSL = {
if (Item.multi._keys[title.sub]) {
vals[title.sub] = Item.multi._keys[title.sub][lang];
}
if (Item.multi._keys[title.subjoin]) {
vals[title.subjoin] = Item.multi._keys[title.subjoin][lang];
}
} else {
vals[title.title] = Item[title.title];
vals[title.main] = Item[title.main];
vals[title.sub] = Item[title.sub];
vals[title.subjoin] = Item[title.subjoin];
}
if (vals[title.main] && vals[title.sub]) {
var mainTitle = vals[title.main];
var subJoin = vals[title.subjoin];
var subTitle = vals[title.sub];
if (sentenceCase) {
mainTitle = CSL.Output.Formatters.sentence(state, mainTitle);
subTitle = CSL.Output.Formatters.sentence(state, subTitle);
} else if (state.opt.development_extensions.uppercase_subtitles) {
subTitle = CSL.Output.Formatters["capitalize-first"](state, subTitle);
}
return [mainTitle, subTitle].join(vals[title.title].slice(mainTitle.length, -1 * subTitle.length));
return [mainTitle, subJoin, subTitle].join("");
} else {
if (sentenceCase) {
return CSL.Output.Formatters.sentence(state, vals[title.title]);
} else if (state.opt.development_extensions.uppercase_subtitles) {
// Split and apply everywhere.
var splits = CSL.TITLE_SPLIT(vals[title.title]);
for (var i=0,ilen=splits.length; i<ilen; i += 2) {
splits[i] = CSL.Output.Formatters["capitalize-first"](state, splits[i]);
}
for (var i=1, ilen=splits.length-1; i < ilen; i += 2) {
if (splits[i].indexOf(":") > -1) {
splits[i] = ": ";
}
if (splits[i].indexOf("-") > -1 || splits[i].indexOf("—") > -1) {
splits[i] = "—";
}
}
vals[title.title] = splits.join("");
return vals[title.title];
} else {
return vals[title.title];
}
Expand Down Expand Up @@ -1063,5 +1143,41 @@ var CSL = {
"csl_reverse_lookup_support",
"main_title_from_short_title",
"uppercase_subtitles"
]
],

TITLE_SPLIT_REGEXP: (function() {
var splits = [
"\\.\\s+",
"\\!\\s+",
"\\?\\s+",
"\\s*::*\\s+",
"\\s*—\\s*",
"\\s+\\-\\s+",
"\\s*\\-\\-\\-*\\s*"
]
return {
match: new RegExp("(" + splits.join("|") + ")", "g"),
matchfirst: new RegExp("^(" + splits.join("|") + ")"),
split: new RegExp("(?:" + splits.join("|") + ")")
}
})(),

TITLE_SPLIT: function(str) {
if (!str) {
return str;
}
var m = str.match(CSL.TITLE_SPLIT_REGEXP.match);
var lst = str.split(CSL.TITLE_SPLIT_REGEXP.split);
for (var i=lst.length-2; i>-1; i--) {
if (lst[i] && lst[i].slice(-1).toLowerCase() !== lst[i].slice(-1)) {
// recombine
lst[i] = lst[i] + m[i] + lst[i+1];
lst = lst.slice(0, i+1).concat(lst.slice(i+2))
} else {
// merge
lst = lst.slice(0, i+1).concat([m[i]]).concat(lst.slice(i+1))
}
}
return lst;
}
};

0 comments on commit 19951b0

Please sign in to comment.