This repository has been archived by the owner on Aug 20, 2021. It is now read-only.
/
citeproc.py
238 lines (203 loc) · 7.73 KB
/
citeproc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
"""
For processing citations and bibliographies using the Citation Style
Language (CSL).
>>> STYLE = Style('apa.csl')
>>> REFS = json.loads(open('refs.json').read())
>>> PROCESSED = process_bibliography(STYLE, REFS)
>>> format_bibliography(PROCESSED)
"""
from xml.etree.ElementTree import Element, SubElement, ElementTree, tostring
import json
CSLNS = '{http://purl.org/net/xbiblio/csl}'
# >>> classes <<<
class Style(ElementTree):
"""
An ElementTree wrapper to easily parse and work with a CSL instance.
>>> style = Style('some.csl')
>>> style.title
"Some Style"
"""
def __init__(self, csl_fname):
_style = self.parse(csl_fname)
_info = _style.find(CSLNS + 'info')
self.title = _info.find(CSLNS + 'title').text
self.updated = _info.find(CSLNS + 'updated').text
self.macros = _style.findall(CSLNS + 'macro')
self.citation = _style.find(CSLNS + 'citation')
self.bibliography = _style.find(CSLNS + 'bibliography')
if self.citation:
self.citation.layout = self.citation.find(CSLNS + 'layout')
self.citation.options = self.citation.findall(CSLNS + 'option')
if self.bibliography:
self.bibliography.options = self.bibliography.find(CSLNS + 'option')
self.bibliography.layout = self.bibliography.find(CSLNS + 'layout')
# >>> processing functions <<<
def get_macro(name, macros):
"""
Return macro from macro list by name.
"""
for themacro in (macro for macro in macros if macro.get('name') == name):
if themacro:
return(themacro)
else:
pass
def get_property(lname):
"""
When given a CSL variable name, returns the corresponding RDF property.
"""
pmap = {
"title": "dcterms:title",
"issued": "dcterms:issued",
"volume": "bibo:volume",
"issue": "bibo:issue",
"doi": "bibo:doi",
"uri": "bibo:uri",
"isbn": "bibo:isbn",
# return a dict for relations; need to adjust other code
"container-title": {"dcterms:isPartOf": "dcterms:title"}
}
return(pmap[lname])
def sortkey(style, reference, context='bibliography'):
"""
When give a Reference and a Style, returns a sorting key.
"""
return(reference['title'], reference['date'])
def process_group(style_node, reference):
"""
When given a style node and a reference, return an evaluated cs:group.
"""
pass
def process_names(style_node, reference):
"""
When given a style node and a reference, returns an evaluated list of
contributor names.
"""
pass
def condition(condition_attributes, reference):
"""
Evaluates a condition.
"""
conditions = []
if 'variable' in condition_attributes:
variables = condition_attributes['variable'].split(" ")
for variable in variables:
conditions.append(variable in reference)
if 'type' in condition_attributes:
reftypes = condition_attributes['type'].split(" ")
for reftype in reftypes:
conditions.append(reftype == reference['type'])
if 'match' in condition_attributes:
match = condition_attributes['match']
if match == 'none':
return(True not in conditions)
elif match == 'all':
return(False not in conditions)
else:
return(True in conditions)
def process_children(parent, style_node, style_macros, reference):
children = style_node.getchildren()
for child in children:
return(process_node(parent, child, style_macros, reference))
def process_choose(parent, style_node, style_macros, reference):
"""
When given a style node and a reference, return an evaluated cs:choose.
"""
children = style_node.getchildren()
for child in children:
# first we check if there are attributes; if there are, it means
# the node is either a cs:if or a cs:else-if element
if child.attrib:
if condition(child.attrib, reference):
process_children(parent, child, style_macros, reference)
break
else:
pass
else:
process_children(parent, child, style_macros, reference)
def process_text(parent, style_node, style_macros, reference):
"""
When given a style node and a reference, return an evaludated cs:text.
"""
formatting = style_node.attrib
variable = style_node.get('variable')
macro = style_node.get('macro')
if variable:
content = reference[style_node.get('variable')] if variable in reference else None
if content:
node = SubElement(parent, "span", attrib=formatting)
node.set('property', get_property(node.attrib.pop('variable')))
node.text = content
return(node)
elif macro:
macro_result = process_macro(parent, get_macro(macro, style_macros), style_macros, reference)
return(macro_result)
else:
pass
def process_node(parent, style_node, style_macros, reference):
"""
Passes of style node processing to appropriate function.
"""
if style_node.tag == CSLNS + "group":
return(process_group(style_node, reference))
elif style_node.tag == CSLNS + "names":
return(process_names(style_node, reference))
elif style_node.tag == CSLNS + "choose":
return(process_choose(parent, style_node, style_macros, reference))
elif style_node.tag == CSLNS + "text":
return(process_text(parent, style_node, style_macros, reference))
def process_macro(parent, macro, style_macros, reference):
"""
When given a macro and a reference, return an evaluated macro
(a list of FormattedNode objects).
"""
mlist = [process_node(parent, style_node, style_macros, reference) for style_node in macro]
return(mlist)
def process_citation(style, reference_list, citation):
"""
With a Style, a list of References and the list of citation groups
(the list of citations with their locator), produce the
formatted output for each citation group.
"""
processed_citation = Element("span", attrib={"class":"citation"})
for reference in reference_list:
citeref = SubElement(processed_citation, "span")
for style_node in style.citation.layout:
process_node(citeref, style_node, style.macros, reference)
return(processed_citation)
def process_bibliography(style, reference_list):
"""
With a Style and the list of references produce a list of formatted
bibliographc entries.
"""
processed_bibliography = Element("ol", attrib={"class":"bibliography"})
for reference in reference_list:
ref = SubElement(processed_bibliography, "li", attrib={"property":"dcterms:references"})
for style_node in style.bibliography.layout:
process_node(ref, style_node, style.macros, reference)
return(processed_bibliography)
def format_bibliography(processed_bibliography, oformat='html+rdfa'):
"""
Generates final output.
"""
if oformat == 'html+rdfa':
print(tostring(processed_bibliography))
elif oformat == 'text':
result = ""
for node in processed_bibliography.getiterator():
if node.text:
result += node.text
print(result)
def citeproc(style, reference_list):
"""
With a Style, a list of References and the list of citation
groups (the list of citations with their locator), produce the
FormattedOutput for each citation group and the bibliography.
"""
pass
def add_year_suffix(reference_list):
"""
Given the list of References, compare year and contributors' names and,
when they collide, generate a suffix to append to the year for
disambiguation.
"""
pass