-
Notifications
You must be signed in to change notification settings - Fork 0
/
WikiPage.py
289 lines (278 loc) · 9.44 KB
/
WikiPage.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
#!/Library/Frameworks/Python.framework/Versions/3.1/bin/python3
import os
import sys
languages = ('English', 'German', 'French', 'Dutch', 'Russian')
flags = ('EN' , 'DE' , 'FR' , 'NL' , 'RU' )
code = ('**','[[','`')
class Bunch:
def __init__(self, **kwds):
self.__dict__.update(kwds)
class WikiPage:
def __init__(self, fname):
self.main = fname
self.order = []
self.sections = {}
self.orders = {}
f = open(fname,'r')
self.text = f.readlines()
f.close()
cur = ''
for line in self.text:
if line.startswith('## '):
cur = line[3:].strip()
self.orders[cur] = []
continue
if not cur or not line.strip():
# skip lines before the first section
continue
# TODO: shitty condition!
if line[2]=='_':
for s in line[2:].split('; '):
z = s.strip()
if z.startswith('_') and z.endswith('_'):
z = z[1:-1]
self.addValue(cur,'Terms',z)
continue
wrds = line.split(': ')
if len(wrds) > 1:
lhs = wrds[0].split('* ')[1]
rhs = ': '.join(wrds[1:]).strip()
# TODO: decide whether this is a temporary fix or a robustness invariant
if rhs.startswith('_') and rhs.endswith('_'):
rhs = rhs[1:-1]
if lhs == 'Publication':
e = Publication(rhs)
elif lhs == 'Definition':
e = MDText(rhs)
else:
e = Entry(rhs)
self.addValue(cur,lhs,e)
elif line.strip():
print('Strange line:',line)
def getValues(self,key1,key2):
if key1 not in self.sections.keys() or key2 not in self.sections[key1].keys():
return []
return self.sections[key1][key2]
def getKeys(self,key):
if key not in self.sections.keys():
return []
return self.orders[key]
def addValue(self,key1,key2,v):
if key1 not in self.sections.keys():
self.sections[key1] = {}
self.order.append(key1)
if key2 not in self.sections[key1].keys():
self.sections[key1][key2] = [v]
self.orders[key1].append(key2)
else:
self.sections[key1][key2].append(v)
def who(self):
return self.__class__.__name__
def validate(self):
lines = list(filter(lambda x:x,map(lambda x:x.strip(),self.text)))
for line in str(self).split('\n'):
if not line:
continue
if line in lines:
lines.remove(line)
else:
print(' * The original is expected to have line "%s"' % line)
for line in lines:
print(' * The original has unmatched line "%s"' % line)
def getLanguages(self):
return sorted(self.sections.keys())
def getNames(self,lang):
return self.getValues(lang,'Terms') + self.getValues(lang,'Short')
def getKeywords(self):
kws = []
for lang in self.orders:
kws.append(lang)
kws.extend(self.getValues(lang,'Terms'))
return kws
def getHtml(self, main):
s = '''<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xmlns:xhtml="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
<meta name="keywords" content="software linguistics, software language engineering, book of knowledge, glossary, %s"/>
<title>SL(E)BOK — SLEG — %s</title>
<link href="www/sleg.css" rel="stylesheet" type="text/css"/>
</head>
<body>
<div class="left">
<a href="index.html"><img src="www/sleg.200.png" alt="Software Language Engineering Glossary (SLEG)" class="pad"/></a><br/>
<div class="pad">[<a href="http://github.com/grammarware/sleg/%s/_edit">Edit!</a>]</div><br/>
<a href="http://creativecommons.org/licenses/by-sa/3.0/" title="CC-BY-SA"><img src="www/cc-by-sa.png" alt="CC-BY-SA"/></a><br/>
<a href="http://creativecommons.org/licenses/by-sa/3.0/" title="Open Knowledge"><img src="www/open-knowledge.png" alt="Open Knowledge" class="pad" /></a><br/>
<a href="http://validator.w3.org/check/referer" title="XHTML 1.0 W3C Rec"><img src="www/xhtml10.png" alt="XHTML 1.0 W3C Rec" /></a><br/>
<a href="http://jigsaw.w3.org/css-validator/check/referer" title="CSS 2.1 W3C CanRec"><img src="www/css21.png" alt="CSS 2.1 W3C CanRec" class="pad" /></a><br/>
<div>[<a href="mailto:vadim@grammarware.net">Complain!</a>]</div>
</div>
<div class="main">
''' % ('; '.join(self.getKeywords()), main, self.main.split('.md')[0].replace(' ','-'))
for lang in languages:
if lang not in self.sections.keys():
continue
# main loop
# TODO: do not hyperlink self-references
s += '<h2>%s</h2>\n<ul><li>' % Flagged(lang)
# s += '<ul><li>%s</li>\n' % '; '.join(['<strong>%s</strong>' % s for s in self.sections[lang].terms])
ts = []
# for t in self.sections[lang].terms:
for t in self.getValues(lang,'Terms'):
if t == main:
ts.append('<strong>%s</strong>' % t)
else:
ts.append('<a href="%s.html"><strong>%s</strong></a>' % (t,t))
s += '; '.join(ts)
if 'Short' in self.getKeys(lang):
z = []
for short in self.getValues(lang,'Short'):
if short == main or not short.text.isalnum():
z.append('%s' % short.getHtml())
else:
z.append('<a href="%s.html">%s</a>' % (short,short.getHtml()))
s += ' (%s)' % '; '.join(z)
s += '</li>\n'
for k in self.getKeys(lang):
for rhs in self.getValues(lang,k):
if k == 'Short' or k == 'Terms':
continue
elif k == 'Figure':
s += '<div class="fig"><a href="http://github.com/grammarware/sleg/blob/master/figures/%s"><img src="http://github.com/grammarware/sleg/raw/master/figures/%s" alt="%s" title="%s"/></a><br/>(<a href="http://github.com/grammarware/sleg/blob/master/figures/%s.info.txt">info</a>)</div>' % (rhs, rhs, main, main, rhs)
elif k == 'Definition':
s += '<li class="def">%s</li>\n' % rhs.getHtml()
else:
s += '<li>%s: %s</li>' % (k,rhs.getHtml())
s += '</ul>'
# Last updated: %s.<br/>
return s+'''</div><div style="clear:both"/><hr />
<div class="last">
<em>
<a href="http://github.com/grammarware/sleg">Software Language Engineering Glossary</a> (SLEG) is
created and maintained by <a href="http://grammarware.net">Dr. Vadim Zaytsev</a>.
</em>
</div></body></html>'''
def __str__(self):
s = ''
for lang in self.order:
s += '\n## %s\n* %s\n' % (lang,'; '.join(['_%s_' % s for s in self.getValues(lang,'Terms')]))
for k in self.getKeys(lang):
if k == 'Terms':
continue
for v in self.getValues(lang,k):
s += '* %s: %s\n' % (k,v)
return s.strip()+'\n'
if self.fig:
s += '* Figure: %s\n' % self.fig
if self.defin:
s += '* Definition: %s\n' % self.defin
for k in languages:
if k in self.items.keys():
s += '* %s: %s\n' % (k, self.items[k])
for p in self.pubs:
s += '* Publication: %s\n' % p
return s.strip()
# Publication: [*Generalized multitext grammars*](http://dx.doi.org/10.3115/1218955.1219039)
class Publication:
def __init__(self, s):
self.title = s.split('[')[1].split(']')[0]
if self.title.startswith('*') and self.title.endswith('*'):
self.title = self.title[1:-1]
if self.title.startswith('_') and self.title.endswith('_'):
self.title = self.title[1:-1]
self.link = s.split('](')[1][:-1]
def who(self):
return self.__class__.__name__
def getHtml(self):
return '<em><a href="%s">%s</a></em>' % (self.link, self.title)
def __str__(self):
return '[*%s*](%s)' % (self.title, self.link)
# English: Wikipedia: http://en.wikipedia.org/wiki/Algebraic_data_type
class Entry:
def __init__(self, s):
self.text = s
def who(self):
return self.__class__.__name__
def getHtml(self):
if self.text.startswith('http://'):
return '<a class="src" href="%s">%s</a>' % (self.text,self.text)
elif self.text.startswith('`'):
return '<code>%s</code>' % self.text.split('`')[1]
else:
return '%s' % self.text
def __str__(self):
return self.text
class MDText:
def __init__(self, s):
self.chunks = []
while s:
if s.startswith('**'):
j = s[2:].find('**')+2
self.chunks.append(MDBold(s[2:j]))
s = s[j+2:]
elif s.startswith('[['):
j = s.find(']]')
self.chunks.append(MDLink(s[2:j]))
s = s[j+2:]
elif s.startswith('`'):
j = s[1:].find('`')+1
self.chunks.append(MDCode(s[1:j]))
s = s[j+1:]
else:
seq = list(filter(lambda x:x!=-1,map(lambda x:s.find(x),code)))
if len(seq) < 1:
j = len(s)
else:
j = min(seq)
self.chunks.append(MDBare(s[:j]))
s = s[j:]
def getHtml(self):
# present as HTML
return ''.join(map(lambda x:x.getHtml(),self.chunks))
def __str__(self):
# present as Markdown
return ''.join(map(str,self.chunks))
# not good with nesting
class MDBold:
def __init__(self, s):
self.text = s
def getHtml(self):
return '<strong>%s</strong>' % self.text
def __str__(self):
return '**%s**' % self.text
# bar not yet implemented
class MDLink:
def __init__(self, s):
if s.find('|') < 0:
self.goal = self.text = s
else:
self.text,self.goal = s.split('|')
def getHtml(self):
return '<a href="%s.html">%s</a>' % (self.goal, self.text) # .capitalize()?
def __str__(self):
if self.goal == self.text:
return '[[%s]]' % self.text
else:
return '[[%s|%s]]' % (self.text,self.goal)
class MDCode:
def __init__(self, s):
self.text = s
def getHtml(self):
return '<code>%s</code>' % self.text
def __str__(self):
return '`%s`' % self.text
class MDBare:
def __init__(self, s):
self.text = s
def getHtml(self):
return self.text
def __str__(self):
return self.text
class Flagged:
def __init__(self, lang):
self.lang = lang
self.flag = flags[languages.index(self.lang)]
def __str__(self):
return '<img src="www/%s.png" alt="%s"/> %s' % (self.flag, self.lang, self.lang)