This repository has been archived by the owner on Nov 9, 2017. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 70
/
blog.py
603 lines (544 loc) · 24 KB
/
blog.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
# The MIT License
#
# Copyright (c) 2008 William T. Katz
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to
# deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
"""A simple RESTful blog/homepage app for Google App Engine
This simple homepage application tries to follow the ideas put forth in the
book 'RESTful Web Services' by Leonard Richardson & Sam Ruby. It follows a
Resource-Oriented Architecture where each URL specifies a resource that
accepts HTTP verbs.
Rather than create new URLs to handle web-based form submission of resources,
this app embeds form submissions through javascript. The ability to send
HTTP verbs POST, PUT, and DELETE is delivered through javascript within the
GET responses. In other words, a rich client gets transmitted with each GET.
This app's API should be reasonably clean and easily targeted by other
clients, like a Flex app or a desktop program.
"""
__author__ = 'William T. Katz'
import datetime
import string
import re
import os
import cgi
import urllib
import logging
from google.appengine.ext import webapp
from google.appengine.api import users
from google.appengine.ext import db
from google.appengine.ext.webapp import template
from google.appengine.api import mail
from google.appengine.api import urlfetch
from handlers import restful
from utils import authorized
from utils import sanitizer
import models
import view
import config
import legacy_aliases # This can be either manually created or
# autogenerated using the drupal_uploader utility
# Functions to generate permalinks depending on type of article
permalink_funcs = {
'article': lambda title,date: get_friendly_url(title),
'blog entry': lambda title,date: str(date.year) + "/" + \
str(date.month) + "/" + get_friendly_url(title)
}
# We allow a mapping from some old url pattern to the current query
# using a regex's matched string.
def legacy_id_mapping(path, legacy_program):
if legacy_program:
if legacy_program == 'Drupal':
url_match = re.match('node/(\d+)/?$', path)
if url_match:
return db.Query(models.blog.Article). \
filter('legacy_id =', url_match.group(1)). \
get()
elif legacy_program == 'Serendipity':
url_match = re.match('archives/(\d+)-.*\.html$', path)
if url_match:
return db.Query(models.blog.Article). \
filter('legacy_id =', url_match.group(1)).get()
return None
# Module methods to handle incoming data
def get_datetime(time_string = None):
if time_string:
return datetime.datetime.strptime(time_string, '%Y-%m-%d %H:%M:%S')
return datetime.datetime.now()
def get_format(format_string):
if not format_string or format_string not in ['html', 'textile']:
format_string = 'html'
return format_string
def get_tag_key(tag_name):
obj = models.blog.Tag.get_or_insert(tag_name)
return obj.key()
def process_tag(tag_name, tags):
# Check tag_name against all 'name' values in tags and coerce
tag_name = tag_name.strip()
lowercase_name = tag_name.lower()
for tag in tags:
if lowercase_name == tag['name'].lower():
return tag['name']
return tag_name
def get_tags(tags_string):
logging.debug("get_tags: tag_string = %s", tags_string)
if tags_string:
from models.blog import Tag
tags = Tag.list()
logging.debug(" tags = %s", tags)
return [process_tag(s, tags)
for s in tags_string.split(",") if s != '']
return None
def get_friendly_url(title):
return re.sub('-+', '-',
re.sub('[^\w-]', '',
re.sub('\s+', '-', title.strip())))
def get_html(body, markup_type):
if markup_type == 'textile':
from external.libs import textile
return textile.textile(body)
return body
def get_captcha(key):
return ("%X" % abs(hash(str(key) + config.BLOG['title'])))[:6]
def get_sanitizer_func(handler, **kwargs):
match_obj = re.match(r'.*;\s*charset=(?P<charset>[\w-]+)',
handler.request.headers['CONTENT_TYPE'])
kwlist = {}
kwlist.update(kwargs)
if match_obj:
kwlist.update({ 'encoding': match_obj.group('charset').lower() })
logging.debug("Content-type: %s", handler.request.headers['CONTENT_TYPE'])
logging.debug("In sanitizer: %s", kwlist)
return lambda html : sanitizer.sanitize_html(html, **kwlist)
def do_sitemap_ping():
form_fields = { "sitemap": "%s/sitemap.xml" % (config.BLOG['root_url'],) }
urlfetch.fetch(url="http://www.google.com/webmasters/tools/ping",
payload=urllib.urlencode(form_fields),
method=urlfetch.GET)
def process_embedded_code(article):
# TODO -- Check for embedded code, escape opening triangular brackets
# within code, and set article embedded_code strings so we can
# use proper javascript.
from utils import codehighlighter
article.html, languages = codehighlighter.process_html(article.html)
article.embedded_code = languages
def process_article_edit(handler, permalink):
# For http PUT, the parameters are passed in URIencoded string in body
body = handler.request.body
params = cgi.parse_qs(body)
for key,value in params.iteritems():
params[key] = value[0]
property_hash = restful.get_sent_properties(params.get,
['title',
('body', get_sanitizer_func(handler, trusted_source=True)),
('format', get_format),
('updated', get_datetime),
('tags', get_tags),
('html', get_html, 'body', 'format')])
if property_hash:
if 'tags' in property_hash:
property_hash['tag_keys'] = [get_tag_key(name)
for name in property_hash['tags']]
article = db.Query(models.blog.Article).filter('permalink =', permalink).get()
before_tags = set(article.tag_keys)
for key,value in property_hash.iteritems():
setattr(article, key, value)
after_tags = set(article.tag_keys)
for removed_tag in before_tags - after_tags:
db.get(removed_tag).counter.decrement()
for added_tag in after_tags - before_tags:
db.get(added_tag).counter.increment()
process_embedded_code(article)
article.put()
restful.send_successful_response(handler, '/' + article.permalink)
view.invalidate_cache()
else:
handler.error(400)
def process_article_submission(handler, article_type):
property_hash = restful.get_sent_properties(handler.request.get,
['title',
('body', get_sanitizer_func(handler, trusted_source=True)),
'legacy_id',
('format', get_format),
('published', get_datetime),
('updated', get_datetime),
('tags', get_tags),
('html', get_html, 'body', 'format'),
('permalink', permalink_funcs[article_type], 'title', 'published')])
if property_hash:
if 'tags' in property_hash:
property_hash['tag_keys'] = [get_tag_key(name)
for name in property_hash['tags']]
property_hash['format'] = 'html' # For now, convert all to HTML
property_hash['article_type'] = article_type
article = models.blog.Article(**property_hash)
article.set_associated_data(
{'relevant_links': handler.request.get('relevant_links'),
'amazon_items': handler.request.get('amazon_items')})
process_embedded_code(article)
article.put()
for key in article.tag_keys:
db.get(key).counter.increment()
do_sitemap_ping()
restful.send_successful_response(handler, '/' + article.permalink)
view.invalidate_cache()
else:
handler.error(400)
def process_comment_submission(handler, article):
sanitize_comment = get_sanitizer_func(handler,
allow_attributes=['href', 'src'],
blacklist_tags=['img'])
property_hash = restful.get_sent_properties(handler.request.get,
['name',
'email',
'homepage',
'title',
('body', sanitize_comment),
'key',
'thread', # If it's given, use it. Else generate it.
'captcha',
('published', get_datetime)])
# If we aren't administrator, abort if bad captcha
if not users.is_current_user_admin():
if property_hash['captcha'] != get_captcha(article.key()):
logging.info("Received captcha (%s) != %s",
property_hash['captcha'],
get_captcha(article.key()))
handler.error(401) # Unauthorized
return
if 'key' not in property_hash and 'thread' not in property_hash:
handler.error(401)
return
# Generate a thread string.
if 'thread' not in property_hash:
matchobj = re.match(r'[^#]+#comment-(?P<key>\w+)',
property_hash['key'])
if matchobj:
logging.debug("Comment has parent: %s", matchobj.group('key'))
comment_key = matchobj.group('key')
# TODO -- Think about GQL injection security issue since
# it can be submitted by public
parent = models.blog.Comment.get(db.Key(comment_key))
thread_string = parent.next_child_thread_string()
else:
logging.debug("Comment is off main article")
comment_key = None
thread_string = article.next_comment_thread_string()
if not thread_string:
handler.error(400)
return
property_hash['thread'] = thread_string
del property_hash['key']
# Get and store some pieces of information from parent article.
# TODO: See if this overhead can be avoided
if not article.num_comments:
article.num_comments = 1
else:
article.num_comments += 1
property_hash['article'] = article.put()
try:
comment = models.blog.Comment(**property_hash)
comment.put()
except:
logging.debug("Bad comment: %s", property_hash)
handler.error(400)
return
# Notify the author of a new comment (from matteocrippa.it)
if config.BLOG['send_comment_notification']:
recipient = "%s <%s>" % (config.BLOG['author'], config.BLOG['email'],)
body = ("A new comment has just been posted on %s/%s by %s."
% (config.BLOG['root_url'], article.permalink, comment.name))
mail.send_mail(sender=config.BLOG['email'],
to=recipient,
subject="New comment by %s" % (comment.name,),
body=body)
# Render just this comment and send it to client
view_path = view.find_file(view.templates, "bloog/blog/comment.html")
response = template.render(
os.path.join("views", view_path),
{ 'comment': comment, "use_gravatars": config.BLOG["use_gravatars"] },
debug=config.DEBUG)
handler.response.out.write(response)
view.invalidate_cache()
def render_article(handler, article):
if article:
# Check if client is requesting javascript and
# return json if javascript is #1 in Accept header.
try:
accept_list = handler.request.headers['Accept']
except KeyError:
logging.error("Had no accept header: %s", handler.request.headers)
accept_list = None
if accept_list and accept_list.split(',')[0] == 'application/json':
handler.response.headers['Content-Type'] = 'application/json'
handler.response.out.write(article.to_json())
else:
# Generate two parts of a captcha that will use
# display:none in between. This step in the anti-spam
# war race due to the following article:
# http://techblog.tilllate.com/2008/07/20/ten-methods-to-obfuscate-e-mail-addresses-compared/
captcha = get_captcha(article.key())
two_columns = article.two_columns
if two_columns is None:
two_columns = article.is_big()
allow_comments = article.allow_comments
if allow_comments is None:
age = (datetime.datetime.now() - article.published).days
allow_comments = (age <= config.BLOG['days_can_comment'])
page = view.ViewPage()
page.render(handler, { "two_columns": two_columns,
"allow_comments": allow_comments,
"article": article,
"captcha1": captcha[:3],
"captcha2": captcha[3:6],
"use_gravatars": config.BLOG['use_gravatars']
})
else:
# This didn't fall into any of our pages or aliases.
# Page not found.
# could do --> self.redirect('/404.html')
handler.error(404)
view.ViewPage(cache_time=36000). \
render(handler, {'module_name': 'blog',
'handler_name': 'notfound'})
class NotFoundHandler(webapp.RequestHandler):
def get(self):
self.error(404)
view.ViewPage(cache_time=36000).render(self)
class UnauthorizedHandler(webapp.RequestHandler):
def get(self):
self.error(403)
view.ViewPage(cache_time=36000).render(self)
class RootHandler(restful.Controller):
def get(self):
logging.debug("RootHandler#get")
page = view.ViewPage()
page.render_query(
self, 'articles',
db.Query(models.blog.Article). \
filter('article_type =', 'blog entry').order('-published'))
@authorized.role("admin")
def post(self):
logging.debug("RootHandler#post")
process_article_submission(handler=self, article_type='article')
class ArticlesHandler(restful.Controller):
def get(self):
logging.debug("ArticlesHandler#get")
page = view.ViewPage()
page.render_query(
self, 'articles',
db.Query(models.blog.Article). \
filter('article_type =', 'article').order('title'),
num_limit=20)
# Articles are off root url
# TODO -- Make it DRY by combining Article/MonthHandler
class ArticleHandler(restful.Controller):
def get(self, path):
logging.debug("ArticleHandler#get on path (%s)", path)
# Handle precomputed legacy aliases
# TODO: Use hash for case-insensitive lookup
for alias in legacy_aliases.redirects:
if path.lower() == alias.lower():
self.redirect(legacy_aliases.redirects[alias])
return
# Check undated pages
article = db.Query(models.blog.Article). \
filter('permalink =', path).get()
if not article:
# This lets you map arbitrary URL patterns like /node/3
# to article properties, e.g. 3 -> legacy_id property
article = legacy_id_mapping(path,
config.BLOG["legacy_blog_software"])
if article and config.BLOG["legacy_entry_redirect"]:
self.redirect('/' + article.permalink)
return
render_article(self, article)
@restful.methods_via_query_allowed
def post(self, path):
article = db.Query(models.blog.Article).filter('permalink =', path).get()
process_comment_submission(self, article)
@authorized.role("admin")
def put(self, path):
logging.debug("ArticleHandler#put")
process_article_edit(self, permalink = path)
@authorized.role("admin")
def delete(self, path):
"""
By using DELETE on /Article, /Comment, /Tag, you can delete the first
entity of the desired kind.
This is useful for writing utilities like clear_datastore.py.
"""
# TODO: Add DELETE for articles off root like blog entry DELETE.
model_class = path.lower()
logging.debug("ArticleHandler#delete on %s", path)
def delete_entity(query):
targets = query.fetch(limit=1)
if len(targets) > 0:
if hasattr(targets[0], 'title'):
title = targets[0].title
elif hasattr(targets[0], 'name'):
title = targets[0].name
else:
title = ''
logging.debug('Deleting %s %s', model_class, title)
targets[0].delete()
self.response.out.write('Deleted ' + model_class + ' ' + title)
view.invalidate_cache()
else:
self.response.set_status(204, 'No more ' + model_class + ' entities')
if model_class == 'article':
query = models.blog.Article.all()
delete_entity(query)
elif model_class == 'comment':
query = models.blog.Comment.all()
delete_entity(query)
elif model_class == 'tag':
query = models.blog.Tag.all()
delete_entity(query)
else:
article = db.Query(models.blog.Article). \
filter('permalink =', path).get()
for key in article.tag_keys:
db.get(key).counter.decrement()
article.delete()
view.invalidate_cache()
restful.send_successful_response(self, "/")
# Blog entries are dated articles
class BlogEntryHandler(restful.Controller):
def get(self, year, month, perm_stem):
logging.debug("BlogEntryHandler#get for year %s, "
"month %s, and perm_link %s",
year, month, perm_stem)
article = db.Query(models.blog.Article). \
filter('permalink =',
year + '/' + month + '/' + perm_stem).get()
render_article(self, article)
@restful.methods_via_query_allowed
def post(self, year, month, perm_stem):
logging.debug("Adding comment for blog entry %s", self.request.path)
permalink = year + '/' + month + '/' + perm_stem
article = db.Query(models.blog.Article). \
filter('permalink =', permalink).get()
if article:
process_comment_submission(self, article)
else:
logging.debug("No article attached to submitted comment")
self.error(400)
@authorized.role("admin")
def put(self, year, month, perm_stem):
permalink = year + '/' + month + '/' + perm_stem
logging.debug("BlogEntryHandler#put")
process_article_edit(handler = self, permalink = permalink)
@authorized.role("admin")
def delete(self, year, month, perm_stem):
permalink = year + '/' + month + '/' + perm_stem
logging.debug("Deleting blog entry %s", permalink)
article = db.Query(models.blog.Article). \
filter('permalink =', permalink).get()
for key in article.tag_keys:
db.get(key).counter.decrement()
article.delete()
view.invalidate_cache()
restful.send_successful_response(self, "/")
class TagHandler(restful.Controller):
def get(self, encoded_tag):
tag = re.sub('(%25|%)(\d\d)',
lambda cmatch: chr(string.atoi(cmatch.group(2), 16)),
encoded_tag) # No urllib.unquote in AppEngine?
page = view.ViewPage()
page.render_query(
self, 'articles',
db.Query(models.blog.Article).filter('tags =',
tag).order('-published'),
{'tag': tag})
class SearchHandler(restful.Controller):
def get(self):
from google.appengine.api import datastore_errors
search_term = self.request.get("s")
query_string = 's=' + urllib.quote_plus(search_term) + '&'
page = view.ViewPage()
try:
page.render_query(
self, 'articles',
models.blog.Article.all().search(search_term). \
order('-published'),
{'search_term': search_term, 'query_string': query_string})
except datastore_errors.NeedIndexError:
page.render(self, {'search_term': search_term,
'search_error_message': """
Sorry, full-text searches are currently limited
to single words until a later AppEngine update.
"""})
class YearHandler(restful.Controller):
def get(self, year):
logging.debug("YearHandler#get for year %s", year)
start_date = datetime.datetime(string.atoi(year), 1, 1)
end_date = datetime.datetime(string.atoi(year), 12, 31, 23, 59, 59)
page = view.ViewPage()
page.render_query(
self, 'articles',
db.Query(models.blog.Article).order('-published'). \
filter('published >=', start_date). \
filter('published <=', end_date),
{'title': 'Articles for ' + year, 'year': year})
class MonthHandler(restful.Controller):
def get(self, year, month):
logging.debug("MonthHandler#get for year %s, month %s", year, month)
start_date = datetime.datetime(string.atoi(year),
string.atoi(month), 1)
end_date = datetime.datetime(string.atoi(year),
string.atoi(month), 31, 23, 59, 59)
page = view.ViewPage()
page.render_query(
self, 'articles',
db.Query(models.blog.Article).order('-published'). \
filter('published >=', start_date). \
filter('published <=', end_date),
{'title': 'Articles for ' + month + '/' + year,
'year': year, 'month': month})
@authorized.role("admin")
def post(self, year, month):
""" Add a blog entry. Since we are POSTing, the server handles
creation of the permalink url. """
logging.debug("MonthHandler#post on date %s, %s", year, month)
process_article_submission(handler=self, article_type='blog entry')
class AtomHandler(webapp.RequestHandler):
def get(self):
logging.debug("Sending Atom feed")
articles = db.Query(models.blog.Article). \
filter('article_type =', 'blog entry'). \
order('-published').fetch(limit=10)
updated = ''
if articles:
updated = articles[0].rfc3339_updated()
self.response.headers['Content-Type'] = 'application/atom+xml'
page = view.ViewPage()
page.render(self, {"blog_updated_timestamp": updated,
"articles": articles, "ext": "xml"})
class SitemapHandler(webapp.RequestHandler):
def get(self):
logging.debug("Sending Sitemap")
articles = db.Query(models.blog.Article).order('-published').fetch(1000)
if articles:
self.response.headers['Content-Type'] = 'text/xml'
page = view.ViewPage()
page.render(self, {
"articles": articles,
"ext": "xml",
"root_url": config.BLOG['root_url']
})