diff --git a/apps/customercare/cron.py b/apps/customercare/cron.py new file mode 100644 index 00000000000..8d3d1511c09 --- /dev/null +++ b/apps/customercare/cron.py @@ -0,0 +1,105 @@ +import calendar +from datetime import datetime +import json +import logging +import re +import rfc822 +import urllib + +from django.conf import settings + +import cronjobs + +from .models import Tweet + + +SEARCH_URL = 'http://search.twitter.com/search.json' + +LINK_REGEX = re.compile('https?\:', re.IGNORECASE) +MENTION_REGEX = re.compile('(^|\W)@') +RT_REGEX = re.compile('^rt\W', re.IGNORECASE) + +log = logging.getLogger('k.twitter') + + +@cronjobs.register +def collect_tweets(): + """Collect new tweets about Firefox.""" + search_options = { + 'q': 'firefox', + 'rpp': 100, # Items per page. + 'result_type': 'recent', # Retrieve tweets by date. + } + + # If we already have some tweets, collect nothing older than what we have. + try: + latest_tweet = Tweet.objects.all()[0] + except IndexError: + log.debug('No existing tweets. Retrieving %d tweets from search.' % ( + settings.CC_TWEETS_PERPAGE)) + else: + search_options['since_id'] = latest_tweet.tweet_id + log.debug('Retrieving tweets with id >= %s' % latest_tweet.tweet_id) + + # Retrieve Tweets + try: + raw_data = json.load(urllib.urlopen('%s?%s' % ( + SEARCH_URL, urllib.urlencode(search_options)))) + except Exception, e: + log.warning('Twitter request failed: %s' % e) + return + + if not ('results' in raw_data and raw_data['results']): + log.info('Twitter returned 0 results.') + return + + # Drop tweets into DB + for item in raw_data['results']: + log.debug('Handling tweet %d: %s...' % (item['id'], item['text'][:50])) + # Apply filters to tweet before saving + item = _filter_tweet(item) + if not item: + continue + + created_date = datetime.utcfromtimestamp(calendar.timegm( + rfc822.parsedate(item['created_at']))) + + tweet = Tweet(tweet_id=item['id'], raw_json=json.dumps(item), + created=created_date) + tweet.save() + log.debug('Tweet %d saved.' % item['id']) + + # When all is done, truncate list of tweets to (approx.) maximum number. + try: + keep_tweet = Tweet.objects.all()[settings.CC_MAX_TWEETS] + except IndexError: + pass + else: + log.debug('Truncating tweet list: Removing tweets older than %s.' % ( + keep_tweet.created)) + Tweet.objects.filter(created__lte=keep_tweet.created).delete() + + +def _filter_tweet(item): + """ + Apply some filters to an incoming tweet. + + May modify tweet. If None is returned, tweet will be discarded. + Used to exclude replies and such from incoming tweets. + """ + # No replies, no mentions + if item['to_user_id'] or MENTION_REGEX.search(item['text']): + log.debug('Tweet %d discarded (reply).' % item['id']) + return None + + # No retweets + if RT_REGEX.search(item['text']) or item['text'].find('(via ') > -1: + log.debug('Tweet %d discarded (retweet).' % item['id']) + return None + + # No links + if LINK_REGEX.search(item['text']): + log.debug('Tweet %d discarded (link).' % item['id']) + return None + + return item diff --git a/apps/customercare/models.py b/apps/customercare/models.py index e69de29bb2d..78b84f49fe7 100644 --- a/apps/customercare/models.py +++ b/apps/customercare/models.py @@ -0,0 +1,15 @@ +from datetime import datetime + +from django.db import models + +from sumo.models import ModelBase + + +class Tweet(ModelBase): + """An entry on twitter.""" + tweet_id = models.BigIntegerField() + raw_json = models.TextField() + created = models.DateTimeField(default=datetime.now, db_index=True) + + class Meta: + ordering = ('-created',) diff --git a/migrations/41-customercare-tweets.sql b/migrations/41-customercare-tweets.sql new file mode 100644 index 00000000000..c59d95de1c3 --- /dev/null +++ b/migrations/41-customercare-tweets.sql @@ -0,0 +1,10 @@ +BEGIN; +CREATE TABLE `customercare_tweet` ( + `id` integer AUTO_INCREMENT NOT NULL PRIMARY KEY, + `tweet_id` bigint NOT NULL, + `raw_json` longtext NOT NULL, + `created` datetime NOT NULL +) +; +CREATE INDEX `customercare_tweet_3216ff68` ON `customercare_tweet` (`created`); +COMMIT; diff --git a/settings.py b/settings.py index 036e0ab2421..80f5f8f2f11 100644 --- a/settings.py +++ b/settings.py @@ -188,6 +188,7 @@ 'product_details', 'wiki', 'gallery', + 'customercare', ) # Extra apps for testing @@ -418,3 +419,7 @@ def JINJA_CONFIG(): GALLERY_VIDEO_THUMBNAIL_PATH = 'uploads/gallery/videos/thumbnails/' THUMBNAIL_PROGRESS_URL = MEDIA_URL + 'img/wait-trans.gif' VIDEO_MAX_FILESIZE = 16777216 # 16 megabytes, in bytes + +# Customare care tweet collection settings +CC_MAX_TWEETS = 500 # Max. no. of tweets in DB +CC_TWEETS_PERPAGE = 100 # How many tweets to collect in one go. Max: 100.