Fork of code from ScraperWiki at https://classic.scraperwiki.com/scra…

…pers/basic_twitter_scrapertejpal_tehelka/
Jook3r · Nov 1, 2015 · 813a344 · 813a344
commit 813a344
Show file tree

Hide file tree

Showing 2 changed files with 36 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,2 @@
+# Ignore output of scraper
+data.sqlite
diff --git a/scraper.py b/scraper.py
@@ -0,0 +1,34 @@
+###################################################################################
+# Twitter scraper - designed to be forked and used for more interesting things
+###################################################################################
+
+import scraperwiki
+import simplejson
+import urllib2
+
+# Change QUERY to your search term of choice. 
+# Examples: 'newsnight', 'from:bbcnewsnight', 'to:bbcnewsnight'
+QUERY = 'Tejpal'
+RESULTS_PER_PAGE = '100'
+LANGUAGE = 'en'
+NUM_PAGES = 1000 
+
+for page in range(1, NUM_PAGES+1):
+    base_url = 'http://search.twitter.com/search.json?q=%s&rpp=%s&lang=%s&page=%s' \
+         % (urllib2.quote(QUERY), RESULTS_PER_PAGE, LANGUAGE, page)
+    try:
+        results_json = simplejson.loads(scraperwiki.scrape(base_url))
+        for result in results_json['results']:
+            #print result
+            data = {}
+            data['id'] = result['id']
+            data['text'] = result['text']
+            data['from_user'] = result['from_user']
+            data['created_at'] = result['created_at']
+            print data['from_user'], data['text']
+            scraperwiki.sqlite.save(["id"], data) 
+    except:
+        print 'Oh dear, failed to scrape %s' % base_url
+        break
+
+