/
twitter-search.rb
49 lines (32 loc) · 1.29 KB
/
twitter-search.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
#! /opt/local/bin/ruby
require 'rubygems'
require 'couchrest'
require 'hpricot'
require 'open-uri'
# COUCHDB = "http://127.0.0.1:5984/twitterprops"
# CouchRest::Model.default_database = CouchRest.database!(COUCHDB)
require 'config/database.rb'
@db = CouchRest::Model.default_database
uri = "http://search.twitter.com/search.atom?q=prop+OR+props+OR+drop+OR+drops"
#data = File.open("/Users/integrum/Desktop/twitter.xml")
doc = Hpricot(open(uri))
record_set = []
#@db.view("messages/by_all")['rows'].each {|n| @db.delete(n['value']) }
(doc/'entry').each do |entry|
title = (entry/'title').inner_html
next unless title =~ /^@?([pd]rop)s?(?:\sto\s)?\s*@?(\w+)\s*(.*)/i
record = {:action => $1, :receiver => $2 }
record[:content] = (entry/'content').inner_html
author = {}
name = (entry/'author'/'name').inner_html
author[:handle] = name.split(" ").first
author[:name] = name.match(/\((.*)\)/)[1]
record[:author] = author
record[:published_at] = (entry/'published').inner_html
record["couchrest-type".to_sym] = "Message"
record[:status_uri] = (entry/'link[@rel="alternate"]').first['href']
record[:image_uri] = (entry/'link[@rel="image"]').first['href']
record[:tweet_id] = (entry/'id').inner_html.split(":").last
record_set << record
end
@db.bulk_save(record_set)