public
Description: Snippets of code that I wrote
Homepage: http://www.hubeify.com
Clone URL: git://github.com/hwong/hwong.git
hwong / twitter / public_timeline_link_farmer.rb
100644 50 lines (43 sloc) 1.038 kb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
require 'net/http'
require 'rexml/document'
require 'mechanize'
 
def urlfinder(text)
  result = []
  text.split(" ").each do |word|
    x = /http:\/\/.*\/?\b/.match(word)
    result << x[0] unless x.nil?
  end
  return result
end
 
url = "http://twitter.com/statuses/public_timeline.xml"
 
# Get the XML data as a string
xml_data = Net::HTTP.get_response(URI.parse(url)).body
 
# Extract all the statuses
statuses = []
document = REXML::Document.new(xml_data)
document.elements[1].elements.each do |element|
   statuses << element.elements[3].text
end
 
# Extract all the links
links = []
statuses.each do |status|
  urlfinder(status).each do |result|
    result.each do |link|
      links << link
    end
  end
end
 
# Print each link
links.each do |link|
  response = Net::HTTP.get_response(URI.parse(link))
  case response
  when Net::HTTPSuccess
    puts link
  when Net::HTTPRedirection
    redirect = URI.parse(response['location'])
    response = Net::HTTP.get_response(redirect)
    puts redirect
  else
    response.error!
  end
end