public
Description: A Ruby RSS Archiver
Homepage: http://cecs.anu.edu.au/~mreid/code/feed_bag.html
Clone URL: git://github.com/mreid/feed-bag.git
feed-bag / feedbag.rb
100755 108 lines (99 sloc) 2.788 kb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
#!/usr/local/bin/ruby
#
# Feed Bag - A RSS Feed Archiver
#
# USAGE
# feedbag [OPTIONS] [RSS_URL ...]
#
# ARGUMENTS
#
# When provided, the RSS_URL will be read, added to the database and
# scanned for next run.
#
# When no arguments are provided, all the existing feeds in the database
# will be scanned for new items.
#
# OPTIONS
# -d --db Use the given SQLite3 database
# -C --clean Destroy the database and rebuild (be careful!)
# -l --list List all the feeds
# -h --help Show a help message
#
# AUTHOR
# Mark D. Reid <mark.reid@anu.edu.au>
#
# CREATED
# 2008-01-18
 
require 'rubygems'
require 'feed-normalizer'
require 'Sequel'
require 'optparse'
 
def clean_feeds ; Feed.create_table! ; end
def clean_entries ; Entry.create_table! ; end
 
# Wipes the entier database clean.
def clean
  clean_entries
  clean_feeds
end
 
# Open the given file as an SQLite database using Sequel and the models
def use(db)
  Sequel.open "sqlite:///#{db}"
  $: << File.expand_path(File.dirname(__FILE__))
  require 'models'
  
  # Build up the tables after a clean or on first run
  Feed.create_table unless Feed.table_exists?
  Entry.create_table unless Entry.table_exists?
end
 
def scan(feed)
  feedin = FeedNormalizer::FeedNormalizer.parse open(feed.url)
  feedin.items.each do |item|
    if item.date_published > feed.last_checked
      puts "\t#{item.title}"
      entry = Entry.new
      entry.url = item.url
      entry.title = item.title
      entry.content = item.content
      entry.description = item.description unless item.description == item.content
      entry.time = item.date_published
      entry.feed_id = feed.id
      entry.save
    else
      print "."
    end
  end
  feed.tick
end
 
# Parse the command-line options and clean database if necessary
opts = OptionParser.new
opts.banner = "Usage: feedbag.rb [options] [feed_url]+"
opts.on('-d', '--db DB', 'Use feed database DB') do |db|
  use(db) ; puts "Using #{db} for Feed DB"
end
opts.on('-l', '--list', 'List all the feeds') do
  Feed.each { |feed| puts "#{feed.id}: #{feed.name} (Checked: #{feed.last_checked}) - #{feed.entries.size}\n" }
  exit
end
opts.on('-C', '--clean', 'Wipes the current feed DB') do
  clean ; puts "Cleaned DB!"
  exit
end
opts.on_tail("-h", "--help", "Show this message") do
  puts opts
  exit
end
opts.parse!
 
# Add any feeds if they appear as arguments
if ARGV.empty?
  Feed.each { |feed| puts "\nScanning #{feed.name}"; scan feed }
else
  # Add RSS URLs to the databases
  ARGV.each do |arg|
    if Feed.filter {:url == arg}.empty?
      puts "Creating new feed for #{arg}"
      feed = Feed.create(:url => arg)
    else
      feed = Feed.filter {:url == arg}.first
      puts "Feed entitled '#{feed.name}' already exists for #{arg}"
    end
  end
end