require "rubygems"
gem 'actionpack', '~> 2.2'
require 'builder'
require 'action_view/helpers/atom_feed_helper'
require 'hpricot'
require 'net/http'
require 'uri'
require 'iconv'
require 'sqlite3'
include ActionView::Helpers::AtomFeedHelper
PUDELEK_DB_FILE = "#{ENV['CORE_PATH']}pudelek.db"
BLOG_URL = "http://blog.mocna-kawa.com"
def fetch_remote_file(address)
url = URI.parse(address)
res = Net::HTTP.start(url.host, url.port) { |http|
http.get(url.path)
}
Iconv.conv('UTF-8', 'ISO-8859-2', res.body)
end
def get_article_body_and_date(article_url)
article_page = Hpricot(fetch_remote_file(article_url));
article_page.search("#article_container").each do |article|
(article/"script, h2, .tags, #goto_sg, #boom, .spacer, .date").remove;
return article.inner_html, Time.now;
end
end
class DB
def initialize(db)
@db = db
end
def article_exists?(id)
result = @db.execute('select * from items where id = ?', id)
result.empty? ? nil : result[0]
end
def insert_article(article_id, article_date, article_body);
@db.execute('insert into items values(?, ?, ?)', article_id, article_date, article_body);
end
end
def extract_item(db, item_url, title_prefix = '')
article_title = title_prefix + item_url.inner_html
article_url = item_url.attributes['href']
article_id = article_url.split('/')[4]
if (article = db.article_exists?(article_id))
article_body, article_date = article[2], Time.at(article[1].to_i)
else
article_body, article_date = get_article_body_and_date(article_url)
db.insert_article(article_id, article_date.to_i, article_body)
end
{ :title => article_title, :url => article_url, :id => article_id,
:body => article_body, :date => article_date }
end
def fetch_items(db)
main_page = Hpricot(fetch_remote_file('http://www.pudelek.pl/'));
items = []
main_page.search("#left_column_container ul li h4 a").each do |item_url|
items << extract_item(db, item_url)
end
main_page.search("#middle_column_container ul li.teaser_box span.link a") do |item_url|
items << extract_item(db, item_url, 'FOTO: ')
end
items.sort { |a, b| b[:id] <=> a[:id] }
end
def render_feed(items)
xml = Builder::XmlMarkup.new
atom_feed({ :id => BLOG_URL + "/pudelek", :root_url => BLOG_URL, :language => 'pl_PL', :url => BLOG_URL }) do |feed|
feed.title("Pudelek Nieoficjalnie")
feed.url(BLOG_URL)
feed.updated(items[0][:date])
items.each do |item|
feed.entry(nil, { :id => item[:url], :published => item[:date], :url => item[:url]}) do |entry|
entry.title(item[:title])
entry.content(item[:body], :type => 'html')
end
end
end
end
def to_file(filename, content)
file = File.open(filename, 'w')
file << content
file.close
end
if ARGV.empty?
puts "Usage:\n\truby pudelek_rss.rb [output_file]"
elsif (!File.exist? PUDELEK_DB_FILE)
puts "Error:\n\tDatabase has not been created (file: #{PUDELEK_DB_FILE}). Run 'ruby sqlite_init.rb' to init DB."
else
db = DB.new(SQLite3::Database.new(PUDELEK_DB_FILE))
items = fetch_items(db)
feed = render_feed(items)
to_file(ARGV[0], feed)
end