0
+require 'rexml/document'
0
+require 'feedparser/textconverters'
0
+require 'feedparser/rexml_patch'
0
+ class UnknownFeedTypeException < RuntimeError
0
+ attr_reader :type, :title, :link, :description, :creator, :encoding, :items
0
+ # REXML::Element for this feed.
0
+ # parse str to build a Feed
0
+ def initialize(str = nil)
0
+ # Determines all the fields using a string containing an
0
+ # Dirty hack: some feeds contain the & char. It must be changed to &
0
+ str.gsub!(/&(\s+)/, '&\1')
0
+ doc = REXML::Document.new(str)
0
+ @encoding = doc.encoding
0
+ @title,@link,@description,@creator = nil
0
+ if doc.root.elements['channel'] || doc.root.elements['rss:channel']
0
+ if (e = doc.root.elements['channel/title'] ||
0
+ doc.root.elements['rss:channel/rss:title']) && e.text
0
+ @title = e.text.toUTF8(@encoding).rmWhiteSpace!
0
+ if (e = doc.root.elements['channel/link'] ||
0
+ doc.root.elements['rss:channel/rss:link']) && e.text
0
+ @link = e.text.rmWhiteSpace!
0
+ if (e = doc.root.elements['channel/description'] ||
0
+ doc.root.elements['rss:channel/rss:description']) && e.text
0
+ @description = e.text.toUTF8(@encoding).rmWhiteSpace!
0
+ if ((e = doc.root.elements['channel/dc:creator']) && e.text) ||
0
+ ((e = doc.root.elements['channel/author'] ||
0
+ doc.root.elements['rss:channel/rss:author']) && e.text)
0
+ @creator = e.text.toUTF8(@encoding).rmWhiteSpace!
0
+ if doc.root.elements['channel/item']
0
+ query = 'channel/item'
0
+ elsif doc.root.elements['item']
0
+ elsif doc.root.elements['rss:channel/rss:item']
0
+ query = 'rss:channel/rss:item'
0
+ doc.root.each_element(query) { |e| @items << RSSItem::new(e, self) }
0
+ elsif doc.root.elements['/feed']
0
+ # We have an ATOM feed!
0
+ if (e = doc.root.elements['/feed/title']) && e.text
0
+ @title = e.text.toUTF8(@encoding).rmWhiteSpace!
0
+ doc.root.each_element('/feed/link') do |e|
0
+ if e.attribute('type') and (
0
+ e.attribute('type').value == 'text/html' or
0
+ e.attribute('type').value == 'application/xhtml' or
0
+ e.attribute('type').value == 'application/xhtml+xml')
0
+ if (h = e.attribute('href')) && h
0
+ @link = h.value.rmWhiteSpace!
0
+ if e = doc.root.elements['/feed/info']
0
+ e = e.elements['div'] || e
0
+ @description = e.to_s.toUTF8(@encoding).rmWhiteSpace!
0
+ doc.root.each_element('/feed/entry') do |e|
0
+ @items << AtomItem::new(e, self)
0
+ raise UnknownFeedTypeException::new
0
+ s += "Type: #{@type}\n"
0
+ s += "Encoding: #{@encoding}\n"
0
+ s += "Title: #{@title}\n"
0
+ s += "Link: #{@link}\n"
0
+ s += "Description: #{@description}\n"
0
+ s += "Creator: #{@creator}\n"
0
+ @items.each { |i| s += i.to_s }
0
+ attr_accessor :title, :link, :content, :date, :creator, :subject,
0
+ :category, :cacheditem
0
+ # REXML::Element for this item
0
+ def initialize(item = nil, feed = nil)
0
+ @title, @link, @content, @date, @creator, @subject, @category = nil
0
+ raise "parse() should be implemented by subclasses!"
0
+ s = "--------------------------------\n" +
0
+ "Title: #{@title}\nLink: #{@link}\n" +
0
+ "Date: #{@date.to_s}\nCreator: #{@creator}\n" +
0
+ "Subject: #{@subject}\nCategory: #{@category}\nContent:\n#{content}\n"
0
+ if defined?(@enclosures) and @enclosures.length > 0
0
+ @enclosures.each do |e|
0
+ s2 += e.join(' ') + "\n"
0
+ class RSSItem < FeedItem
0
+ # The item's enclosures childs. An array of (url, length, type) triplets.
0
+ attr_accessor :enclosures
0
+ # Title. If no title, use the pubDate as fallback.
0
+ if ((e = item.elements['title'] || item.elements['rss:title']) &&
0
+ ((e = item.elements['pubDate'] || item.elements['rss:pubDate']) &&
0
+ @title = e.text.toUTF8(@feed.encoding).rmWhiteSpace!
0
+ if ((e = item.elements['link'] || item.elements['rss:link']) && e.text)||
0
+ (e = item.elements['guid'] || item.elements['rss:guid'] and
0
+ not (e.attribute('isPermaLink') and
0
+ e.attribute('isPermaLink').value == 'false'))
0
+ @link = e.text.rmWhiteSpace!
0
+ if (e = item.elements['content:encoded']) ||
0
+ (e = item.elements['description'] || item.elements['rss:description'])
0
+ @content = FeedParser::getcontent(e, @feed)
0
+ if e = item.elements['dc:date'] || item.elements['pubDate'] ||
0
+ item.elements['rss:pubDate']
0
+ @date = Time::xmlschema(e.text)
0
+ @date = Time::rfc2822(e.text)
0
+ @date = Time::parse(e.text)
0
+ @creator = @feed.creator
0
+ if (e = item.elements['dc:creator'] || item.elements['author'] ||
0
+ item.elements['rss:author']) && e.text
0
+ @creator = e.text.toUTF8(@feed.encoding).rmWhiteSpace!
0
+ if (e = item.elements['dc:subject']) && e.text
0
+ @subject = e.text.toUTF8(@feed.encoding).rmWhiteSpace!
0
+ if (e = item.elements['dc:category'] || item.elements['category'] ||
0
+ item.elements['rss:category']) && e.text
0
+ @category = e.text.toUTF8(@feed.encoding).rmWhiteSpace!
0
+ item.each_element('enclosure') do |e|
0
+ url = e.attribute('url').value if e.attribute('url')
0
+ length = e.attribute('length').value if e.attribute('length')
0
+ type = e.attribute('type').value if e.attribute('type')
0
+ @enclosures << [ url, length, type ]
0
+ class AtomItem < FeedItem
0
+ if (e = item.elements['title']) && e.text
0
+ @title = e.text.toUTF8(@feed.encoding).rmWhiteSpace!
0
+ item.each_element('link') do |e|
0
+ if e.attribute('type') and (
0
+ e.attribute('type').value == 'text/html' or
0
+ e.attribute('type').value == 'application/xhtml' or
0
+ e.attribute('type').value == 'application/xhtml+xml')
0
+ if (h = e.attribute('href')) && h.value
0
+ if e = item.elements['content'] || item.elements['summary']
0
+ if (e.attribute('mode') and e.attribute('mode').value == 'escaped') &&
0
+ @content = e.text.toUTF8(@feed.encoding).rmWhiteSpace!
0
+ @content = FeedParser::getcontent(e, @feed)
0
+ if (e = item.elements['issued'] || e = item.elements['created']) && e.text
0
+ @date = Time::xmlschema(e.text)
0
+ @date = Time::rfc2822(e.text)
0
+ @date = Time::parse(e.text)
0
+ @creator = @feed.creator
0
+ if (e = item.elements['author/name']) && e.text
0
+ @creator = e.text.toUTF8(@feed.encoding).rmWhiteSpace!
0
+ def FeedParser::getcontent(e, feed = nil)
0
+ encoding = feed ? feed.encoding : 'utf-8'
0
+ children = e.children.reject do |i|
0
+ i.class == REXML::Text and i.to_s.chomp == ''
0
+ if children.length > 1
0
+ children.each { |c| s += c.to_s }
0
+ return s.toUTF8(encoding).rmWhiteSpace!.text2html
0
+ elsif children.length == 1
0
+ if c.class == REXML::Text
0
+ return e.text.toUTF8(encoding).rmWhiteSpace!.text2html
0
+ if c.class == REXML::CData
0
+ return c.to_s.toUTF8(encoding).rmWhiteSpace!.text2html
0
+ return c.text.toUTF8(encoding).text2html