public
Description: ruby libxml library targetting speed and ease of use. provides an hpricot-like interface to xml
Homepage: http://trac.hasno.info/fastxml
Clone URL: git://github.com/segfault/fastxml.git
fastxml / specs / basic_html_spec.rb
100644 70 lines (56 sloc) 1.709 kb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# encoding: utf-8
%w[ ../ext ./ext ../lib ./lib ].each { |lp| $: << lp }
 
require 'fastxml'
 
describe FastXml::Doc, " doing html parsing" do
  before(:all) do
    @data_raw = open( "./test_data/hasno_feed.html" )
    @data_ary = @data_raw.readlines
    @data_str = @data_ary.join('')
  end
 
  before do
    @data_raw.rewind if @data_raw
  end
 
  after(:all) do
    @data_raw.close if @data_raw
  end
 
 
  it 'should parse string input' do
    @data_str.should_not be_nil
    doc = FastXml::Doc.new( @data_str, {:html=>true} )
    doc.should_not be_nil
    doc.to_s.should_not be_nil
  end
 
  it 'should parse array input' do
    @data_ary.should_not be_nil
    doc = FastXml::Doc.new( @data_ary, {:html=>true} )
    doc.should_not be_nil
    doc.to_s.should_not be_nil
  end
 
  it 'should be able to parse hasno and search' do
    doc = FastHtml( @data_str )
    descs = (doc/"p[class=description]")
    descs.should_not be_nil
    descs.each do |d|
      d.should_not be_nil
      d.length.should_be >= 1
    end
  end
  
  it 'should handle the twitter public timeline' do
    raw_data = open( "./test_data/twitter_public.html" ).readlines.join('')
    doc = FastHtml( raw_data )
    doc.should_not be_nil
    doc.to_s.should_not be_nil
    doc.to_s.length.should >= 30000
    doc.root.should_not be_nil
    (doc/"").should_not be_nil
    doc.root.children.should_not be_nil
  end
 
 
 
  it 'should be able to handle the cnn site' do
    raw_data = open( "./test_data/cnn_main.html" ).readlines.join('')
    doc = FastHtml( raw_data )
    doc.should_not be_nil
    doc.to_s.should_not be_nil
    doc.to_s.length.should >= 10000
    (doc/"").should_not be_nil
    doc.root.children.should_not be_nil
  end
 
end