public
Description: ruby libxml library targetting speed and ease of use. provides an hpricot-like interface to xml
Homepage: http://trac.hasno.info/fastxml
Clone URL: git://github.com/segfault/fastxml.git
started adding more hpricot-esque functionality and adding support for 
blocks being passed to search


git-svn-id: svn://hasno.info/fastxml/trunk@40 
b3082176-f867-4bde-be85-e3c57d66f029
segfault (author)
Wed Sep 05 22:07:56 -0700 2007
commit  ecdb845a0ca9053bc4c21b1ea3560fe344b076a2
tree    16158e5515f13441e8d182f6a66b3c7237035aaf
parent  11af53f7703d180394292941e1f7f32bed7dfb45
...
10
11
12
13
 
 
14
15
16
...
20
21
22
 
 
 
23
24
25
26
 
27
28
29
...
61
62
63
 
64
65
66
...
177
178
179
180
 
181
182
183
...
10
11
12
 
13
14
15
16
17
...
21
22
23
24
25
26
27
28
29
 
30
31
32
33
...
65
66
67
68
69
70
71
...
182
183
184
 
185
186
187
188
0
@@ -10,7 +10,8 @@
0
 VALUE rb_cFastXmlDoc;
0
 VALUE rb_cFastXmlNode;
0
 VALUE rb_cFastXmlNodeList;
0
-
0
+ID s_readlines;
0
+ID s_to_s;
0
 
0
 void Init_fastxml()
0
 {
0
@@ -20,10 +21,13 @@ void Init_fastxml()
0
     if (xmlHasFeature(XML_WITH_XPATH) == 0)
0
         rb_raise( rb_eRuntimeError, "libxml not built with xpath support" );
0
 
0
+ s_readlines = rb_intern("readlines");
0
+ s_to_s = rb_intern("to_s");
0
+
0
     xmlInitParser();
0
     xmlXPathInit();
0
     VALUE rb_mFastXml = rb_define_module( "FastXml" );
0
- //rb_define_const( rb_mFastXml, "VERSION", rb_str_new2( "0.1" ) );
0
+ rb_define_const( rb_mFastXml, "LIBXML_VERSION", rb_str_new2( LIBXML_DOTTED_VERSION ) );
0
     rb_cFastXmlDoc = rb_define_class_under( rb_mFastXml, "Doc", rb_cObject );
0
     rb_cFastXmlNode = rb_define_class_under( rb_mFastXml, "Node", rb_cObject );
0
     rb_cFastXmlNodeList = rb_define_class_under( rb_mFastXml, "NodeList", rb_cObject );
0
@@ -61,6 +65,7 @@ void Init_fastxml()
0
     rb_define_method( rb_cFastXmlNodeList, "length", fastxml_nodelist_length, 0 );
0
     rb_define_method( rb_cFastXmlNodeList, "each", fastxml_nodelist_each, 0 );
0
     rb_define_method( rb_cFastXmlNodeList, "entry", fastxml_nodelist_entry, 1 );
0
+ rb_define_method( rb_cFastXmlNodeList, "to_ary", fastxml_nodelist_entry, 0 );
0
   
0
   rb_require( "lib/fastxml_lib" );
0
 
0
@@ -177,7 +182,7 @@ VALUE munge_xpath_namespace( VALUE orig_expr, xmlChar *root_ns )
0
   return rb_ary_join( ret_ary, slash );
0
 }
0
 
0
-VALUE fastxml_xpath_search(VALUE self, VALUE raw_xpath)
0
+VALUE fastxml_xpath_search(VALUE self, VALUE raw_xpath, VALUE blk)
0
 {
0
     VALUE ret, dv, xpath_s;
0
   xmlXPathCompExprPtr xpath_xpr;
...
38
39
40
41
 
 
 
 
42
43
44
...
38
39
40
 
41
42
43
44
45
46
47
0
@@ -38,7 +38,10 @@ typedef struct {
0
 RUBY_EXTERN VALUE rb_cFastXmlDoc;
0
 RUBY_EXTERN VALUE rb_cFastXmlNode;
0
 
0
-RUBY_EXTERN VALUE fastxml_xpath_search(VALUE self, VALUE raw_xpath);
0
+RUBY_EXTERN ID s_readlines;
0
+RUBY_EXTERN ID s_to_s;
0
+
0
+RUBY_EXTERN VALUE fastxml_xpath_search(VALUE self, VALUE raw_xpath, VALUE blk);
0
 RUBY_EXTERN VALUE fastxml_raw_node_to_obj(xmlNodePtr cur);
0
 RUBY_EXTERN VALUE fastxml_nodeset_to_obj(xmlXPathObjectPtr xpath_obj, fxml_data_t *data);
0
 RUBY_EXTERN VALUE fastxml_nodelist_to_obj(xmlNodePtr root, int len);
...
13
14
15
16
 
17
18
 
19
20
 
21
22
23
...
82
83
84
85
 
86
87
 
88
89
90
...
121
122
123
124
 
125
126
127
...
132
133
134
135
136
137
 
 
 
 
 
 
 
 
138
139
140
...
13
14
15
 
16
17
18
19
20
 
21
22
23
24
...
83
84
85
 
86
87
 
88
89
90
91
...
122
123
124
 
125
126
127
128
...
133
134
135
 
 
 
136
137
138
139
140
141
142
143
144
145
146
0
@@ -13,11 +13,12 @@ VALUE fastxml_doc_inspect(VALUE self)
0
 {
0
     VALUE *argv;
0
     argv = ALLOCA_N( VALUE, 3 );
0
- argv[0] = rb_str_new2( "#<%s:0x%x>" );
0
+ argv[0] = rb_str_new2( "#<%s:0x%x %s>" );
0
     argv[1] = CLASS_OF( self );
0
     argv[2] = rb_obj_id( self );
0
+ argv[3] = fastxml_doc_to_s( self );
0
 
0
- return rb_f_sprintf( 3, argv );
0
+ return rb_f_sprintf( 4, argv );
0
 }
0
 
0
 VALUE fastxml_doc_children(VALUE self)
0
@@ -82,9 +83,9 @@ VALUE fastxml_doc_transform(VALUE self, VALUE xform)
0
   return ret;
0
 }
0
 
0
-VALUE fastxml_doc_search(VALUE self, VALUE raw_xpath)
0
+VALUE fastxml_doc_search(VALUE self, VALUE raw_xpath, VALUE blk)
0
 {
0
- return fastxml_xpath_search( self, raw_xpath );
0
+ return fastxml_xpath_search( self, raw_xpath, blk );
0
 }
0
 
0
 VALUE fastxml_doc_to_s(VALUE self)
0
@@ -121,7 +122,7 @@ VALUE fastxml_doc_root(VALUE self)
0
 
0
 VALUE fastxml_doc_initialize(VALUE self, VALUE xml_doc_str)
0
 {
0
- VALUE data_s, dv;
0
+ VALUE data_s, dv, lines;
0
     fxml_data_t *data;
0
     int parser_opts = XML_PARSE_NOERROR | XML_PARSE_NOWARNING;
0
     int parse_dtd = XML_PARSE_DTDLOAD | XML_PARSE_DTDATTR | XML_PARSE_DTDVALID;
0
@@ -132,9 +133,14 @@ VALUE fastxml_doc_initialize(VALUE self, VALUE xml_doc_str)
0
         return Qnil;
0
     }
0
 
0
-
0
- data_s = rb_obj_as_string( xml_doc_str );
0
- rb_iv_set( self, "@raw_data", data_s );
0
+ if (rb_respond_to( xml_doc_str, s_readlines )) {
0
+ lines = rb_funcall( xml_doc_str, s_readlines, 0 );
0
+ data_s = rb_funcall( lines, s_to_s, 0 );
0
+ }
0
+ else
0
+ data_s = rb_obj_as_string( xml_doc_str );
0
+
0
+ rb_iv_set( self, "@raw_data", data_s );
0
 
0
     data = ALLOC(fxml_data_t);
0
     memset( data, (int)NULL, sizeof(fxml_data_t) );
...
5
6
7
8
 
9
10
11
...
5
6
7
 
8
9
10
11
0
@@ -5,7 +5,7 @@
0
 #ifndef fastxml_doc_h
0
 #define fastxml_doc_h
0
 RUBY_EXTERN VALUE fastxml_doc_initialize(VALUE self, VALUE data);
0
-RUBY_EXTERN VALUE fastxml_doc_search(VALUE self, VALUE raw_xpath);
0
+RUBY_EXTERN VALUE fastxml_doc_search(VALUE self, VALUE raw_xpath, VALUE blk);
0
 RUBY_EXTERN VALUE fastxml_doc_to_s(VALUE self);
0
 RUBY_EXTERN VALUE fastxml_doc_root(VALUE self);
0
 RUBY_EXTERN VALUE fastxml_doc_transform(VALUE self, VALUE xform);
...
229
230
231
232
 
233
234
 
235
236
237
...
229
230
231
 
232
233
 
234
235
236
237
0
@@ -229,9 +229,9 @@ VALUE fastxml_node_to_s(VALUE self)
0
     return ret;
0
 }
0
 
0
-VALUE fastxml_node_search(VALUE self, VALUE raw_xpath)
0
+VALUE fastxml_node_search(VALUE self, VALUE raw_xpath, VALUE blk)
0
 {
0
- return fastxml_xpath_search( self, raw_xpath );
0
+ return fastxml_xpath_search( self, raw_xpath, blk );
0
 }
0
 
0
 
...
5
6
7
8
 
9
10
11
...
5
6
7
 
8
9
10
11
0
@@ -5,7 +5,7 @@
0
 #ifndef fastxml_node_h
0
 #define fastxml_node_h
0
 RUBY_EXTERN VALUE fastxml_node_initialize(VALUE self);
0
-RUBY_EXTERN VALUE fastxml_node_search(VALUE self,VALUE raw_xpath);
0
+RUBY_EXTERN VALUE fastxml_node_search(VALUE self,VALUE raw_xpath, VALUE blk);
0
 RUBY_EXTERN VALUE fastxml_node_name(VALUE self);
0
 RUBY_EXTERN VALUE fastxml_node_value(VALUE self);
0
 RUBY_EXTERN VALUE fastxml_node_value_set(VALUE self, VALUE new_val);
...
54
55
56
57
 
58
59
60
...
68
69
70
71
 
72
73
74
...
89
90
91
92
 
93
94
95
 
96
97
98
...
100
101
102
 
 
 
 
 
 
 
 
 
 
103
104
105
...
54
55
56
 
57
58
59
60
...
68
69
70
 
71
72
73
74
...
89
90
91
 
92
93
94
 
95
96
97
98
...
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
0
@@ -54,7 +54,7 @@ VALUE fastxml_nodelist_length(VALUE self)
0
   return rb_int2inum( data->list_len );
0
 }
0
 
0
-VALUE fastxml_nodelist_to_ary(fxml_data_t *root)
0
+VALUE fastxml_nodelist_obj_to_ary(fxml_data_t *root)
0
 {
0
   VALUE ret;
0
     xmlNodePtr cur = root->list;  
0
@@ -68,7 +68,7 @@ VALUE fastxml_nodelist_to_ary(fxml_data_t *root)
0
   return ret;
0
 }
0
 
0
-VALUE fastxml_nodeset_to_ary(fxml_data_t *root)
0
+VALUE fastxml_nodeset_obj_to_ary(fxml_data_t *root)
0
 {
0
   VALUE ret;
0
   xmlNodePtr cur = root->xpath_obj->nodesetval->nodeTab;  
0
@@ -89,10 +89,10 @@ VALUE fastxml_nodelist_gen_list(VALUE self, fxml_data_t *data)
0
 
0
   if (lst == Qnil) {
0
      if (data->xpath_obj != NULL) {
0
- lst = fastxml_nodeset_to_ary( data );
0
+ lst = fastxml_nodeset_obj_to_ary( data );
0
       rb_iv_set( self, "@list", lst );
0
     } else {
0
- lst = fastxml_nodelist_to_ary( data );
0
+ lst = fastxml_nodelist_obj_to_ary( data );
0
       rb_iv_set( self, "@list", lst );
0
     }
0
   }
0
@@ -100,6 +100,16 @@ VALUE fastxml_nodelist_gen_list(VALUE self, fxml_data_t *data)
0
   return lst;
0
 }
0
 
0
+VALUE fastxml_nodelist_to_ary(VALUE self)
0
+{
0
+ VALUE dv;
0
+ fxml_data_t *data;
0
+
0
+ dv = rb_iv_get( self, "@lxml_doc" );
0
+ Data_Get_Struct( dv, fxml_data_t, data );
0
+ return fastxml_nodelist_gen_list( self, data );
0
+}
0
+
0
 VALUE fastxml_nodelist_each(VALUE self)
0
 {
0
   VALUE lst, dv;
...
9
10
11
 
12
...
9
10
11
12
13
0
@@ -9,4 +9,5 @@ RUBY_EXTERN VALUE fastxml_nodelist_inspect(VALUE self);
0
 RUBY_EXTERN VALUE fastxml_nodelist_length(VALUE self);
0
 RUBY_EXTERN VALUE fastxml_nodelist_entry(VALUE self, long idx);
0
 RUBY_EXTERN VALUE fastxml_nodelist_each(VALUE self);
0
+RUBY_EXTERN VALUE fastxml_nodelist_to_ary(VALUE self);
0
 #endif /*fastxml_nodelist_h*/
...
35
36
37
 
 
 
 
38
39
40
...
46
47
48
49
 
50
51
52
...
60
61
62
 
 
 
 
 
 
 
63
64
65
...
35
36
37
38
39
40
41
42
43
44
...
50
51
52
 
53
54
55
56
...
64
65
66
67
68
69
70
71
72
73
74
75
76
0
@@ -35,6 +35,10 @@ class FastXml::Doc
0
   def doctype?
0
     nil
0
   end
0
+
0
+ def xpath
0
+ "/"
0
+ end
0
 end
0
 
0
 class FastXml::Node
0
@@ -46,7 +50,7 @@ end
0
 
0
 class FastXml::NodeList
0
   def [](idx)
0
- self.entry(idx)
0
+ self.entry(idx)
0
   end
0
   
0
   def first
0
@@ -60,6 +64,13 @@ class FastXml::NodeList
0
   def empty?
0
     return (length == 0)
0
   end
0
+
0
+ def at(tgt)
0
+ return self.entry( tgt.to_i ) if tgt =~ /^\d+$/
0
+ ret = []
0
+ each { |nd| ret << (nd/tgt).to_ary }
0
+ ret.flatten!
0
+ end
0
 end
0
 
0
 
...
33
34
35
 
 
 
 
 
 
 
 
 
 
 
36
37
...
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
0
@@ -33,5 +33,16 @@ describe FastXml::Doc, " when created" do
0
     doc.should_not be_nil
0
     doc.to_s.should_not be_nil
0
   end
0
+
0
+ it 'should be able to parse large files' do
0
+ raw_data = open( "./test_data/xslspec.xml" ).readlines.join('')
0
+ doc = FastXml( raw_data )
0
+ doc.should_not be_nil
0
+ doc.to_s.should_not be_nil
0
+ doc.to_s.length.should >= 1584496
0
+ doc.root.should_not be_nil
0
+ (doc/"").should_not be_nil
0
+ doc.root.children.should_not be_nil
0
+ end
0
 
0
 end
...
6
7
8
9
 
 
10
11
12
...
22
23
24
25
26
27
28
29
30
31
...
56
57
58
 
 
 
 
 
 
 
59
...
6
7
8
 
9
10
11
12
13
...
23
24
25
 
 
 
 
26
27
28
...
53
54
55
56
57
58
59
60
61
62
63
0
@@ -6,7 +6,8 @@ require 'fastxml'
0
 
0
 describe FastXml::Doc, " functionality" do
0
   before(:all) do
0
- data_raw = open( "./test_data/hasno_feed.xml" )
0
+ @filename = "./test_data/hasno_feed.xml"
0
+ data_raw = open( @filename )
0
     @data_ary = data_raw.readlines
0
     data_raw.close
0
     @data_str = @data_ary.join('')
0
@@ -22,10 +23,6 @@ describe FastXml::Doc, " functionality" do
0
     @doc.should respond_to( :children )
0
     @doc.children.should_not be_nil
0
   end
0
-
0
- #it 'should be able to run namespaced xpath searches' do
0
- # @doc.search( "/Atom:feed/Atom:entry").length.should > 1
0
- #end
0
   
0
   it 'should be able to run default namespace xpath searches' do
0
     @doc.should respond_to( :search )
0
@@ -56,4 +53,11 @@ describe FastXml::Doc, " functionality" do
0
     @doc.should respond_to( :at )
0
     @doc.at( "/feed" ).should_not be_nil
0
   end
0
+
0
+ it 'should be able to parse objects with a readlines method' do
0
+ rlm = FastXml( open( @filename ) )
0
+ rlm.should_not be_nil
0
+ rlm.to_s.should == @doc.to_s
0
+ rlm.should_not == @doc
0
+ end
0
 end

Comments

    No one has commented yet.