copiousfreetime / amalgalite

SQLite database engine embedded in a ruby extension.

This URL has Read+Write access

amalgalite / examples / define_aggregate.rb
100644 76 lines (65 sloc) 1.931 kb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#!/usr/bin/env ruby
 
require 'rubygems'
$: << "../lib"
$: << "../ext"
require 'amalgalite'
 
#--
# Create a database and a table to put some results from the functions in
#--
db = Amalgalite::Database.new( ":memory:" )
db.execute( "CREATE TABLE atest( words )" )
 
#------------------------------------------------------------------------------
# Create unique word count aggregate
#------------------------------------------------------------------------------
class UniqueWordCount < ::Amalgalite::Aggregate
  attr_accessor :words
 
  def initialize
    @name = 'unique_word_count'
    @arity = 1
    @words = Hash.new { |h,k| h[k] = 0 }
  end
 
  def step( str )
    str.split(/\W+/).each do |word|
      words[ word.downcase ] += 1
    end
    return nil
  end
 
  def finalize
    return words.size
  end
end
 
db.define_aggregate( 'unique_word_count', UniqueWordCount )
 
#------------------------------------------------------------------------------
# Now we have a new aggregate function, lets insert some rows into the database
# and see what we can find.
#------------------------------------------------------------------------------
sql = "INSERT INTO atest( words ) VALUES( ? )"
verify = {}
db.prepare( sql ) do |stmt|
  DATA.each do |words|
    words.strip!
    puts "Inserting #{words}"
    stmt.execute( words )
    words.split(/\W+/).each { |w| verify[w] = true }
  end
end
 
#------------------------------------------------------------------------------
# And show the results
#------------------------------------------------------------------------------
puts
puts "Getting results..."
puts
all_rows = db.execute("SELECT unique_word_count( words ) AS uwc FROM atest")
puts "#{all_rows.first['uwc']} unique words found"
puts "#{verify.size} unique words to verify"
 
__END__
some random
words with
which
to play
and there should
be a couple of different
words that appear
more than once and
some that appear only
once