public
Description: Speed testing for a data munging task
Homepage: http://anyall.org/blog/?p=652
Clone URL: git://github.com/brendano/awkspeed.git
awkspeed / 2num.rb
100644 23 lines (22 sloc) 0.473 kb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
imap = {}
jmap = {}
i_counter = Hash.new{|h,k| h[k]=0}
j = 0
vocab = open("vocab", 'w')
ARGV.each do |file|
  out = open(file+"n", 'w')
  open(file).each do |line|
    (item, feat, val) = line.strip.split
    if !imap[[file,item]]
      i_counter[file] += 1
      imap[[file,item]] = i_counter[file]
    end
    if !jmap[feat]
      j += 1
      jmap[feat] = j
      vocab.puts feat
    end
    out.puts [imap[[file,item]], jmap[feat], val].join(" ")
  end
  out.close
end