public
Description: Speed testing for a data munging task
Homepage: http://anyall.org/blog/?p=652
Clone URL: git://github.com/brendano/awkspeed.git
awkspeed / 2num.pl
100644 18 lines (17 sloc) 0.401 kb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
open(VOCAB, ">vocab");
foreach $file (@ARGV) {
  open(IN, "<$file");
  open(OUT, ">${file}n");
  while (<IN>) {
    ($item,$feat,$val) = split(" ");
    if (!$imap{"$file $item"}) {
      $imap{"$file $item"} = ++$I{$file};
    }
    if (!$jmap{$feat}) {
      $jmap{$feat} = ++$J;
      print VOCAB $feat . "\n";
    }
    print OUT $imap{"$file $item"} . " $jmap{$feat} $val\n";
  }
  close(OUT);
}