public
Description: Speed testing for a data munging task
Homepage: http://anyall.org/blog/?p=652
Clone URL: git://github.com/brendano/awkspeed.git
awkspeed / 2num_c.cc
100644 44 lines (43 sloc) 1.316 kb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
#include <stdio.h>
#include <string>
using namespace std;
#include <ext/hash_map>
using namespace __gnu_cxx;
// You have to define a hashing function to use string keys in a hash_map? Are you kidding me?! http://www.gamedev.net/community/forums/topic.asp?topic_id=119766
namespace __gnu_cxx {
  template<>
  struct hash< std::string > {
    size_t operator( )( std::string const & s ) const {
      return __stl_hash_string( s.c_str( ) );
    }
  };
}
int main(int argc, const char **argv)
{
  hash_map<string, int> imap;
  hash_map<string, int> jmap;
  hash_map<string, int> I;
  int J=0;
  FILE *vocab = fopen("vocab", "w");
  for (int file_i=1; file_i<argc; file_i++) {
    printf("%s\n",argv[file_i]);
    string filename = argv[file_i];
    FILE *in = fopen(argv[file_i], "r");
    string outname = argv[file_i];
    outname += "n";
    FILE *out = fopen(outname.c_str(), "w");
    char item[1000], feat[1000], val[1000];
    while (fscanf(in, "%s %s %s", item, feat, val) != EOF) {
      string key = filename + " " + item;
      if (imap.find(key) == imap.end()) {
        imap[key] = ++I[filename];
      }
      if (jmap.find(feat) == jmap.end()) {
        jmap[feat] = ++J;
        fprintf(vocab, "%s\n", feat);
      }
      fprintf(out, "%d %d %s\n", imap[key], jmap[feat], val);
    }
  }
  return 0;
}