Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

added hashmap

  • Loading branch information...
commit f241ebf41331a3e0b203438462cde69488c3fd50 1 parent 1dec7b8
@hal3 hal3 authored
Showing with 221 additions and 0 deletions.
  1. +70 −0 vowpalwabbit/v_hashmap.cc
  2. +151 −0 vowpalwabbit/v_hashmap.h
View
70 vowpalwabbit/v_hashmap.cc
@@ -0,0 +1,70 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <float.h>
+#include "v_array.h"
+#include "v_hashmap.h"
+
+size_t myhash(int k)
+{
+ //return (size_t)(k * 382490328 + 34280); // ok hash function
+ return ((size_t)k) % 3; // crappy hash function
+}
+
+bool inteq(int a, int b) { return (a==b); }
+
+void printit(int k, float v) {
+ printf(" iter(%d, %g)\n", k, v);
+}
+
+void test_v_hashmap()
+{
+ v_hashmap<int,float> hm = v_hashmap<int,float>(15, FLT_MAX, &inteq);
+
+ for (int k=-5; k<=5; k++)
+ printf("get(%d,%zu) -> %g\n", k, myhash(k), hm.get(k, myhash(k)));
+ printf("\n");
+
+ for (int k=-5; k<=5; k+=5) {
+ printf("put(%d,%zu) <- %g\n", k, myhash(k), (float)k);
+ hm.put(k, myhash(k), (float)k);
+ }
+ printf("\n");
+ for (int k=-5; k<=5; k++)
+ printf("get(%d,%zu) -> %g\n", k, myhash(k), hm.get(k, myhash(k)));
+ printf("\n");
+
+ hm.iter(&printit);
+ hm.double_size();
+ hm.iter(&printit);
+
+ for (int k=-5; k<=5; k++)
+ printf("get(%d,%zu) -> %g\n", k, myhash(k), hm.get(k, myhash(k)));
+ printf("\n");
+
+ for (int k=-5; k<=5; k++)
+ printf("get(%d,%zu) -> %g\n", k, myhash(k), hm.get(k, myhash(k)));
+ printf("\n");
+
+ for (int k=-5; k<=5; k+=2) {
+ printf("put(%d,%zu) <- %g\n", k, myhash(k), (float)k);
+ hm.put(k, myhash(k), (float)k);
+ }
+ printf("\n");
+
+ for (int k=-5; k<=5; k++)
+ printf("get(%d,%zu) -> %g\n", k, myhash(k), hm.get(k, myhash(k)));
+ printf("\n");
+
+ for (int k=-10; k<=10; k+=1) {
+ printf("put(%d,%zu) <- %g\n", k, myhash(k), (float)k);
+ hm.put(k, myhash(k), (float)k);
+ }
+ printf("\n");
+
+ for (int k=-5; k<=5; k++)
+ printf("get(%d,%zu) -> %g\n", k, myhash(k), hm.get(k, myhash(k)));
+ printf("\n");
+ hm.iter(&printit);
+}
+
View
151 vowpalwabbit/v_hashmap.h
@@ -0,0 +1,151 @@
+#ifndef V_HASHMAP_H
+#define V_HASHMAP_H
+
+#include <stdio.h>
+#include <iostream>
+#include <stdlib.h>
+#include <string.h>
+#include "v_array.h"
+
+template<class K, class V> class v_hashmap{
+ public:
+ struct elem {
+ bool occupied;
+ K key;
+ V val;
+ size_t hash;
+ };
+
+ bool (*equivalent)(K,K);
+ size_t (*hash)(K);
+ V default_value;
+ v_array<elem> dat;
+ size_t last_position;
+ size_t num_occupants;
+
+
+ size_t base_size() {
+ return dat.end_array-dat.begin;
+ }
+
+ v_hashmap(size_t min_size, V def, bool (*eq)(K,K)) {
+ dat = v_array<elem>();
+ reserve(dat, min_size); // reserve sets to 0 ==> occupied=false
+
+ default_value = def;
+ equivalent = eq;
+
+ last_position = 0;
+ num_occupants = 0;
+ }
+
+ v_hashmap(V def) { v_hashmap(1023, def, NULL); }
+
+ void set_equivalent(bool (*eq)(K,K)) { equivalent = eq; }
+
+ ~v_hashmap() {
+ dat.erase();
+ free(dat.begin);
+ }
+
+ void clear() {
+ memset(dat.begin, 0, base_size()*sizeof(elem));
+ last_position = 0;
+ num_occupants = 0;
+ }
+
+ void iter(void (*func)(K,V)) {
+ // printf("iter\n");
+ for (elem* e=dat.begin; e!=dat.end_array; e++) {
+ // printf(" [occ=%d hash=%zu]\n", e->occupied, e->hash);
+ if (e->occupied)
+ func(e->key, e->val);
+ }
+ }
+
+
+ void put_after_get_nogrow(K key, size_t hash, V val) {
+ dat[last_position].occupied = true;
+ dat[last_position].key = key;
+ dat[last_position].val = val;
+ dat[last_position].hash = hash;
+ }
+
+ void double_size() {
+ // printf("doubling size!\n");
+ // remember the old occupants
+ v_array<elem>tmp = v_array<elem>();
+ reserve(tmp, num_occupants+10);
+ for (elem* e=dat.begin; e!=dat.end_array; e++)
+ if (e->occupied)
+ push(tmp, *e);
+
+ // double the size and clear
+ reserve(dat, base_size()*2);
+ memset(dat.begin, 0, base_size()*sizeof(elem));
+
+ // re-insert occupants
+ for (elem* e=tmp.begin; e!=tmp.end; e++) {
+ get(e->key, e->hash);
+ // std::cerr << "reinserting " << e->key << " at " << last_position << std::endl;
+ put_after_get_nogrow(e->key, e->hash, e->val);
+ }
+ tmp.erase();
+ free(tmp.begin);
+ }
+
+ V get(K key, size_t hash) {
+ size_t sz = base_size();
+ size_t first_position = hash % sz;
+ last_position = first_position;
+ while (true) {
+ // if there's nothing there, obviously we don't contain it
+ if (!dat[last_position].occupied)
+ return default_value;
+
+ // there's something there: maybe it's us
+ if ((dat[last_position].hash == hash) &&
+ ((equivalent == NULL) ||
+ (equivalent(key, dat[last_position].key))))
+ return dat[last_position].val;
+
+ // there's something there that's NOT us -- advance pointer
+ last_position++;
+ if (last_position >= sz)
+ last_position = 0;
+
+ // check to make sure we haven't cycled around -- this is a bug!
+ if (last_position == first_position) {
+ std::cerr << "error: v_hashmap did not grow enough!" << std::endl;
+ exit(-1);
+ }
+ }
+ }
+
+ // only call put_after_get(key, hash, val) if you've already
+ // run get(key, hash). if you haven't already run get, then
+ // you should use put() rather than put_after_get(). these
+ // both will overwrite previous values, if they exist.
+ void put_after_get(K key, size_t hash, V val) {
+ if (!dat[last_position].occupied) {
+ num_occupants++;
+ if (num_occupants*4 >= base_size()) { // grow when we're a quarter full
+ double_size();
+ get(key, hash); // probably should change last_position-- this is the lazy man's way to do it
+ }
+ }
+
+ // now actually insert it
+ put_after_get_nogrow(key, hash, val);
+ }
+
+ void put(K key, size_t hash, V val) {
+ get(key, hash);
+ put_after_get(key, hash, val);
+ }
+};
+
+void test_v_hashmap();
+
+
+#endif
Please sign in to comment.
Something went wrong with that request. Please try again.