Skip to content

Commit

Permalink
hashmap: add API to disable item counting when threaded
Browse files Browse the repository at this point in the history
This is to address concerns raised by ThreadSanitizer on the mailing list
about threaded unprotected R/W access to map.size with my previous "disallow
rehash" change (0607e10).

See:
https://public-inbox.org/git/adb37b70139fd1e2bac18bfd22c8b96683ae18eb.1502780344.git.martin.agren@gmail.com/

Add API to hashmap to disable item counting and thus automatic rehashing.
Also include API to later re-enable them.

When item counting is disabled, the map.size field is invalid.  So to
prevent accidents, the field has been renamed and an accessor function
hashmap_get_size() has been added.  All direct references to this
field have been been updated.  And the name of the field changed
to map.private_size to communicate this.

Here is the relevant output from ThreadSanitizer showing the problem:

WARNING: ThreadSanitizer: data race (pid=10554)
  Read of size 4 at 0x00000082d488 by thread T2 (mutexes: write M16):
    #0 hashmap_add hashmap.c:209
    #1 hash_dir_entry_with_parent_and_prefix name-hash.c:302
    #2 handle_range_dir name-hash.c:347
    #3 handle_range_1 name-hash.c:415
    #4 lazy_dir_thread_proc name-hash.c:471
    #5 <null> <null>

  Previous write of size 4 at 0x00000082d488 by thread T1 (mutexes: write M31):
    #0 hashmap_add hashmap.c:209
    #1 hash_dir_entry_with_parent_and_prefix name-hash.c:302
    #2 handle_range_dir name-hash.c:347
    #3 handle_range_1 name-hash.c:415
    #4 handle_range_dir name-hash.c:380
    #5 handle_range_1 name-hash.c:415
    #6 lazy_dir_thread_proc name-hash.c:471
    #7 <null> <null>

Martin gives instructions for running TSan on test t3008 in this post:
https://public-inbox.org/git/CAN0heSoJDL9pWELD6ciLTmWf-a=oyxe4EXXOmCKvsG5MSuzxsA@mail.gmail.com/

Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
  • Loading branch information
jeffhostetler authored and gitster committed Sep 7, 2017
1 parent 238e487 commit 8b604d1
Show file tree
Hide file tree
Showing 6 changed files with 88 additions and 40 deletions.
15 changes: 9 additions & 6 deletions attr.c
Expand Up @@ -151,10 +151,12 @@ struct all_attrs_item {
static void all_attrs_init(struct attr_hashmap *map, struct attr_check *check)
{
int i;
unsigned int size;

hashmap_lock(map);

if (map->map.size < check->all_attrs_nr)
size = hashmap_get_size(&map->map);
if (size < check->all_attrs_nr)
die("BUG: interned attributes shouldn't be deleted");

/*
Expand All @@ -163,13 +165,13 @@ static void all_attrs_init(struct attr_hashmap *map, struct attr_check *check)
* field), reallocate the provided attr_check instance's all_attrs
* field and fill each entry with its corresponding git_attr.
*/
if (map->map.size != check->all_attrs_nr) {
if (size != check->all_attrs_nr) {
struct attr_hash_entry *e;
struct hashmap_iter iter;
hashmap_iter_init(&map->map, &iter);

REALLOC_ARRAY(check->all_attrs, map->map.size);
check->all_attrs_nr = map->map.size;
REALLOC_ARRAY(check->all_attrs, size);
check->all_attrs_nr = size;

while ((e = hashmap_iter_next(&iter))) {
const struct git_attr *a = e->value;
Expand Down Expand Up @@ -237,10 +239,11 @@ static const struct git_attr *git_attr_internal(const char *name, int namelen)

if (!a) {
FLEX_ALLOC_MEM(a, name, name, namelen);
a->attr_nr = g_attr_hashmap.map.size;
a->attr_nr = hashmap_get_size(&g_attr_hashmap.map);

attr_hashmap_add(&g_attr_hashmap, a->name, namelen, a);
assert(a->attr_nr == (g_attr_hashmap.map.size - 1));
assert(a->attr_nr ==
(hashmap_get_size(&g_attr_hashmap.map) - 1));
}

hashmap_unlock(&g_attr_hashmap);
Expand Down
2 changes: 1 addition & 1 deletion builtin/describe.c
Expand Up @@ -508,7 +508,7 @@ int cmd_describe(int argc, const char **argv, const char *prefix)

hashmap_init(&names, commit_name_cmp, NULL, 0);
for_each_rawref(get_name, NULL);
if (!names.size && !always)
if (!hashmap_get_size(&names) && !always)
die(_("No names found, cannot describe anything."));

if (argc == 0) {
Expand Down
26 changes: 17 additions & 9 deletions hashmap.c
Expand Up @@ -116,9 +116,6 @@ static void rehash(struct hashmap *map, unsigned int newsize)
unsigned int i, oldsize = map->tablesize;
struct hashmap_entry **oldtable = map->table;

if (map->disallow_rehash)
return;

alloc_table(map, newsize);
for (i = 0; i < oldsize; i++) {
struct hashmap_entry *e = oldtable[i];
Expand Down Expand Up @@ -166,6 +163,12 @@ void hashmap_init(struct hashmap *map, hashmap_cmp_fn equals_function,
while (initial_size > size)
size <<= HASHMAP_RESIZE_BITS;
alloc_table(map, size);

/*
* Keep track of the number of items in the map and
* allow the map to automatically grow as necessary.
*/
map->do_count_items = 1;
}

void hashmap_free(struct hashmap *map, int free_entries)
Expand Down Expand Up @@ -206,9 +209,11 @@ void hashmap_add(struct hashmap *map, void *entry)
map->table[b] = entry;

/* fix size and rehash if appropriate */
map->size++;
if (map->size > map->grow_at)
rehash(map, map->tablesize << HASHMAP_RESIZE_BITS);
if (map->do_count_items) {
map->private_size++;
if (map->private_size > map->grow_at)
rehash(map, map->tablesize << HASHMAP_RESIZE_BITS);
}
}

void *hashmap_remove(struct hashmap *map, const void *key, const void *keydata)
Expand All @@ -224,9 +229,12 @@ void *hashmap_remove(struct hashmap *map, const void *key, const void *keydata)
old->next = NULL;

/* fix size and rehash if appropriate */
map->size--;
if (map->size < map->shrink_at)
rehash(map, map->tablesize >> HASHMAP_RESIZE_BITS);
if (map->do_count_items) {
map->private_size--;
if (map->private_size < map->shrink_at)
rehash(map, map->tablesize >> HASHMAP_RESIZE_BITS);
}

return old;
}

Expand Down
72 changes: 51 additions & 21 deletions hashmap.h
Expand Up @@ -183,7 +183,7 @@ struct hashmap {
const void *cmpfn_data;

/* total number of entries (0 means the hashmap is empty) */
unsigned int size;
unsigned int private_size; /* use hashmap_get_size() */

/*
* tablesize is the allocated size of the hash table. A non-0 value
Expand All @@ -196,8 +196,7 @@ struct hashmap {
unsigned int grow_at;
unsigned int shrink_at;

/* See `hashmap_disallow_rehash`. */
unsigned disallow_rehash : 1;
unsigned int do_count_items : 1;
};

/* hashmap functions */
Expand Down Expand Up @@ -252,6 +251,18 @@ static inline void hashmap_entry_init(void *entry, unsigned int hash)
e->next = NULL;
}

/*
* Return the number of items in the map.
*/
static inline unsigned int hashmap_get_size(struct hashmap *map)
{
if (map->do_count_items)
return map->private_size;

BUG("hashmap_get_size: size not set");
return 0;
}

/*
* Returns the hashmap entry for the specified key, or NULL if not found.
*
Expand Down Expand Up @@ -344,24 +355,6 @@ extern void *hashmap_remove(struct hashmap *map, const void *key,
*/
int hashmap_bucket(const struct hashmap *map, unsigned int hash);

/*
* Disallow/allow rehashing of the hashmap.
* This is useful if the caller knows that the hashmap needs multi-threaded
* access. The caller is still required to guard/lock searches and inserts
* in a manner appropriate to their usage. This simply prevents the table
* from being unexpectedly re-mapped.
*
* It is up to the caller to ensure that the hashmap is initialized to a
* reasonable size to prevent poor performance.
*
* A call to allow rehashing does not force a rehash; that might happen
* with the next insert or delete.
*/
static inline void hashmap_disallow_rehash(struct hashmap *map, unsigned value)
{
map->disallow_rehash = value;
}

/*
* Used to iterate over all entries of a hashmap. Note that it is
* not safe to add or remove entries to the hashmap while
Expand All @@ -387,6 +380,43 @@ static inline void *hashmap_iter_first(struct hashmap *map,
return hashmap_iter_next(iter);
}

/*
* Disable item counting and automatic rehashing when adding/removing items.
*
* Normally, the hashmap keeps track of the number of items in the map
* and uses it to dynamically resize it. This (both the counting and
* the resizing) can cause problems when the map is being used by
* threaded callers (because the hashmap code does not know about the
* locking strategy used by the threaded callers and therefore, does
* not know how to protect the "private_size" counter).
*/
static inline void hashmap_disable_item_counting(struct hashmap *map)
{
map->do_count_items = 0;
}

/*
* Re-enable item couting when adding/removing items.
* If counting is currently disabled, it will force count them.
* It WILL NOT automatically rehash them.
*/
static inline void hashmap_enable_item_counting(struct hashmap *map)
{
void *item;
unsigned int n = 0;
struct hashmap_iter iter;

if (map->do_count_items)
return;

hashmap_iter_init(map, &iter);
while ((item = hashmap_iter_next(&iter)))
n++;

map->do_count_items = 1;
map->private_size = n;
}

/* String interning */

/*
Expand Down
10 changes: 8 additions & 2 deletions name-hash.c
Expand Up @@ -584,9 +584,15 @@ static void lazy_init_name_hash(struct index_state *istate)
hashmap_init(&istate->dir_hash, dir_entry_cmp, NULL, istate->cache_nr);

if (lookup_lazy_params(istate)) {
hashmap_disallow_rehash(&istate->dir_hash, 1);
/*
* Disable item counting and automatic rehashing because
* we do per-chain (mod n) locking rather than whole hashmap
* locking and we need to prevent the table-size from changing
* and bucket items from being redistributed.
*/
hashmap_disable_item_counting(&istate->dir_hash);
threaded_lazy_init_name_hash(istate);
hashmap_disallow_rehash(&istate->dir_hash, 0);
hashmap_enable_item_counting(&istate->dir_hash);
} else {
int nr;
for (nr = 0; nr < istate->cache_nr; nr++)
Expand Down
3 changes: 2 additions & 1 deletion t/helper/test-hashmap.c
Expand Up @@ -235,7 +235,8 @@ int cmd_main(int argc, const char **argv)
} else if (!strcmp("size", cmd)) {

/* print table sizes */
printf("%u %u\n", map.tablesize, map.size);
printf("%u %u\n", map.tablesize,
hashmap_get_size(&map));

} else if (!strcmp("intern", cmd) && l1) {

Expand Down

0 comments on commit 8b604d1

Please sign in to comment.