Skip to content

Commit

Permalink
bpo-41493: Refactoring dictresize (pythonGH-21751)
Browse files Browse the repository at this point in the history
Split newsize calculation into new function. dictresize() now accepts exact newsize.
  • Loading branch information
methane committed Aug 7, 2020
1 parent 5f0769a commit d9323a8
Showing 1 changed file with 41 additions and 26 deletions.
67 changes: 41 additions & 26 deletions Objects/dictobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ converting the dict to the combined table.
#define PyDict_MINSIZE 8

#include "Python.h"
#include "pycore_bitutils.h" // _Py_bit_length
#include "pycore_gc.h" // _PyObject_GC_IS_TRACKED()
#include "pycore_object.h" // _PyObject_GC_TRACK()
#include "pycore_pyerrors.h" // _PyErr_Fetch()
Expand Down Expand Up @@ -236,7 +237,7 @@ lookdict_unicode_nodummy(PyDictObject *mp, PyObject *key,
static Py_ssize_t lookdict_split(PyDictObject *mp, PyObject *key,
Py_hash_t hash, PyObject **value_addr);

static int dictresize(PyDictObject *mp, Py_ssize_t minused);
static int dictresize(PyDictObject *mp, Py_ssize_t newsize);

static PyObject* dict_iter(PyDictObject *dict);

Expand Down Expand Up @@ -411,18 +412,40 @@ dictkeys_set_index(PyDictKeysObject *keys, Py_ssize_t i, Py_ssize_t ix)
*/
#define USABLE_FRACTION(n) (((n) << 1)/3)

/* ESTIMATE_SIZE is reverse function of USABLE_FRACTION.
/* Find the smallest dk_size >= minsize. */
static inline Py_ssize_t
calculate_keysize(Py_ssize_t minsize)
{
#if SIZEOF_LONG == SIZEOF_SIZE_T
minsize = (minsize | PyDict_MINSIZE) - 1;
return 1LL << _Py_bit_length(minsize | (PyDict_MINSIZE-1));
#elif defined(_MSC_VER)
// On 64bit Windows, sizeof(long) == 4.
minsize = (minsize | PyDict_MINSIZE) - 1;
unsigned long msb;
_BitScanReverse64(&msb, (uint64_t)minsize);
return 1LL << (msb + 1);
#else
Py_ssize_t size;
for (size = PyDict_MINSIZE;
size < minsize && size > 0;
size <<= 1)
;
return size;
#endif
}

/* estimate_keysize is reverse function of USABLE_FRACTION.
*
* This can be used to reserve enough size to insert n entries without
* resizing.
*/
#define ESTIMATE_SIZE(n) (((n)*3+1) >> 1)
static inline Py_ssize_t
estimate_keysize(Py_ssize_t n)
{
return calculate_keysize((n*3 + 1) / 2);
}

/* Alternative fraction that is otherwise close enough to 2n/3 to make
* little difference. 8 * 2/3 == 8 * 5/8 == 5. 16 * 2/3 == 16 * 5/8 == 10.
* 32 * 2/3 = 21, 32 * 5/8 = 20.
* Its advantage is that it is faster to compute on machines with slow division.
* #define USABLE_FRACTION(n) (((n) >> 1) + ((n) >> 2) - ((n) >> 3))
*/

/* GROWTH_RATE. Growth rate upon hitting maximum load.
* Currently set to used*3.
Expand Down Expand Up @@ -1036,7 +1059,7 @@ find_empty_slot(PyDictKeysObject *keys, Py_hash_t hash)
static int
insertion_resize(PyDictObject *mp)
{
return dictresize(mp, GROWTH_RATE(mp));
return dictresize(mp, calculate_keysize(GROWTH_RATE(mp)));
}

/*
Expand Down Expand Up @@ -1194,22 +1217,19 @@ After resizing a table is always combined,
but can be resplit by make_keys_shared().
*/
static int
dictresize(PyDictObject *mp, Py_ssize_t minsize)
dictresize(PyDictObject *mp, Py_ssize_t newsize)
{
Py_ssize_t newsize, numentries;
Py_ssize_t numentries;
PyDictKeysObject *oldkeys;
PyObject **oldvalues;
PyDictKeyEntry *oldentries, *newentries;

/* Find the smallest table size > minused. */
for (newsize = PyDict_MINSIZE;
newsize < minsize && newsize > 0;
newsize <<= 1)
;
if (newsize <= 0) {
PyErr_NoMemory();
return -1;
}
assert(IS_POWER_OF_2(newsize));
assert(newsize >= PyDict_MINSIZE);

oldkeys = mp->ma_keys;

Expand Down Expand Up @@ -1355,13 +1375,8 @@ _PyDict_NewPresized(Py_ssize_t minused)
newsize = max_presize;
}
else {
Py_ssize_t minsize = ESTIMATE_SIZE(minused);
newsize = PyDict_MINSIZE*2;
while (newsize < minsize) {
newsize <<= 1;
}
newsize = estimate_keysize(minused);
}
assert(IS_POWER_OF_2(newsize));

new_keys = new_keys_object(newsize);
if (new_keys == NULL)
Expand Down Expand Up @@ -1930,7 +1945,7 @@ _PyDict_FromKeys(PyObject *cls, PyObject *iterable, PyObject *value)
PyObject *key;
Py_hash_t hash;

if (dictresize(mp, ESTIMATE_SIZE(PyDict_GET_SIZE(iterable)))) {
if (dictresize(mp, estimate_keysize(PyDict_GET_SIZE(iterable)))) {
Py_DECREF(d);
return NULL;
}
Expand All @@ -1949,7 +1964,7 @@ _PyDict_FromKeys(PyObject *cls, PyObject *iterable, PyObject *value)
PyObject *key;
Py_hash_t hash;

if (dictresize(mp, ESTIMATE_SIZE(PySet_GET_SIZE(iterable)))) {
if (dictresize(mp, estimate_keysize(PySet_GET_SIZE(iterable)))) {
Py_DECREF(d);
return NULL;
}
Expand Down Expand Up @@ -2558,7 +2573,7 @@ dict_merge(PyObject *a, PyObject *b, int override)
* that there will be no (or few) overlapping keys.
*/
if (USABLE_FRACTION(mp->ma_keys->dk_size) < other->ma_used) {
if (dictresize(mp, ESTIMATE_SIZE(mp->ma_used + other->ma_used))) {
if (dictresize(mp, estimate_keysize(mp->ma_used + other->ma_used))) {
return -1;
}
}
Expand Down

0 comments on commit d9323a8

Please sign in to comment.