Permalink
Browse files

kmalloc: Use 'fls' to round up the size to the nearest power of 2

On average tests conducted on Intel i3, i7 and xeon-e3 in x86_64 mode,
fls version is 3 times faster than the simple loop version.

Submitted-by: vsrinivas@

Also M_POWEROF2 flag is used to do the nearest power of 2 size rounding
up, instead of a seperate function (was kmalloc_powerof2)

Suggested-by: sjg@, vsrinivas@
  • Loading branch information...
1 parent 16348f3 commit 1e57f8673c953ace8a25ada0a39ab83008646ce3 Sepherosa Ziehau committed Oct 8, 2012
View
@@ -492,6 +492,22 @@ slab_record_source(SLZone *z, const char *file, int line)
#endif
+static __inline unsigned long
+powerof2_size(unsigned long size)
+{
+ int i, wt;
+
+ if (size == 0)
+ return 0;
+
+ i = flsl(size);
+ wt = (size & ~(1 << (i - 1)));
+ if (!wt)
+ --i;
+
+ return (1UL << i);
+}
+
/*
* kmalloc() (SLAB ALLOCATOR)
*
@@ -505,6 +521,7 @@ slab_record_source(SLZone *z, const char *file, int line)
* M_ZERO - zero the returned memory.
* M_USE_RESERVE - allow greater drawdown of the free list
* M_USE_INTERRUPT_RESERVE - allow the freelist to be exhausted
+ * M_POWEROF2 - roundup size to the nearest power of 2
*
* MPSAFE
*/
@@ -545,6 +562,9 @@ kmalloc(unsigned long size, struct malloc_type *type, int flags)
}
++type->ks_calls;
+ if (flags & M_POWEROF2)
+ size = powerof2_size(size);
+
/*
* Handle the case where the limit is reached. Panic if we can't return
* NULL. The original malloc code looped, but this tended to
@@ -1566,21 +1586,11 @@ kmem_slab_free(void *ptr, vm_size_t size)
crit_exit();
}
-void *
-kmalloc_powerof2(unsigned long size_alloc, struct malloc_type *type, int flags)
-{
- unsigned long size;
-
- for (size = 1; size < size_alloc; size <<= 1)
- ; /* EMPTY */
- return kmalloc(size, type, flags);
-}
-
void *
kmalloc_cachealign(unsigned long size_alloc, struct malloc_type *type,
int flags)
{
if (size_alloc < __VM_CACHELINE_SIZE)
size_alloc = __VM_CACHELINE_SIZE;
- return kmalloc_powerof2(size_alloc, type, flags);
+ return kmalloc(size_alloc, type, flags | M_POWEROF2);
}
@@ -574,7 +574,8 @@ bus_dmamem_alloc(bus_dma_tag_t dmat, void **vaddr, int flags,
maxsize = check_kmalloc(dmat, *vaddr, 0);
if (maxsize) {
kfree(*vaddr, M_DEVBUF);
- *vaddr = kmalloc_powerof2(maxsize, M_DEVBUF, mflags);
+ *vaddr = kmalloc(maxsize, M_DEVBUF,
+ mflags | M_POWEROF2);
check_kmalloc(dmat, *vaddr, 1);
}
} else {
@@ -574,7 +574,8 @@ bus_dmamem_alloc(bus_dma_tag_t dmat, void **vaddr, int flags,
maxsize = check_kmalloc(dmat, *vaddr, 0);
if (maxsize) {
kfree(*vaddr, M_DEVBUF);
- *vaddr = kmalloc_powerof2(maxsize, M_DEVBUF, mflags);
+ *vaddr = kmalloc(maxsize, M_DEVBUF,
+ mflags | M_POWEROF2);
check_kmalloc(dmat, *vaddr, 1);
}
} else {
View
@@ -63,6 +63,7 @@
#define M_PASSIVE_ZERO 0x0800 /* (internal to the slab code only) */
#define M_USE_INTERRUPT_RESERVE \
0x1000 /* can exhaust free list entirely */
+#define M_POWEROF2 0x2000 /* roundup size to the nearest power of 2 */
/*
* M_NOWAIT has to be a set of flags for equivalence to prior use.
@@ -204,8 +205,6 @@ char *kstrdup (const char *, struct malloc_type *);
#define kstrdup_debug(str, type, file, line) \
kstrdup(str, type)
#endif
-void *kmalloc_powerof2 (unsigned long size, struct malloc_type *type,
- int flags);
void *kmalloc_cachealign (unsigned long size, struct malloc_type *type,
int flags);
void kfree (void *addr, struct malloc_type *type);

0 comments on commit 1e57f86

Please sign in to comment.