Skip to content

Commit

Permalink
lru: various fixups after review
Browse files Browse the repository at this point in the history
- upgrade key and value lengths to 16 bits;
- convert array indexing to unsigned types;
- use two memory contexts;
- remove vectorization #pragma (no noticeable difference anymore);
- benchmark: +1 out-of-bounds access and some nitpicks;
- benchmark: report real capacity, output CSV;
- Makefile: fixup cleaning bench/*
  • Loading branch information
vcunat committed Sep 7, 2016
1 parent d64d58c commit 509dec6
Show file tree
Hide file tree
Showing 8 changed files with 122 additions and 60 deletions.
2 changes: 1 addition & 1 deletion Makefile
Expand Up @@ -5,7 +5,7 @@ include platform.mk
all: info lib daemon modules
install: lib-install daemon-install modules-install etc-install
check: all tests
clean: contrib-clean lib-clean daemon-clean modules-clean tests-clean doc-clean
clean: contrib-clean lib-clean daemon-clean modules-clean tests-clean doc-clean bench-clean
doc: doc-html
.PHONY: all install check clean doc info

Expand Down
12 changes: 6 additions & 6 deletions bench/bench.mk
Expand Up @@ -27,9 +27,9 @@ $(foreach bench,$(bench_BIN),$(eval $(call make_bench,$(bench))))
.PHONY: bench bench-clean
bench-clean: $(foreach bench,$(bench_BIN),$(bench)-clean)
bench: $(foreach bench,$(bench_BIN),bench/$(bench))
# Test LRU with increasing overfill, misses should increase ~ linearly
@./bench/bench_lru 22 bench/bench_lru_set1.tsv - 65536 # fill = 1
@./bench/bench_lru 23 bench/bench_lru_set1.tsv - 32768 # fill = 2
@./bench/bench_lru 23 bench/bench_lru_set1.tsv - 16384 # fill = 4
@./bench/bench_lru 23 bench/bench_lru_set1.tsv - 8192 # fill = 8
@./bench/bench_lru 23 bench/bench_lru_set1.tsv - 4096 # fill = 16
@echo "Test LRU with increasing overfill, misses should increase ~ linearly" >&2
@./bench/bench_lru 23 bench/bench_lru_set1.tsv - 65536 # fill ~ 1
@./bench/bench_lru 23 bench/bench_lru_set1.tsv - 32768 # fill ~ 2
@./bench/bench_lru 23 bench/bench_lru_set1.tsv - 16384 # fill ~ 4
@./bench/bench_lru 23 bench/bench_lru_set1.tsv - 8192 # fill ~ 8
@./bench/bench_lru 23 bench/bench_lru_set1.tsv - 4096 # fill ~ 16
85 changes: 64 additions & 21 deletions bench/bench_lru.c
Expand Up @@ -2,6 +2,7 @@
#include <math.h>
#include <stdlib.h>
#include <stdint.h>
#include <stdio.h>
#include <sys/time.h>
#include <unistd.h>

Expand All @@ -11,17 +12,25 @@

typedef kr_nsrep_lru_t lru_bench_t;

#define p_out(...) do { \
printf(__VA_ARGS__); \
fflush(stdout); \
} while (0)
#define p_err(...) fprintf(stderr, __VA_ARGS__)

static int die(const char *cause) {
static int die(const char *cause)
{
fprintf(stderr, "%s: %s\n", cause, strerror(errno));
exit(1);
}

static void time_get(struct timeval *tv) {
static void time_get(struct timeval *tv)
{
if (gettimeofday(tv, NULL))
die("gettimeofday");
}
static void time_print_diff(struct timeval *tv, size_t op_count) {
static void time_print_diff(struct timeval *tv, size_t op_count)
{
struct timeval now;
time_get(&now);
now.tv_sec -= tv->tv_sec;
Expand All @@ -33,11 +42,16 @@ static void time_print_diff(struct timeval *tv, size_t op_count) {

size_t speed = round((double)(op_count) / 1000
/ (now.tv_sec + (double)(now.tv_usec)/1000000));
printf("\t%ld.%06d s, \t %zd kop/s\n", now.tv_sec, (int)now.tv_usec, speed);

p_out("%ld.%06d", now.tv_sec, (int)now.tv_usec);
p_err(" s"); p_out(","); p_err("\t");
p_out("%zd", speed);
p_err(" kops/s"); p_out(","); p_err("\n");
}

/// initialize seed for random()
static int ssrandom(char *s) {
static int ssrandom(char *s)
{
if (*s == '-') { // initialize from time
struct timeval now;
time_get(&now);
Expand All @@ -60,7 +74,8 @@ struct key {
};

/// read lines from a file and reorder them randomly
static struct key * read_lines(const char *fname, size_t *count) {
static struct key * read_lines(const char *fname, size_t *count, char **pfree)
{
// read the file at once
int fd = open(fname, O_RDONLY);
if (fd < 0)
Expand All @@ -70,6 +85,7 @@ static struct key * read_lines(const char *fname, size_t *count) {
die("stat");
size_t flen = (size_t)st.st_size;
char *fbuf = malloc(flen + 1);
*pfree = fbuf;
if (fbuf == NULL)
die("malloc");
if (read(fd, fbuf, flen) < 0)
Expand All @@ -86,7 +102,11 @@ static struct key * read_lines(const char *fname, size_t *count) {
}
*count = lines;
size_t avg_len = (flen + 1) / lines - 1;
printf("%zu lines read, average length %zu\n", lines, avg_len);

p_err("lines read: ");
p_out("%zu,", lines);
p_err("\taverage length ");
p_out("%zu,", avg_len);

struct key *result = calloc(lines, sizeof(struct key));
result[0].chars = fbuf;
Expand Down Expand Up @@ -122,47 +142,61 @@ static struct key * read_lines(const char *fname, size_t *count) {
#define lru_get_try lru_get
#endif

static void usage(const char *progname) {
fprintf(stderr, "usage: %s <log_count> <input> <seed> [lru_size]\n"
"The seed must be at least 12 characters or \"-\".\n" , progname);
static void usage(const char *progname)
{
p_err("usage: %s <log_count> <input> <seed> [lru_size]\n", progname);
p_err("The seed must be at least 12 characters or \"-\".\n"
"Standard output contains csv-formatted lines.\n");
exit(1);
}

int main(int argc, char ** argv) {

int main(int argc, char ** argv)
{
if (argc != 4 && argc != 5)
usage(argv[0]);
if (ssrandom(argv[3]) < 0)
usage(argv[0]);

p_out("\n");
size_t key_count;
struct key *keys = read_lines(argv[2], &key_count);
char *data_to_free = NULL;
struct key *keys = read_lines(argv[2], &key_count, &data_to_free);
size_t run_count;
{
size_t run_log = atoi(argv[1]);
assert(run_log < 64);
run_count = 1ULL << run_log;
printf("test run length: 2^%zd\n", run_log);
p_err("\ntest run length:\t2^");
p_out("%zd,", run_log);
}

struct timeval time;
const int lru_size = argc > 4 ? atoi(argv[4]) : LRU_RTT_SIZE;

lru_bench_t *lru;
#ifdef lru_create
lru_create(&lru, lru_size, NULL);
lru_create(&lru, lru_size, NULL, NULL);
#else
lru = malloc(lru_size(lru_bench_t, lru_size));
if (lru)
lru_init(lru, lru_size);
#endif
if (!lru)
die("malloc");
printf("LRU size:\t%d\n", lru_size);
p_err("\nLRU capacity:\t");
p_out("%d,",
#ifdef lru_capacity
lru_capacity(lru) // report real capacity, if provided
#else
lru_size
#endif
);

size_t miss = 0;
p_err("\nload everything:\t");
time_get(&time);
printf("load everything:");
for (size_t i = 0, ki = key_count; i < run_count; ++i, --ki) {
for (size_t i = 0, ki = key_count - 1; i < run_count; ++i, --ki) {
unsigned *r = lru_get_new(lru, keys[ki].chars, keys[ki].len);
if (!r || *r == 0)
++miss;
Expand All @@ -172,20 +206,29 @@ int main(int argc, char ** argv) {
ki = key_count;
}
time_print_diff(&time, run_count);
printf("LRU misses:\t%zd%%\n", (miss * 100 + 50) / run_count);
p_err("LRU misses [%%]:\t");
p_out("%zd,",(miss * 100 + 50) / run_count);
p_err("\n");

unsigned accum = 0; // compute something to make sure compiler can't remove code
p_err("search everything:\t");
time_get(&time);
printf("search everything:");
for (size_t i = 0, ki = key_count; i < run_count; ++i, --ki) {
for (size_t i = 0, ki = key_count - 1; i < run_count; ++i, --ki) {
unsigned *r = lru_get_try(lru, keys[ki].chars, keys[ki].len);
if (r)
accum += *r;
if (unlikely(ki == 0))
ki = key_count;
}
time_print_diff(&time, run_count);
printf("ignore: %u\n", accum);
p_err("ignore: %u\n", accum);

// free memory, at least with new LRU
#ifdef lru_create
lru_free(lru);
#endif
free(keys);
free(data_to_free);

return 0;
}
Expand Down
6 changes: 3 additions & 3 deletions daemon/engine.c
Expand Up @@ -461,9 +461,9 @@ static int init_resolver(struct engine *engine)
kr_zonecut_init(&engine->resolver.root_hints, (const uint8_t *)"", engine->pool);
kr_zonecut_set_sbelt(&engine->resolver, &engine->resolver.root_hints);
/* Open NS rtt + reputation cache */
lru_create(&engine->resolver.cache_rtt, LRU_RTT_SIZE, engine->pool);
lru_create(&engine->resolver.cache_rep, LRU_REP_SIZE, engine->pool);
lru_create(&engine->resolver.cache_cookie, LRU_COOKIES_SIZE, engine->pool);
lru_create(&engine->resolver.cache_rtt, LRU_RTT_SIZE, engine->pool, NULL);
lru_create(&engine->resolver.cache_rep, LRU_REP_SIZE, engine->pool, NULL);
lru_create(&engine->resolver.cache_cookie, LRU_COOKIES_SIZE, engine->pool, NULL);

/* Load basic modules */
engine_register(engine, "iterate", NULL, NULL);
Expand Down
39 changes: 19 additions & 20 deletions lib/generic/lru.c
Expand Up @@ -20,8 +20,8 @@
typedef struct lru_group lru_group_t;

struct lru_item {
uint8_t key_len, val_len; /**< Single byte should be enough for our purposes. */
char data[]; /**< Place for both key and value. */
uint16_t key_len, val_len; /**< Two bytes should be enough for our purposes. */
char data[]; /**< Place for both key and value. */
};

/** @internal Compute offset of value in struct lru_item. */
Expand All @@ -48,20 +48,20 @@ static uint item_size(uint key_len, uint val_len)
KR_EXPORT void lru_free_items_impl(struct lru *lru)
{
assert(lru);
for (int i = 0; i < (1 << lru->log_groups); ++i) {
for (size_t i = 0; i < (1 << (size_t)lru->log_groups); ++i) {
lru_group_t *g = &lru->groups[i];
for (int j = 0; j < LRU_ASSOC; ++j)
mm_free(lru->mm, g->items[j]);
}
}

/** @internal See lru_apply. */
KR_EXPORT void lru_apply_impl(struct lru *lru, lru_apply_fun f, void *baton) // TODO: re-read
KR_EXPORT void lru_apply_impl(struct lru *lru, lru_apply_fun f, void *baton)
{
assert(lru);
for (int i = 0; i < (1 << lru->log_groups); ++i) {
for (size_t i = 0; i < (1 << (size_t)lru->log_groups); ++i) {
lru_group_t *g = &lru->groups[i];
for (int j = 0; j < LRU_ASSOC; ++j) {
for (uint j = 0; j < LRU_ASSOC; ++j) {
struct lru_item *it = g->items[j];
if (!it)
continue;
Expand All @@ -78,7 +78,7 @@ KR_EXPORT void lru_apply_impl(struct lru *lru, lru_apply_fun f, void *baton) //
}

/** @internal See lru_create. */
KR_EXPORT struct lru * lru_create_impl(uint max_slots, knot_mm_t *mm)
KR_EXPORT struct lru * lru_create_impl(uint max_slots, knot_mm_t *mm_array, knot_mm_t *mm)
{
assert(max_slots);
// let lru->log_groups = ceil(log2(max_slots / (float) assoc))
Expand All @@ -91,11 +91,12 @@ KR_EXPORT struct lru * lru_create_impl(uint max_slots, knot_mm_t *mm)
assert(max_slots <= group_count * LRU_ASSOC && group_count * LRU_ASSOC < 2 * max_slots);

size_t size = offsetof(struct lru, groups[group_count]);
struct lru *lru = mm_alloc(mm, size);
struct lru *lru = mm_alloc(mm_array, size);
if (unlikely(lru == NULL))
return NULL;
*lru = (struct lru){
.mm = mm,
.mm_array = mm_array,
.log_groups = log_groups,
};
// zeros are a good init
Expand All @@ -105,20 +106,16 @@ KR_EXPORT struct lru * lru_create_impl(uint max_slots, knot_mm_t *mm)

/** Swap two places; it could be made public if useful elsewhere. */
#define swap(x, y) do { /* http://stackoverflow.com/a/3982430/587396 */ \
unsigned char swap_temp[sizeof(x) == sizeof(y) ? (signed)sizeof(x) : -1]; \
unsigned char swap_temp[sizeof(x) == sizeof(y) ? (ssize_t)sizeof(x) : -1]; \
memcpy(swap_temp, &y, sizeof(x)); \
memcpy(&y, &x, sizeof(x)); \
memcpy(&x, swap_temp, sizeof(x)); \
} while(0)

#if defined(NDEBUG) && defined(__GNUC__)
#pragma GCC optimize "-ftree-vectorize"
#endif

/** @internal Decrement all counters within a group. */
static void group_dec_counts(lru_group_t *g) {
g->counts[LRU_TRACKED] = LRU_TRACKED;
for (int i = 0; i < LRU_TRACKED + 1; ++i) // vectorized?
for (uint i = 0; i < LRU_TRACKED + 1; ++i)
if (likely(g->counts[i]))
--g->counts[i];
}
Expand All @@ -131,17 +128,19 @@ static void group_inc_count(lru_group_t *g, int i) {
// We could've decreased or halved all of them, but let's keep the max.
}

/** @internal Implementation of both getting and insertion. */
/** @internal Implementation of both getting and insertion.
* Note: val_len is only meaningful if do_insert. */
KR_EXPORT void * lru_get_impl(struct lru *lru, const char *key, uint key_len,
uint val_len, bool do_insert)
{
assert(lru && (key || !key_len) && key_len < 256);
assert(lru && (key || !key_len) && key_len <= UINT16_MAX
&& (!do_insert || val_len <= UINT16_MAX));
// find the right group
uint32_t khash = hash(key, key_len);
uint16_t khash_top = khash >> 16;
lru_group_t *g = &lru->groups[khash & ((1 << lru->log_groups) - 1)];
struct lru_item *it;
int i;
uint i;
// scan the *stored* elements in the group
for (i = 0; i < LRU_ASSOC; ++i)
if (g->hashes[i] == khash_top) {
Expand All @@ -162,7 +161,7 @@ KR_EXPORT void * lru_get_impl(struct lru *lru, const char *key, uint key_len,
if (!do_insert)
return NULL;
// check if we trumped some stored key
for (int j = 0; j < LRU_ASSOC; ++j)
for (uint j = 0; j < LRU_ASSOC; ++j)
if (unlikely(g->counts[i] > g->counts[j])) {
// evict key j, i.e. swap with i
--g->counts[i]; // we increment it below
Expand All @@ -180,7 +179,7 @@ KR_EXPORT void * lru_get_impl(struct lru *lru, const char *key, uint key_len,
group_dec_counts(g);
return NULL;
insert: // insert into position i (incl. key)
assert(i >= 0 && i < LRU_ASSOC);
assert(i < LRU_ASSOC);
g->hashes[i] = khash_top;
it = g->items[i];
uint new_size = item_size(key_len, val_len);
Expand All @@ -196,7 +195,7 @@ KR_EXPORT void * lru_get_impl(struct lru *lru, const char *key, uint key_len,
memcpy(it->data, key, key_len);
memset(item_val(it), 0, val_len); // clear the value
found: // key and hash OK on g->items[i]; now update stamps
assert(i >= 0 && i < LRU_ASSOC);
assert(i < LRU_ASSOC);
group_inc_count(g, i);
return item_val(g->items[i]);
}
Expand Down

0 comments on commit 509dec6

Please sign in to comment.