Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Initial support for keys following zipfian #166

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion bash-completion/memtier_benchmark
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ _memtier_completions()

options_no_args=("--debug" "--show-config" "--hide-histogram" "--distinct-client-seed" "--randomize"\
"--random-data" "--data-verify" "--verify-only" "--generate-keys" "--key-stddev"\
"--key-median" "--no-expiry" "--cluster-mode" "--help" "--version"\
"--key-median" "--key-zipf-exp" "--no-expiry" "--cluster-mode" "--help" "--version"\
"-D" "-R" "-h" "-v")

options_comp=("--protocol" "-P" "--key-pattern" "--data-size-pattern" "--command-key-pattern")
Expand Down
4 changes: 4 additions & 0 deletions client.h
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,8 @@ class client : public connections_manager {
return OBJECT_GENERATOR_KEY_RANDOM;
} else if (cfg->key_pattern[index] == 'G') {
return OBJECT_GENERATOR_KEY_GAUSSIAN;
} else if (cfg->key_pattern[index] == 'Z') {
return OBJECT_GENERATOR_KEY_ZIPFIAN;
} else {
if (index == key_pattern_set)
return OBJECT_GENERATOR_KEY_SET_ITER;
Expand All @@ -137,6 +139,8 @@ class client : public connections_manager {
return OBJECT_GENERATOR_KEY_RANDOM;
} else if (cmd->key_pattern == 'G') {
return OBJECT_GENERATOR_KEY_GAUSSIAN;
} else if (cmd->key_pattern == 'Z') {
return OBJECT_GENERATOR_KEY_ZIPFIAN;
} else {
return index;
}
Expand Down
5 changes: 5 additions & 0 deletions memtier_benchmark.1
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,7 @@ Key ID maximum value (default: 10000000)
\fB\-\-key\-pattern\fR=\fI\,PATTERN\/\fR
Set:Get pattern (default: R:R)
G for Gaussian distribution.
Z for Zipfian distribution (will limit keys to positive).
R for uniform Random.
S for Sequential.
P for Parallel (Sequential were each client has a subset of the key\-range).
Expand All @@ -195,6 +196,10 @@ The standard deviation used in the Gaussian distribution
\fB\-\-key\-median\fR
The median point used in the Gaussian distribution
(default is the center of the key range)
.TP
\fB\-\-key\-zipf\-exp\fR
The exponent used in the zipf distribution, limit to (0, 5)
(default is 1, though any number >2 seems insane)\n
.SS "WAIT Options:"
.TP
\fB\-\-wait\-ratio\fR=\fI\,RATIO\/\fR
Expand Down
25 changes: 24 additions & 1 deletion memtier_benchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,7 @@ static void config_print_to_json(json_handler * jsonhandler, struct benchmark_co
jsonhandler->write_obj("key_pattern" ,"\"%s\"", cfg->key_pattern);
jsonhandler->write_obj("key_stddev" ,"%f", cfg->key_stddev);
jsonhandler->write_obj("key_median" ,"%f", cfg->key_median);
jsonhandler->write_obj("key_zipf_exp" ,"%f", cfg->key_zipf_exp);
jsonhandler->write_obj("reconnect_interval","%u", cfg->reconnect_interval);
jsonhandler->write_obj("multi_key_get" ,"%u", cfg->multi_key_get);
jsonhandler->write_obj("authenticate" ,"\"%s\"", cfg->authenticate ? cfg->authenticate : "");
Expand Down Expand Up @@ -365,6 +366,7 @@ static int config_parse_args(int argc, char *argv[], struct benchmark_config *cf
o_key_pattern,
o_key_stddev,
o_key_median,
o_key_zipf_exp,
o_show_config,
o_hide_histogram,
o_print_percentiles,
Expand Down Expand Up @@ -439,6 +441,7 @@ static int config_parse_args(int argc, char *argv[], struct benchmark_config *cf
{ "key-pattern", 1, 0, o_key_pattern },
{ "key-stddev", 1, 0, o_key_stddev },
{ "key-median", 1, 0, o_key_median },
{ "key-zipf-exp", 1, 0, o_key_zipf_exp},
{ "reconnect-interval", 1, 0, o_reconnect_interval },
{ "multi-key-get", 1, 0, o_multi_key_get },
{ "authenticate", 1, 0, 'a' },
Expand Down Expand Up @@ -690,19 +693,29 @@ static int config_parse_args(int argc, char *argv[], struct benchmark_config *cf
return -1;
}
break;
case o_key_zipf_exp:
endptr = NULL;
cfg->key_zipf_exp = strtod(optarg, &endptr);
if (cfg->key_zipf_exp <= 0 || cfg->key_zipf_exp >= 5 || !endptr || *endptr != '\0') {
fprintf(stderr, "error: key-zipf-exp must be within interval (0, 5).\n");
return -1;
}
break;
case o_key_pattern:
cfg->key_pattern = optarg;

if (strlen(cfg->key_pattern) != 3 || cfg->key_pattern[key_pattern_delimiter] != ':' ||
(cfg->key_pattern[key_pattern_set] != 'R' &&
cfg->key_pattern[key_pattern_set] != 'S' &&
cfg->key_pattern[key_pattern_set] != 'G' &&
cfg->key_pattern[key_pattern_set] != 'Z' &&
cfg->key_pattern[key_pattern_set] != 'P') ||
(cfg->key_pattern[key_pattern_get] != 'R' &&
cfg->key_pattern[key_pattern_get] != 'S' &&
cfg->key_pattern[key_pattern_get] != 'G' &&
cfg->key_pattern[key_pattern_get] != 'Z' &&
cfg->key_pattern[key_pattern_get] != 'P')) {
fprintf(stderr, "error: key-pattern must be in the format of [S/R/G/P]:[S/R/G/P].\n");
fprintf(stderr, "error: key-pattern must be in the format of [S/R/G/P/Z]:[S/R/G/P/Z].\n");
return -1;
}

Expand Down Expand Up @@ -939,12 +952,15 @@ void usage() {
" --key-pattern=PATTERN Set:Get pattern (default: R:R)\n"
" G for Gaussian distribution.\n"
" R for uniform Random.\n"
" Z for zipf distribution (will limit keys to positive).\n"
" S for Sequential.\n"
" P for Parallel (Sequential were each client has a subset of the key-range).\n"
" --key-stddev The standard deviation used in the Gaussian distribution\n"
" (default is key range / 6)\n"
" --key-median The median point used in the Gaussian distribution\n"
" (default is the center of the key range)\n"
" --key-zipf-exp The exponent used in the zipf distribution, limit to (0, 5)\n"
" (default is 1, though any number >2 seems insane)\n"
"\n"
"WAIT Options:\n"
" --wait-ratio=RATIO Set:Wait ratio (default is no WAIT commands - 1:0)\n"
Expand Down Expand Up @@ -1465,6 +1481,13 @@ int main(int argc, char *argv[])
obj_gen->set_key_distribution(cfg.key_stddev, cfg.key_median);
}
obj_gen->set_expiry_range(cfg.expiry_range.min, cfg.expiry_range.max);
if (cfg.key_pattern[key_pattern_set] == 'Z' || cfg.key_pattern[key_pattern_get] == 'Z') {
if (cfg.key_zipf_exp == 0.0) {
// user can't specify 0.0, so 0.0 means unset
cfg.key_zipf_exp = 1.0;
}
obj_gen->set_key_zipf_distribution(cfg.key_zipf_exp);
}

// Prepare output file
FILE *outfile;
Expand Down
1 change: 1 addition & 0 deletions memtier_benchmark.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ struct benchmark_config {
unsigned long long key_maximum;
double key_stddev;
double key_median;
double key_zipf_exp;
const char *key_pattern;
unsigned int reconnect_interval;
int multi_key_get;
Expand Down
106 changes: 105 additions & 1 deletion obj_gen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,14 @@ object_generator::object_generator(size_t n_key_iterators/*= OBJECT_GENERATOR_KE
m_key_max(0),
m_key_stddev(0),
m_key_median(0),
m_key_zipf_min(0),
m_key_zipf_max(0),
m_key_zipf_exp(1),
m_key_zipf_1mexp(0),
m_key_zipf_1mexpInv(0),
m_key_zipf_Hmin(0),
m_key_zipf_Hmax(0),
m_key_zipf_s(0),
m_value_buffer(NULL),
m_random_fd(-1),
m_value_buffer_size(0),
Expand All @@ -172,6 +180,14 @@ object_generator::object_generator(const object_generator& copy) :
m_key_max(copy.m_key_max),
m_key_stddev(copy.m_key_stddev),
m_key_median(copy.m_key_median),
m_key_zipf_min(copy.m_key_zipf_min),
m_key_zipf_max(copy.m_key_zipf_max),
m_key_zipf_exp(copy.m_key_zipf_exp),
m_key_zipf_1mexp(copy.m_key_zipf_1mexp),
m_key_zipf_1mexpInv(copy.m_key_zipf_1mexpInv),
m_key_zipf_Hmin(copy.m_key_zipf_Hmin),
m_key_zipf_Hmax(copy.m_key_zipf_Hmax),
m_key_zipf_s(copy.m_key_zipf_s),
m_value_buffer(NULL),
m_random_fd(-1),
m_value_buffer_size(0),
Expand Down Expand Up @@ -348,6 +364,47 @@ void object_generator::set_key_distribution(double key_stddev, double key_median
m_key_median = key_median;
}

// should be called after set_key_range in memtier_benchmark.cpp
void object_generator::set_key_zipf_distribution(double key_exp)
{
const double eps = 1e-4;

if (key_exp < eps)
m_key_zipf_exp = 0.;
else if (fabs(key_exp - 1) < eps)
m_key_zipf_exp = 1.;
else
m_key_zipf_exp = key_exp;

if (m_key_min == 0)
m_key_zipf_min = 1;
else
m_key_zipf_min = m_key_min;

if (m_key_max <= m_key_zipf_min)
m_key_zipf_max = m_key_zipf_min;
else
m_key_zipf_max = m_key_max;

if (m_key_zipf_exp < eps)
return; // degenerated to uniform distribution
else if (fabs(key_exp - 1) < eps) {
m_key_zipf_Hmin = log(m_key_zipf_min + 0.5) - 1. / m_key_zipf_min;
m_key_zipf_Hmax = log(m_key_zipf_max + 0.5);
double t = log(m_key_zipf_min + 1.5) - 1. / (m_key_zipf_min + 1);
m_key_zipf_s = m_key_zipf_min + 1 - exp(t);
} else {
m_key_zipf_1mexp = 1. - m_key_zipf_exp;
m_key_zipf_1mexpInv = 1. / m_key_zipf_1mexp;
m_key_zipf_Hmin = pow(m_key_zipf_min + 0.5, m_key_zipf_1mexp) -
m_key_zipf_1mexp * pow(m_key_zipf_min, -m_key_zipf_exp);
m_key_zipf_Hmax = pow(m_key_zipf_max + 0.5, m_key_zipf_1mexp);
double t = pow(m_key_zipf_min + 1.5, m_key_zipf_1mexp) -
m_key_zipf_1mexp * pow(m_key_zipf_min + 1, -m_key_zipf_exp);
m_key_zipf_s = m_key_zipf_min + 1 - pow(t, m_key_zipf_1mexpInv);
}
}

// return a random number between r_min and r_max
unsigned long long object_generator::random_range(unsigned long long r_min, unsigned long long r_max)
{
Expand All @@ -361,15 +418,62 @@ unsigned long long object_generator::normal_distribution(unsigned long long r_mi
return m_random.gaussian_distribution_range(r_stddev, r_median, r_min, r_max);
}

// following sampler is based on:
// Rejection-inversion to generate variates from monotone discrete distributions
// ACM Transactions on Modeling and Computer Simulation.
// Volume 6 Issue 3 July 1996 pp 169–184
// https://doi.org/10.1145/235025.235029
unsigned long long object_generator::zipf_distribution()
{
const double eps = 1e-4;

if (m_key_zipf_exp < eps)
return random_range(m_key_zipf_min, m_key_zipf_max);
else if (fabs(m_key_zipf_exp - 1.0) < eps) {
while (true) {
double p = m_random.get_random() / (double)(m_random.get_random_max());
double u = p * (m_key_zipf_Hmax - m_key_zipf_Hmin) + m_key_zipf_Hmin;
double x = exp(u);
if (x < m_key_zipf_min - 0.5)
x = m_key_zipf_min + 0.5;
if (x >= m_key_zipf_max + 0.5)
x = m_key_zipf_max;
double k = floor(x + 0.5);
if (k - x <= m_key_zipf_s)
return k;
if (u > log(k + 0.5) - 1. / k)
return k;
}
} else {
while (true) {
double p = m_random.get_random() / (double)(m_random.get_random_max());
double u = p * (m_key_zipf_Hmax - m_key_zipf_Hmin) + m_key_zipf_Hmin;
double x = pow(u, m_key_zipf_1mexpInv);
if (x < m_key_zipf_min - 0.5)
x = m_key_zipf_min + 0.5;
if (x >= m_key_zipf_max + 0.5)
x = m_key_zipf_max;
double k = floor(x + 0.5);
if (k - x <= m_key_zipf_s)
return k;
double t = (u - pow(k + 0.5, m_key_zipf_1mexp));
if (m_key_zipf_1mexpInv * t > -pow(k, -m_key_zipf_exp))
return k;
}
}
}

unsigned long long object_generator::get_key_index(int iter)
{
assert(iter < static_cast<int>(m_next_key.size()) && iter >= OBJECT_GENERATOR_KEY_GAUSSIAN);
assert(iter < static_cast<int>(m_next_key.size()) && iter >= OBJECT_GENERATOR_KEY_ZIPFIAN);

unsigned long long k;
if (iter==OBJECT_GENERATOR_KEY_RANDOM) {
k = random_range(m_key_min, m_key_max);
} else if(iter==OBJECT_GENERATOR_KEY_GAUSSIAN) {
k = normal_distribution(m_key_min, m_key_max, m_key_stddev, m_key_median);
} else if(iter == OBJECT_GENERATOR_KEY_ZIPFIAN) {
k = zipf_distribution();
} else {
if (m_next_key[iter] < m_key_min)
m_next_key[iter] = m_key_min;
Expand Down
18 changes: 17 additions & 1 deletion obj_gen.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ class gaussian_noise: public random_generator {
private:
double gaussian_distribution(const double &stddev);
bool m_hasSpare;
double m_spare;
double m_spare;
};

class data_object {
Expand Down Expand Up @@ -75,6 +75,7 @@ class data_object {
#define OBJECT_GENERATOR_KEY_GET_ITER 0
#define OBJECT_GENERATOR_KEY_RANDOM -1
#define OBJECT_GENERATOR_KEY_GAUSSIAN -2
#define OBJECT_GENERATOR_KEY_ZIPFIAN -3

class object_generator {
public:
Expand All @@ -98,6 +99,19 @@ class object_generator {
unsigned long long m_key_max;
double m_key_stddev;
double m_key_median;

// zipf will only be used for key generation
// adjusted min and max key for zipf, may be difference from user specified
unsigned long long m_key_zipf_min;
unsigned long long m_key_zipf_max;
// other persist data across generations
double m_key_zipf_exp;
double m_key_zipf_1mexp;
double m_key_zipf_1mexpInv;
double m_key_zipf_Hmin;
double m_key_zipf_Hmax;
double m_key_zipf_s;

data_object m_object;

std::vector<unsigned long long> m_next_key;
Expand All @@ -121,6 +135,7 @@ class object_generator {

unsigned long long random_range(unsigned long long r_min, unsigned long long r_max);
unsigned long long normal_distribution(unsigned long long r_min, unsigned long long r_max, double r_stddev, double r_median);
unsigned long long zipf_distribution();

void set_random_data(bool random_data);
void set_data_size_fixed(unsigned int size);
Expand All @@ -131,6 +146,7 @@ class object_generator {
void set_key_prefix(const char *key_prefix);
void set_key_range(unsigned long long key_min, unsigned long long key_max);
void set_key_distribution(double key_stddev, double key_median);
void set_key_zipf_distribution(double key_exp);
void set_random_seed(int seed);

unsigned long long get_key_index(int iter);
Expand Down