Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

os/bluestore: introduce power 2 macros for block alignment and rounding #10128

Merged
merged 4 commits into from Jul 7, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
51 changes: 46 additions & 5 deletions src/include/intarith.h
Expand Up @@ -16,25 +16,66 @@
#define CEPH_INTARITH_H

#ifndef MIN
# define MIN(a,b) ((a) < (b) ? (a):(b))
#define MIN(a,b) ((a) < (b) ? (a):(b))
#endif

#ifndef MAX
# define MAX(a,b) ((a) > (b) ? (a):(b))
#define MAX(a,b) ((a) > (b) ? (a):(b))
#endif

#ifndef DIV_ROUND_UP
# define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
#endif

#ifndef ROUND_UP_TO
# define ROUND_UP_TO(n, d) ((n)%(d) ? ((n)+(d)-(n)%(d)) : (n))
#define ROUND_UP_TO(n, d) ((n)%(d) ? ((n)+(d)-(n)%(d)) : (n))
#endif

#ifndef SHIFT_ROUND_UP
# define SHIFT_ROUND_UP(x,y) (((x)+(1<<(y))-1) >> (y))
#define SHIFT_ROUND_UP(x,y) (((x)+(1<<(y))-1) >> (y))
#endif

/*
* Macro to determine if value is a power of 2
*/
#define ISP2(x) (((x) & ((x) - 1)) == 0)

/*
* Macros for various sorts of alignment and rounding. The "align" must
* be a power of 2. Often times it is a block, sector, or page.
*/

/*
* return x rounded down to an align boundary
* eg, P2ALIGN(1200, 1024) == 1024 (1*align)
* eg, P2ALIGN(1024, 1024) == 1024 (1*align)
* eg, P2ALIGN(0x1234, 0x100) == 0x1200 (0x12*align)
* eg, P2ALIGN(0x5600, 0x100) == 0x5600 (0x56*align)
*/
#define P2ALIGN(x, align) ((x) & -(align))

/*
* return x % (mod) align
* eg, P2PHASE(0x1234, 0x100) == 0x34 (x-0x12*align)
* eg, P2PHASE(0x5600, 0x100) == 0x00 (x-0x56*align)
*/
#define P2PHASE(x, align) ((x) & ((align) - 1))

/*
* return how much space is left in this block (but if it's perfectly
* aligned, return 0).
* eg, P2NPHASE(0x1234, 0x100) == 0xcc (0x13*align-x)
* eg, P2NPHASE(0x5600, 0x100) == 0x00 (0x56*align-x)
*/
#define P2NPHASE(x, align) (-(x) & ((align) - 1))

/*
* return x rounded up to an align boundary
* eg, P2ROUNDUP(0x1234, 0x100) == 0x1300 (0x13*align)
* eg, P2ROUNDUP(0x5600, 0x100) == 0x5600 (0x56*align)
*/
#define P2ROUNDUP(x, align) (-(-(x) & -(align)))

// count trailing zeros.
// NOTE: the builtin is nondeterministic on 0 input
static inline unsigned ctz(unsigned v) {
Expand Down
96 changes: 58 additions & 38 deletions src/os/bluestore/BlueStore.cc
Expand Up @@ -1746,18 +1746,38 @@ int BlueStore::_open_fm(bool create)
dout(1) << __func__ << " pre-fragmenting freespace, using "
<< g_conf->bluestore_debug_prefill << " with max free extent "
<< g_conf->bluestore_debug_prefragment_max << dendl;
uint64_t start = ROUND_UP_TO(reserved, min_alloc_size);
uint64_t start = P2ROUNDUP(reserved, min_alloc_size);
uint64_t max_b = g_conf->bluestore_debug_prefragment_max / min_alloc_size;
float r = g_conf->bluestore_debug_prefill;
while (start < end) {
r /= 1.0 - r;
bool stop = false;

while (!stop && start < end) {
uint64_t l = (rand() % max_b + 1) * min_alloc_size;
if (start + l > end)
if (start + l > end) {
l = end - start;
l = ROUND_UP_TO(l, min_alloc_size);
uint64_t u = 1 + (uint64_t)(r * (double)l / (1.0 - r));
u = ROUND_UP_TO(u, min_alloc_size);
l = P2ALIGN(l, min_alloc_size);
}
assert(start + l <= end);

uint64_t u = 1 + (uint64_t)(r * (double)l);
u = P2ROUNDUP(u, min_alloc_size);
if (start + l + u > end) {
u = end - (start + l);
// trim to align so we don't overflow again
u = P2ALIGN(u, min_alloc_size);
stop = true;
}
assert(start + l + u <= end);

dout(20) << " free 0x" << std::hex << start << "~" << l
<< " use 0x" << u << std::dec << dendl;

if (u == 0) {
// break if u has been trimmed to nothing
break;
}

fm->allocate(start + l, u, t);
start += l + u;
}
Expand Down Expand Up @@ -2018,7 +2038,7 @@ int BlueStore::_open_db(bool create)
g_conf->bluestore_bluefs_gift_ratio);
initial = MAX(initial, g_conf->bluestore_bluefs_min);
// align to bluefs's alloc_size
initial = ROUND_UP_TO(initial, g_conf->bluefs_alloc_size);
initial = P2ROUNDUP(initial, g_conf->bluefs_alloc_size);
initial += g_conf->bluefs_alloc_size - BLUEFS_START;
bluefs->add_block_extent(bluefs_shared_bdev, BLUEFS_START, initial);
bluefs_extents.insert(BLUEFS_START, initial);
Expand Down Expand Up @@ -2296,7 +2316,7 @@ int BlueStore::_balance_bluefs_freespace(vector<bluestore_pextent_t> *extents,

if (gift) {
// round up to alloc size
gift = ROUND_UP_TO(gift, min_alloc_size);
gift = P2ROUNDUP(gift, min_alloc_size);

// hard cap to fit into 32 bits
gift = MIN(gift, 1ull<<31);
Expand Down Expand Up @@ -2327,7 +2347,7 @@ int BlueStore::_balance_bluefs_freespace(vector<bluestore_pextent_t> *extents,
// reclaim from bluefs?
if (reclaim) {
// round up to alloc size
reclaim = ROUND_UP_TO(reclaim, min_alloc_size);
reclaim = P2ROUNDUP(reclaim, min_alloc_size);

// hard cap to fit into 32 bits
reclaim = MIN(reclaim, 1ull<<31);
Expand Down Expand Up @@ -5686,6 +5706,7 @@ void BlueStore::_do_write_small(
dout(10) << __func__ << " 0x" << std::hex << offset << "~" << length
<< std::dec << dendl;
assert(length < min_alloc_size);
uint64_t end = offset + length;

bufferlist bl;
blp.copy(length, bl);
Expand All @@ -5705,7 +5726,7 @@ void BlueStore::_do_write_small(
break;
}
int64_t blob = ep->second.blob;
b = c->get_blob(o, ep->second.blob);
b = c->get_blob(o, blob);
if (!b->blob.is_mutable() || b->blob.is_compressed()) {
dout(20) << __func__ << " ignoring immutable " << blob << ": " << *b
<< dendl;
Expand All @@ -5724,15 +5745,16 @@ void BlueStore::_do_write_small(

// can we pad our head/tail out with zeros?
uint64_t chunk_size = b->blob.get_chunk_size(block_size);
uint64_t head_pad = offset % chunk_size;
uint64_t head_pad = P2PHASE(offset, chunk_size);
if (head_pad && o->onode.has_any_lextents(offset - head_pad, chunk_size)) {
head_pad = 0;
}
uint64_t tail_pad =
ROUND_UP_TO(offset + length, chunk_size) - (offset + length);
if (tail_pad && o->onode.has_any_lextents(offset + length, tail_pad)) {

uint64_t tail_pad = P2NPHASE(end, chunk_size);
if (tail_pad && o->onode.has_any_lextents(end, tail_pad)) {
tail_pad = 0;
}

bufferlist padded = bl;
if (head_pad) {
bufferlist z;
Expand Down Expand Up @@ -5786,9 +5808,8 @@ void BlueStore::_do_write_small(
}

// read some data to fill out the chunk?
uint64_t head_read = b_off % chunk_size;
uint64_t tail_read =
ROUND_UP_TO(b_off + b_len, chunk_size) - (b_off + b_len);
uint64_t head_read = P2PHASE(b_off, chunk_size);
uint64_t tail_read = P2NPHASE(b_off + b_len, chunk_size);
if ((head_read || tail_read) &&
(b->blob.get_ondisk_length() >= b_off + b_len + tail_read) &&
head_read + tail_read < min_alloc_size) {
Expand Down Expand Up @@ -5865,12 +5886,12 @@ void BlueStore::_do_write_small(
// new blob.
b = o->blob_map.new_blob(c->cache);
unsigned alloc_len = min_alloc_size;
uint64_t b_off = offset % alloc_len;
uint64_t b_off = P2PHASE(offset, alloc_len);
b->bc.write(txc->seq, b_off, bl, wctx->buffered ? 0 : Buffer::FLAG_NOCACHE);
_pad_zeros(&bl, &b_off, block_size);
o->onode.punch_hole(offset, length, &wctx->lex_old);
bluestore_lextent_t& lex = o->onode.extent_map[offset] =
bluestore_lextent_t(b->id, offset % min_alloc_size, length);
bluestore_lextent_t(b->id, P2PHASE(offset, alloc_len), length);
b->blob.ref_map.get(lex.offset, lex.length);
txc->statfs_delta.stored() += lex.length;
dout(20) << __func__ << " lex 0x" << std::hex << offset << std::dec
Expand Down Expand Up @@ -5960,10 +5981,10 @@ int BlueStore::_do_alloc_write(
::encode(chdr, compressed_bl);
compressed_bl.claim_append(t);
uint64_t rawlen = compressed_bl.length();
uint64_t newlen = ROUND_UP_TO(rawlen, min_alloc_size);
uint64_t newlen = P2ROUNDUP(rawlen, min_alloc_size);
uint64_t dstlen = final_length *
g_conf->bluestore_compression_required_ratio;
dstlen = ROUND_UP_TO(dstlen, min_alloc_size);
dstlen = P2ROUNDUP(dstlen, min_alloc_size);
if (newlen <= dstlen && newlen < final_length) {
// Cool. We compressed at least as much as we were hoping to.
// pad out to min_alloc_size
Expand Down Expand Up @@ -6163,28 +6184,27 @@ int BlueStore::_do_write(
// we fall within the same block
_do_write_small(txc, c, o, offset, length, p, &wctx);
} else {
uint64_t head_offset = 0, head_length = 0;
uint64_t middle_offset = 0, middle_length = 0;
uint64_t tail_offset = 0, tail_length = 0;
if (offset % min_alloc_size) {
head_offset = offset;
head_length = min_alloc_size - (offset % min_alloc_size);
assert(head_length < length);
uint64_t head_offset, head_length;
uint64_t middle_offset, middle_length;
uint64_t tail_offset, tail_length;

head_offset = offset;
head_length = P2NPHASE(offset, min_alloc_size);

tail_offset = P2ALIGN(end, min_alloc_size);
tail_length = P2PHASE(end, min_alloc_size);

middle_offset = head_offset + head_length;
middle_length = length - head_length - tail_length;

if (head_length) {
_do_write_small(txc, c, o, head_offset, head_length, p, &wctx);
middle_offset = offset + head_length;
middle_length = length - head_length;
} else {
middle_offset = offset;
middle_length = length;
}
if (end % min_alloc_size) {
tail_length = end % min_alloc_size;
tail_offset = end - tail_length;
middle_length -= tail_length;
}

if (middle_length) {
_do_write_big(txc, c, o, middle_offset, middle_length, p, &wctx);
}

if (tail_length) {
_do_write_small(txc, c, o, tail_offset, tail_length, p, &wctx);
}
Expand Down
19 changes: 19 additions & 0 deletions src/test/test_intarith.cc
Expand Up @@ -61,3 +61,22 @@ TEST(intarith, ctz) {
ASSERT_EQ(20u, ctzll(0xffffffff00000));
ASSERT_EQ(48u, ctzll(0xff000000000000ull));
}

TEST(intarith, p2family) {
ASSERT_TRUE(ISP2(0x100));
ASSERT_FALSE(ISP2(0x1234));

ASSERT_EQ(1024, P2ALIGN(1200, 1024));
ASSERT_EQ(1024, P2ALIGN(1024, 1024));
ASSERT_EQ(0x1200, P2ALIGN(0x1234, 0x100));
ASSERT_EQ(0x5600, P2ALIGN(0x5600, 0x100));

ASSERT_EQ(0x34, P2PHASE(0x1234, 0x100));
ASSERT_EQ(0x00, P2PHASE(0x5600, 0x100));

ASSERT_EQ(0xcc, P2NPHASE(0x1234, 0x100));
ASSERT_EQ(0x00, P2NPHASE(0x5600, 0x100));

ASSERT_EQ(0x1300, P2ROUNDUP(0x1234, 0x100));
ASSERT_EQ(0x5600, P2ROUNDUP(0x5600, 0x100));
}