Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

os: move_ranges_destroy_src #11237

Merged
merged 9 commits into from
Oct 13, 2016
27 changes: 27 additions & 0 deletions src/os/ObjectStore.h
Original file line number Diff line number Diff line change
Expand Up @@ -411,6 +411,7 @@ class ObjectStore {
OP_COLL_HINT = 40, // cid, type, bl

OP_TRY_RENAME = 41, // oldcid, oldoid, newoid
OP_MERGE_DELETE = 42, //move tempobj to base object. cid, oid, newoid, vector of tuple <src offset, dest offset, len>
};

// Transaction hint type
Expand Down Expand Up @@ -690,6 +691,7 @@ class ObjectStore {

case OP_CLONERANGE2:
case OP_CLONE:
case OP_MERGE_DELETE:
assert(op->cid < cm.size());
assert(op->oid < om.size());
assert(op->dest_oid < om.size());
Expand Down Expand Up @@ -1001,6 +1003,9 @@ class ObjectStore {
void decode_attrset(map<string,bufferlist>& aset) {
::decode(aset, data_bl_p);
}
void decode_move_info(vector<boost::tuple<uint64_t, uint64_t, uint64_t >>& move_info) {
::decode(move_info, data_bl_p);
}
void decode_attrset_bl(bufferlist *pbl) {
decode_str_str_map_to_bl(data_bl_p, pbl);
}
Expand Down Expand Up @@ -1367,6 +1372,28 @@ class ObjectStore {
}
data.ops++;
}

/*
* Move source object to base object.
* Data portion is only copied from source object to base object.
* The copy is done according to the move_info vector of tuple, which
* has information of src_offset, dest_offset and length.
* Finally, the source object is deleted.
*/
void move_ranges_destroy_src(
const coll_t& cid,
const ghobject_t& src_oid,
ghobject_t oid,
const vector<boost::tuple<uint64_t, uint64_t, uint64_t>>& move_info) {
Op* _op = _get_next_op();
_op->op = OP_MERGE_DELETE;
_op->cid = _get_coll_id(cid);
_op->oid = _get_object_id(src_oid);
_op->dest_oid = _get_object_id(oid);
::encode(move_info, data_bl);
data.ops++;
}

/// Create the collection
void create_collection(const coll_t& cid, int bits) {
if (use_tbl) {
Expand Down
48 changes: 48 additions & 0 deletions src/os/bluestore/BlueStore.cc
Original file line number Diff line number Diff line change
Expand Up @@ -6720,6 +6720,19 @@ void BlueStore::_txc_add_transaction(TransContext *txc, Transaction *t)
}
break;

case Transaction::OP_MERGE_DELETE:
{
const ghobject_t& noid = i.get_oid(op->dest_oid);
OnodeRef& no = ovec[op->dest_oid];
if (!no) {
no = c->get_onode(noid, true);
}
vector<boost::tuple<uint64_t, uint64_t, uint64_t>> move_info;
i.decode_move_info(move_info);
r = _move_ranges_destroy_src(txc, c, o, no, move_info);
}
break;

case Transaction::OP_COLL_ADD:
assert(0 == "not implemented");
break;
Expand Down Expand Up @@ -8183,6 +8196,41 @@ int BlueStore::_clone_range(TransContext *txc,
return r;
}

/* Move contents of src object according to move_info to base object.
* Once the move_info is traversed completely, delete the src object.
*/
int BlueStore::_move_ranges_destroy_src(TransContext *txc,
CollectionRef& c,
OnodeRef& srco,
OnodeRef& baseo,
const vector<boost::tuple<uint64_t, uint64_t, uint64_t>> move_info)
{
dout(15) << __func__ << " " << c->cid << " " << srco->oid << " -> " << baseo->oid << dendl;

int r = 0;

//Traverse move_info completely, move contents from src object to base object.
for (unsigned i = 0; i < move_info.size(); ++i) {
uint64_t srcoff = move_info[i].get<0>();
uint64_t dstoff = move_info[i].get<1>();
uint64_t len = move_info[i].get<2>();

dout(15) << __func__ << " " << c->cid << " " << srco->oid << " -> "
<< baseo->oid << " from 0x" << std::hex << srcoff << "~" << len
<< " to offset 0x" << dstoff << std::dec << dendl;

r = _clone_range(txc, c, srco, baseo, srcoff, len, dstoff);
if (r < 0)
goto out;
}

// delete the src object
r = _do_remove(txc, c, srco);

out:
return r;
}

int BlueStore::_rename(TransContext *txc,
CollectionRef& c,
OnodeRef& oldo,
Expand Down
5 changes: 5 additions & 0 deletions src/os/bluestore/BlueStore.h
Original file line number Diff line number Diff line change
Expand Up @@ -1872,6 +1872,11 @@ class BlueStore : public ObjectStore,
OnodeRef& oldo,
OnodeRef& newo,
uint64_t srcoff, uint64_t length, uint64_t dstoff);
int _move_ranges_destroy_src(TransContext *txc,
CollectionRef& c,
OnodeRef& oldo,
OnodeRef& newo,
const vector<boost::tuple<uint64_t, uint64_t, uint64_t>> move_info);
int _rename(TransContext *txc,
CollectionRef& c,
OnodeRef& oldo,
Expand Down
125 changes: 111 additions & 14 deletions src/os/filestore/FileStore.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
*/
#include "include/compat.h"
#include "include/int_types.h"
#include "boost/tuple/tuple.hpp"

#include <unistd.h>
#include <stdlib.h>
Expand Down Expand Up @@ -2669,13 +2670,14 @@ void FileStore::_do_transaction(
{
coll_t cid = i.get_cid(op->cid);
ghobject_t oid = i.get_oid(op->oid);
_kludge_temp_object_collection(cid, oid);
coll_t ncid = cid;
ghobject_t noid = i.get_oid(op->dest_oid);
_kludge_temp_object_collection(cid, noid);
_kludge_temp_object_collection(cid, oid);
_kludge_temp_object_collection(ncid, noid);
uint64_t off = op->off;
uint64_t len = op->len;
tracepoint(objectstore, clone_range_enter, osr_name, len);
r = _clone_range(cid, oid, noid, off, len, off, spos);
r = _clone_range(cid, oid, ncid, noid, off, len, off, spos);
tracepoint(objectstore, clone_range_exit, r);
}
break;
Expand All @@ -2684,18 +2686,35 @@ void FileStore::_do_transaction(
{
coll_t cid = i.get_cid(op->cid);
ghobject_t oid = i.get_oid(op->oid);
_kludge_temp_object_collection(cid, oid);
coll_t ncid = cid;
ghobject_t noid = i.get_oid(op->dest_oid);
_kludge_temp_object_collection(cid, noid);
_kludge_temp_object_collection(cid, oid);
_kludge_temp_object_collection(ncid, noid);
uint64_t srcoff = op->off;
uint64_t len = op->len;
uint64_t dstoff = op->dest_off;
tracepoint(objectstore, clone_range2_enter, osr_name, len);
r = _clone_range(cid, oid, noid, srcoff, len, dstoff, spos);
r = _clone_range(cid, oid, ncid, noid, srcoff, len, dstoff, spos);
tracepoint(objectstore, clone_range2_exit, r);
}
break;

case Transaction::OP_MERGE_DELETE:
{
ghobject_t src_oid = i.get_oid(op->oid);
coll_t cid = i.get_cid(op->cid);
ghobject_t oid = i.get_oid(op->dest_oid);
coll_t src_cid = i.get_cid(op->cid);
_kludge_temp_object_collection(cid, oid);
_kludge_temp_object_collection(src_cid, src_oid);
vector<boost::tuple<uint64_t, uint64_t, uint64_t>> move_info;
i.decode_move_info(move_info);
tracepoint(objectstore, move_ranges_destroy_src_enter, osr_name);
r = _move_ranges_destroy_src(src_cid, src_oid, cid, oid, move_info, spos);
tracepoint(objectstore, move_ranges_destroy_src_exit, r);
}
break;

case Transaction::OP_MKCOLL:
{
coll_t cid = i.get_cid(op->cid);
Expand Down Expand Up @@ -2812,7 +2831,7 @@ void FileStore::_do_transaction(

case Transaction::OP_COLL_SETATTR:
case Transaction::OP_COLL_RMATTR:
assert(0 == "collection attr methods no longer implmented");
assert(0 == "collection attr methods no longer implemented");
break;

case Transaction::OP_STARTSYNC:
Expand Down Expand Up @@ -3709,22 +3728,22 @@ int FileStore::_do_copy_range(int from, int to, uint64_t srcoff, uint64_t len, u
return r;
}

int FileStore::_clone_range(const coll_t& cid, const ghobject_t& oldoid, const ghobject_t& newoid,
int FileStore::_clone_range(const coll_t& oldcid, const ghobject_t& oldoid, const coll_t& newcid, const ghobject_t& newoid,
uint64_t srcoff, uint64_t len, uint64_t dstoff,
const SequencerPosition& spos)
{
dout(15) << "clone_range " << cid << "/" << oldoid << " -> " << cid << "/" << newoid << " " << srcoff << "~" << len << " to " << dstoff << dendl;
dout(15) << "clone_range " << oldcid << "/" << oldoid << " -> " << newcid << "/" << newoid << " " << srcoff << "~" << len << " to " << dstoff << dendl;

if (_check_replay_guard(cid, newoid, spos) < 0)
if (_check_replay_guard(newcid, newoid, spos) < 0)
return 0;

int r;
FDRef o, n;
r = lfn_open(cid, oldoid, false, &o);
r = lfn_open(oldcid, oldoid, false, &o);
if (r < 0) {
goto out2;
}
r = lfn_open(cid, newoid, true, &n);
r = lfn_open(newcid, newoid, true, &n);
if (r < 0) {
goto out;
}
Expand All @@ -3741,11 +3760,89 @@ int FileStore::_clone_range(const coll_t& cid, const ghobject_t& oldoid, const g
out:
lfn_close(o);
out2:
dout(10) << "clone_range " << cid << "/" << oldoid << " -> " << cid << "/" << newoid << " "
dout(10) << "clone_range " << oldcid << "/" << oldoid << " -> " << newcid << "/" << newoid << " "
<< srcoff << "~" << len << " to " << dstoff << " = " << r << dendl;
return r;
}

/*
* Move contents of src object according to move_info to base object. Once the move_info is traversed completely, delete the src object.
*/
int FileStore::_move_ranges_destroy_src(const coll_t& src_cid, const ghobject_t& src_oid, const coll_t& cid, const ghobject_t& oid,
const vector<boost::tuple<uint64_t, uint64_t, uint64_t>> move_info,
const SequencerPosition& spos)
{
int r = 0;

dout(10) << __func__ << src_cid << "/" << src_oid << " -> " << cid << "/" << oid << dendl;

// check replay guard for base object. If not possible to replay, return.
int dstcmp = _check_replay_guard(cid, oid, spos);
if (dstcmp < 0)
return 0;

// check the src name too; it might have a newer guard, and we don't
// want to clobber it
int srccmp = _check_replay_guard(src_cid, src_oid, spos);
if (srccmp < 0)
return 0;

FDRef b;
r = lfn_open(cid, oid, true, &b);
if (r < 0) {
return 0;
}

FDRef t;
r = lfn_open(src_cid, src_oid, false, &t);
//If we are replaying, it is possible that we do not find src obj as it is deleted before crashing.
if (r < 0) {
lfn_close(b);
dout(10) << __func__ << " replaying -->" << replaying << dendl;
if (replaying) {
_set_replay_guard(**b, spos, &oid);
return 0;
} else {
return -ENOENT;
}
}

for (unsigned i = 0; i < move_info.size(); ++i) {
uint64_t srcoff = move_info[i].get<0>();
uint64_t dstoff = move_info[i].get<1>();
uint64_t len = move_info[i].get<2>();

r = _do_clone_range(**t, **b, srcoff, len, dstoff);
}

dout(10) << __func__ << cid << "/" << oid << " " << " = " << r << dendl;

lfn_close(t);

//In case crash occurs here, replay will have to do cloning again.
//Only if do_clone_range is successful, go ahead with deleting the source object.
if (r < 0)
goto out;

r = lfn_unlink(src_cid, src_oid, spos, true);
// If crash occurs between unlink and set guard, correct the error.
// as during next time, it might not find the already deleted object.
if (r < 0 && replaying) {
r = 0;
}

if (r < 0)
goto out;

//set replay guard for base obj coll_t, as this api is not idempotent.
_set_replay_guard(**b, spos, &oid);

out:
lfn_close(b);
dout(10) << __func__ << cid << "/" << oid << " " << " = " << r << dendl;
return r;
}

class SyncEntryTimeout : public Context {
public:
explicit SyncEntryTimeout(int commit_timeo)
Expand Down Expand Up @@ -4639,7 +4736,7 @@ int FileStore::list_collections(vector<coll_t>& ls, bool include_temp)
continue;
coll_t cid;
if (!cid.parse(de->d_name)) {
derr << "ignoging invalid collection '" << de->d_name << "'" << dendl;
derr << "ignoring invalid collection '" << de->d_name << "'" << dendl;
continue;
}
if (!cid.is_temp() || include_temp)
Expand Down
5 changes: 4 additions & 1 deletion src/os/filestore/FileStore.h
Original file line number Diff line number Diff line change
Expand Up @@ -557,9 +557,12 @@ class FileStore : public JournalingObjectStore,
int _truncate(const coll_t& cid, const ghobject_t& oid, uint64_t size);
int _clone(const coll_t& cid, const ghobject_t& oldoid, const ghobject_t& newoid,
const SequencerPosition& spos);
int _clone_range(const coll_t& cid, const ghobject_t& oldoid, const ghobject_t& newoid,
int _clone_range(const coll_t& oldcid, const ghobject_t& oldoid, const coll_t& newcid, const ghobject_t& newoid,
uint64_t srcoff, uint64_t len, uint64_t dstoff,
const SequencerPosition& spos);
int _move_ranges_destroy_src(const coll_t& temp_cid, const ghobject_t& temp_oid, const coll_t& cid, const ghobject_t& oid,
const vector<boost::tuple<uint64_t, uint64_t, uint64_t> > move_info,
const SequencerPosition& spos);
int _do_clone_range(int from, int to, uint64_t srcoff, uint64_t len, uint64_t dstoff);
int _do_sparse_copy_range(int from, int to, uint64_t srcoff, uint64_t len, uint64_t dstoff);
int _do_copy_range(int from, int to, uint64_t srcoff, uint64_t len, uint64_t dstoff, bool skip_sloppycrc=false);
Expand Down