diff --git a/src/cls/rgw/cls_rgw_types.h b/src/cls/rgw/cls_rgw_types.h index 7dac3e8bd412d..30cbabda1c4e9 100644 --- a/src/cls/rgw/cls_rgw_types.h +++ b/src/cls/rgw/cls_rgw_types.h @@ -854,7 +854,7 @@ struct cls_rgw_obj_chain { cls_rgw_obj_chain() {} - void push_obj(const string& pool, cls_rgw_obj_key& key, string& loc) { + void push_obj(const string& pool, const cls_rgw_obj_key& key, const string& loc) { cls_rgw_obj obj; obj.pool = pool; obj.key = key; diff --git a/src/rgw/rgw_bucket.cc b/src/rgw/rgw_bucket.cc index fd9dfa3a407a0..1ae31f069f48b 100644 --- a/src/rgw/rgw_bucket.cc +++ b/src/rgw/rgw_bucket.cc @@ -659,13 +659,12 @@ int rgw_remove_bucket_bypass_gc(RGWRados *store, rgw_bucket& bucket, } if (astate->has_manifest) { - rgw_obj head_obj; RGWObjManifest& manifest = astate->manifest; RGWObjManifest::obj_iterator miter = manifest.obj_begin(); + rgw_obj head_obj = manifest.get_obj(); + rgw_raw_obj raw_head_obj; + RGWRados::obj_to_raw(head_obj, &raw_head_obj); - if (miter.get_location().ns.empty()) { - head_obj = miter.get_location(); - } for (; miter != manifest.obj_end() && max_aio--; ++miter) { if (!max_aio) { @@ -677,13 +676,13 @@ int rgw_remove_bucket_bypass_gc(RGWRados *store, rgw_bucket& bucket, max_aio = concurrent_max; } - rgw_obj last_obj = miter.get_location(); - if (last_obj == head_obj) { + rgw_raw_obj last_obj = miter.get_location(); + if (last_obj == raw_head_obj) { // have the head obj deleted at the end continue; } - ret = store->delete_obj_aio(last_obj, bucket, info, astate, handles, keep_index_consistent); + ret = store->delete_raw_obj_aio(last_obj, handles); if (ret < 0) { lderr(store->ctx()) << "ERROR: delete obj aio failed with " << ret << dendl; return ret; diff --git a/src/rgw/rgw_common.h b/src/rgw/rgw_common.h index 154b733d27666..46ca21da46add 100644 --- a/src/rgw/rgw_common.h +++ b/src/rgw/rgw_common.h @@ -830,7 +830,7 @@ struct rgw_raw_obj { oid = _oid; } - bool empty() { + bool empty() const { return oid.empty(); } @@ -873,6 +873,10 @@ struct rgw_raw_obj { return (r < 0); } + bool operator==(const rgw_raw_obj& o) const { + return (pool == o.pool && oid == o.oid && loc == o.loc); + } + void dump(Formatter *f) const; void decode_json(JSONObj *obj); }; @@ -1684,7 +1688,7 @@ class rgw_obj { } } - bool empty() { + bool empty() const { return object.empty(); } @@ -1692,7 +1696,7 @@ class rgw_obj { return instance == "null"; } - bool have_instance() { + bool have_instance() const { return !instance.empty(); } @@ -1780,7 +1784,7 @@ class rgw_obj { } } - string& get_hash_object() { + const string& get_hash_object() const { return index_hash_source.empty() ? orig_obj : index_hash_source; } /** @@ -1880,6 +1884,14 @@ class rgw_obj { return in_extra_data; } + const rgw_pool& get_data_pool() const { + if (!in_extra_data) { + return bucket.placement.data_pool; + } else { + return bucket.placement.data_extra_pool; + } + } + void encode(bufferlist& bl) const { ENCODE_START(5, 3, bl); ::encode(bucket.name, bl); diff --git a/src/rgw/rgw_dencoder.cc b/src/rgw/rgw_dencoder.cc index 766960476828b..d12340719dbf8 100644 --- a/src/rgw/rgw_dencoder.cc +++ b/src/rgw/rgw_dencoder.cc @@ -112,11 +112,11 @@ void RGWObjManifest::obj_iterator::seek(uint64_t o) void RGWObjManifest::obj_iterator::update_location() { if (manifest->explicit_objs) { - location = explicit_iter->second.loc; + RGWRados::obj_to_raw(explicit_iter->second.loc, &location); return; } - const rgw_obj& head = manifest->get_head(); + const rgw_raw_obj& head = manifest->get_head(); if (ofs < manifest->get_head_size()) { location = head; @@ -159,7 +159,7 @@ void RGWObjManifest::generate_test_instances(std::list& o) o.push_back(new RGWObjManifest); } -void RGWObjManifest::get_implicit_location(uint64_t cur_part_id, uint64_t cur_stripe, uint64_t ofs, string *override_prefix, rgw_obj *location) +void RGWObjManifest::get_implicit_location(uint64_t cur_part_id, uint64_t cur_stripe, uint64_t ofs, string *override_prefix, rgw_raw_obj *location) { string oid; if (!override_prefix || override_prefix->empty()) { @@ -194,17 +194,21 @@ void RGWObjManifest::get_implicit_location(uint64_t cur_part_id, uint64_t cur_st rgw_bucket *bucket; + rgw_obj loc; + if (!tail_bucket.name.empty()) { bucket = &tail_bucket; } else { - bucket = &head_obj.bucket; + bucket = &obj.bucket; } - location->init_ns(*bucket, oid, ns); + loc.init_ns(*bucket, oid, ns); // Always overwrite instance with tail_instance // to get the right shadow object location - location->set_instance(tail_instance); + loc.set_instance(tail_instance); + + RGWRados::obj_to_raw(loc, location); } diff --git a/src/rgw/rgw_op.cc b/src/rgw/rgw_op.cc index be6a065e54170..68c2e7a7d56b8 100644 --- a/src/rgw/rgw_op.cc +++ b/src/rgw/rgw_op.cc @@ -2567,9 +2567,9 @@ int RGWPutObjProcessor_Multipart::prepare(RGWRados *store, string *oid_rand) return r; } - head_obj = manifest_gen.get_cur_obj(); + cur_obj = manifest_gen.get_cur_obj(); + rgw_raw_obj_to_obj(bucket, cur_obj, &head_obj); head_obj.index_hash_source = obj_str; - cur_obj = head_obj; return 0; } @@ -4734,7 +4734,10 @@ void RGWAbortMultipart::execute() store->update_gc_chain(meta_obj, obj_part.manifest, &chain); RGWObjManifest::obj_iterator oiter = obj_part.manifest.obj_begin(); if (oiter != obj_part.manifest.obj_end()) { - rgw_obj head = oiter.get_location(); + rgw_obj head; + rgw_raw_obj raw_head = oiter.get_location(); + rgw_raw_obj_to_obj(s->bucket, raw_head, &head); + rgw_obj_key key; head.get_index_key(&key); remove_objs.push_back(key); diff --git a/src/rgw/rgw_op.h b/src/rgw/rgw_op.h index b52242331624c..f9200aa6b9c7a 100644 --- a/src/rgw/rgw_op.h +++ b/src/rgw/rgw_op.h @@ -1525,7 +1525,7 @@ static inline int put_data_and_throttle(RGWPutObjProcessor *processor, do { void *handle; - rgw_obj obj; + rgw_raw_obj obj; int ret = processor->handle_data(data, ofs, hash, &handle, &obj, &again); if (ret < 0) diff --git a/src/rgw/rgw_orphan.cc b/src/rgw/rgw_orphan.cc index 2538e55417ae4..af37a61586fbc 100644 --- a/src/rgw/rgw_orphan.cc +++ b/src/rgw/rgw_orphan.cc @@ -437,8 +437,8 @@ int RGWOrphanSearch::handle_stat_result(map >& oids, RGWRados: RGWObjManifest::obj_iterator miter; for (miter = manifest.obj_begin(); miter != manifest.obj_end(); ++miter) { - const rgw_obj& loc = miter.get_location(); - string s = bucket.bucket_id + "_" + loc.get_object(); + const rgw_raw_obj& loc = miter.get_location(); + string s = loc.oid; obj_oids.insert(obj_fingerprint(s)); } } diff --git a/src/rgw/rgw_rados.cc b/src/rgw/rgw_rados.cc index a4a25579f8d81..55b207e220f81 100644 --- a/src/rgw/rgw_rados.cc +++ b/src/rgw/rgw_rados.cc @@ -1903,13 +1903,12 @@ void RGWObjManifest::obj_iterator::operator++() update_location(); } -int RGWObjManifest::generator::create_begin(CephContext *cct, RGWObjManifest *_m, rgw_bucket& _b, rgw_obj& _h) +int RGWObjManifest::generator::create_begin(CephContext *cct, RGWObjManifest *_m, rgw_bucket& _b, rgw_obj& _obj) { manifest = _m; - bucket = _b; manifest->set_tail_bucket(_b); - manifest->set_head(_h, 0); + manifest->set_head(_obj, 0); last_ofs = 0; if (manifest->get_prefix().empty()) { @@ -1942,7 +1941,7 @@ int RGWObjManifest::generator::create_begin(CephContext *cct, RGWObjManifest *_m manifest->get_implicit_location(cur_part_id, cur_stripe, 0, NULL, &cur_obj); // Normal object which not generated through copy operation - manifest->set_tail_instance(_h.get_instance()); + manifest->set_tail_instance(_obj.get_instance()); manifest->update_iterators(); @@ -2098,10 +2097,16 @@ void RGWObjManifest::convert_to_explicit() while (iter != obj_end()) { RGWObjManifestPart& part = objs[iter.get_stripe_ofs()]; - part.loc = iter.get_location(); + const rgw_raw_obj& raw_loc = iter.get_location(); part.loc_ofs = 0; uint64_t ofs = iter.get_stripe_ofs(); + + if (ofs == 0) { + part.loc = obj; + } else { + rgw_raw_obj_to_obj(tail_bucket, raw_loc, &part.loc); + } ++iter; uint64_t next_ofs = iter.get_stripe_ofs(); @@ -2181,9 +2186,13 @@ RGWPutObjProcessor_Aio::~RGWPutObjProcessor_Aio() if (is_complete) return; - set::iterator iter; - bool is_multipart_obj = false; - rgw_obj multipart_obj; + set::iterator iter; + bool need_to_remove_head = false; + rgw_raw_obj raw_head; + + if (!head_obj.empty()) { + RGWRados::obj_to_raw(head_obj, &raw_head); + } /** * We should delete the object in the "multipart" namespace to avoid race condition. @@ -2191,32 +2200,36 @@ RGWPutObjProcessor_Aio::~RGWPutObjProcessor_Aio() * upload, when it is deleted, a second upload would start with the same suffix("2/"), therefore, objects * written by the second upload may be deleted by the first upload. * details is describled on #11749 + * + * The above comment still stands, but instead of searching for a specific object in the multipart + * namespace, we just make sure that we remove the object that is marked as the head object after + * we remove all the other raw objects. Note that we use different call to remove the head object, + * as this one needs to go via the bucket index prepare/complete 2-phase commit scheme. */ for (iter = written_objs.begin(); iter != written_objs.end(); ++iter) { - const rgw_obj &obj = *iter; - if (RGW_OBJ_NS_MULTIPART == obj.ns) { - ldout(store->ctx(), 5) << "NOTE: we should not process the multipart object (" << obj << ") here" << dendl; - multipart_obj = *iter; - is_multipart_obj = true; + const rgw_raw_obj& obj = *iter; + if (!head_obj.empty() && obj == raw_head) { + ldout(store->ctx(), 5) << "NOTE: we should not process the head object (" << obj << ") here" << dendl; + need_to_remove_head = true; continue; } - int r = store->delete_obj(obj_ctx, bucket_info, obj, 0, 0); + int r = store->delete_raw_obj(obj); if (r < 0 && r != -ENOENT) { ldout(store->ctx(), 5) << "WARNING: failed to remove obj (" << obj << "), leaked" << dendl; } } - if (true == is_multipart_obj) { - ldout(store->ctx(), 5) << "NOTE: we are going to process the multipart obj (" << multipart_obj << dendl; - int r = store->delete_obj(obj_ctx, bucket_info, multipart_obj, 0, 0); + if (need_to_remove_head) { + ldout(store->ctx(), 5) << "NOTE: we are going to process the head obj (" << raw_head << ")" << dendl; + int r = store->delete_obj(obj_ctx, bucket_info, head_obj, 0, 0); if (r < 0 && r != -ENOENT) { - ldout(store->ctx(), 0) << "WARNING: failed to remove obj (" << multipart_obj << "), leaked" << dendl; + ldout(store->ctx(), 0) << "WARNING: failed to remove obj (" << raw_head << "), leaked" << dendl; } } } -int RGWPutObjProcessor_Aio::handle_obj_data(rgw_obj& obj, bufferlist& bl, off_t ofs, off_t abs_ofs, void **phandle, bool exclusive) +int RGWPutObjProcessor_Aio::handle_obj_data(rgw_raw_obj& obj, bufferlist& bl, off_t ofs, off_t abs_ofs, void **phandle, bool exclusive) { if ((uint64_t)abs_ofs + bl.length() > obj_len) obj_len = abs_ofs + bl.length(); @@ -2273,7 +2286,7 @@ int RGWPutObjProcessor_Aio::drain_pending() return ret; } -int RGWPutObjProcessor_Aio::throttle_data(void *handle, const rgw_obj& obj, bool need_to_wait) +int RGWPutObjProcessor_Aio::throttle_data(void *handle, const rgw_raw_obj& obj, bool need_to_wait) { bool _wait = need_to_wait; @@ -2308,7 +2321,7 @@ int RGWPutObjProcessor_Aio::throttle_data(void *handle, const rgw_obj& obj, bool return 0; } -int RGWPutObjProcessor_Atomic::write_data(bufferlist& bl, off_t ofs, void **phandle, rgw_obj *pobj, bool exclusive) +int RGWPutObjProcessor_Atomic::write_data(bufferlist& bl, off_t ofs, void **phandle, rgw_raw_obj *pobj, bool exclusive) { if (ofs >= next_part_ofs) { int r = prepare_next_part(ofs); @@ -2322,7 +2335,7 @@ int RGWPutObjProcessor_Atomic::write_data(bufferlist& bl, off_t ofs, void **phan return RGWPutObjProcessor_Aio::handle_obj_data(cur_obj, bl, ofs - cur_part_ofs, ofs, phandle, exclusive); } -int RGWPutObjProcessor_Atomic::handle_data(bufferlist& bl, off_t ofs, MD5 *hash, void **phandle, rgw_obj *pobj, bool *again) +int RGWPutObjProcessor_Atomic::handle_data(bufferlist& bl, off_t ofs, MD5 *hash, void **phandle, rgw_raw_obj *pobj, bool *again) { *again = false; @@ -2415,7 +2428,7 @@ int RGWPutObjProcessor_Atomic::prepare(RGWRados *store, string *oid_rand) manifest.set_trivial_rule(max_chunk_size, store->ctx()->_conf->rgw_obj_stripe_size); - r = manifest_gen.create_begin(store->ctx(), &manifest, bucket, head_obj); + r = manifest_gen.create_begin(store->ctx(), &manifest, head_obj.bucket, head_obj); if (r < 0) { return r; } @@ -2459,7 +2472,7 @@ int RGWPutObjProcessor_Atomic::complete_writing_data() } while (pending_data_bl.length()) { void *handle; - rgw_obj obj; + rgw_raw_obj obj; uint64_t max_write_size = MIN(max_chunk_size, (uint64_t)next_part_ofs - data_ofs); if (max_write_size > pending_data_bl.length()) { max_write_size = pending_data_bl.length(); @@ -3040,10 +3053,10 @@ RGWDataSyncStatusManager* RGWRados::get_data_sync_manager(const std::string& sou return thread->second->get_manager(); } -int RGWRados::get_required_alignment(rgw_bucket& bucket, uint64_t *alignment) +int RGWRados::get_required_alignment(const rgw_pool& pool, uint64_t *alignment) { IoCtx ioctx; - int r = open_pool_ctx(bucket.placement.data_pool, ioctx); + int r = open_pool_ctx(pool, ioctx); if (r < 0) { ldout(cct, 0) << "ERROR: open_pool_ctx() returned " << r << dendl; return r; @@ -3076,10 +3089,15 @@ int RGWRados::get_required_alignment(rgw_bucket& bucket, uint64_t *alignment) return 0; } -int RGWRados::get_max_chunk_size(rgw_bucket& bucket, uint64_t *max_chunk_size) +int RGWRados::get_required_alignment(const rgw_bucket& bucket, uint64_t *alignment) +{ + return get_required_alignment(bucket.placement.data_pool, alignment); +} + +int RGWRados::get_max_chunk_size(const rgw_pool& pool, uint64_t *max_chunk_size) { uint64_t alignment; - int r = get_required_alignment(bucket, &alignment); + int r = get_required_alignment(pool, &alignment); if (r < 0) { return r; } @@ -3103,6 +3121,11 @@ int RGWRados::get_max_chunk_size(rgw_bucket& bucket, uint64_t *max_chunk_size) return 0; } +int RGWRados::get_max_chunk_size(const rgw_bucket& bucket, uint64_t *max_chunk_size) +{ + return get_max_chunk_size(bucket.placement.data_pool, max_chunk_size); +} + void RGWRados::finalize() { if (run_sync_thread) { @@ -5487,7 +5510,6 @@ int RGWRados::create_pools(vector& names, vector& retcodes) return 0; } - int RGWRados::get_obj_ioctx(const rgw_obj& obj, librados::IoCtx *ioctx) { const rgw_bucket& bucket = obj.bucket; @@ -5555,14 +5577,9 @@ int RGWRados::get_raw_obj_ref(const rgw_raw_obj& obj, rgw_rados_ref *ref, rgw_po void RGWRados::obj_to_raw(const rgw_obj& obj, rgw_raw_obj *raw_obj) { - get_obj_bucket_and_oid_loc(obj, raw_obj->oid, raw_obj->loc); - - if (!obj.is_in_extra_data()) { - raw_obj->pool = obj.bucket.placement.data_pool; - } else { - raw_obj->pool = obj.bucket.placement.get_data_extra_pool(); - } + rgw_obj_to_raw(obj, raw_obj); } + int RGWRados::get_system_obj_ref(const rgw_raw_obj& obj, rgw_rados_ref *ref, rgw_pool *pool) { return get_raw_obj_ref(obj, ref, pool); @@ -5747,10 +5764,13 @@ int RGWRados::fix_tail_obj_locator(rgw_bucket& bucket, rgw_obj_key& key, bool fi RGWObjManifest::obj_iterator miter; RGWObjManifest& manifest = astate->manifest; for (miter = manifest.obj_begin(); miter != manifest.obj_end(); ++miter) { - rgw_obj loc = miter.get_location(); + rgw_raw_obj raw_loc = miter.get_location(); + rgw_obj loc; string oid; string locator; + rgw_raw_obj_to_obj(manifest.get_tail_bucket(), raw_loc, &loc); + if (loc.ns.empty()) { /* continue, we're only interested in tail objects */ continue; @@ -5795,7 +5815,7 @@ int RGWRados::fix_tail_obj_locator(rgw_bucket& bucket, rgw_obj_key& key, bool fi return 0; } -int RGWRados::BucketShard::init(rgw_bucket& _bucket, rgw_obj& obj) +int RGWRados::BucketShard::init(const rgw_bucket& _bucket, const rgw_obj& obj) { bucket = _bucket; @@ -6389,24 +6409,13 @@ int RGWRados::put_system_obj_data(void *ctx, rgw_raw_obj& obj, bufferlist& bl, * attrs: all the given attrs are written to bucket storage for the given object * Returns: 0 on success, -ERR# otherwise. */ -int RGWRados::put_obj_data(void *ctx, rgw_obj& obj, - const char *data, off_t ofs, size_t len, bool exclusive) -{ - void *handle; - bufferlist bl; - bl.append(data, len); - int r = aio_put_obj_data(ctx, obj, bl, ofs, exclusive, &handle); - if (r < 0) - return r; - return aio_wait(handle); -} -int RGWRados::aio_put_obj_data(void *ctx, rgw_obj& obj, bufferlist& bl, +int RGWRados::aio_put_obj_data(void *ctx, rgw_raw_obj& obj, bufferlist& bl, off_t ofs, bool exclusive, void **handle) { rgw_rados_ref ref; - int r = get_obj_ref(obj, &ref); + int r = get_raw_obj_ref(obj, &ref); if (r < 0) { return r; } @@ -6469,7 +6478,7 @@ class RGWRadosPutObj : public RGWGetDataCB do { void *handle; - rgw_obj obj; + rgw_raw_obj obj; int ret = processor->handle_data(bl, ofs, NULL, &handle, &obj, &again); if (ret < 0) return ret; @@ -7032,7 +7041,7 @@ int RGWRados::copy_obj(RGWObjectCtx& obj_ctx, return ret; } - vector ref_objs; + vector ref_objs; if (remote_dest) { /* dest is in a different zonegroup, copy it there */ @@ -7085,7 +7094,7 @@ int RGWRados::copy_obj(RGWObjectCtx& obj_ctx, } rgw_rados_ref ref; - ret = get_obj_ref(miter.get_location(), &ref); + ret = get_raw_obj_ref(miter.get_location(), &ref); if (ret < 0) { return ret; } @@ -7128,11 +7137,10 @@ int RGWRados::copy_obj(RGWObjectCtx& obj_ctx, for (; miter != astate->manifest.obj_end(); ++miter) { ObjectWriteOperation op; cls_refcount_get(op, tag, true); - const rgw_obj& loc = miter.get_location(); - get_obj_bucket_and_oid_loc(loc, oid, key); - ref.ioctx.locator_set_key(key); + const rgw_raw_obj& loc = miter.get_location(); + ref.ioctx.locator_set_key(loc.loc); - ret = ref.ioctx.operate(oid, &op); + ret = ref.ioctx.operate(loc.oid, &op); if (ret < 0) { goto done_ret; } @@ -7177,7 +7185,7 @@ int RGWRados::copy_obj(RGWObjectCtx& obj_ctx, done_ret: if (!copy_itself) { - vector::iterator riter; + vector::iterator riter; string oid, key; @@ -7186,10 +7194,9 @@ int RGWRados::copy_obj(RGWObjectCtx& obj_ctx, ObjectWriteOperation op; cls_refcount_put(op, tag, true); - get_obj_bucket_and_oid_loc(*riter, oid, key); - ref.ioctx.locator_set_key(key); + ref.ioctx.locator_set_key(riter->loc); - int r = ref.ioctx.operate(oid, &op); + int r = ref.ioctx.operate(riter->oid, &op); if (r < 0) { ldout(cct, 0) << "ERROR: cleanup after error failed to drop reference on obj=" << *riter << dendl; } @@ -7244,7 +7251,7 @@ int RGWRados::copy_obj_data(RGWObjectCtx& obj_ctx, do { void *handle; - rgw_obj obj; + rgw_raw_obj obj; ret = processor.handle_data(bl, ofs, NULL, &handle, &obj, &again); if (ret < 0) { @@ -7463,14 +7470,14 @@ int RGWRados::Object::complete_atomic_modification() void RGWRados::update_gc_chain(rgw_obj& head_obj, RGWObjManifest& manifest, cls_rgw_obj_chain *chain) { RGWObjManifest::obj_iterator iter; + rgw_raw_obj raw_head; + obj_to_raw(head_obj, &raw_head); for (iter = manifest.obj_begin(); iter != manifest.obj_end(); ++iter) { - const rgw_obj& mobj = iter.get_location(); - if (mobj == head_obj) + const rgw_raw_obj& mobj = iter.get_location(); + if (mobj == raw_head) continue; - string oid, loc; - get_obj_bucket_and_oid_loc(mobj, oid, loc); - cls_rgw_obj_key key(oid); - chain->push_obj(mobj.bucket.placement.data_pool.to_str(), key, loc); + cls_rgw_obj_key key(mobj.oid); + chain->push_obj(mobj.pool.to_str(), key, mobj.loc); } } @@ -7895,6 +7902,25 @@ int RGWRados::delete_obj(RGWObjectCtx& obj_ctx, return del_op.delete_obj(); } +int RGWRados::delete_raw_obj(const rgw_raw_obj& obj) +{ + rgw_rados_ref ref; + rgw_pool pool; + int r = get_raw_obj_ref(obj, &ref, &pool); + if (r < 0) { + return r; + } + + ObjectWriteOperation op; + + op.remove(); + r = ref.ioctx.operate(ref.oid, &op); + if (r < 0) + return r; + + return 0; +} + int RGWRados::delete_system_obj(rgw_raw_obj& obj, RGWObjVersionTracker *objv_tracker) { rgw_rados_ref ref; @@ -7918,7 +7944,7 @@ int RGWRados::delete_system_obj(rgw_raw_obj& obj, RGWObjVersionTracker *objv_tra return 0; } -int RGWRados::delete_obj_index(rgw_obj& obj) +int RGWRados::delete_obj_index(const rgw_obj& obj) { std::string oid, key; get_obj_bucket_and_oid_loc(obj, oid, key); @@ -7949,7 +7975,7 @@ static void generate_fake_tag(CephContext *cct, map& attrset if (mi != manifest.obj_end()) { if (manifest.has_tail()) // first object usually points at the head, let's skip to a more unique part ++mi; - tag = mi.get_location().get_object(); + tag = mi.get_location().oid; tag.append("_"); } @@ -7985,7 +8011,7 @@ static bool has_olh_tag(map& attrs) return (iter != attrs.end()); } -int RGWRados::get_olh_target_state(RGWObjectCtx& obj_ctx, rgw_obj& obj, RGWObjState *olh_state, +int RGWRados::get_olh_target_state(RGWObjectCtx& obj_ctx, const rgw_obj& obj, RGWObjState *olh_state, RGWObjState **target_state) { assert(olh_state->is_olh); @@ -8048,7 +8074,7 @@ int RGWRados::get_system_obj_state(RGWObjectCtx *rctx, rgw_raw_obj& obj, RGWRawO return ret; } -int RGWRados::get_obj_state_impl(RGWObjectCtx *rctx, rgw_obj& obj, RGWObjState **state, bool follow_olh) +int RGWRados::get_obj_state_impl(RGWObjectCtx *rctx, const rgw_obj& obj, RGWObjState **state, bool follow_olh) { bool need_follow_olh = follow_olh && !obj.have_instance(); @@ -8178,7 +8204,7 @@ int RGWRados::get_obj_state_impl(RGWObjectCtx *rctx, rgw_obj& obj, RGWObjState * return 0; } -int RGWRados::get_obj_state(RGWObjectCtx *rctx, rgw_obj& obj, RGWObjState **state, bool follow_olh) +int RGWRados::get_obj_state(RGWObjectCtx *rctx, const rgw_obj& obj, RGWObjState **state, bool follow_olh) { int ret; @@ -8313,7 +8339,7 @@ int RGWRados::system_obj_get_attr(rgw_raw_obj& obj, const char *name, bufferlist return 0; } -int RGWRados::append_atomic_test(RGWObjectCtx *rctx, rgw_obj& obj, +int RGWRados::append_atomic_test(RGWObjectCtx *rctx, const rgw_obj& obj, ObjectOperation& op, RGWObjState **pstate) { if (!rctx) @@ -8674,6 +8700,7 @@ int RGWRados::Object::Read::prepare(int64_t *pofs, int64_t *pend) } state.obj = astate->obj; + RGWRados::obj_to_raw(state.obj, &state.head_obj); r = store->get_obj_ioctx(state.obj, &state.io_ctx); if (r < 0) { @@ -8948,7 +8975,7 @@ int RGWRados::Object::Read::read(int64_t ofs, int64_t end, bufferlist& bl) CephContext *cct = store->ctx(); std::string oid, key; - rgw_obj read_obj = state.obj; + rgw_raw_obj read_obj; uint64_t read_ofs = ofs; uint64_t len, read_len; bool reading_from_head = true; @@ -8959,9 +8986,6 @@ int RGWRados::Object::Read::read(int64_t ofs, int64_t end, bufferlist& bl) bufferlist read_bl; uint64_t max_chunk_size; - - get_obj_bucket_and_oid_loc(state.obj, oid, key); - RGWObjState *astate; int r = source->get_state(&astate, true); if (r < 0) @@ -8980,16 +9004,14 @@ int RGWRados::Object::Read::read(int64_t ofs, int64_t end, bufferlist& bl) read_obj = iter.get_location(); len = min(len, iter.get_stripe_size() - (ofs - stripe_ofs)); read_ofs = iter.location_ofs() + (ofs - stripe_ofs); - reading_from_head = (read_obj == state.obj); - - if (!reading_from_head) { - get_obj_bucket_and_oid_loc(read_obj, oid, key); - } + reading_from_head = (read_obj == state.head_obj); + } else { + read_obj = state.head_obj; } - r = store->get_max_chunk_size(read_obj.bucket, &max_chunk_size); + r = store->get_max_chunk_size(read_obj.pool, &max_chunk_size); if (r < 0) { - ldout(cct, 0) << "ERROR: failed to get max_chunk_size() for bucket " << read_obj.bucket << dendl; + ldout(cct, 0) << "ERROR: failed to get max_chunk_size() for pool " << read_obj.pool << dendl; return r; } @@ -8997,13 +9019,13 @@ int RGWRados::Object::Read::read(int64_t ofs, int64_t end, bufferlist& bl) len = max_chunk_size; - state.io_ctx.locator_set_key(key); + state.io_ctx.locator_set_key(read_obj.loc); read_len = len; if (reading_from_head) { /* only when reading from the head object do we need to do the atomic test */ - r = store->append_atomic_test(&source->get_ctx(), read_obj, op, &astate); + r = store->append_atomic_test(&source->get_ctx(), state.obj, op, &astate); if (r < 0) return r; @@ -9030,7 +9052,7 @@ int RGWRados::Object::Read::read(int64_t ofs, int64_t end, bufferlist& bl) ldout(cct, 20) << "rados->read obj-ofs=" << ofs << " read_ofs=" << read_ofs << " read_len=" << read_len << dendl; op.read(read_ofs, read_len, pbl, NULL); - r = state.io_ctx.operate(oid, &op, NULL); + r = state.io_ctx.operate(read_obj.oid, &op, NULL); ldout(cct, 20) << "rados->read r=" << r << " bl.length=" << bl.length() << dendl; if (r < 0) { @@ -9311,11 +9333,11 @@ struct get_obj_data : public RefCountedObject { } }; -static int _get_obj_iterate_cb(rgw_obj& obj, off_t obj_ofs, off_t read_ofs, off_t len, bool is_head_obj, RGWObjState *astate, void *arg) +static int _get_obj_iterate_cb(const rgw_obj& obj, const rgw_raw_obj& read_obj, off_t obj_ofs, off_t read_ofs, off_t len, bool is_head_obj, RGWObjState *astate, void *arg) { struct get_obj_data *d = (struct get_obj_data *)arg; - return d->rados->get_obj_iterate_cb(d->ctx, astate, obj, obj_ofs, read_ofs, len, is_head_obj, arg); + return d->rados->get_obj_iterate_cb(d->ctx, astate, obj, read_obj, obj_ofs, read_ofs, len, is_head_obj, arg); } static void _get_obj_aio_completion_cb(completion_t cb, void *arg) @@ -9400,7 +9422,8 @@ int RGWRados::flush_read_list(struct get_obj_data *d) } int RGWRados::get_obj_iterate_cb(RGWObjectCtx *ctx, RGWObjState *astate, - rgw_obj& obj, + const rgw_obj& obj, + const rgw_raw_obj& read_obj, off_t obj_ofs, off_t read_ofs, off_t len, bool is_head_obj, void *arg) @@ -9442,8 +9465,6 @@ int RGWRados::get_obj_iterate_cb(RGWObjectCtx *ctx, RGWObjState *astate, } } - get_obj_bucket_and_oid_loc(obj, oid, key); - d->throttle.get(len); if (d->is_cancelled()) { return d->get_err_code(); @@ -9454,13 +9475,13 @@ int RGWRados::get_obj_iterate_cb(RGWObjectCtx *ctx, RGWObjState *astate, */ d->add_io(obj_ofs, len, &pbl, &c); - ldout(cct, 20) << "rados->get_obj_iterate_cb oid=" << oid << " obj-ofs=" << obj_ofs << " read_ofs=" << read_ofs << " len=" << len << dendl; + ldout(cct, 20) << "rados->get_obj_iterate_cb oid=" << read_obj.oid << " obj-ofs=" << obj_ofs << " read_ofs=" << read_ofs << " len=" << len << dendl; op.read(read_ofs, len, pbl, NULL); librados::IoCtx io_ctx(d->io_ctx); - io_ctx.locator_set_key(key); + io_ctx.locator_set_key(read_obj.loc); - r = io_ctx.aio_operate(oid, c, &op, NULL); + r = io_ctx.aio_operate(read_obj.oid, c, &op, NULL); ldout(cct, 20) << "rados->aio_operate r=" << r << " bl.length=" << pbl->length() << dendl; if (r < 0) goto done_err; @@ -9523,15 +9544,18 @@ int RGWRados::Object::Read::iterate(int64_t ofs, int64_t end, RGWGetDataCB *cb) int RGWRados::iterate_obj(RGWObjectCtx& obj_ctx, rgw_obj& obj, off_t ofs, off_t end, uint64_t max_chunk_size, - int (*iterate_obj_cb)(rgw_obj&, off_t, off_t, off_t, bool, RGWObjState *, void *), + int (*iterate_obj_cb)(const rgw_obj& obj, const rgw_raw_obj&, off_t, off_t, off_t, bool, RGWObjState *, void *), void *arg) { - rgw_obj read_obj = obj; + rgw_raw_obj head_obj; + rgw_raw_obj read_obj; uint64_t read_ofs = ofs; uint64_t len; bool reading_from_head = true; RGWObjState *astate = NULL; + obj_to_raw(obj, &head_obj); + int r = get_obj_state(&obj_ctx, obj, &astate, NULL); if (r < 0) { return r; @@ -9561,8 +9585,8 @@ int RGWRados::iterate_obj(RGWObjectCtx& obj_ctx, rgw_obj& obj, read_len = max_chunk_size; } - reading_from_head = (read_obj == obj); - r = iterate_obj_cb(read_obj, ofs, read_ofs, read_len, reading_from_head, astate, arg); + reading_from_head = (read_obj == head_obj); + r = iterate_obj_cb(obj, read_obj, ofs, read_ofs, read_len, reading_from_head, astate, arg); if (r < 0) { return r; } @@ -9573,9 +9597,10 @@ int RGWRados::iterate_obj(RGWObjectCtx& obj_ctx, rgw_obj& obj, } } else { while (ofs <= end) { + read_obj = head_obj; uint64_t read_len = min(len, max_chunk_size); - r = iterate_obj_cb(obj, ofs, ofs, read_len, reading_from_head, astate, arg); + r = iterate_obj_cb(obj, read_obj, ofs, ofs, read_len, reading_from_head, astate, arg); if (r < 0) { return r; } @@ -9588,7 +9613,7 @@ int RGWRados::iterate_obj(RGWObjectCtx& obj_ctx, rgw_obj& obj, return 0; } -int RGWRados::obj_operate(rgw_obj& obj, ObjectWriteOperation *op) +int RGWRados::obj_operate(const rgw_obj& obj, ObjectWriteOperation *op) { rgw_rados_ref ref; int r = get_obj_ref(obj, &ref); @@ -9599,7 +9624,7 @@ int RGWRados::obj_operate(rgw_obj& obj, ObjectWriteOperation *op) return ref.ioctx.operate(ref.oid, op); } -int RGWRados::obj_operate(rgw_obj& obj, ObjectReadOperation *op) +int RGWRados::obj_operate(const rgw_obj& obj, ObjectReadOperation *op) { rgw_rados_ref ref; int r = get_obj_ref(obj, &ref); @@ -9612,7 +9637,7 @@ int RGWRados::obj_operate(rgw_obj& obj, ObjectReadOperation *op) return ref.ioctx.operate(ref.oid, op, &outbl); } -int RGWRados::olh_init_modification_impl(RGWObjState& state, rgw_obj& olh_obj, string *op_tag) +int RGWRados::olh_init_modification_impl(RGWObjState& state, const rgw_obj& olh_obj, string *op_tag) { ObjectWriteOperation op; @@ -9711,7 +9736,7 @@ int RGWRados::olh_init_modification_impl(RGWObjState& state, rgw_obj& olh_obj, s return 0; } -int RGWRados::olh_init_modification(RGWObjState& state, rgw_obj& obj, string *op_tag) +int RGWRados::olh_init_modification(RGWObjState& state, const rgw_obj& obj, string *op_tag) { int ret; @@ -9723,7 +9748,7 @@ int RGWRados::olh_init_modification(RGWObjState& state, rgw_obj& obj, string *op return ret; } -int RGWRados::bucket_index_link_olh(RGWObjState& olh_state, rgw_obj& obj_instance, bool delete_marker, +int RGWRados::bucket_index_link_olh(RGWObjState& olh_state, const rgw_obj& obj_instance, bool delete_marker, const string& op_tag, struct rgw_bucket_dir_entry_meta *meta, uint64_t olh_epoch, @@ -9759,7 +9784,7 @@ void RGWRados::bucket_index_guard_olh_op(RGWObjState& olh_state, ObjectOperation op.cmpxattr(RGW_ATTR_OLH_ID_TAG, CEPH_OSD_CMPXATTR_OP_EQ, olh_state.olh_tag); } -int RGWRados::bucket_index_unlink_instance(rgw_obj& obj_instance, const string& op_tag, const string& olh_tag, uint64_t olh_epoch) +int RGWRados::bucket_index_unlink_instance(const rgw_obj& obj_instance, const string& op_tag, const string& olh_tag, uint64_t olh_epoch) { rgw_rados_ref ref; int r = get_obj_ref(obj_instance, &ref); @@ -9783,7 +9808,7 @@ int RGWRados::bucket_index_unlink_instance(rgw_obj& obj_instance, const string& return 0; } -int RGWRados::bucket_index_read_olh_log(RGWObjState& state, rgw_obj& obj_instance, uint64_t ver_marker, +int RGWRados::bucket_index_read_olh_log(RGWObjState& state, const rgw_obj& obj_instance, uint64_t ver_marker, map > *log, bool *is_truncated) { @@ -9813,7 +9838,7 @@ int RGWRados::bucket_index_read_olh_log(RGWObjState& state, rgw_obj& obj_instanc return 0; } -int RGWRados::bucket_index_trim_olh_log(RGWObjState& state, rgw_obj& obj_instance, uint64_t ver) +int RGWRados::bucket_index_trim_olh_log(RGWObjState& state, const rgw_obj& obj_instance, uint64_t ver) { rgw_rados_ref ref; int r = get_obj_ref(obj_instance, &ref); @@ -9843,7 +9868,7 @@ int RGWRados::bucket_index_trim_olh_log(RGWObjState& state, rgw_obj& obj_instanc return 0; } -int RGWRados::bucket_index_clear_olh(RGWObjState& state, rgw_obj& obj_instance) +int RGWRados::bucket_index_clear_olh(RGWObjState& state, const rgw_obj& obj_instance) { rgw_rados_ref ref; int r = get_obj_ref(obj_instance, &ref); @@ -9871,7 +9896,7 @@ int RGWRados::bucket_index_clear_olh(RGWObjState& state, rgw_obj& obj_instance) return 0; } -int RGWRados::apply_olh_log(RGWObjectCtx& obj_ctx, RGWObjState& state, RGWBucketInfo& bucket_info, rgw_obj& obj, +int RGWRados::apply_olh_log(RGWObjectCtx& obj_ctx, RGWObjState& state, RGWBucketInfo& bucket_info, const rgw_obj& obj, bufferlist& olh_tag, map >& log, uint64_t *plast_ver) { @@ -10002,7 +10027,7 @@ int RGWRados::apply_olh_log(RGWObjectCtx& obj_ctx, RGWObjState& state, RGWBucket /* * read olh log and apply it */ -int RGWRados::update_olh(RGWObjectCtx& obj_ctx, RGWObjState *state, RGWBucketInfo& bucket_info, rgw_obj& obj) +int RGWRados::update_olh(RGWObjectCtx& obj_ctx, RGWObjState *state, RGWBucketInfo& bucket_info, const rgw_obj& obj) { map > log; bool is_truncated; @@ -10022,7 +10047,7 @@ int RGWRados::update_olh(RGWObjectCtx& obj_ctx, RGWObjState *state, RGWBucketInf return 0; } -int RGWRados::set_olh(RGWObjectCtx& obj_ctx, RGWBucketInfo& bucket_info, rgw_obj& target_obj, bool delete_marker, rgw_bucket_dir_entry_meta *meta, +int RGWRados::set_olh(RGWObjectCtx& obj_ctx, RGWBucketInfo& bucket_info, const rgw_obj& target_obj, bool delete_marker, rgw_bucket_dir_entry_meta *meta, uint64_t olh_epoch, real_time unmod_since, bool high_precision_time) { string op_tag; @@ -10082,7 +10107,7 @@ int RGWRados::set_olh(RGWObjectCtx& obj_ctx, RGWBucketInfo& bucket_info, rgw_obj return 0; } -int RGWRados::unlink_obj_instance(RGWObjectCtx& obj_ctx, RGWBucketInfo& bucket_info, rgw_obj& target_obj, +int RGWRados::unlink_obj_instance(RGWObjectCtx& obj_ctx, RGWBucketInfo& bucket_info, const rgw_obj& target_obj, uint64_t olh_epoch) { string op_tag; @@ -10167,7 +10192,7 @@ static void filter_attrset(map& unfiltered_attrset, const st } } -int RGWRados::get_olh(rgw_obj& obj, RGWOLHInfo *olh) +int RGWRados::get_olh(const rgw_obj& obj, RGWOLHInfo *olh) { map unfiltered_attrset; @@ -10231,7 +10256,7 @@ void RGWRados::check_pending_olh_entries(map& pending_entrie } } -int RGWRados::remove_olh_pending_entries(RGWObjState& state, rgw_obj& olh_obj, map& pending_attrs) +int RGWRados::remove_olh_pending_entries(RGWObjState& state, const rgw_obj& olh_obj, map& pending_attrs) { ObjectWriteOperation op; @@ -10261,7 +10286,7 @@ int RGWRados::remove_olh_pending_entries(RGWObjState& state, rgw_obj& olh_obj, m return 0; } -int RGWRados::follow_olh(RGWObjectCtx& obj_ctx, RGWObjState *state, rgw_obj& olh_obj, rgw_obj *target) +int RGWRados::follow_olh(RGWObjectCtx& obj_ctx, RGWObjState *state, const rgw_obj& olh_obj, rgw_obj *target) { map pending_entries; filter_attrset(state->attrset, RGW_ATTR_OLH_PENDING_PREFIX, &pending_entries); @@ -11658,7 +11683,9 @@ int RGWRados::check_disk_state(librados::IoCtx io_ctx, RGWObjManifest::obj_iterator miter; RGWObjManifest& manifest = astate->manifest; for (miter = manifest.obj_begin(); miter != manifest.obj_end(); ++miter) { - rgw_obj loc = miter.get_location(); + const rgw_raw_obj& raw_loc = miter.get_location(); + rgw_obj loc; + rgw_raw_obj_to_obj(manifest.get_obj().bucket, raw_loc, &loc); if (loc.ns == RGW_OBJ_NS_MULTIPART) { dout(10) << "check_disk_state(): removing manifest part from index: " << loc << dendl; @@ -12364,7 +12391,32 @@ librados::Rados* RGWRados::get_rados_handle() } } -int RGWRados::delete_obj_aio(rgw_obj& obj, rgw_bucket& bucket, +int RGWRados::delete_raw_obj_aio(const rgw_raw_obj& obj, list& handles) +{ + rgw_rados_ref ref; + int ret = get_raw_obj_ref(obj, &ref); + if (ret < 0) { + lderr(cct) << "ERROR: failed to get obj ref with ret=" << ret << dendl; + return ret; + } + + ObjectWriteOperation op; + list prefixes; + cls_rgw_remove_obj(op, prefixes); + + AioCompletion *c = librados::Rados::aio_create_completion(NULL, NULL, NULL); + ret = ref.ioctx.aio_operate(ref.oid, c, &op); + if (ret < 0) { + lderr(cct) << "ERROR: AioOperate failed with ret=" << ret << dendl; + return ret; + } + + handles.push_back(c); + + return 0; +} + +int RGWRados::delete_obj_aio(const rgw_obj& obj, rgw_bucket& bucket, RGWBucketInfo& bucket_info, RGWObjState *astate, list& handles, bool keep_index_consistent) { diff --git a/src/rgw/rgw_rados.h b/src/rgw/rgw_rados.h index 1dfb9c5774d63..440dfb70d1d7f 100644 --- a/src/rgw/rgw_rados.h +++ b/src/rgw/rgw_rados.h @@ -73,6 +73,38 @@ static inline void get_obj_bucket_and_oid_loc(const rgw_obj& obj, string& oid, s int rgw_policy_from_attrset(CephContext *cct, map& attrset, RGWAccessControlPolicy *policy); +static inline void rgw_obj_to_raw(const rgw_obj& obj, rgw_raw_obj *raw_obj) +{ + get_obj_bucket_and_oid_loc(obj, raw_obj->oid, raw_obj->loc); + + if (!obj.is_in_extra_data()) { + raw_obj->pool = obj.bucket.placement.data_pool; + } else { + raw_obj->pool = obj.bucket.placement.get_data_extra_pool(); + } +} + +static inline bool rgw_raw_obj_to_obj(const rgw_bucket& bucket, const rgw_raw_obj& raw_obj, rgw_obj *obj) +{ + string name; + string instance; + string ns; + + ssize_t pos = raw_obj.oid.find('_'); + if (pos < 0) { + return false; + } + + if (!rgw_obj::parse_raw_oid(raw_obj.oid.substr(pos + 1), &name, &instance, &ns)) { + return false; + } + + obj->init_ns(bucket, name, ns); + obj->set_instance(instance); + + return true; +} + struct RGWOLHInfo { rgw_obj target; bool removed; @@ -161,7 +193,7 @@ struct RGWCloneRangeInfo { }; struct RGWObjManifestPart { - rgw_obj loc; /* the object where the data is located */ + rgw_obj loc; /* the object where the data is located */ uint64_t loc_ofs; /* the offset at that object where the data is located */ uint64_t size; /* the part size */ @@ -246,13 +278,14 @@ class RGWObjManifest { uint64_t obj_size; - rgw_obj head_obj; + rgw_obj obj; + rgw_raw_obj head_obj; uint64_t head_size; uint64_t max_head_size; string prefix; rgw_bucket tail_bucket; /* might be different than the original bucket, - as object might have been copied across buckets */ + as object might have been copied across pools */ map rules; string tail_instance; /* tail object's instance */ @@ -276,6 +309,7 @@ class RGWObjManifest { explicit_objs = rhs.explicit_objs; objs = rhs.objs; obj_size = rhs.obj_size; + obj = rhs.obj; head_obj = rhs.head_obj; head_size = rhs.head_size; max_head_size = rhs.max_head_size; @@ -304,7 +338,7 @@ class RGWObjManifest { objs.swap(_objs); } - void get_implicit_location(uint64_t cur_part_id, uint64_t cur_stripe, uint64_t ofs, string *override_prefix, rgw_obj *location); + void get_implicit_location(uint64_t cur_part_id, uint64_t cur_stripe, uint64_t ofs, string *override_prefix, rgw_raw_obj *location); void set_trivial_rule(uint64_t tail_ofs, uint64_t stripe_max_size) { RGWObjManifestRule rule(0, tail_ofs, 0, stripe_max_size); @@ -320,27 +354,28 @@ class RGWObjManifest { } void encode(bufferlist& bl) const { - ENCODE_START(5, 3, bl); + ENCODE_START(6, 3, bl); ::encode(obj_size, bl); ::encode(objs, bl); ::encode(explicit_objs, bl); - ::encode(head_obj, bl); + ::encode(obj, bl); ::encode(head_size, bl); ::encode(max_head_size, bl); ::encode(prefix, bl); ::encode(rules, bl); ::encode(tail_bucket, bl); ::encode(tail_instance, bl); + ::encode(head_obj, bl); ENCODE_FINISH(bl); } void decode(bufferlist::iterator& bl) { - DECODE_START_LEGACY_COMPAT_LEN_32(5, 2, 2, bl); + DECODE_START_LEGACY_COMPAT_LEN_32(6, 2, 2, bl); ::decode(obj_size, bl); ::decode(objs, bl); if (struct_v >= 3) { ::decode(explicit_objs, bl); - ::decode(head_obj, bl); + ::decode(obj, bl); ::decode(head_size, bl); ::decode(max_head_size, bl); ::decode(prefix, bl); @@ -349,7 +384,7 @@ class RGWObjManifest { explicit_objs = true; if (!objs.empty()) { map::iterator iter = objs.begin(); - head_obj = iter->second.loc; + obj = iter->second.loc; head_size = iter->second.size; max_head_size = head_size; } @@ -362,9 +397,8 @@ class RGWObjManifest { * when the explicit objs manifest was around, and it got copied. */ rgw_obj& obj_0 = objs[0].loc; - if (!obj_0.get_object().empty() && obj_0.ns.empty()) { - objs[0].loc = head_obj; + objs[0].loc = obj; objs[0].size = head_size; } } @@ -376,7 +410,13 @@ class RGWObjManifest { if (struct_v >= 5) { ::decode(tail_instance, bl); } else { // old object created before 'tail_instance' field added to manifest - tail_instance = head_obj.get_instance(); + tail_instance = obj.get_instance(); + } + + if (struct_v >= 6) { + ::decode(head_obj, bl); + } else { + rgw_obj_to_raw(obj, &head_obj); } update_iterators(); @@ -404,8 +444,8 @@ class RGWObjManifest { if (explicit_objs) { if (objs.size() == 1) { map::iterator iter = objs.begin(); - rgw_obj& obj = iter->second.loc; - return !(head_obj == obj); + rgw_obj& o = iter->second.loc; + return !(obj == o); } return (objs.size() >= 2); } @@ -413,16 +453,21 @@ class RGWObjManifest { } void set_head(const rgw_obj& _o, uint64_t _s) { - head_obj = _o; + obj = _o; + rgw_obj_to_raw(obj, &head_obj); head_size = _s; if (explicit_objs && head_size > 0) { - objs[0].loc = head_obj; + objs[0].loc = obj; objs[0].size = head_size; } } - const rgw_obj& get_head() { + const rgw_obj& get_obj() { + return obj; + } + + const rgw_raw_obj& get_head() { return head_obj; } @@ -487,7 +532,7 @@ class RGWObjManifest { int cur_stripe; string cur_override_prefix; - rgw_obj location; + rgw_raw_obj location; map::iterator rule_iter; map::iterator next_rule_iter; @@ -537,7 +582,7 @@ class RGWObjManifest { bool operator!=(const obj_iterator& rhs) { return (ofs != rhs.ofs); } - const rgw_obj& get_location() { + const rgw_raw_obj& get_location() { return location; } @@ -596,8 +641,8 @@ class RGWObjManifest { string oid_prefix; - rgw_obj cur_obj; - rgw_bucket bucket; + rgw_raw_obj cur_obj; + rgw_pool pool; RGWObjManifestRule rule; @@ -605,11 +650,11 @@ class RGWObjManifest { public: generator() : manifest(NULL), last_ofs(0), cur_part_ofs(0), cur_part_id(0), cur_stripe(0), cur_stripe_size(0) {} - int create_begin(CephContext *cct, RGWObjManifest *manifest, rgw_bucket& bucket, rgw_obj& head); + int create_begin(CephContext *cct, RGWObjManifest *manifest, rgw_bucket& bucket, rgw_obj& obj); int create_next(uint64_t ofs); - const rgw_obj& get_cur_obj() { return cur_obj; } + const rgw_raw_obj& get_cur_obj() { return cur_obj; } /* total max size of current stripe (including head obj) */ uint64_t cur_stripe_max_size() { @@ -1783,7 +1828,7 @@ class RGWObjectCtxImpl { public: RGWObjectCtxImpl(RGWRados *_store) : store(_store), lock("RGWObjectCtxImpl") {} - S *get_state(T& obj) { + S *get_state(const T& obj) { S *result; typename std::map::iterator iter; lock.get_read(); @@ -1931,11 +1976,11 @@ class RGWRados int get_system_obj_ref(const rgw_raw_obj& obj, rgw_rados_ref *ref, rgw_pool *pool = NULL); uint64_t max_bucket_id; - int get_olh_target_state(RGWObjectCtx& rctx, rgw_obj& obj, RGWObjState *olh_state, + int get_olh_target_state(RGWObjectCtx& rctx, const rgw_obj& obj, RGWObjState *olh_state, RGWObjState **target_state); int get_system_obj_state_impl(RGWObjectCtx *rctx, rgw_raw_obj& obj, RGWRawObjState **state, RGWObjVersionTracker *objv_tracker); - int get_obj_state_impl(RGWObjectCtx *rctx, rgw_obj& obj, RGWObjState **state, bool follow_olh); - int append_atomic_test(RGWObjectCtx *rctx, rgw_obj& obj, + int get_obj_state_impl(RGWObjectCtx *rctx, const rgw_obj& obj, RGWObjState **state, bool follow_olh); + int append_atomic_test(RGWObjectCtx *rctx, const rgw_obj& obj, librados::ObjectOperation& op, RGWObjState **state); int update_placement_map(); @@ -2115,8 +2160,10 @@ class RGWRados return obj_tombstone_cache; } - int get_required_alignment(rgw_bucket& bucket, uint64_t *alignment); - int get_max_chunk_size(rgw_bucket& bucket, uint64_t *max_chunk_size); + int get_required_alignment(const rgw_pool& pool, uint64_t *alignment); + int get_required_alignment(const rgw_bucket& bucket, uint64_t *alignment); + int get_max_chunk_size(const rgw_pool& pool, uint64_t *max_chunk_size); + int get_max_chunk_size(const rgw_bucket& bucket, uint64_t *max_chunk_size); int get_raw_obj_ref(const rgw_raw_obj& obj, rgw_rados_ref *ref, rgw_pool *pool = NULL); static void obj_to_raw(const rgw_obj& obj, rgw_raw_obj *raw_obj); @@ -2279,7 +2326,7 @@ class RGWRados string bucket_obj; explicit BucketShard(RGWRados *_store) : store(_store), shard_id(-1) {} - int init(rgw_bucket& _bucket, rgw_obj& obj); + int init(const rgw_bucket& _bucket, const rgw_obj& obj); }; class Object { @@ -2341,6 +2388,7 @@ class RGWRados struct GetObjState { librados::IoCtx io_ctx; rgw_obj obj; + rgw_raw_obj head_obj; } state; struct ConditionParams { @@ -2499,7 +2547,7 @@ class RGWRados bool blind; public: - UpdateIndex(RGWRados::Bucket *_target, rgw_obj& _obj, RGWObjState *_state) : target(_target), obj(_obj), obj_state(_state), bilog_flags(0), + UpdateIndex(RGWRados::Bucket *_target, const rgw_obj& _obj, RGWObjState *_state) : target(_target), obj(_obj), obj_state(_state), bilog_flags(0), bs(target->get_store()), bs_initialized(false) { blind = (target->get_bucket_info().index_type == RGWBIType_Indexless); } @@ -2567,10 +2615,8 @@ class RGWRados virtual int put_system_obj_data(void *ctx, rgw_raw_obj& obj, bufferlist& bl, off_t ofs, bool exclusive); - int put_obj_data(void *ctx, rgw_obj& obj, const char *data, - off_t ofs, size_t len, bool exclusive); - int aio_put_obj_data(void *ctx, rgw_obj& obj, bufferlist& bl, - off_t ofs, bool exclusive, void **handle); + int aio_put_obj_data(void *ctx, rgw_raw_obj& obj, bufferlist& bl, + off_t ofs, bool exclusive, void **handle); int put_system_obj(void *ctx, rgw_raw_obj& obj, const char *data, size_t len, bool exclusive, ceph::real_time *mtime, map& attrs, RGWObjVersionTracker *objv_tracker, @@ -2742,11 +2788,14 @@ class RGWRados uint16_t bilog_flags = 0, const ceph::real_time& expiration_time = ceph::real_time()); + /** Delete a raw object.*/ + int delete_raw_obj(const rgw_raw_obj& obj); + /* Delete a system object */ virtual int delete_system_obj(rgw_raw_obj& src_obj, RGWObjVersionTracker *objv_tracker = NULL); /** Remove an object from the bucket index */ - int delete_obj_index(rgw_obj& obj); + int delete_obj_index(const rgw_obj& obj); /** * Get the attributes for an object. @@ -2780,8 +2829,8 @@ class RGWRados map* rmattrs); int get_system_obj_state(RGWObjectCtx *rctx, rgw_raw_obj& obj, RGWRawObjState **state, RGWObjVersionTracker *objv_tracker); - int get_obj_state(RGWObjectCtx *rctx, rgw_obj& obj, RGWObjState **state, bool follow_olh); - int get_obj_state(RGWObjectCtx *rctx, rgw_obj& obj, RGWObjState **state) { + int get_obj_state(RGWObjectCtx *rctx, const rgw_obj& obj, RGWObjState **state, bool follow_olh); + int get_obj_state(RGWObjectCtx *rctx, const rgw_obj& obj, RGWObjState **state) { return get_obj_state(rctx, obj, state, true); } @@ -2805,13 +2854,14 @@ class RGWRados int iterate_obj(RGWObjectCtx& ctx, rgw_obj& obj, off_t ofs, off_t end, uint64_t max_chunk_size, - int (*iterate_obj_cb)(rgw_obj&, off_t, off_t, off_t, bool, RGWObjState *, void *), + int (*iterate_obj_cb)(const rgw_obj& obj, const rgw_raw_obj&, off_t, off_t, off_t, bool, RGWObjState *, void *), void *arg); int flush_read_list(struct get_obj_data *d); int get_obj_iterate_cb(RGWObjectCtx *ctx, RGWObjState *astate, - rgw_obj& obj, + const rgw_obj& obj, + const rgw_raw_obj& read_obj, off_t obj_ofs, off_t read_ofs, off_t len, bool is_head_obj, void *arg); @@ -2825,34 +2875,34 @@ class RGWRados map *attrs, bufferlist *first_chunk, RGWObjVersionTracker *objv_tracker); - int obj_operate(rgw_obj& obj, librados::ObjectWriteOperation *op); - int obj_operate(rgw_obj& obj, librados::ObjectReadOperation *op); + int obj_operate(const rgw_obj& obj, librados::ObjectWriteOperation *op); + int obj_operate(const rgw_obj& obj, librados::ObjectReadOperation *op); void bucket_index_guard_olh_op(RGWObjState& olh_state, librados::ObjectOperation& op); - int olh_init_modification(RGWObjState& state, rgw_obj& olh_obj, string *op_tag); - int olh_init_modification_impl(RGWObjState& state, rgw_obj& olh_obj, string *op_tag); - int bucket_index_link_olh(RGWObjState& olh_state, rgw_obj& obj_instance, bool delete_marker, + int olh_init_modification(RGWObjState& state, const rgw_obj& olh_obj, string *op_tag); + int olh_init_modification_impl(RGWObjState& state, const rgw_obj& olh_obj, string *op_tag); + int bucket_index_link_olh(RGWObjState& olh_state, const rgw_obj& obj_instance, bool delete_marker, const string& op_tag, struct rgw_bucket_dir_entry_meta *meta, uint64_t olh_epoch, ceph::real_time unmod_since, bool high_precision_time); - int bucket_index_unlink_instance(rgw_obj& obj_instance, const string& op_tag, const string& olh_tag, uint64_t olh_epoch); - int bucket_index_read_olh_log(RGWObjState& state, rgw_obj& obj_instance, uint64_t ver_marker, + int bucket_index_unlink_instance(const rgw_obj& obj_instance, const string& op_tag, const string& olh_tag, uint64_t olh_epoch); + int bucket_index_read_olh_log(RGWObjState& state, const rgw_obj& obj_instance, uint64_t ver_marker, map > *log, bool *is_truncated); - int bucket_index_trim_olh_log(RGWObjState& obj_state, rgw_obj& obj_instance, uint64_t ver); - int bucket_index_clear_olh(RGWObjState& state, rgw_obj& obj_instance); - int apply_olh_log(RGWObjectCtx& ctx, RGWObjState& obj_state, RGWBucketInfo& bucket_info, rgw_obj& obj, + int bucket_index_trim_olh_log(RGWObjState& obj_state, const rgw_obj& obj_instance, uint64_t ver); + int bucket_index_clear_olh(RGWObjState& state, const rgw_obj& obj_instance); + int apply_olh_log(RGWObjectCtx& ctx, RGWObjState& obj_state, RGWBucketInfo& bucket_info, const rgw_obj& obj, bufferlist& obj_tag, map >& log, uint64_t *plast_ver); - int update_olh(RGWObjectCtx& obj_ctx, RGWObjState *state, RGWBucketInfo& bucket_info, rgw_obj& obj); - int set_olh(RGWObjectCtx& obj_ctx, RGWBucketInfo& bucket_info, rgw_obj& target_obj, bool delete_marker, rgw_bucket_dir_entry_meta *meta, + int update_olh(RGWObjectCtx& obj_ctx, RGWObjState *state, RGWBucketInfo& bucket_info, const rgw_obj& obj); + int set_olh(RGWObjectCtx& obj_ctx, RGWBucketInfo& bucket_info, const rgw_obj& target_obj, bool delete_marker, rgw_bucket_dir_entry_meta *meta, uint64_t olh_epoch, ceph::real_time unmod_since, bool high_precision_time); - int unlink_obj_instance(RGWObjectCtx& obj_ctx, RGWBucketInfo& bucket_info, rgw_obj& target_obj, + int unlink_obj_instance(RGWObjectCtx& obj_ctx, RGWBucketInfo& bucket_info, const rgw_obj& target_obj, uint64_t olh_epoch); void check_pending_olh_entries(map& pending_entries, map *rm_pending_entries); - int remove_olh_pending_entries(RGWObjState& state, rgw_obj& olh_obj, map& pending_attrs); - int follow_olh(RGWObjectCtx& ctx, RGWObjState *state, rgw_obj& olh_obj, rgw_obj *target); - int get_olh(rgw_obj& obj, RGWOLHInfo *olh); + int remove_olh_pending_entries(RGWObjState& state, const rgw_obj& olh_obj, map& pending_attrs); + int follow_olh(RGWObjectCtx& ctx, RGWObjState *state, const rgw_obj& olh_obj, rgw_obj *target); + int get_olh(const rgw_obj& obj, RGWOLHInfo *olh); void gen_rand_obj_instance_name(rgw_obj *target); @@ -3101,7 +3151,8 @@ class RGWRados librados::Rados* get_rados_handle(); - int delete_obj_aio(rgw_obj& obj, rgw_bucket& bucket, RGWBucketInfo& info, RGWObjState *astate, + int delete_raw_obj_aio(const rgw_raw_obj& obj, list& handles); + int delete_obj_aio(const rgw_obj& obj, rgw_bucket& bucket, RGWBucketInfo& info, RGWObjState *astate, list& handles, bool keep_index_consistent); private: /** @@ -3257,8 +3308,8 @@ class RGWPutObjProcessor store = _store; return 0; } - virtual int handle_data(bufferlist& bl, off_t ofs, MD5 *hash, void **phandle, rgw_obj *pobj, bool *again) = 0; - virtual int throttle_data(void *handle, const rgw_obj& obj, bool need_to_wait) = 0; + virtual int handle_data(bufferlist& bl, off_t ofs, MD5 *hash, void **phandle, rgw_raw_obj *pobj, bool *again) = 0; + virtual int throttle_data(void *handle, const rgw_raw_obj& obj, bool need_to_wait) = 0; virtual void complete_hash(MD5 *hash) { assert(0); } @@ -3273,7 +3324,7 @@ class RGWPutObjProcessor struct put_obj_aio_info { void *handle; - rgw_obj obj; + rgw_raw_obj obj; }; class RGWPutObjProcessor_Aio : public RGWPutObjProcessor @@ -3285,22 +3336,23 @@ class RGWPutObjProcessor_Aio : public RGWPutObjProcessor int wait_pending_front(); bool pending_has_completed(); - rgw_obj last_written_obj; + rgw_raw_obj last_written_obj; protected: uint64_t obj_len; - set written_objs; + set written_objs; + rgw_obj head_obj; - void add_written_obj(const rgw_obj& obj) { + void add_written_obj(const rgw_raw_obj& obj) { written_objs.insert(obj); } int drain_pending(); - int handle_obj_data(rgw_obj& obj, bufferlist& bl, off_t ofs, off_t abs_ofs, void **phandle, bool exclusive); + int handle_obj_data(rgw_raw_obj& obj, bufferlist& bl, off_t ofs, off_t abs_ofs, void **phandle, bool exclusive); public: - int throttle_data(void *handle, const rgw_obj& obj, bool need_to_wait); + int throttle_data(void *handle, const rgw_raw_obj& obj, bool need_to_wait); RGWPutObjProcessor_Aio(RGWObjectCtx& obj_ctx, RGWBucketInfo& bucket_info) : RGWPutObjProcessor(obj_ctx, bucket_info), max_chunks(RGW_MAX_PENDING_CHUNKS), obj_len(0) {} virtual ~RGWPutObjProcessor_Aio(); @@ -3330,12 +3382,11 @@ class RGWPutObjProcessor_Atomic : public RGWPutObjProcessor_Aio string unique_tag; - rgw_obj head_obj; - rgw_obj cur_obj; + rgw_raw_obj cur_obj; RGWObjManifest manifest; RGWObjManifest::generator manifest_gen; - int write_data(bufferlist& bl, off_t ofs, void **phandle, rgw_obj *pobj, bool exclusive); + int write_data(bufferlist& bl, off_t ofs, void **phandle, rgw_raw_obj *pobj, bool exclusive); virtual int do_complete(string& etag, ceph::real_time *mtime, ceph::real_time set_mtime, map& attrs, ceph::real_time delete_at, const char *if_match = NULL, const char *if_nomatch = NULL); @@ -3368,7 +3419,7 @@ class RGWPutObjProcessor_Atomic : public RGWPutObjProcessor_Aio void set_extra_data_len(uint64_t len) { extra_data_len = len; } - virtual int handle_data(bufferlist& bl, off_t ofs, MD5 *hash, void **phandle, rgw_obj *pobj, bool *again); + virtual int handle_data(bufferlist& bl, off_t ofs, MD5 *hash, void **phandle, rgw_raw_obj *pobj, bool *again); virtual void complete_hash(MD5 *hash); bufferlist& get_extra_data() { return extra_data_bl; }