New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
cephfs-data-scan: scrub tag filtering (#12133 and #12145) #5685
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -52,14 +52,15 @@ bool DataScan::parse_kwarg( | |
} | ||
|
||
const std::string arg(*i); | ||
const std::string val(*(++i)); | ||
const std::string val(*(i + 1)); | ||
|
||
if (arg == std::string("--output-dir")) { | ||
if (driver != NULL) { | ||
derr << "Unexpected --output-dir: output already selected!" << dendl; | ||
*r = -EINVAL; | ||
return false; | ||
} | ||
dout(4) << "Using local file output to '" << val << "'" << dendl; | ||
driver = new LocalFileDriver(val, data_io); | ||
return true; | ||
} else if (arg == std::string("-n")) { | ||
|
@@ -80,6 +81,10 @@ bool DataScan::parse_kwarg( | |
return false; | ||
} | ||
return true; | ||
} else if (arg == std::string("--filter-tag")) { | ||
filter_tag = val; | ||
dout(10) << "Applying tag filter: '" << filter_tag << "'" << dendl; | ||
return true; | ||
} else { | ||
return false; | ||
} | ||
|
@@ -156,6 +161,7 @@ int DataScan::main(const std::vector<const char*> &args) | |
driver = new MetadataDriver(); | ||
driver->set_force_corrupt(force_corrupt); | ||
driver->set_force_init(force_init); | ||
dout(4) << "Using metadata pool output" << dendl; | ||
} | ||
|
||
dout(4) << "connecting to RADOS..." << dendl; | ||
|
@@ -447,7 +453,28 @@ int DataScan::scan_inodes() | |
float progress = 0.0; | ||
librados::NObjectIterator i = data_io.nobjects_begin(n, m); | ||
#else | ||
librados::NObjectIterator i = data_io.nobjects_begin(); | ||
librados::NObjectIterator i; | ||
bool legacy_filtering = false; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It looks like you've broken the SHARDED_PGLS build here? legacy_filtering is referenced regardless below, and I imagine it also needs to include the filtering bits. ;) (Although I'm not sure how interested you are in maintaining the two branches for this short period of time...nor if preprocessor #ifdefs are the right way to handle that, now that I'm seeing them again...) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. these #ifdefs aren't functional, just aide memoires for me when the post-infernalis sharded pgls stuff goes in. |
||
|
||
bufferlist filter_bl; | ||
ClsCephFSClient::build_tag_filter(filter_tag, &filter_bl); | ||
|
||
// try/catch to deal with older OSDs that don't support | ||
// the cephfs pgls filtering mode | ||
try { | ||
i = data_io.nobjects_begin(filter_bl); | ||
dout(4) << "OSDs accepted cephfs object filtering" << dendl; | ||
} catch (const std::runtime_error &e) { | ||
// A little unfriendly, librados raises std::runtime_error | ||
// on pretty much any unhandled I/O return value, such as | ||
// the OSD saying -EINVAL because of our use of a filter | ||
// mode that it doesn't know about. | ||
std::cerr << "OSDs do not support cephfs object filtering: using " | ||
"(slower) fallback mode" << std::endl; | ||
legacy_filtering = true; | ||
i = data_io.nobjects_begin(); | ||
} | ||
|
||
#endif | ||
librados::NObjectIterator i_end = data_io.nobjects_end(); | ||
|
||
|
@@ -484,10 +511,38 @@ int DataScan::scan_inodes() | |
continue; | ||
} | ||
|
||
// We are only interested in 0th objects during this phase: we touched | ||
// the other objects during scan_extents | ||
if (obj_name_offset != 0) { | ||
continue; | ||
if (legacy_filtering) { | ||
dout(20) << "Applying filter to " << oid << dendl; | ||
|
||
// We are only interested in 0th objects during this phase: we touched | ||
// the other objects during scan_extents | ||
if (obj_name_offset != 0) { | ||
dout(20) << "Non-zeroth object" << dendl; | ||
continue; | ||
} | ||
|
||
bufferlist scrub_tag_bl; | ||
int r = data_io.getxattr(oid, "scrub_tag", scrub_tag_bl); | ||
if (r >= 0) { | ||
std::string read_tag; | ||
bufferlist::iterator q = scrub_tag_bl.begin(); | ||
try { | ||
::decode(read_tag, q); | ||
if (read_tag == filter_tag) { | ||
dout(20) << "skipping " << oid << " because it has the filter_tag" | ||
<< dendl; | ||
continue; | ||
} | ||
} catch (const buffer::error &err) { | ||
} | ||
dout(20) << "read non-matching tag '" << read_tag << "'" << dendl; | ||
} else { | ||
dout(20) << "no tag read (" << r << ")" << dendl; | ||
} | ||
|
||
} else { | ||
assert(obj_name_offset == 0); | ||
dout(20) << "OSD matched oid " << oid << dendl; | ||
} | ||
|
||
AccumulateResult accum_res; | ||
|
@@ -496,7 +551,11 @@ int DataScan::scan_inodes() | |
int r = ClsCephFSClient::fetch_inode_accumulate_result( | ||
data_io, oid, &backtrace, &loaded_layout, &accum_res); | ||
|
||
if (r < 0) { | ||
if (r == -EINVAL) { | ||
dout(4) << "Accumulated metadata missing from '" | ||
<< oid << ", did you run scan_extents?" << dendl; | ||
continue; | ||
} else if (r < 0) { | ||
dout(4) << "Unexpected error loading accumulated metadata from '" | ||
<< oid << "': " << cpp_strerror(r) << dendl; | ||
// FIXME: this creates situation where if a client has a corrupt | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this is fine, but we could also have a generic "name and xattr" filter. Require it to match the name in a (definable, or just search-based) pattern, and to either match or not match an xattr name and value.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
When we're writing a plugin for the osd, I don't feel the need to make it customizable like this: it's already a special purpose call, and anyone who needs something more generic can write their own object class.
I'm going to take you at the "I think this is fine" part :-)