Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

jewel: Decode errors on backtrace will crash MDS #12836

Merged
merged 3 commits into from Jan 25, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
24 changes: 24 additions & 0 deletions qa/tasks/cephfs/filesystem.py
Expand Up @@ -633,6 +633,30 @@ def _read_data_xattr(self, ino_no, xattr_name, type, pool):

return json.loads(p.stdout.getvalue().strip())

def _write_data_xattr(self, ino_no, xattr_name, data, pool=None):
"""
Write to an xattr of the 0th data object of an inode. Will
succeed whether the object and/or xattr already exist or not.

:param ino_no: integer inode number
:param xattr_name: string name of the xattr
:param data: byte array data to write to the xattr
:param pool: name of data pool or None to use primary data pool
:return: None
"""
remote = self.mds_daemons[self.mds_ids[0]].remote
if pool is None:
pool = self.get_data_pool_name()

obj_name = "{0:x}.00000000".format(ino_no)
args = [
os.path.join(self._prefix, "rados"), "-p", pool, "setxattr",
obj_name, xattr_name, data
]
remote.run(
args=args,
stdout=StringIO())

def read_backtrace(self, ino_no, pool=None):
"""
Read the backtrace from the data pool, return a dict in the format
Expand Down
3 changes: 2 additions & 1 deletion qa/tasks/cephfs/mount.py
Expand Up @@ -136,7 +136,8 @@ def run_python(self, pyscript):

def run_shell(self, args, wait=True):
args = ["cd", self.mountpoint, run.Raw('&&'), "sudo"] + args
return self.client_remote.run(args=args, stdout=StringIO(), wait=wait)
return self.client_remote.run(args=args, stdout=StringIO(),
stderr=StringIO(), wait=wait)

def open_no_data(self, basename):
"""
Expand Down
46 changes: 46 additions & 0 deletions qa/tasks/cephfs/test_damage.py
Expand Up @@ -442,3 +442,49 @@ def test_damaged_dentry(self):
# Now I should be able to create a file with the same name as the
# damaged guy if I want.
self.mount_a.touch("subdir/file_to_be_damaged")

def test_corrupt_backtrace(self):
"""
That an un-decodeable backtrace leads to an appropriate
error trying to follow the backtrace to the file.
"""

self.mount_a.run_shell(["mkdir", "alpha"])
self.mount_a.run_shell(["mkdir", "bravo"])
self.mount_a.run_shell(["touch", "alpha/target"])
self.mount_a.run_shell(["ln", "alpha/target", "bravo/hardlink"])

alpha_ino = self.mount_a.path_to_ino("alpha/target")

# Ensure everything is written to backing store
self.mount_a.umount_wait()
self.fs.mds_asok(["flush", "journal"])

# Validate that the backtrace is present and decodable
self.fs.read_backtrace(alpha_ino)
# Go corrupt the backtrace of alpha/target (used for resolving
# bravo/hardlink).
self.fs._write_data_xattr(alpha_ino, "parent", "rhubarb")

# Drop everything from the MDS cache
self.mds_cluster.mds_stop()
self.fs.journal_tool(['journal', 'reset'])
self.mds_cluster.mds_fail_restart()
self.fs.wait_for_daemons()

# Check that touching the hardlink gives EIO
self.mount_a.mount()
ran = self.mount_a.run_shell(["ls", "-l", "bravo/hardlink"], wait=False)
try:
ran.wait()
except CommandFailedError:
self.assertTrue("Input/output error" in ran.stderr.getvalue())

# Check that an entry is created in the damage table
damage = json.loads(
self.fs.mon_manager.raw_cluster_cmd(
'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
"damage", "ls", '--format=json-pretty'))
self.assertEqual(len(damage), 1)
self.assertEqual(damage[0]['damage_type'], "backtrace")
self.assertEqual(damage[0]['ino'], alpha_ino)
9 changes: 8 additions & 1 deletion src/mds/MDCache.cc
Expand Up @@ -8241,7 +8241,14 @@ void MDCache::_open_ino_backtrace_fetched(inodeno_t ino, bufferlist& bl, int err

inode_backtrace_t backtrace;
if (err == 0) {
::decode(backtrace, bl);
try {
::decode(backtrace, bl);
} catch (const buffer::error &decode_exc) {
derr << "corrupt backtrace on ino x0" << std::hex << ino
<< std::dec << ": " << decode_exc << dendl;
open_ino_finish(ino, info, -EIO);
return;
}
if (backtrace.pool != info.pool && backtrace.pool != -1) {
dout(10) << " old object in pool " << info.pool
<< ", retrying pool " << backtrace.pool << dendl;
Expand Down