ceph · jcsp · Jan 25, 2017 · Dec 20, 2016 · Jan 5, 2017 · Jan 24, 2017
diff --git a/qa/tasks/cephfs/filesystem.py b/qa/tasks/cephfs/filesystem.py
@@ -633,6 +633,30 @@ def _read_data_xattr(self, ino_no, xattr_name, type, pool):
 
         return json.loads(p.stdout.getvalue().strip())
 
+    def _write_data_xattr(self, ino_no, xattr_name, data, pool=None):
+        """
+        Write to an xattr of the 0th data object of an inode.  Will
+        succeed whether the object and/or xattr already exist or not.
+
+        :param ino_no: integer inode number
+        :param xattr_name: string name of the xattr
+        :param data: byte array data to write to the xattr
+        :param pool: name of data pool or None to use primary data pool
+        :return: None
+        """
+        remote = self.mds_daemons[self.mds_ids[0]].remote
+        if pool is None:
+            pool = self.get_data_pool_name()
+
+        obj_name = "{0:x}.00000000".format(ino_no)
+        args = [
+            os.path.join(self._prefix, "rados"), "-p", pool, "setxattr",
+            obj_name, xattr_name, data
+        ]
+        remote.run(
+            args=args,
+            stdout=StringIO())
+
     def read_backtrace(self, ino_no, pool=None):
         """
         Read the backtrace from the data pool, return a dict in the format

diff --git a/qa/tasks/cephfs/mount.py b/qa/tasks/cephfs/mount.py
@@ -136,7 +136,8 @@ def run_python(self, pyscript):
 
     def run_shell(self, args, wait=True):
         args = ["cd", self.mountpoint, run.Raw('&&'), "sudo"] + args
-        return self.client_remote.run(args=args, stdout=StringIO(), wait=wait)
+        return self.client_remote.run(args=args, stdout=StringIO(),
+                                      stderr=StringIO(), wait=wait)
 
     def open_no_data(self, basename):
         """

diff --git a/qa/tasks/cephfs/test_damage.py b/qa/tasks/cephfs/test_damage.py
@@ -442,3 +442,49 @@ def test_damaged_dentry(self):
         # Now I should be able to create a file with the same name as the
         # damaged guy if I want.
         self.mount_a.touch("subdir/file_to_be_damaged")
+
+    def test_corrupt_backtrace(self):
+        """
+        That an un-decodeable backtrace leads to an appropriate
+        error trying to follow the backtrace to the file.
+        """
+
+        self.mount_a.run_shell(["mkdir", "alpha"])
+        self.mount_a.run_shell(["mkdir", "bravo"])
+        self.mount_a.run_shell(["touch", "alpha/target"])
+        self.mount_a.run_shell(["ln", "alpha/target", "bravo/hardlink"])
+
+        alpha_ino = self.mount_a.path_to_ino("alpha/target")
+
+        # Ensure everything is written to backing store
+        self.mount_a.umount_wait()
+        self.fs.mds_asok(["flush", "journal"])
+
+        # Validate that the backtrace is present and decodable
+        self.fs.read_backtrace(alpha_ino)
+        # Go corrupt the backtrace of alpha/target (used for resolving
+        # bravo/hardlink).
+        self.fs._write_data_xattr(alpha_ino, "parent", "rhubarb")
+
+        # Drop everything from the MDS cache
+        self.mds_cluster.mds_stop()
+        self.fs.journal_tool(['journal', 'reset'])
+        self.mds_cluster.mds_fail_restart()
+        self.fs.wait_for_daemons()
+
+        # Check that touching the hardlink gives EIO
+        self.mount_a.mount()
+        ran = self.mount_a.run_shell(["ls", "-l", "bravo/hardlink"], wait=False)
+        try:
+            ran.wait()
+        except CommandFailedError:
+            self.assertTrue("Input/output error" in ran.stderr.getvalue())
+
+        # Check that an entry is created in the damage table
+        damage = json.loads(
+            self.fs.mon_manager.raw_cluster_cmd(
+                'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
+                "damage", "ls", '--format=json-pretty'))
+        self.assertEqual(len(damage), 1)
+        self.assertEqual(damage[0]['damage_type'], "backtrace")
+        self.assertEqual(damage[0]['ino'], alpha_ino)
diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc
@@ -8241,7 +8241,14 @@ void MDCache::_open_ino_backtrace_fetched(inodeno_t ino, bufferlist& bl, int err
 
   inode_backtrace_t backtrace;
   if (err == 0) {
-    ::decode(backtrace, bl);
+    try {
+      ::decode(backtrace, bl);
+    } catch (const buffer::error &decode_exc) {
+      derr << "corrupt backtrace on ino x0" << std::hex << ino
+           << std::dec << ": " << decode_exc << dendl;
+      open_ino_finish(ino, info, -EIO);
+      return;
+    }
     if (backtrace.pool != info.pool && backtrace.pool != -1) {
       dout(10) << " old object in pool " << info.pool
 	       << ", retrying pool " << backtrace.pool << dendl;