From 52291d6554f2275b228d7039d222bccfab164106 Mon Sep 17 00:00:00 2001 From: Edward Hope-Morley Date: Mon, 12 Aug 2013 17:46:38 +0100 Subject: [PATCH] Added copy-on-write support for all RBD cloning Up till now we only had copy-on-write for cloning from snapshot. This change optionally allows clone from volume to use copy-on-write instead of a doing a full copy each time. This should increase speed and reduce nearterm storage consumtion but could introduce some new risks e.g. excessively long clone chains and flatten storms. To avoid this, a new config option has been providedons are provided - rbd_max_clone_depth - which allows the user to limit the depth of a chain of clones i.e. a->b->c->d as opposed to a->b ->c ->d This will avoid flatten storms by breaking chains as they are formed and at an early, predefined stage. A second option - rbd_clone_from_volume_force_copy - allows the user to use a full copy as before i.e. disable COW for volume clones. Implements: blueprint use-copy-on-write-for-all-volume-cloning Fixes: bug #1209199 Change-Id: Ia4a8a10c797cda2cf1ef3a2e9bd49f8c084ec977 --- cinder/tests/backup/fake_rados.py | 29 ++++ cinder/tests/test_rbd.py | 84 +++++++---- cinder/volume/drivers/rbd.py | 240 +++++++++++++++++++++++++++--- etc/cinder/cinder.conf.sample | 7 +- 4 files changed, 311 insertions(+), 49 deletions(-) diff --git a/cinder/tests/backup/fake_rados.py b/cinder/tests/backup/fake_rados.py index b9d3cd6a695..9839d275547 100644 --- a/cinder/tests/backup/fake_rados.py +++ b/cinder/tests/backup/fake_rados.py @@ -16,6 +16,9 @@ class mock_rados(object): + class ObjectNotFound(Exception): + pass + class ioctx(object): def __init__(self, *args, **kwargs): pass @@ -23,6 +26,20 @@ def __init__(self, *args, **kwargs): def close(self, *args, **kwargs): pass + class Object(object): + + def __init__(self, *args, **kwargs): + pass + + def read(self, *args): + raise NotImplementedError() + + def write(self, *args): + raise NotImplementedError() + + def seek(self, *args): + raise NotImplementedError() + class Rados(object): def __init__(self, *args, **kwargs): @@ -63,6 +80,12 @@ def create_snap(self, *args, **kwargs): def remove_snap(self, *args, **kwargs): pass + def protect_snap(self, *args, **kwargs): + pass + + def unprotect_snap(self, *args, **kwargs): + pass + def read(self, *args, **kwargs): raise NotImplementedError() @@ -78,6 +101,9 @@ def close(self): def list_snaps(self): raise NotImplementedError() + def parent_info(self): + raise NotImplementedError() + def size(self): raise NotImplementedError() @@ -94,3 +120,6 @@ def remove(self, *args, **kwargs): def list(self, *args, **kwargs): raise NotImplementedError() + + def clone(self, *args, **kwargs): + raise NotImplementedError() diff --git a/cinder/tests/test_rbd.py b/cinder/tests/test_rbd.py index 20a8e81a02f..cbb7ceb9bd5 100644 --- a/cinder/tests/test_rbd.py +++ b/cinder/tests/test_rbd.py @@ -27,6 +27,8 @@ from cinder.openstack.common import log as logging from cinder.openstack.common import timeutils from cinder import test +from cinder.tests.backup.fake_rados import mock_rados +from cinder.tests.backup.fake_rados import mock_rbd from cinder.tests.image import fake as fake_image from cinder.tests.test_volume import DriverTestCase from cinder import units @@ -111,11 +113,11 @@ def test_create_volume(self): driver.RADOSClient(self.driver).AndReturn(mock_client) mock_client.__enter__().AndReturn(mock_client) self.rbd.RBD_FEATURE_LAYERING = 1 - mock_rbd = self.mox.CreateMockAnything() - self.rbd.RBD().AndReturn(mock_rbd) - mock_rbd.create(mox.IgnoreArg(), str(name), size * 1024 ** 3, - old_format=False, - features=self.rbd.RBD_FEATURE_LAYERING) + _mock_rbd = self.mox.CreateMockAnything() + self.rbd.RBD().AndReturn(_mock_rbd) + _mock_rbd.create(mox.IgnoreArg(), str(name), size * 1024 ** 3, + old_format=False, + features=self.rbd.RBD_FEATURE_LAYERING) mock_client.__exit__(None, None, None).AndReturn(None) self.mox.ReplayAll() @@ -125,21 +127,31 @@ def test_create_volume(self): def test_delete_volume(self): name = u'volume-00000001' volume = dict(name=name) - mock_client = self.mox.CreateMockAnything() - self.mox.StubOutWithMock(driver, 'RADOSClient') - self.stubs.Set(self.driver, '_get_backup_snaps', lambda *args: None) - driver.RADOSClient(self.driver).AndReturn(mock_client) - mock_client.__enter__().AndReturn(mock_client) - mock_image = self.mox.CreateMockAnything() - self.rbd.Image(mox.IgnoreArg(), str(name)).AndReturn(mock_image) - mock_image.close() - mock_rbd = self.mox.CreateMockAnything() - self.rbd.RBD().AndReturn(mock_rbd) - mock_rbd.remove(mox.IgnoreArg(), str(name)) - mock_client.__exit__(None, None, None).AndReturn(None) + # Setup librbd stubs + self.stubs.Set(self.driver, 'rados', mock_rados) + self.stubs.Set(self.driver, 'rbd', mock_rbd) - self.mox.ReplayAll() + class mock_client(object): + def __init__(self, *args, **kwargs): + self.ioctx = None + + def __enter__(self, *args, **kwargs): + return self + + def __exit__(self, type_, value, traceback): + pass + + self.stubs.Set(driver, 'RADOSClient', mock_client) + + self.stubs.Set(self.driver, '_get_backup_snaps', + lambda *args: None) + self.stubs.Set(self.driver.rbd.Image, 'list_snaps', + lambda *args: []) + self.stubs.Set(self.driver.rbd.Image, 'parent_info', + lambda *args: (None, None, None)) + self.stubs.Set(self.driver.rbd.Image, 'unprotect_snap', + lambda *args: None) self.driver.delete_volume(volume) @@ -184,17 +196,35 @@ def test_delete_snapshot(self): def test_create_cloned_volume(self): src_name = u'volume-00000001' dst_name = u'volume-00000002' - mock_proxy = self.mox.CreateMockAnything() - mock_proxy.ioctx = self.mox.CreateMockAnything() - self.mox.StubOutWithMock(driver, 'RBDVolumeProxy') - driver.RBDVolumeProxy(self.driver, src_name, read_only=True) \ - .AndReturn(mock_proxy) - mock_proxy.__enter__().AndReturn(mock_proxy) - mock_proxy.copy(mock_proxy.ioctx, str(dst_name)) - mock_proxy.__exit__(None, None, None).AndReturn(None) + # Setup librbd stubs + self.stubs.Set(self.driver, 'rados', mock_rados) + self.stubs.Set(self.driver, 'rbd', mock_rbd) - self.mox.ReplayAll() + self.driver.rbd.RBD_FEATURE_LAYERING = 1 + + class mock_client(object): + def __init__(self, *args, **kwargs): + self.ioctx = None + + def __enter__(self, *args, **kwargs): + return self + + def __exit__(self, type_, value, traceback): + pass + + self.stubs.Set(driver, 'RADOSClient', mock_client) + + def mock_clone(*args, **kwargs): + pass + + self.stubs.Set(self.driver.rbd.RBD, 'clone', mock_clone) + self.stubs.Set(self.driver.rbd.Image, 'list_snaps', + lambda *args: [{'name': 'snap1'}, {'name': 'snap2'}]) + self.stubs.Set(self.driver.rbd.Image, 'parent_info', + lambda *args: (None, None, None)) + self.stubs.Set(self.driver.rbd.Image, 'protect_snap', + lambda *args: None) self.driver.create_cloned_volume(dict(name=dst_name), dict(name=src_name)) diff --git a/cinder/volume/drivers/rbd.py b/cinder/volume/drivers/rbd.py index 3bc580bff05..8c0478fc1a8 100644 --- a/cinder/volume/drivers/rbd.py +++ b/cinder/volume/drivers/rbd.py @@ -62,7 +62,15 @@ cfg.StrOpt('volume_tmp_dir', default=None, help='where to store temporary image files if the volume ' - 'driver does not write them directly to the volume'), ] + 'driver does not write them directly to the volume'), + cfg.IntOpt('rbd_max_clone_depth', + default=5, + help='maximum number of nested clones that can be taken of a ' + 'volume before enforcing a flatten prior to next clone. ' + 'A value of zero disables cloning')] + +CONF = cfg.CONF +CONF.register_opts(rbd_opts) def ascii_str(string): @@ -150,11 +158,11 @@ def seek(self, offset, whence=0): new_offset = self.volume.size() - 1 new_offset += offset else: - raise IOError("Invalid argument - whence=%s not supported" % + raise IOError(_("Invalid argument - whence=%s not supported") % (whence)) if (new_offset < 0): - raise IOError("Invalid argument") + raise IOError(_("Invalid argument")) self._offset = new_offset @@ -173,7 +181,7 @@ def fileno(self): Raising IOError is recommended way to notify caller that interface is not supported - see http://docs.python.org/2/library/io.html#io.IOBase """ - raise IOError("fileno() not supported by RBD()") + raise IOError(_("fileno() not supported by RBD()")) # NOTE(dosaboy): if IO object is not closed explicitly, Python auto closes # it which, if this is not overridden, calls flush() prior to close which @@ -232,9 +240,6 @@ def __enter__(self): def __exit__(self, type_, value, traceback): self.driver._disconnect_from_rados(self.cluster, self.ioctx) -CONF = cfg.CONF -CONF.register_opts(rbd_opts) - class RBDDriver(driver.VolumeDriver): """Implements RADOS block device (RBD) volume commands.""" @@ -349,10 +354,105 @@ def get_volume_stats(self, refresh=False): def _supports_layering(self): return hasattr(self.rbd, 'RBD_FEATURE_LAYERING') + def _get_clone_depth(self, client, volume_name, depth=0): + """Returns the number of ancestral clones (if any) of the given volume. + """ + parent_volume = self.rbd.Image(client.ioctx, volume_name) + try: + pool, parent, snap = self._get_clone_info(parent_volume, + volume_name) + finally: + parent_volume.close() + + if not parent: + return depth + + # If clone depth was reached, flatten should have occured so if it has + # been exceeded then something has gone wrong. + if depth > CONF.rbd_max_clone_depth: + raise Exception(_("clone depth exceeds limit of %s") % + (CONF.rbd_max_clone_depth)) + + return self._get_clone_depth(client, parent, depth + 1) + def create_cloned_volume(self, volume, src_vref): - """Clone a logical volume.""" - with RBDVolumeProxy(self, src_vref['name'], read_only=True) as vol: - vol.copy(vol.ioctx, str(volume['name'])) + """Create a cloned volume from another volume. + + Since we are cloning from a volume and not a snapshot, we must first + create a snapshot of the source volume. + + The user has the option to limit how long a volume's clone chain can be + by setting rbd_max_clone_depth. If a clone is made of another clone + and that clone has rbd_max_clone_depth clones behind it, the source + volume will be flattened. + """ + src_name = str(src_vref['name']) + dest_name = str(volume['name']) + flatten_parent = False + + # Do full copy if requested + if CONF.rbd_max_clone_depth <= 0: + with RBDVolumeProxy(self, src_name, read_only=True) as vol: + vol.copy(vol.ioctx, dest_name) + + return + + # Otherwise do COW clone. + with RADOSClient(self) as client: + depth = self._get_clone_depth(client, src_name) + # If source volume is a clone and rbd_max_clone_depth reached, + # flatten the source before cloning. Zero rbd_max_clone_depth means + # infinite is allowed. + if depth == CONF.rbd_max_clone_depth: + LOG.debug(_("maximum clone depth (%d) has been reached - " + "flattening source volume") % + (CONF.rbd_max_clone_depth)) + flatten_parent = True + + src_volume = self.rbd.Image(client.ioctx, src_name) + try: + # First flatten source volume if required. + if flatten_parent: + pool, parent, snap = self._get_clone_info(src_volume, + src_name) + # Flatten source volume + LOG.debug(_("flattening source volume %s") % (src_name)) + src_volume.flatten() + # Delete parent clone snap + parent_volume = self.rbd.Image(client.ioctx, parent) + try: + parent_volume.unprotect_snap(snap) + parent_volume.remove_snap(snap) + finally: + parent_volume.close() + + # Create new snapshot of source volume + clone_snap = "%s.clone_snap" % dest_name + LOG.debug(_("creating snapshot='%s'") % (clone_snap)) + src_volume.create_snap(clone_snap) + src_volume.protect_snap(clone_snap) + except Exception as exc: + # Only close if exception since we still need it. + src_volume.close() + raise exc + + # Now clone source volume snapshot + try: + LOG.debug(_("cloning '%(src_vol)s@%(src_snap)s' to " + "'%(dest)s'") % + {'src_vol': src_name, 'src_snap': clone_snap, + 'dest': dest_name}) + self.rbd.RBD().clone(client.ioctx, src_name, clone_snap, + client.ioctx, dest_name, + features=self.rbd.RBD_FEATURE_LAYERING) + except Exception as exc: + src_volume.unprotect_snap(clone_snap) + src_volume.remove_snap(clone_snap) + raise exc + finally: + src_volume.close() + + LOG.debug(_("clone created successfully")) def create_volume(self, volume): """Creates a logical volume.""" @@ -361,6 +461,8 @@ def create_volume(self, volume): else: size = int(volume['size']) * 1024 ** 3 + LOG.debug(_("creating volume '%s'") % (volume['name'])) + old_format = True features = 0 if self._supports_layering(): @@ -410,23 +512,119 @@ def create_volume_from_snapshot(self, volume, snapshot): if int(volume['size']): self._resize(volume) + def _delete_backup_snaps(self, client, volume_name): + rbd_image = self.rbd.Image(client.ioctx, volume_name) + try: + backup_snaps = self._get_backup_snaps(rbd_image) + if backup_snaps: + for snap in backup_snaps: + rbd_image.remove_snap(snap['name']) + else: + LOG.debug(_("volume has no backup snaps")) + finally: + rbd_image.close() + + def _get_clone_info(self, volume, volume_name, snap=None): + """If volume is a clone, return its parent info. + + Returns a tuple of (pool, parent, snap). A snapshot may optionally be + provided for the case where a cloned volume has been flattened but it's + snapshot still depends on the parent. + """ + try: + snap and volume.set_snap(snap) + pool, parent, parent_snap = tuple(volume.parent_info()) + snap and volume.set_snap(None) + # Strip the tag off the end of the volume name since it will not be + # in the snap name. + if volume_name.endswith('.deleted'): + volume_name = volume_name[:-len('.deleted')] + # Now check the snap name matches. + if parent_snap == "%s.clone_snap" % volume_name: + return pool, parent, parent_snap + except self.rbd.ImageNotFound: + LOG.debug(_("volume %s is not a clone") % volume_name) + volume.set_snap(None) + + return (None, None, None) + + def _delete_clone_parent_refs(self, client, parent_name, parent_snap): + """Walk back up the clone chain and delete references. + + Deletes references i.e. deleted parent volumes and snapshots. + """ + parent_rbd = self.rbd.Image(client.ioctx, parent_name) + parent_has_snaps = False + try: + # Check for grandparent + _pool, g_parent, g_parent_snap = self._get_clone_info(parent_rbd, + parent_name, + parent_snap) + + LOG.debug(_("deleting parent snapshot %s") % (parent_snap)) + parent_rbd.unprotect_snap(parent_snap) + parent_rbd.remove_snap(parent_snap) + + parent_has_snaps = bool(list(parent_rbd.list_snaps())) + finally: + parent_rbd.close() + + # If parent has been deleted in Cinder, delete the silent reference and + # keep walking up the chain if it is itself a clone. + if (not parent_has_snaps) and parent_name.endswith('.deleted'): + LOG.debug(_("deleting parent %s") % (parent_name)) + self.rbd.RBD().remove(client.ioctx, parent_name) + + # Now move up to grandparent if there is one + if g_parent: + self._delete_clone_parent_refs(client, g_parent, g_parent_snap) + def delete_volume(self, volume): """Deletes a logical volume.""" + volume_name = str(volume['name']) with RADOSClient(self) as client: # Ensure any backup snapshots are deleted - rbd_image = self.rbd.Image(client.ioctx, str(volume['name'])) + self._delete_backup_snaps(client, volume_name) + + # If the volume has non-clone snapshots this delete is expected to + # raise VolumeIsBusy so do so straight away. + rbd_image = self.rbd.Image(client.ioctx, volume_name) + clone_snap = None + parent = None try: - backup_snaps = self._get_backup_snaps(rbd_image) - if backup_snaps: - for snap in backup_snaps: - rbd_image.remove_snap(snap['name']) + snaps = rbd_image.list_snaps() + for snap in snaps: + if snap['name'].endswith('.clone_snap'): + LOG.debug(_("volume has clone snapshot(s)")) + # We grab one of these and use it when fetching parent + # info in case the this volume has been flattened. + clone_snap = snap['name'] + break + + raise exception.VolumeIsBusy(volume_name=volume_name) + + # Determine if this volume is itself a clone + pool, parent, parent_snap = self._get_clone_info(rbd_image, + volume_name, + clone_snap) finally: rbd_image.close() - try: - self.rbd.RBD().remove(client.ioctx, str(volume['name'])) - except self.rbd.ImageHasSnapshots: - raise exception.VolumeIsBusy(volume_name=volume['name']) + if clone_snap is None: + LOG.debug(_("deleting rbd volume %s") % (volume_name)) + self.rbd.RBD().remove(client.ioctx, volume_name) + + # If it is a clone, walk back up the parent chain deleting + # references. + if parent: + LOG.debug(_("volume is a clone so cleaning references")) + self._delete_clone_parent_refs(client, parent, parent_snap) + else: + # If the volume has copy-on-write clones we will not be able to + # delete it. Instead we will keep it as a silent volume which + # will be deleted when it's snapshot and clones are deleted. + new_name = "%s.deleted" % (volume_name) + self.rbd.RBD().rename(client.ioctx, volume_name, new_name) def create_snapshot(self, snapshot): """Creates an rbd snapshot.""" @@ -584,7 +782,7 @@ def backup_volume(self, context, backup, backup_service): rbd_fd = RBDImageIOWrapper(rbd_meta) backup_service.backup(backup, rbd_fd) - LOG.debug("volume backup complete.") + LOG.debug(_("volume backup complete.")) def restore_backup(self, context, backup, volume, backup_service): """Restore an existing backup to a new or existing volume.""" @@ -597,7 +795,7 @@ def restore_backup(self, context, backup, volume, backup_service): rbd_fd = RBDImageIOWrapper(rbd_meta) backup_service.restore(backup, volume['id'], rbd_fd) - LOG.debug("volume restore complete.") + LOG.debug(_("volume restore complete.")) def extend_volume(self, volume, new_size): """Extend an existing volume.""" diff --git a/etc/cinder/cinder.conf.sample b/etc/cinder/cinder.conf.sample index 36d6de7a6f3..4c839da3d66 100644 --- a/etc/cinder/cinder.conf.sample +++ b/etc/cinder/cinder.conf.sample @@ -1393,6 +1393,11 @@ # does not write them directly to the volume (string value) #volume_tmp_dir= +# maximum number of nested clones that can be taken of a +# volume before enforcing a flatten prior to next clone. A +# value of zero disables cloning (integer value) +#rbd_max_clone_depth=5 + # # Options defined in cinder.volume.drivers.san.hp.hp_3par_common @@ -1714,4 +1719,4 @@ #volume_dd_blocksize=1M -# Total option count: 366 +# Total option count: 367