Skip to content

Commit

Permalink
Clean unused kernels and ramdisks from image cache
Browse files Browse the repository at this point in the history
Fixes bug #1132138

Only unused disk images are currently cleaned up by the image cache
manager but it seems logical to clean up unused kernels and ramdisks
too.

Achieve that by writing kernels and ramdisks to disk using the sha1
sum of their ID as the filename. This is the same scheme as used for
disk image filenames and causes the image cache manager to consider
them for cleanup. We also make the cache manager take note of in use
kernels and ramdisks when iterating over the list of instances.

A nasty upgrade concern is that if we immediately switch to writing
kernels to disk using this scheme then, where shared storage is used,
we can have older image cache managers on remote compute nodes cleaning
up kernels because they appear unused. To mitigate that, turn off this
behaviour by default and allow it to be enabled using a new config
option. This option will be removed in future and the behaviour enabled
by default.

DocImpact: new remove_unused_kernels option

Change-Id: I56bba9fa6596601104498e262c2e657f0eae2fa0
  • Loading branch information
markmc committed Feb 23, 2013
1 parent a42845e commit 38997fc
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 22 deletions.
27 changes: 22 additions & 5 deletions nova/tests/test_imagecache.py
Expand Up @@ -163,6 +163,8 @@ def test_list_running_instances(self):
'vm_state': '',
'task_state': ''},
{'image_ref': '2',
'kernel_id': '21',
'ramdisk_id': '22',
'host': 'remotehost',
'name': 'inst-3',
'uuid': '789',
Expand All @@ -174,18 +176,24 @@ def test_list_running_instances(self):
# The argument here should be a context, but it's mocked out
image_cache_manager._list_running_instances(None, all_instances)

self.assertEqual(len(image_cache_manager.used_images), 2)
self.assertEqual(len(image_cache_manager.used_images), 4)
self.assertTrue(image_cache_manager.used_images['1'] ==
(1, 0, ['inst-1']))
self.assertTrue(image_cache_manager.used_images['2'] ==
(1, 1, ['inst-2', 'inst-3']))
self.assertTrue(image_cache_manager.used_images['21'] ==
(0, 1, ['inst-3']))
self.assertTrue(image_cache_manager.used_images['22'] ==
(0, 1, ['inst-3']))

self.assertTrue('inst-1' in image_cache_manager.instance_names)
self.assertTrue('123' in image_cache_manager.instance_names)

self.assertEqual(len(image_cache_manager.image_popularity), 2)
self.assertEqual(len(image_cache_manager.image_popularity), 4)
self.assertEqual(image_cache_manager.image_popularity['1'], 1)
self.assertEqual(image_cache_manager.image_popularity['2'], 2)
self.assertEqual(image_cache_manager.image_popularity['21'], 1)
self.assertEqual(image_cache_manager.image_popularity['22'], 1)

def test_list_resizing_instances(self):
all_instances = [{'image_ref': '1',
Expand Down Expand Up @@ -703,6 +711,8 @@ def test_handle_base_image_checksum_fails(self):

def test_verify_base_images(self):
hashed_1 = '356a192b7913b04c54574d18c28d46e6395428ab'
hashed_21 = '472b07b9fcf2c2451e8781e944bf5f77cd8457c8'
hashed_22 = '12c6fc06c99a462375eeb3f43dfd832b08ca9e17'
hashed_42 = '92cfceb39d57d914ed8b14d0e37643de0797ae56'

self.flags(instances_path='/instance_path')
Expand All @@ -715,6 +725,8 @@ def test_verify_base_images(self):
'e09c675c2d1cfac32dae3c2d83689c8c94bc693b_sm',
hashed_42,
hashed_1,
hashed_21,
hashed_22,
'%s_5368709120' % hashed_1,
'%s_10737418240' % hashed_1,
'00000004']
Expand Down Expand Up @@ -744,8 +756,10 @@ def exists(path):
if path == fq_path(p) + '.info':
return False

if path in ['/instance_path/_base/%s_sm' % hashed_1,
'/instance_path/_base/%s_sm' % hashed_42]:
if path in ['/instance_path/_base/%s_sm' % i for i in [hashed_1,
hashed_21,
hashed_22,
hashed_42]]:
return False

self.fail('Unexpected path existence check: %s' % path)
Expand Down Expand Up @@ -800,6 +814,8 @@ def isfile(path):
'vm_state': '',
'task_state': ''},
{'image_ref': '1',
'kernel_id': '21',
'ramdisk_id': '22',
'host': CONF.host,
'name': 'instance-2',
'uuid': '456',
Expand Down Expand Up @@ -850,7 +866,8 @@ def remove(path):
image_cache_manager.verify_base_images(None, all_instances)

# Verify
active = [fq_path(hashed_1), fq_path('%s_5368709120' % hashed_1)]
active = [fq_path(hashed_1), fq_path('%s_5368709120' % hashed_1),
fq_path(hashed_21), fq_path(hashed_22)]
self.assertEquals(image_cache_manager.active_base_files, active)

for rem in [fq_path('e97222e91fc4241f49a7f520d1dcf446751129b3_sm'),
Expand Down
7 changes: 3 additions & 4 deletions nova/virt/libvirt/driver.py
Expand Up @@ -44,7 +44,6 @@
import eventlet
import functools
import glob
import hashlib
import os
import shutil
import socket
Expand Down Expand Up @@ -1718,23 +1717,23 @@ def raw(fname):
'ramdisk_id': instance['ramdisk_id']}

if disk_images['kernel_id']:
fname = disk_images['kernel_id']
fname = imagecache.get_cache_fname(disk_images, 'kernel_id')
raw('kernel').cache(fetch_func=libvirt_utils.fetch_image,
context=context,
filename=fname,
image_id=disk_images['kernel_id'],
user_id=instance['user_id'],
project_id=instance['project_id'])
if disk_images['ramdisk_id']:
fname = disk_images['ramdisk_id']
fname = imagecache.get_cache_fname(disk_images, 'ramdisk_id')
raw('ramdisk').cache(fetch_func=libvirt_utils.fetch_image,
context=context,
filename=fname,
image_id=disk_images['ramdisk_id'],
user_id=instance['user_id'],
project_id=instance['project_id'])

root_fname = hashlib.sha1(str(disk_images['image_id'])).hexdigest()
root_fname = imagecache.get_cache_fname(disk_images, 'image_id')
size = instance['root_gb'] * 1024 * 1024 * 1024

inst_type = instance['instance_type']
Expand Down
59 changes: 46 additions & 13 deletions nova/virt/libvirt/imagecache.py
Expand Up @@ -54,6 +54,12 @@
cfg.BoolOpt('remove_unused_base_images',
default=True,
help='Should unused base images be removed?'),
cfg.BoolOpt('remove_unused_kernels',
default=False,
help='Should unused kernel images be removed? This is only '
'safe to enable if all compute nodes have been updated '
'to support this option. This will enabled by default '
'in future.'),
cfg.IntOpt('remove_unused_resized_minimum_age_seconds',
default=3600,
help='Unused resized base images younger than this will not be '
Expand All @@ -76,6 +82,29 @@
CONF.import_opt('instances_path', 'nova.compute.manager')


def get_cache_fname(images, key):
"""Return a filename based on the SHA1 hash of a given image ID.
Image files stored in the _base directory that match this pattern
are considered for cleanup by the image cache manager. The cache
manager considers the file to be in use if it matches an instance's
image_ref, kernel_id or ramdisk_id property.
However, in grizzly-3 and before, only the image_ref property was
considered. This means that it's unsafe to store kernel and ramdisk
images using this pattern until we're sure that all compute nodes
are running a cache manager newer than grizzly-3. For now, we
require admins to confirm that by setting the remove_unused_kernels
boolean but, at some point in the future, we'll be safely able to
assume this.
"""
image_id = str(images[key])
if not CONF.remove_unused_kernels and key in ['kernel_id', 'ramdisk_id']:
return image_id
else:
return hashlib.sha1(image_id).hexdigest()


def get_info_filename(base_path):
"""Construct a filename for storing additional information about a base
image.
Expand Down Expand Up @@ -240,8 +269,8 @@ def _list_base_images(self, base_dir):
"""Return a list of the images present in _base.
Determine what images we have on disk. There will be other files in
this directory (for example kernels) so we only grab the ones which
are the right length to be disk images.
this directory so we only grab the ones which are the right length
to be disk images.
Note that this does not return a value. It instead populates a class
variable with a list of images that we need to try and explain.
Expand Down Expand Up @@ -278,18 +307,22 @@ def _list_running_instances(self, context, all_instances):
self.instance_names.add(instance['name'] + '_resize')
self.instance_names.add(instance['uuid'] + '_resize')

image_ref_str = str(instance['image_ref'])
local, remote, insts = self.used_images.get(image_ref_str,
(0, 0, []))
if instance['host'] == CONF.host:
local += 1
else:
remote += 1
insts.append(instance['name'])
self.used_images[image_ref_str] = (local, remote, insts)
for image_key in ['image_ref', 'kernel_id', 'ramdisk_id']:
try:
image_ref_str = str(instance[image_key])
except KeyError:
continue
local, remote, insts = self.used_images.get(image_ref_str,
(0, 0, []))
if instance['host'] == CONF.host:
local += 1
else:
remote += 1
insts.append(instance['name'])
self.used_images[image_ref_str] = (local, remote, insts)

self.image_popularity.setdefault(image_ref_str, 0)
self.image_popularity[image_ref_str] += 1
self.image_popularity.setdefault(image_ref_str, 0)
self.image_popularity[image_ref_str] += 1

def _list_backing_images(self):
"""List the backing images currently in use."""
Expand Down

0 comments on commit 38997fc

Please sign in to comment.