Skip to content

Commit

Permalink
Obj replicator cleans up files where part dirs should be.
Browse files Browse the repository at this point in the history
If a partition directory was a file instead of a directory, the
object-replicator would attempt to listdir() it, raise an exception, and
try again next iteration.  This condition could arise after running
xfs_repair.

Now, collect_jobs() will reap any partition directories which are
actually files.  Fixes bug 1045954.

Change-Id: Id65d3eab2effd61c3f6b25250611c88c907b2a16
  • Loading branch information
dbishop authored and openstack-gerrit committed Sep 5, 2012
1 parent 2b23eb3 commit 46a093f
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 3 deletions.
14 changes: 11 additions & 3 deletions swift/obj/replicator.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
# limitations under the License.

import os
from os.path import basename, dirname, isdir, join
from os.path import basename, dirname, isdir, isfile, join
import random
import shutil
import time
Expand Down Expand Up @@ -543,16 +543,24 @@ def collect_jobs(self):
continue
for partition in os.listdir(obj_path):
try:
job_path = join(obj_path, partition)
if isfile(job_path):
# Clean up any (probably zero-byte) files where a
# partition should be.
self.logger.warning('Removing partition directory '
'which was a file: %s', job_path)
os.remove(job_path)
continue
part_nodes = \
self.object_ring.get_part_nodes(int(partition))
nodes = [node for node in part_nodes
if node['id'] != local_dev['id']]
jobs.append(dict(path=join(obj_path, partition),
jobs.append(dict(path=job_path,
device=local_dev['device'],
nodes=nodes,
delete=len(nodes) > len(part_nodes) - 1,
partition=partition))
except ValueError:
except ValueError, OSError:
continue
random.shuffle(jobs)
# Partititons that need to be deleted take priority
Expand Down
39 changes: 39 additions & 0 deletions test/unit/obj/test_replicator.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,7 @@ def setUp(self):
timeout='300', stats_interval='1')
self.replicator = object_replicator.ObjectReplicator(
self.conf)
self.replicator.logger = FakeLogger()

def tearDown(self):
process_errors = []
Expand Down Expand Up @@ -388,6 +389,44 @@ def test_collect_jobs(self):
self.assertEquals(jobs_by_part[part]['path'],
os.path.join(self.objects, part))

def test_collect_jobs_removes_zbf(self):
"""
After running xfs_repair, a partition directory could become a
zero-byte file. If this happens, collect_jobs() should clean it up and
*not* create a job which will hit an exception as it tries to listdir()
a file.
"""
# Surprise! Partition dir 1 is actually a zero-byte-file
part_1_path = os.path.join(self.objects, '1')
rmtree(part_1_path)
with open(part_1_path, 'w'):
pass
self.assertTrue(os.path.isfile(part_1_path)) # sanity check
jobs = self.replicator.collect_jobs()
jobs_to_delete = [j for j in jobs if j['delete']]
jobs_to_keep = [j for j in jobs if not j['delete']]
jobs_by_part = {}
for job in jobs:
jobs_by_part[job['partition']] = job
self.assertEquals(len(jobs_to_delete), 0)
self.assertEquals(
[node['id'] for node in jobs_by_part['0']['nodes']], [1, 2])
self.assertFalse('1' in jobs_by_part)
self.assertEquals(
[node['id'] for node in jobs_by_part['2']['nodes']], [2, 3])
self.assertEquals(
[node['id'] for node in jobs_by_part['3']['nodes']], [3, 1])
for part in ['0', '2', '3']:
for node in jobs_by_part[part]['nodes']:
self.assertEquals(node['device'], 'sda')
self.assertEquals(jobs_by_part[part]['path'],
os.path.join(self.objects, part))
self.assertFalse(os.path.exists(part_1_path))
self.assertEquals(
[(('Removing partition directory which was a file: %s',
part_1_path), {})],
self.replicator.logger.log_dict['warning'])

def test_delete_partition(self):
df = DiskFile(self.devices, 'sda', '0', 'a', 'c', 'o', FakeLogger())
mkdirs(df.datadir)
Expand Down

0 comments on commit 46a093f

Please sign in to comment.