Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Merge tag 'md-3.5' of git://neil.brown.name/md

Pull md updates from NeilBrown:
 "It's been a busy cycle for md - lots of fun stuff here..  if you like
  this kind of thing :-)

  Main features:
   - RAID10 arrays can be reshaped - adding and removing devices and
     changing chunks (not 'far' array though)
   - allow RAID5 arrays to be reshaped with a backup file (not tested
     yet, but the priciple works fine for RAID10).
   - arrays can be reshaped while a bitmap is present - you no longer
     need to remove it first
   - SSSE3 support for RAID6 syndrome calculations

  and of course a number of minor fixes etc."

* tag 'md-3.5' of git://neil.brown.name/md: (56 commits)
  md/bitmap: record the space available for the bitmap in the superblock.
  md/raid10: Remove extras after reshape to smaller number of devices.
  md/raid5: improve removal of extra devices after reshape.
  md: check the return of mddev_find()
  MD RAID1: Further conditionalize 'fullsync'
  DM RAID: Use md_error() in place of simply setting Faulty bit
  DM RAID: Record and handle missing devices
  DM RAID: Set recovery flags on resume
  md/raid5: Allow reshape while a bitmap is present.
  md/raid10: resize bitmap when required during reshape.
  md: allow array to be resized while bitmap is present.
  md/bitmap: make sure reshape request are reflected in superblock.
  md/bitmap: add bitmap_resize function to allow bitmap resizing.
  md/bitmap: use DIV_ROUND_UP instead of open-code
  md/bitmap: create a 'struct bitmap_counts' substructure of 'struct bitmap'
  md/bitmap: make bitmap bitops atomic.
  md/bitmap: make _page_attr bitops atomic.
  md/bitmap: merge bitmap_file_unmap and bitmap_file_put.
  md/bitmap: remove async freeing of bitmap file.
  md/bitmap: convert some spin_lock_irqsave to spin_lock_irq
  ...
  • Loading branch information...
commit c80ddb526331a72c9e9d1480f85f6fd7c74e3d2d 2 parents 2c13bc0 + 1dff2b8
@torvalds torvalds authored
View
5 arch/x86/Makefile
@@ -115,9 +115,10 @@ cfi-sections := $(call as-instr,.cfi_sections .debug_frame,-DCONFIG_AS_CFI_SECTI
# does binutils support specific instructions?
asinstr := $(call as-instr,fxsaveq (%rax),-DCONFIG_AS_FXSAVEQ=1)
+avx_instr := $(call as-instr,vxorps %ymm0$(comma)%ymm1$(comma)%ymm2,-DCONFIG_AS_AVX=1)
-KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr)
-KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr)
+KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr)
+KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr)
LDFLAGS := -m elf_$(UTS_MACHINE)
View
6 arch/x86/include/asm/xor_32.h
@@ -861,6 +861,9 @@ static struct xor_block_template xor_block_pIII_sse = {
.do_5 = xor_sse_5,
};
+/* Also try the AVX routines */
+#include "xor_avx.h"
+
/* Also try the generic routines. */
#include <asm-generic/xor.h>
@@ -871,6 +874,7 @@ do { \
xor_speed(&xor_block_8regs_p); \
xor_speed(&xor_block_32regs); \
xor_speed(&xor_block_32regs_p); \
+ AVX_XOR_SPEED; \
if (cpu_has_xmm) \
xor_speed(&xor_block_pIII_sse); \
if (cpu_has_mmx) { \
@@ -883,6 +887,6 @@ do { \
We may also be able to load into the L1 only depending on how the cpu
deals with a load to a line that is being prefetched. */
#define XOR_SELECT_TEMPLATE(FASTEST) \
- (cpu_has_xmm ? &xor_block_pIII_sse : FASTEST)
+ AVX_SELECT(cpu_has_xmm ? &xor_block_pIII_sse : FASTEST)
#endif /* _ASM_X86_XOR_32_H */
View
8 arch/x86/include/asm/xor_64.h
@@ -347,15 +347,21 @@ static struct xor_block_template xor_block_sse = {
.do_5 = xor_sse_5,
};
+
+/* Also try the AVX routines */
+#include "xor_avx.h"
+
#undef XOR_TRY_TEMPLATES
#define XOR_TRY_TEMPLATES \
do { \
+ AVX_XOR_SPEED; \
xor_speed(&xor_block_sse); \
} while (0)
/* We force the use of the SSE xor block because it can write around L2.
We may also be able to load into the L1 only depending on how the cpu
deals with a load to a line that is being prefetched. */
-#define XOR_SELECT_TEMPLATE(FASTEST) (&xor_block_sse)
+#define XOR_SELECT_TEMPLATE(FASTEST) \
+ AVX_SELECT(&xor_block_sse)
#endif /* _ASM_X86_XOR_64_H */
View
214 arch/x86/include/asm/xor_avx.h
@@ -0,0 +1,214 @@
+#ifndef _ASM_X86_XOR_AVX_H
+#define _ASM_X86_XOR_AVX_H
+
+/*
+ * Optimized RAID-5 checksumming functions for AVX
+ *
+ * Copyright (C) 2012 Intel Corporation
+ * Author: Jim Kukunas <james.t.kukunas@linux.intel.com>
+ *
+ * Based on Ingo Molnar and Zach Brown's respective MMX and SSE routines
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#ifdef CONFIG_AS_AVX
+
+#include <linux/compiler.h>
+#include <asm/i387.h>
+
+#define ALIGN32 __aligned(32)
+
+#define YMM_SAVED_REGS 4
+
+#define YMMS_SAVE \
+do { \
+ preempt_disable(); \
+ cr0 = read_cr0(); \
+ clts(); \
+ asm volatile("vmovaps %%ymm0, %0" : "=m" (ymm_save[0]) : : "memory"); \
+ asm volatile("vmovaps %%ymm1, %0" : "=m" (ymm_save[32]) : : "memory"); \
+ asm volatile("vmovaps %%ymm2, %0" : "=m" (ymm_save[64]) : : "memory"); \
+ asm volatile("vmovaps %%ymm3, %0" : "=m" (ymm_save[96]) : : "memory"); \
+} while (0);
+
+#define YMMS_RESTORE \
+do { \
+ asm volatile("sfence" : : : "memory"); \
+ asm volatile("vmovaps %0, %%ymm3" : : "m" (ymm_save[96])); \
+ asm volatile("vmovaps %0, %%ymm2" : : "m" (ymm_save[64])); \
+ asm volatile("vmovaps %0, %%ymm1" : : "m" (ymm_save[32])); \
+ asm volatile("vmovaps %0, %%ymm0" : : "m" (ymm_save[0])); \
+ write_cr0(cr0); \
+ preempt_enable(); \
+} while (0);
+
+#define BLOCK4(i) \
+ BLOCK(32 * i, 0) \
+ BLOCK(32 * (i + 1), 1) \
+ BLOCK(32 * (i + 2), 2) \
+ BLOCK(32 * (i + 3), 3)
+
+#define BLOCK16() \
+ BLOCK4(0) \
+ BLOCK4(4) \
+ BLOCK4(8) \
+ BLOCK4(12)
+
+static void xor_avx_2(unsigned long bytes, unsigned long *p0, unsigned long *p1)
+{
+ unsigned long cr0, lines = bytes >> 9;
+ char ymm_save[32 * YMM_SAVED_REGS] ALIGN32;
+
+ YMMS_SAVE
+
+ while (lines--) {
+#undef BLOCK
+#define BLOCK(i, reg) \
+do { \
+ asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p1[i / sizeof(*p1)])); \
+ asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
+ "m" (p0[i / sizeof(*p0)])); \
+ asm volatile("vmovdqa %%ymm" #reg ", %0" : \
+ "=m" (p0[i / sizeof(*p0)])); \
+} while (0);
+
+ BLOCK16()
+
+ p0 = (unsigned long *)((uintptr_t)p0 + 512);
+ p1 = (unsigned long *)((uintptr_t)p1 + 512);
+ }
+
+ YMMS_RESTORE
+}
+
+static void xor_avx_3(unsigned long bytes, unsigned long *p0, unsigned long *p1,
+ unsigned long *p2)
+{
+ unsigned long cr0, lines = bytes >> 9;
+ char ymm_save[32 * YMM_SAVED_REGS] ALIGN32;
+
+ YMMS_SAVE
+
+ while (lines--) {
+#undef BLOCK
+#define BLOCK(i, reg) \
+do { \
+ asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p2[i / sizeof(*p2)])); \
+ asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
+ "m" (p1[i / sizeof(*p1)])); \
+ asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
+ "m" (p0[i / sizeof(*p0)])); \
+ asm volatile("vmovdqa %%ymm" #reg ", %0" : \
+ "=m" (p0[i / sizeof(*p0)])); \
+} while (0);
+
+ BLOCK16()
+
+ p0 = (unsigned long *)((uintptr_t)p0 + 512);
+ p1 = (unsigned long *)((uintptr_t)p1 + 512);
+ p2 = (unsigned long *)((uintptr_t)p2 + 512);
+ }
+
+ YMMS_RESTORE
+}
+
+static void xor_avx_4(unsigned long bytes, unsigned long *p0, unsigned long *p1,
+ unsigned long *p2, unsigned long *p3)
+{
+ unsigned long cr0, lines = bytes >> 9;
+ char ymm_save[32 * YMM_SAVED_REGS] ALIGN32;
+
+ YMMS_SAVE
+
+ while (lines--) {
+#undef BLOCK
+#define BLOCK(i, reg) \
+do { \
+ asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p3[i / sizeof(*p3)])); \
+ asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
+ "m" (p2[i / sizeof(*p2)])); \
+ asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
+ "m" (p1[i / sizeof(*p1)])); \
+ asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
+ "m" (p0[i / sizeof(*p0)])); \
+ asm volatile("vmovdqa %%ymm" #reg ", %0" : \
+ "=m" (p0[i / sizeof(*p0)])); \
+} while (0);
+
+ BLOCK16();
+
+ p0 = (unsigned long *)((uintptr_t)p0 + 512);
+ p1 = (unsigned long *)((uintptr_t)p1 + 512);
+ p2 = (unsigned long *)((uintptr_t)p2 + 512);
+ p3 = (unsigned long *)((uintptr_t)p3 + 512);
+ }
+
+ YMMS_RESTORE
+}
+
+static void xor_avx_5(unsigned long bytes, unsigned long *p0, unsigned long *p1,
+ unsigned long *p2, unsigned long *p3, unsigned long *p4)
+{
+ unsigned long cr0, lines = bytes >> 9;
+ char ymm_save[32 * YMM_SAVED_REGS] ALIGN32;
+
+ YMMS_SAVE
+
+ while (lines--) {
+#undef BLOCK
+#define BLOCK(i, reg) \
+do { \
+ asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p4[i / sizeof(*p4)])); \
+ asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
+ "m" (p3[i / sizeof(*p3)])); \
+ asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
+ "m" (p2[i / sizeof(*p2)])); \
+ asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
+ "m" (p1[i / sizeof(*p1)])); \
+ asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
+ "m" (p0[i / sizeof(*p0)])); \
+ asm volatile("vmovdqa %%ymm" #reg ", %0" : \
+ "=m" (p0[i / sizeof(*p0)])); \
+} while (0);
+
+ BLOCK16()
+
+ p0 = (unsigned long *)((uintptr_t)p0 + 512);
+ p1 = (unsigned long *)((uintptr_t)p1 + 512);
+ p2 = (unsigned long *)((uintptr_t)p2 + 512);
+ p3 = (unsigned long *)((uintptr_t)p3 + 512);
+ p4 = (unsigned long *)((uintptr_t)p4 + 512);
+ }
+
+ YMMS_RESTORE
+}
+
+static struct xor_block_template xor_block_avx = {
+ .name = "avx",
+ .do_2 = xor_avx_2,
+ .do_3 = xor_avx_3,
+ .do_4 = xor_avx_4,
+ .do_5 = xor_avx_5,
+};
+
+#define AVX_XOR_SPEED \
+do { \
+ if (cpu_has_avx) \
+ xor_speed(&xor_block_avx); \
+} while (0)
+
+#define AVX_SELECT(FASTEST) \
+ (cpu_has_avx ? &xor_block_avx : FASTEST)
+
+#else
+
+#define AVX_XOR_SPEED {}
+
+#define AVX_SELECT(FASTEST) (FASTEST)
+
+#endif
+#endif
View
13 crypto/xor.c
@@ -21,6 +21,7 @@
#include <linux/gfp.h>
#include <linux/raid/xor.h>
#include <linux/jiffies.h>
+#include <linux/preempt.h>
#include <asm/xor.h>
/* The xor routines to use. */
@@ -63,12 +64,14 @@ static void
do_xor_speed(struct xor_block_template *tmpl, void *b1, void *b2)
{
int speed;
- unsigned long now;
+ unsigned long now, j;
int i, count, max;
tmpl->next = template_list;
template_list = tmpl;
+ preempt_disable();
+
/*
* Count the number of XORs done during a whole jiffy, and use
* this to calculate the speed of checksumming. We use a 2-page
@@ -76,9 +79,11 @@ do_xor_speed(struct xor_block_template *tmpl, void *b1, void *b2)
*/
max = 0;
for (i = 0; i < 5; i++) {
- now = jiffies;
+ j = jiffies;
count = 0;
- while (jiffies == now) {
+ while ((now = jiffies) == j)
+ cpu_relax();
+ while (time_before(jiffies, now + 1)) {
mb(); /* prevent loop optimzation */
tmpl->do_2(BENCH_SIZE, b1, b2);
mb();
@@ -89,6 +94,8 @@ do_xor_speed(struct xor_block_template *tmpl, void *b1, void *b2)
max = count;
}
+ preempt_enable();
+
speed = max * (HZ * BENCH_SIZE / 1024);
tmpl->speed = speed;
View
1,100 drivers/md/bitmap.c
@@ -45,7 +45,7 @@ static inline char *bmname(struct bitmap *bitmap)
* if we find our page, we increment the page's refcount so that it stays
* allocated while we're using it
*/
-static int bitmap_checkpage(struct bitmap *bitmap,
+static int bitmap_checkpage(struct bitmap_counts *bitmap,
unsigned long page, int create)
__releases(bitmap->lock)
__acquires(bitmap->lock)
@@ -76,8 +76,7 @@ __acquires(bitmap->lock)
spin_lock_irq(&bitmap->lock);
if (mappage == NULL) {
- pr_debug("%s: bitmap map page allocation failed, hijacking\n",
- bmname(bitmap));
+ pr_debug("md/bitmap: map page allocation failed, hijacking\n");
/* failed - set the hijacked flag so that we can use the
* pointer as a counter */
if (!bitmap->bp[page].map)
@@ -100,7 +99,7 @@ __acquires(bitmap->lock)
/* if page is completely empty, put it back on the free list, or dealloc it */
/* if page was hijacked, unmark the flag so it might get alloced next time */
/* Note: lock should be held when calling this */
-static void bitmap_checkfree(struct bitmap *bitmap, unsigned long page)
+static void bitmap_checkfree(struct bitmap_counts *bitmap, unsigned long page)
{
char *ptr;
@@ -130,22 +129,14 @@ static void bitmap_checkfree(struct bitmap *bitmap, unsigned long page)
*/
/* IO operations when bitmap is stored near all superblocks */
-static struct page *read_sb_page(struct mddev *mddev, loff_t offset,
- struct page *page,
- unsigned long index, int size)
+static int read_sb_page(struct mddev *mddev, loff_t offset,
+ struct page *page,
+ unsigned long index, int size)
{
/* choose a good rdev and read the page from there */
struct md_rdev *rdev;
sector_t target;
- int did_alloc = 0;
-
- if (!page) {
- page = alloc_page(GFP_KERNEL);
- if (!page)
- return ERR_PTR(-ENOMEM);
- did_alloc = 1;
- }
rdev_for_each(rdev, mddev) {
if (! test_bit(In_sync, &rdev->flags)
@@ -158,15 +149,10 @@ static struct page *read_sb_page(struct mddev *mddev, loff_t offset,
roundup(size, bdev_logical_block_size(rdev->bdev)),
page, READ, true)) {
page->index = index;
- attach_page_buffers(page, NULL); /* so that free_buffer will
- * quietly no-op */
- return page;
+ return 0;
}
}
- if (did_alloc)
- put_page(page);
- return ERR_PTR(-EIO);
-
+ return -EIO;
}
static struct md_rdev *next_active_rdev(struct md_rdev *rdev, struct mddev *mddev)
@@ -208,6 +194,7 @@ static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait)
struct md_rdev *rdev = NULL;
struct block_device *bdev;
struct mddev *mddev = bitmap->mddev;
+ struct bitmap_storage *store = &bitmap->storage;
while ((rdev = next_active_rdev(rdev, mddev)) != NULL) {
int size = PAGE_SIZE;
@@ -215,9 +202,13 @@ static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait)
bdev = (rdev->meta_bdev) ? rdev->meta_bdev : rdev->bdev;
- if (page->index == bitmap->file_pages-1)
- size = roundup(bitmap->last_page_size,
+ if (page->index == store->file_pages-1) {
+ int last_page_size = store->bytes & (PAGE_SIZE-1);
+ if (last_page_size == 0)
+ last_page_size = PAGE_SIZE;
+ size = roundup(last_page_size,
bdev_logical_block_size(bdev));
+ }
/* Just make sure we aren't corrupting data or
* metadata
*/
@@ -276,10 +267,10 @@ static void write_page(struct bitmap *bitmap, struct page *page, int wait)
{
struct buffer_head *bh;
- if (bitmap->file == NULL) {
+ if (bitmap->storage.file == NULL) {
switch (write_sb_page(bitmap, page, wait)) {
case -EINVAL:
- bitmap->flags |= BITMAP_WRITE_ERROR;
+ set_bit(BITMAP_WRITE_ERROR, &bitmap->flags);
}
} else {
@@ -297,20 +288,16 @@ static void write_page(struct bitmap *bitmap, struct page *page, int wait)
wait_event(bitmap->write_wait,
atomic_read(&bitmap->pending_writes)==0);
}
- if (bitmap->flags & BITMAP_WRITE_ERROR)
+ if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags))
bitmap_file_kick(bitmap);
}
static void end_bitmap_write(struct buffer_head *bh, int uptodate)
{
struct bitmap *bitmap = bh->b_private;
- unsigned long flags;
- if (!uptodate) {
- spin_lock_irqsave(&bitmap->lock, flags);
- bitmap->flags |= BITMAP_WRITE_ERROR;
- spin_unlock_irqrestore(&bitmap->lock, flags);
- }
+ if (!uptodate)
+ set_bit(BITMAP_WRITE_ERROR, &bitmap->flags);
if (atomic_dec_and_test(&bitmap->pending_writes))
wake_up(&bitmap->write_wait);
}
@@ -325,8 +312,12 @@ __clear_page_buffers(struct page *page)
}
static void free_buffers(struct page *page)
{
- struct buffer_head *bh = page_buffers(page);
+ struct buffer_head *bh;
+ if (!PagePrivate(page))
+ return;
+
+ bh = page_buffers(page);
while (bh) {
struct buffer_head *next = bh->b_this_page;
free_buffer_head(bh);
@@ -343,11 +334,12 @@ static void free_buffers(struct page *page)
* This usage is similar to how swap files are handled, and allows us
* to write to a file with no concerns of memory allocation failing.
*/
-static struct page *read_page(struct file *file, unsigned long index,
- struct bitmap *bitmap,
- unsigned long count)
+static int read_page(struct file *file, unsigned long index,
+ struct bitmap *bitmap,
+ unsigned long count,
+ struct page *page)
{
- struct page *page = NULL;
+ int ret = 0;
struct inode *inode = file->f_path.dentry->d_inode;
struct buffer_head *bh;
sector_t block;
@@ -355,16 +347,9 @@ static struct page *read_page(struct file *file, unsigned long index,
pr_debug("read bitmap file (%dB @ %llu)\n", (int)PAGE_SIZE,
(unsigned long long)index << PAGE_SHIFT);
- page = alloc_page(GFP_KERNEL);
- if (!page)
- page = ERR_PTR(-ENOMEM);
- if (IS_ERR(page))
- goto out;
-
bh = alloc_page_buffers(page, 1<<inode->i_blkbits, 0);
if (!bh) {
- put_page(page);
- page = ERR_PTR(-ENOMEM);
+ ret = -ENOMEM;
goto out;
}
attach_page_buffers(page, bh);
@@ -376,8 +361,7 @@ static struct page *read_page(struct file *file, unsigned long index,
bh->b_blocknr = bmap(inode, block);
if (bh->b_blocknr == 0) {
/* Cannot use this file! */
- free_buffers(page);
- page = ERR_PTR(-EINVAL);
+ ret = -EINVAL;
goto out;
}
bh->b_bdev = inode->i_sb->s_bdev;
@@ -400,17 +384,15 @@ static struct page *read_page(struct file *file, unsigned long index,
wait_event(bitmap->write_wait,
atomic_read(&bitmap->pending_writes)==0);
- if (bitmap->flags & BITMAP_WRITE_ERROR) {
- free_buffers(page);
- page = ERR_PTR(-EIO);
- }
+ if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags))
+ ret = -EIO;
out:
- if (IS_ERR(page))
- printk(KERN_ALERT "md: bitmap read error: (%dB @ %llu): %ld\n",
+ if (ret)
+ printk(KERN_ALERT "md: bitmap read error: (%dB @ %llu): %d\n",
(int)PAGE_SIZE,
(unsigned long long)index << PAGE_SHIFT,
- PTR_ERR(page));
- return page;
+ ret);
+ return ret;
}
/*
@@ -426,9 +408,9 @@ void bitmap_update_sb(struct bitmap *bitmap)
return;
if (bitmap->mddev->bitmap_info.external)
return;
- if (!bitmap->sb_page) /* no superblock */
+ if (!bitmap->storage.sb_page) /* no superblock */
return;
- sb = kmap_atomic(bitmap->sb_page);
+ sb = kmap_atomic(bitmap->storage.sb_page);
sb->events = cpu_to_le64(bitmap->mddev->events);
if (bitmap->mddev->events < bitmap->events_cleared)
/* rocking back to read-only */
@@ -438,8 +420,13 @@ void bitmap_update_sb(struct bitmap *bitmap)
/* Just in case these have been changed via sysfs: */
sb->daemon_sleep = cpu_to_le32(bitmap->mddev->bitmap_info.daemon_sleep/HZ);
sb->write_behind = cpu_to_le32(bitmap->mddev->bitmap_info.max_write_behind);
+ /* This might have been changed by a reshape */
+ sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors);
+ sb->chunksize = cpu_to_le32(bitmap->mddev->bitmap_info.chunksize);
+ sb->sectors_reserved = cpu_to_le32(bitmap->mddev->
+ bitmap_info.space);
kunmap_atomic(sb);
- write_page(bitmap, bitmap->sb_page, 1);
+ write_page(bitmap, bitmap->storage.sb_page, 1);
}
/* print out the bitmap file superblock */
@@ -447,9 +434,9 @@ void bitmap_print_sb(struct bitmap *bitmap)
{
bitmap_super_t *sb;
- if (!bitmap || !bitmap->sb_page)
+ if (!bitmap || !bitmap->storage.sb_page)
return;
- sb = kmap_atomic(bitmap->sb_page);
+ sb = kmap_atomic(bitmap->storage.sb_page);
printk(KERN_DEBUG "%s: bitmap file superblock:\n", bmname(bitmap));
printk(KERN_DEBUG " magic: %08x\n", le32_to_cpu(sb->magic));
printk(KERN_DEBUG " version: %d\n", le32_to_cpu(sb->version));
@@ -488,15 +475,15 @@ static int bitmap_new_disk_sb(struct bitmap *bitmap)
unsigned long chunksize, daemon_sleep, write_behind;
int err = -EINVAL;
- bitmap->sb_page = alloc_page(GFP_KERNEL);
- if (IS_ERR(bitmap->sb_page)) {
- err = PTR_ERR(bitmap->sb_page);
- bitmap->sb_page = NULL;
+ bitmap->storage.sb_page = alloc_page(GFP_KERNEL);
+ if (IS_ERR(bitmap->storage.sb_page)) {
+ err = PTR_ERR(bitmap->storage.sb_page);
+ bitmap->storage.sb_page = NULL;
return err;
}
- bitmap->sb_page->index = 0;
+ bitmap->storage.sb_page->index = 0;
- sb = kmap_atomic(bitmap->sb_page);
+ sb = kmap_atomic(bitmap->storage.sb_page);
sb->magic = cpu_to_le32(BITMAP_MAGIC);
sb->version = cpu_to_le32(BITMAP_MAJOR_HI);
@@ -534,8 +521,8 @@ static int bitmap_new_disk_sb(struct bitmap *bitmap)
memcpy(sb->uuid, bitmap->mddev->uuid, 16);
- bitmap->flags |= BITMAP_STALE;
- sb->state |= cpu_to_le32(BITMAP_STALE);
+ set_bit(BITMAP_STALE, &bitmap->flags);
+ sb->state = cpu_to_le32(bitmap->flags);
bitmap->events_cleared = bitmap->mddev->events;
sb->events_cleared = cpu_to_le64(bitmap->mddev->events);
@@ -551,31 +538,45 @@ static int bitmap_read_sb(struct bitmap *bitmap)
bitmap_super_t *sb;
unsigned long chunksize, daemon_sleep, write_behind;
unsigned long long events;
+ unsigned long sectors_reserved = 0;
int err = -EINVAL;
+ struct page *sb_page;
+ if (!bitmap->storage.file && !bitmap->mddev->bitmap_info.offset) {
+ chunksize = 128 * 1024 * 1024;
+ daemon_sleep = 5 * HZ;
+ write_behind = 0;
+ set_bit(BITMAP_STALE, &bitmap->flags);
+ err = 0;
+ goto out_no_sb;
+ }
/* page 0 is the superblock, read it... */
- if (bitmap->file) {
- loff_t isize = i_size_read(bitmap->file->f_mapping->host);
+ sb_page = alloc_page(GFP_KERNEL);
+ if (!sb_page)
+ return -ENOMEM;
+ bitmap->storage.sb_page = sb_page;
+
+ if (bitmap->storage.file) {
+ loff_t isize = i_size_read(bitmap->storage.file->f_mapping->host);
int bytes = isize > PAGE_SIZE ? PAGE_SIZE : isize;
- bitmap->sb_page = read_page(bitmap->file, 0, bitmap, bytes);
+ err = read_page(bitmap->storage.file, 0,
+ bitmap, bytes, sb_page);
} else {
- bitmap->sb_page = read_sb_page(bitmap->mddev,
- bitmap->mddev->bitmap_info.offset,
- NULL,
- 0, sizeof(bitmap_super_t));
+ err = read_sb_page(bitmap->mddev,
+ bitmap->mddev->bitmap_info.offset,
+ sb_page,
+ 0, sizeof(bitmap_super_t));
}
- if (IS_ERR(bitmap->sb_page)) {
- err = PTR_ERR(bitmap->sb_page);
- bitmap->sb_page = NULL;
+ if (err)
return err;
- }
- sb = kmap_atomic(bitmap->sb_page);
+ sb = kmap_atomic(sb_page);
chunksize = le32_to_cpu(sb->chunksize);
daemon_sleep = le32_to_cpu(sb->daemon_sleep) * HZ;
write_behind = le32_to_cpu(sb->write_behind);
+ sectors_reserved = le32_to_cpu(sb->sectors_reserved);
/* verify that the bitmap-specific fields are valid */
if (sb->magic != cpu_to_le32(BITMAP_MAGIC))
@@ -618,60 +619,32 @@ static int bitmap_read_sb(struct bitmap *bitmap)
"-- forcing full recovery\n",
bmname(bitmap), events,
(unsigned long long) bitmap->mddev->events);
- sb->state |= cpu_to_le32(BITMAP_STALE);
+ set_bit(BITMAP_STALE, &bitmap->flags);
}
}
/* assign fields using values from superblock */
- bitmap->mddev->bitmap_info.chunksize = chunksize;
- bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep;
- bitmap->mddev->bitmap_info.max_write_behind = write_behind;
bitmap->flags |= le32_to_cpu(sb->state);
if (le32_to_cpu(sb->version) == BITMAP_MAJOR_HOSTENDIAN)
- bitmap->flags |= BITMAP_HOSTENDIAN;
+ set_bit(BITMAP_HOSTENDIAN, &bitmap->flags);
bitmap->events_cleared = le64_to_cpu(sb->events_cleared);
- if (bitmap->flags & BITMAP_STALE)
- bitmap->events_cleared = bitmap->mddev->events;
err = 0;
out:
kunmap_atomic(sb);
+out_no_sb:
+ if (test_bit(BITMAP_STALE, &bitmap->flags))
+ bitmap->events_cleared = bitmap->mddev->events;
+ bitmap->mddev->bitmap_info.chunksize = chunksize;
+ bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep;
+ bitmap->mddev->bitmap_info.max_write_behind = write_behind;
+ if (bitmap->mddev->bitmap_info.space == 0 ||
+ bitmap->mddev->bitmap_info.space > sectors_reserved)
+ bitmap->mddev->bitmap_info.space = sectors_reserved;
if (err)
bitmap_print_sb(bitmap);
return err;
}
-enum bitmap_mask_op {
- MASK_SET,
- MASK_UNSET
-};
-
-/* record the state of the bitmap in the superblock. Return the old value */
-static int bitmap_mask_state(struct bitmap *bitmap, enum bitmap_state bits,
- enum bitmap_mask_op op)
-{
- bitmap_super_t *sb;
- int old;
-
- if (!bitmap->sb_page) /* can't set the state */
- return 0;
- sb = kmap_atomic(bitmap->sb_page);
- old = le32_to_cpu(sb->state) & bits;
- switch (op) {
- case MASK_SET:
- sb->state |= cpu_to_le32(bits);
- bitmap->flags |= bits;
- break;
- case MASK_UNSET:
- sb->state &= cpu_to_le32(~bits);
- bitmap->flags &= ~bits;
- break;
- default:
- BUG();
- }
- kunmap_atomic(sb);
- return old;
-}
-
/*
* general bitmap file operations
*/
@@ -683,17 +656,19 @@ static int bitmap_mask_state(struct bitmap *bitmap, enum bitmap_state bits,
* file a page at a time. There's a superblock at the start of the file.
*/
/* calculate the index of the page that contains this bit */
-static inline unsigned long file_page_index(struct bitmap *bitmap, unsigned long chunk)
+static inline unsigned long file_page_index(struct bitmap_storage *store,
+ unsigned long chunk)
{
- if (!bitmap->mddev->bitmap_info.external)
+ if (store->sb_page)
chunk += sizeof(bitmap_super_t) << 3;
return chunk >> PAGE_BIT_SHIFT;
}
/* calculate the (bit) offset of this bit within a page */
-static inline unsigned long file_page_offset(struct bitmap *bitmap, unsigned long chunk)
+static inline unsigned long file_page_offset(struct bitmap_storage *store,
+ unsigned long chunk)
{
- if (!bitmap->mddev->bitmap_info.external)
+ if (store->sb_page)
chunk += sizeof(bitmap_super_t) << 3;
return chunk & (PAGE_BITS - 1);
}
@@ -705,57 +680,86 @@ static inline unsigned long file_page_offset(struct bitmap *bitmap, unsigned lon
* 1 page (e.g., x86) or less than 1 page -- so the bitmap might start on page
* 0 or page 1
*/
-static inline struct page *filemap_get_page(struct bitmap *bitmap,
+static inline struct page *filemap_get_page(struct bitmap_storage *store,
unsigned long chunk)
{
- if (file_page_index(bitmap, chunk) >= bitmap->file_pages)
+ if (file_page_index(store, chunk) >= store->file_pages)
return NULL;
- return bitmap->filemap[file_page_index(bitmap, chunk)
- - file_page_index(bitmap, 0)];
+ return store->filemap[file_page_index(store, chunk)
+ - file_page_index(store, 0)];
}
-static void bitmap_file_unmap(struct bitmap *bitmap)
+static int bitmap_storage_alloc(struct bitmap_storage *store,
+ unsigned long chunks, int with_super)
+{
+ int pnum;
+ unsigned long num_pages;
+ unsigned long bytes;
+
+ bytes = DIV_ROUND_UP(chunks, 8);
+ if (with_super)
+ bytes += sizeof(bitmap_super_t);
+
+ num_pages = DIV_ROUND_UP(bytes, PAGE_SIZE);
+
+ store->filemap = kmalloc(sizeof(struct page *)
+ * num_pages, GFP_KERNEL);
+ if (!store->filemap)
+ return -ENOMEM;
+
+ if (with_super && !store->sb_page) {
+ store->sb_page = alloc_page(GFP_KERNEL|__GFP_ZERO);
+ if (store->sb_page == NULL)
+ return -ENOMEM;
+ store->sb_page->index = 0;
+ }
+ pnum = 0;
+ if (store->sb_page) {
+ store->filemap[0] = store->sb_page;
+ pnum = 1;
+ }
+ for ( ; pnum < num_pages; pnum++) {
+ store->filemap[pnum] = alloc_page(GFP_KERNEL|__GFP_ZERO);
+ if (!store->filemap[pnum]) {
+ store->file_pages = pnum;
+ return -ENOMEM;
+ }
+ store->filemap[pnum]->index = pnum;
+ }
+ store->file_pages = pnum;
+
+ /* We need 4 bits per page, rounded up to a multiple
+ * of sizeof(unsigned long) */
+ store->filemap_attr = kzalloc(
+ roundup(DIV_ROUND_UP(num_pages*4, 8), sizeof(unsigned long)),
+ GFP_KERNEL);
+ if (!store->filemap_attr)
+ return -ENOMEM;
+
+ store->bytes = bytes;
+
+ return 0;
+}
+
+static void bitmap_file_unmap(struct bitmap_storage *store)
{
struct page **map, *sb_page;
- unsigned long *attr;
int pages;
- unsigned long flags;
+ struct file *file;
- spin_lock_irqsave(&bitmap->lock, flags);
- map = bitmap->filemap;
- bitmap->filemap = NULL;
- attr = bitmap->filemap_attr;
- bitmap->filemap_attr = NULL;
- pages = bitmap->file_pages;
- bitmap->file_pages = 0;
- sb_page = bitmap->sb_page;
- bitmap->sb_page = NULL;
- spin_unlock_irqrestore(&bitmap->lock, flags);
+ file = store->file;
+ map = store->filemap;
+ pages = store->file_pages;
+ sb_page = store->sb_page;
while (pages--)
if (map[pages] != sb_page) /* 0 is sb_page, release it below */
free_buffers(map[pages]);
kfree(map);
- kfree(attr);
+ kfree(store->filemap_attr);
if (sb_page)
free_buffers(sb_page);
-}
-
-static void bitmap_file_put(struct bitmap *bitmap)
-{
- struct file *file;
- unsigned long flags;
-
- spin_lock_irqsave(&bitmap->lock, flags);
- file = bitmap->file;
- bitmap->file = NULL;
- spin_unlock_irqrestore(&bitmap->lock, flags);
-
- if (file)
- wait_event(bitmap->write_wait,
- atomic_read(&bitmap->pending_writes)==0);
- bitmap_file_unmap(bitmap);
if (file) {
struct inode *inode = file->f_path.dentry->d_inode;
@@ -773,14 +777,14 @@ static void bitmap_file_kick(struct bitmap *bitmap)
{
char *path, *ptr = NULL;
- if (bitmap_mask_state(bitmap, BITMAP_STALE, MASK_SET) == 0) {
+ if (!test_and_set_bit(BITMAP_STALE, &bitmap->flags)) {
bitmap_update_sb(bitmap);
- if (bitmap->file) {
+ if (bitmap->storage.file) {
path = kmalloc(PAGE_SIZE, GFP_KERNEL);
if (path)
- ptr = d_path(&bitmap->file->f_path, path,
- PAGE_SIZE);
+ ptr = d_path(&bitmap->storage.file->f_path,
+ path, PAGE_SIZE);
printk(KERN_ALERT
"%s: kicking failed bitmap file %s from array!\n",
@@ -792,10 +796,6 @@ static void bitmap_file_kick(struct bitmap *bitmap)
"%s: disabling internal bitmap due to errors\n",
bmname(bitmap));
}
-
- bitmap_file_put(bitmap);
-
- return;
}
enum bitmap_page_attr {
@@ -805,24 +805,30 @@ enum bitmap_page_attr {
BITMAP_PAGE_NEEDWRITE = 2, /* there are cleared bits that need to be synced */
};
-static inline void set_page_attr(struct bitmap *bitmap, struct page *page,
- enum bitmap_page_attr attr)
+static inline void set_page_attr(struct bitmap *bitmap, int pnum,
+ enum bitmap_page_attr attr)
{
- __set_bit((page->index<<2) + attr, bitmap->filemap_attr);
+ set_bit((pnum<<2) + attr, bitmap->storage.filemap_attr);
}
-static inline void clear_page_attr(struct bitmap *bitmap, struct page *page,
- enum bitmap_page_attr attr)
+static inline void clear_page_attr(struct bitmap *bitmap, int pnum,
+ enum bitmap_page_attr attr)
{
- __clear_bit((page->index<<2) + attr, bitmap->filemap_attr);
+ clear_bit((pnum<<2) + attr, bitmap->storage.filemap_attr);
}
-static inline unsigned long test_page_attr(struct bitmap *bitmap, struct page *page,
- enum bitmap_page_attr attr)
+static inline int test_page_attr(struct bitmap *bitmap, int pnum,
+ enum bitmap_page_attr attr)
{
- return test_bit((page->index<<2) + attr, bitmap->filemap_attr);
+ return test_bit((pnum<<2) + attr, bitmap->storage.filemap_attr);
}
+static inline int test_and_clear_page_attr(struct bitmap *bitmap, int pnum,
+ enum bitmap_page_attr attr)
+{
+ return test_and_clear_bit((pnum<<2) + attr,
+ bitmap->storage.filemap_attr);
+}
/*
* bitmap_file_set_bit -- called before performing a write to the md device
* to set (and eventually sync) a particular bit in the bitmap file
@@ -835,26 +841,46 @@ static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block)
unsigned long bit;
struct page *page;
void *kaddr;
- unsigned long chunk = block >> bitmap->chunkshift;
+ unsigned long chunk = block >> bitmap->counts.chunkshift;
- if (!bitmap->filemap)
- return;
-
- page = filemap_get_page(bitmap, chunk);
+ page = filemap_get_page(&bitmap->storage, chunk);
if (!page)
return;
- bit = file_page_offset(bitmap, chunk);
+ bit = file_page_offset(&bitmap->storage, chunk);
/* set the bit */
kaddr = kmap_atomic(page);
- if (bitmap->flags & BITMAP_HOSTENDIAN)
+ if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
set_bit(bit, kaddr);
else
- __set_bit_le(bit, kaddr);
+ test_and_set_bit_le(bit, kaddr);
kunmap_atomic(kaddr);
pr_debug("set file bit %lu page %lu\n", bit, page->index);
/* record page number so it gets flushed to disk when unplug occurs */
- set_page_attr(bitmap, page, BITMAP_PAGE_DIRTY);
+ set_page_attr(bitmap, page->index, BITMAP_PAGE_DIRTY);
+}
+
+static void bitmap_file_clear_bit(struct bitmap *bitmap, sector_t block)
+{
+ unsigned long bit;
+ struct page *page;
+ void *paddr;
+ unsigned long chunk = block >> bitmap->counts.chunkshift;
+
+ page = filemap_get_page(&bitmap->storage, chunk);
+ if (!page)
+ return;
+ bit = file_page_offset(&bitmap->storage, chunk);
+ paddr = kmap_atomic(page);
+ if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
+ clear_bit(bit, paddr);
+ else
+ test_and_clear_bit_le(bit, paddr);
+ kunmap_atomic(paddr);
+ if (!test_page_attr(bitmap, page->index, BITMAP_PAGE_NEEDWRITE)) {
+ set_page_attr(bitmap, page->index, BITMAP_PAGE_PENDING);
+ bitmap->allclean = 0;
+ }
}
/* this gets called when the md device is ready to unplug its underlying
@@ -862,42 +888,37 @@ static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block)
* sync the dirty pages of the bitmap file to disk */
void bitmap_unplug(struct bitmap *bitmap)
{
- unsigned long i, flags;
+ unsigned long i;
int dirty, need_write;
- struct page *page;
int wait = 0;
- if (!bitmap)
+ if (!bitmap || !bitmap->storage.filemap ||
+ test_bit(BITMAP_STALE, &bitmap->flags))
return;
/* look at each page to see if there are any set bits that need to be
* flushed out to disk */
- for (i = 0; i < bitmap->file_pages; i++) {
- spin_lock_irqsave(&bitmap->lock, flags);
- if (!bitmap->filemap) {
- spin_unlock_irqrestore(&bitmap->lock, flags);
+ for (i = 0; i < bitmap->storage.file_pages; i++) {
+ if (!bitmap->storage.filemap)
return;
+ dirty = test_and_clear_page_attr(bitmap, i, BITMAP_PAGE_DIRTY);
+ need_write = test_and_clear_page_attr(bitmap, i,
+ BITMAP_PAGE_NEEDWRITE);
+ if (dirty || need_write) {
+ clear_page_attr(bitmap, i, BITMAP_PAGE_PENDING);
+ write_page(bitmap, bitmap->storage.filemap[i], 0);
}
- page = bitmap->filemap[i];
- dirty = test_page_attr(bitmap, page, BITMAP_PAGE_DIRTY);
- need_write = test_page_attr(bitmap, page, BITMAP_PAGE_NEEDWRITE);
- clear_page_attr(bitmap, page, BITMAP_PAGE_DIRTY);
- clear_page_attr(bitmap, page, BITMAP_PAGE_NEEDWRITE);
if (dirty)
wait = 1;
- spin_unlock_irqrestore(&bitmap->lock, flags);
-
- if (dirty || need_write)
- write_page(bitmap, page, 0);
}
if (wait) { /* if any writes were performed, we need to wait on them */
- if (bitmap->file)
+ if (bitmap->storage.file)
wait_event(bitmap->write_wait,
atomic_read(&bitmap->pending_writes)==0);
else
md_super_wait(bitmap->mddev);
}
- if (bitmap->flags & BITMAP_WRITE_ERROR)
+ if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags))
bitmap_file_kick(bitmap);
}
EXPORT_SYMBOL(bitmap_unplug);
@@ -917,98 +938,77 @@ static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int n
static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
{
unsigned long i, chunks, index, oldindex, bit;
- struct page *page = NULL, *oldpage = NULL;
- unsigned long num_pages, bit_cnt = 0;
+ struct page *page = NULL;
+ unsigned long bit_cnt = 0;
struct file *file;
- unsigned long bytes, offset;
+ unsigned long offset;
int outofdate;
int ret = -ENOSPC;
void *paddr;
+ struct bitmap_storage *store = &bitmap->storage;
- chunks = bitmap->chunks;
- file = bitmap->file;
+ chunks = bitmap->counts.chunks;
+ file = store->file;
- BUG_ON(!file && !bitmap->mddev->bitmap_info.offset);
+ if (!file && !bitmap->mddev->bitmap_info.offset) {
+ /* No permanent bitmap - fill with '1s'. */
+ store->filemap = NULL;
+ store->file_pages = 0;
+ for (i = 0; i < chunks ; i++) {
+ /* if the disk bit is set, set the memory bit */
+ int needed = ((sector_t)(i+1) << (bitmap->counts.chunkshift)
+ >= start);
+ bitmap_set_memory_bits(bitmap,
+ (sector_t)i << bitmap->counts.chunkshift,
+ needed);
+ }
+ return 0;
+ }
- outofdate = bitmap->flags & BITMAP_STALE;
+ outofdate = test_bit(BITMAP_STALE, &bitmap->flags);
if (outofdate)
printk(KERN_INFO "%s: bitmap file is out of date, doing full "
"recovery\n", bmname(bitmap));
- bytes = DIV_ROUND_UP(bitmap->chunks, 8);
- if (!bitmap->mddev->bitmap_info.external)
- bytes += sizeof(bitmap_super_t);
-
- num_pages = DIV_ROUND_UP(bytes, PAGE_SIZE);
-
- if (file && i_size_read(file->f_mapping->host) < bytes) {
+ if (file && i_size_read(file->f_mapping->host) < store->bytes) {
printk(KERN_INFO "%s: bitmap file too short %lu < %lu\n",
- bmname(bitmap),
- (unsigned long) i_size_read(file->f_mapping->host),
- bytes);
+ bmname(bitmap),
+ (unsigned long) i_size_read(file->f_mapping->host),
+ store->bytes);
goto err;
}
- ret = -ENOMEM;
-
- bitmap->filemap = kmalloc(sizeof(struct page *) * num_pages, GFP_KERNEL);
- if (!bitmap->filemap)
- goto err;
-
- /* We need 4 bits per page, rounded up to a multiple of sizeof(unsigned long) */
- bitmap->filemap_attr = kzalloc(
- roundup(DIV_ROUND_UP(num_pages*4, 8), sizeof(unsigned long)),
- GFP_KERNEL);
- if (!bitmap->filemap_attr)
- goto err;
-
oldindex = ~0L;
+ offset = 0;
+ if (!bitmap->mddev->bitmap_info.external)
+ offset = sizeof(bitmap_super_t);
for (i = 0; i < chunks; i++) {
int b;
- index = file_page_index(bitmap, i);
- bit = file_page_offset(bitmap, i);
+ index = file_page_index(&bitmap->storage, i);
+ bit = file_page_offset(&bitmap->storage, i);
if (index != oldindex) { /* this is a new page, read it in */
int count;
/* unmap the old page, we're done with it */
- if (index == num_pages-1)
- count = bytes - index * PAGE_SIZE;
+ if (index == store->file_pages-1)
+ count = store->bytes - index * PAGE_SIZE;
else
count = PAGE_SIZE;
- if (index == 0 && bitmap->sb_page) {
- /*
- * if we're here then the superblock page
- * contains some bits (PAGE_SIZE != sizeof sb)
- * we've already read it in, so just use it
- */
- page = bitmap->sb_page;
- offset = sizeof(bitmap_super_t);
- if (!file)
- page = read_sb_page(
- bitmap->mddev,
- bitmap->mddev->bitmap_info.offset,
- page,
- index, count);
- } else if (file) {
- page = read_page(file, index, bitmap, count);
- offset = 0;
- } else {
- page = read_sb_page(bitmap->mddev,
- bitmap->mddev->bitmap_info.offset,
- NULL,
- index, count);
- offset = 0;
- }
- if (IS_ERR(page)) { /* read error */
- ret = PTR_ERR(page);
+ page = store->filemap[index];
+ if (file)
+ ret = read_page(file, index, bitmap,
+ count, page);
+ else
+ ret = read_sb_page(
+ bitmap->mddev,
+ bitmap->mddev->bitmap_info.offset,
+ page,
+ index, count);
+
+ if (ret)
goto err;
- }
oldindex = index;
- oldpage = page;
-
- bitmap->filemap[bitmap->file_pages++] = page;
- bitmap->last_page_size = count;
if (outofdate) {
/*
@@ -1022,39 +1022,33 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
write_page(bitmap, page, 1);
ret = -EIO;
- if (bitmap->flags & BITMAP_WRITE_ERROR)
+ if (test_bit(BITMAP_WRITE_ERROR,
+ &bitmap->flags))
goto err;
}
}
paddr = kmap_atomic(page);
- if (bitmap->flags & BITMAP_HOSTENDIAN)
+ if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
b = test_bit(bit, paddr);
else
b = test_bit_le(bit, paddr);
kunmap_atomic(paddr);
if (b) {
/* if the disk bit is set, set the memory bit */
- int needed = ((sector_t)(i+1) << bitmap->chunkshift
+ int needed = ((sector_t)(i+1) << bitmap->counts.chunkshift
>= start);
bitmap_set_memory_bits(bitmap,
- (sector_t)i << bitmap->chunkshift,
+ (sector_t)i << bitmap->counts.chunkshift,
needed);
bit_cnt++;
}
- }
-
- /* everything went OK */
- ret = 0;
- bitmap_mask_state(bitmap, BITMAP_STALE, MASK_UNSET);
-
- if (bit_cnt) { /* Kick recovery if any bits were set */
- set_bit(MD_RECOVERY_NEEDED, &bitmap->mddev->recovery);
- md_wakeup_thread(bitmap->mddev->thread);
+ offset = 0;
}
printk(KERN_INFO "%s: bitmap initialized from disk: "
- "read %lu/%lu pages, set %lu of %lu bits\n",
- bmname(bitmap), bitmap->file_pages, num_pages, bit_cnt, chunks);
+ "read %lu pages, set %lu of %lu bits\n",
+ bmname(bitmap), store->file_pages,
+ bit_cnt, chunks);
return 0;
@@ -1071,22 +1065,38 @@ void bitmap_write_all(struct bitmap *bitmap)
*/
int i;
- spin_lock_irq(&bitmap->lock);
- for (i = 0; i < bitmap->file_pages; i++)
- set_page_attr(bitmap, bitmap->filemap[i],
+ if (!bitmap || !bitmap->storage.filemap)
+ return;
+ if (bitmap->storage.file)
+ /* Only one copy, so nothing needed */
+ return;
+
+ for (i = 0; i < bitmap->storage.file_pages; i++)
+ set_page_attr(bitmap, i,
BITMAP_PAGE_NEEDWRITE);
bitmap->allclean = 0;
- spin_unlock_irq(&bitmap->lock);
}
-static void bitmap_count_page(struct bitmap *bitmap, sector_t offset, int inc)
+static void bitmap_count_page(struct bitmap_counts *bitmap,
+ sector_t offset, int inc)
{
sector_t chunk = offset >> bitmap->chunkshift;
unsigned long page = chunk >> PAGE_COUNTER_SHIFT;
bitmap->bp[page].count += inc;
bitmap_checkfree(bitmap, page);
}
-static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap,
+
+static void bitmap_set_pending(struct bitmap_counts *bitmap, sector_t offset)
+{
+ sector_t chunk = offset >> bitmap->chunkshift;
+ unsigned long page = chunk >> PAGE_COUNTER_SHIFT;
+ struct bitmap_page *bp = &bitmap->bp[page];
+
+ if (!bp->pending)
+ bp->pending = 1;
+}
+
+static bitmap_counter_t *bitmap_get_counter(struct bitmap_counts *bitmap,
sector_t offset, sector_t *blocks,
int create);
@@ -1099,10 +1109,9 @@ void bitmap_daemon_work(struct mddev *mddev)
{
struct bitmap *bitmap;
unsigned long j;
- unsigned long flags;
- struct page *page = NULL, *lastpage = NULL;
+ unsigned long nextpage;
sector_t blocks;
- void *paddr;
+ struct bitmap_counts *counts;
/* Use a mutex to guard daemon_work against
* bitmap_destroy.
@@ -1124,112 +1133,90 @@ void bitmap_daemon_work(struct mddev *mddev)
}
bitmap->allclean = 1;
- spin_lock_irqsave(&bitmap->lock, flags);
- for (j = 0; j < bitmap->chunks; j++) {
+ /* Any file-page which is PENDING now needs to be written.
+ * So set NEEDWRITE now, then after we make any last-minute changes
+ * we will write it.
+ */
+ for (j = 0; j < bitmap->storage.file_pages; j++)
+ if (test_and_clear_page_attr(bitmap, j,
+ BITMAP_PAGE_PENDING))
+ set_page_attr(bitmap, j,
+ BITMAP_PAGE_NEEDWRITE);
+
+ if (bitmap->need_sync &&
+ mddev->bitmap_info.external == 0) {
+ /* Arrange for superblock update as well as
+ * other changes */
+ bitmap_super_t *sb;
+ bitmap->need_sync = 0;
+ if (bitmap->storage.filemap) {
+ sb = kmap_atomic(bitmap->storage.sb_page);
+ sb->events_cleared =
+ cpu_to_le64(bitmap->events_cleared);
+ kunmap_atomic(sb);
+ set_page_attr(bitmap, 0,
+ BITMAP_PAGE_NEEDWRITE);
+ }
+ }
+ /* Now look at the bitmap counters and if any are '2' or '1',
+ * decrement and handle accordingly.
+ */
+ counts = &bitmap->counts;
+ spin_lock_irq(&counts->lock);
+ nextpage = 0;
+ for (j = 0; j < counts->chunks; j++) {
bitmap_counter_t *bmc;
- if (!bitmap->filemap)
- /* error or shutdown */
- break;
+ sector_t block = (sector_t)j << counts->chunkshift;
- page = filemap_get_page(bitmap, j);
-
- if (page != lastpage) {
- /* skip this page unless it's marked as needing cleaning */
- if (!test_page_attr(bitmap, page, BITMAP_PAGE_PENDING)) {
- int need_write = test_page_attr(bitmap, page,
- BITMAP_PAGE_NEEDWRITE);
- if (need_write)
- clear_page_attr(bitmap, page, BITMAP_PAGE_NEEDWRITE);
-
- spin_unlock_irqrestore(&bitmap->lock, flags);
- if (need_write)
- write_page(bitmap, page, 0);
- spin_lock_irqsave(&bitmap->lock, flags);
- j |= (PAGE_BITS - 1);
+ if (j == nextpage) {
+ nextpage += PAGE_COUNTER_RATIO;
+ if (!counts->bp[j >> PAGE_COUNTER_SHIFT].pending) {
+ j |= PAGE_COUNTER_MASK;
continue;
}
-
- /* grab the new page, sync and release the old */
- if (lastpage != NULL) {
- if (test_page_attr(bitmap, lastpage,
- BITMAP_PAGE_NEEDWRITE)) {
- clear_page_attr(bitmap, lastpage,
- BITMAP_PAGE_NEEDWRITE);
- spin_unlock_irqrestore(&bitmap->lock, flags);
- write_page(bitmap, lastpage, 0);
- } else {
- set_page_attr(bitmap, lastpage,
- BITMAP_PAGE_NEEDWRITE);
- bitmap->allclean = 0;
- spin_unlock_irqrestore(&bitmap->lock, flags);
- }
- } else
- spin_unlock_irqrestore(&bitmap->lock, flags);
- lastpage = page;
-
- /* We are possibly going to clear some bits, so make
- * sure that events_cleared is up-to-date.
- */
- if (bitmap->need_sync &&
- mddev->bitmap_info.external == 0) {
- bitmap_super_t *sb;
- bitmap->need_sync = 0;
- sb = kmap_atomic(bitmap->sb_page);
- sb->events_cleared =
- cpu_to_le64(bitmap->events_cleared);
- kunmap_atomic(sb);
- write_page(bitmap, bitmap->sb_page, 1);
- }
- spin_lock_irqsave(&bitmap->lock, flags);
- if (!bitmap->need_sync)
- clear_page_attr(bitmap, page, BITMAP_PAGE_PENDING);
- else
- bitmap->allclean = 0;
+ counts->bp[j >> PAGE_COUNTER_SHIFT].pending = 0;
}
- bmc = bitmap_get_counter(bitmap,
- (sector_t)j << bitmap->chunkshift,
+ bmc = bitmap_get_counter(counts,
+ block,
&blocks, 0);
- if (!bmc)
+
+ if (!bmc) {
j |= PAGE_COUNTER_MASK;
- else if (*bmc) {
- if (*bmc == 1 && !bitmap->need_sync) {
- /* we can clear the bit */
- *bmc = 0;
- bitmap_count_page(bitmap,
- (sector_t)j << bitmap->chunkshift,
- -1);
-
- /* clear the bit */
- paddr = kmap_atomic(page);
- if (bitmap->flags & BITMAP_HOSTENDIAN)
- clear_bit(file_page_offset(bitmap, j),
- paddr);
- else
- __clear_bit_le(
- file_page_offset(bitmap,
- j),
- paddr);
- kunmap_atomic(paddr);
- } else if (*bmc <= 2) {
- *bmc = 1; /* maybe clear the bit next time */
- set_page_attr(bitmap, page, BITMAP_PAGE_PENDING);
- bitmap->allclean = 0;
- }
+ continue;
}
- }
- spin_unlock_irqrestore(&bitmap->lock, flags);
-
- /* now sync the final page */
- if (lastpage != NULL) {
- spin_lock_irqsave(&bitmap->lock, flags);
- if (test_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE)) {
- clear_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE);
- spin_unlock_irqrestore(&bitmap->lock, flags);
- write_page(bitmap, lastpage, 0);
- } else {
- set_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE);
+ if (*bmc == 1 && !bitmap->need_sync) {
+ /* We can clear the bit */
+ *bmc = 0;
+ bitmap_count_page(counts, block, -1);
+ bitmap_file_clear_bit(bitmap, block);
+ } else if (*bmc && *bmc <= 2) {
+ *bmc = 1;
+ bitmap_set_pending(counts, block);
bitmap->allclean = 0;
- spin_unlock_irqrestore(&bitmap->lock, flags);
+ }
+ }
+ spin_unlock_irq(&counts->lock);
+
+ /* Now start writeout on any page in NEEDWRITE that isn't DIRTY.
+ * DIRTY pages need to be written by bitmap_unplug so it can wait
+ * for them.
+ * If we find any DIRTY page we stop there and let bitmap_unplug
+ * handle all the rest. This is important in the case where
+ * the first blocking holds the superblock and it has been updated.
+ * We mustn't write any other blocks before the superblock.
+ */
+ for (j = 0;
+ j < bitmap->storage.file_pages
+ && !test_bit(BITMAP_STALE, &bitmap->flags);
+ j++) {
+
+ if (test_page_attr(bitmap, j,
+ BITMAP_PAGE_DIRTY))
+ /* bitmap_unplug will handle the rest */
+ break;
+ if (test_and_clear_page_attr(bitmap, j,
+ BITMAP_PAGE_NEEDWRITE)) {
+ write_page(bitmap, bitmap->storage.filemap[j], 0);
}
}
@@ -1240,7 +1227,7 @@ void bitmap_daemon_work(struct mddev *mddev)
mutex_unlock(&mddev->bitmap_info.mutex);
}
-static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap,
+static bitmap_counter_t *bitmap_get_counter(struct bitmap_counts *bitmap,
sector_t offset, sector_t *blocks,
int create)
__releases(bitmap->lock)
@@ -1302,10 +1289,10 @@ int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sect
sector_t blocks;
bitmap_counter_t *bmc;
- spin_lock_irq(&bitmap->lock);
- bmc = bitmap_get_counter(bitmap, offset, &blocks, 1);
+ spin_lock_irq(&bitmap->counts.lock);
+ bmc = bitmap_get_counter(&bitmap->counts, offset, &blocks, 1);
if (!bmc) {
- spin_unlock_irq(&bitmap->lock);
+ spin_unlock_irq(&bitmap->counts.lock);
return 0;
}
@@ -1317,7 +1304,7 @@ int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sect
*/
prepare_to_wait(&bitmap->overflow_wait, &__wait,
TASK_UNINTERRUPTIBLE);
- spin_unlock_irq(&bitmap->lock);
+ spin_unlock_irq(&bitmap->counts.lock);
io_schedule();
finish_wait(&bitmap->overflow_wait, &__wait);
continue;
@@ -1326,7 +1313,7 @@ int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sect
switch (*bmc) {
case 0:
bitmap_file_set_bit(bitmap, offset);
- bitmap_count_page(bitmap, offset, 1);
+ bitmap_count_page(&bitmap->counts, offset, 1);
/* fall through */
case 1:
*bmc = 2;
@@ -1334,7 +1321,7 @@ int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sect
(*bmc)++;
- spin_unlock_irq(&bitmap->lock);
+ spin_unlock_irq(&bitmap->counts.lock);
offset += blocks;
if (sectors > blocks)
@@ -1364,10 +1351,10 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long secto
unsigned long flags;
bitmap_counter_t *bmc;
- spin_lock_irqsave(&bitmap->lock, flags);
- bmc = bitmap_get_counter(bitmap, offset, &blocks, 0);
+ spin_lock_irqsave(&bitmap->counts.lock, flags);
+ bmc = bitmap_get_counter(&bitmap->counts, offset, &blocks, 0);
if (!bmc) {
- spin_unlock_irqrestore(&bitmap->lock, flags);
+ spin_unlock_irqrestore(&bitmap->counts.lock, flags);
return;
}
@@ -1386,14 +1373,10 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long secto
(*bmc)--;
if (*bmc <= 2) {
- set_page_attr(bitmap,
- filemap_get_page(
- bitmap,
- offset >> bitmap->chunkshift),
- BITMAP_PAGE_PENDING);
+ bitmap_set_pending(&bitmap->counts, offset);
bitmap->allclean = 0;
}
- spin_unlock_irqrestore(&bitmap->lock, flags);
+ spin_unlock_irqrestore(&bitmap->counts.lock, flags);
offset += blocks;
if (sectors > blocks)
sectors -= blocks;
@@ -1412,8 +1395,8 @@ static int __bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t
*blocks = 1024;
return 1; /* always resync if no bitmap */
}
- spin_lock_irq(&bitmap->lock);
- bmc = bitmap_get_counter(bitmap, offset, blocks, 0);
+ spin_lock_irq(&bitmap->counts.lock);
+ bmc = bitmap_get_counter(&bitmap->counts, offset, blocks, 0);
rv = 0;
if (bmc) {
/* locked */
@@ -1427,7 +1410,7 @@ static int __bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t
}
}
}
- spin_unlock_irq(&bitmap->lock);
+ spin_unlock_irq(&bitmap->counts.lock);
return rv;
}
@@ -1464,8 +1447,8 @@ void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, i
*blocks = 1024;
return;
}
- spin_lock_irqsave(&bitmap->lock, flags);
- bmc = bitmap_get_counter(bitmap, offset, blocks, 0);
+ spin_lock_irqsave(&bitmap->counts.lock, flags);
+ bmc = bitmap_get_counter(&bitmap->counts, offset, blocks, 0);
if (bmc == NULL)
goto unlock;
/* locked */
@@ -1476,15 +1459,13 @@ void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, i
*bmc |= NEEDED_MASK;
else {
if (*bmc <= 2) {
- set_page_attr(bitmap,
- filemap_get_page(bitmap, offset >> bitmap->chunkshift),
- BITMAP_PAGE_PENDING);
+ bitmap_set_pending(&bitmap->counts, offset);
bitmap->allclean = 0;
}
}
}
unlock:
- spin_unlock_irqrestore(&bitmap->lock, flags);
+ spin_unlock_irqrestore(&bitmap->counts.lock, flags);
}
EXPORT_SYMBOL(bitmap_end_sync);
@@ -1524,7 +1505,7 @@ void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector)
bitmap->mddev->curr_resync_completed = sector;
set_bit(MD_CHANGE_CLEAN, &bitmap->mddev->flags);
- sector &= ~((1ULL << bitmap->chunkshift) - 1);
+ sector &= ~((1ULL << bitmap->counts.chunkshift) - 1);
s = 0;
while (s < sector && s < bitmap->mddev->resync_max_sectors) {
bitmap_end_sync(bitmap, s, &blocks, 0);
@@ -1538,27 +1519,25 @@ EXPORT_SYMBOL(bitmap_cond_end_sync);
static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed)
{
/* For each chunk covered by any of these sectors, set the
- * counter to 1 and set resync_needed. They should all
+ * counter to 2 and possibly set resync_needed. They should all
* be 0 at this point
*/
sector_t secs;
bitmap_counter_t *bmc;
- spin_lock_irq(&bitmap->lock);
- bmc = bitmap_get_counter(bitmap, offset, &secs, 1);
+ spin_lock_irq(&bitmap->counts.lock);
+ bmc = bitmap_get_counter(&bitmap->counts, offset, &secs, 1);
if (!bmc) {
- spin_unlock_irq(&bitmap->lock);
+ spin_unlock_irq(&bitmap->counts.lock);
return;
}
if (!*bmc) {
- struct page *page;
*bmc = 2 | (needed ? NEEDED_MASK : 0);
- bitmap_count_page(bitmap, offset, 1);
- page = filemap_get_page(bitmap, offset >> bitmap->chunkshift);
- set_page_attr(bitmap, page, BITMAP_PAGE_PENDING);
+ bitmap_count_page(&bitmap->counts, offset, 1);
+ bitmap_set_pending(&bitmap->counts, offset);
bitmap->allclean = 0;
}
- spin_unlock_irq(&bitmap->lock);
+ spin_unlock_irq(&bitmap->counts.lock);
}
/* dirty the memory and file bits for bitmap chunks "s" to "e" */
@@ -1567,11 +1546,9 @@ void bitmap_dirty_bits(struct bitmap *bitmap, unsigned long s, unsigned long e)
unsigned long chunk;
for (chunk = s; chunk <= e; chunk++) {
- sector_t sec = (sector_t)chunk << bitmap->chunkshift;
+ sector_t sec = (sector_t)chunk << bitmap->counts.chunkshift;
bitmap_set_memory_bits(bitmap, sec, 1);
- spin_lock_irq(&bitmap->lock);
bitmap_file_set_bit(bitmap, sec);
- spin_unlock_irq(&bitmap->lock);
if (sec < bitmap->mddev->recovery_cp)
/* We are asserting that the array is dirty,
* so move the recovery_cp address back so
@@ -1616,11 +1593,15 @@ static void bitmap_free(struct bitmap *bitmap)
if (!bitmap) /* there was no bitmap */
return;
- /* release the bitmap file and kill the daemon */
- bitmap_file_put(bitmap);
+ /* Shouldn't be needed - but just in case.... */
+ wait_event(bitmap->write_wait,
+ atomic_read(&bitmap->pending_writes) == 0);
+
+ /* release the bitmap file */
+ bitmap_file_unmap(&bitmap->storage);
- bp = bitmap->bp;
- pages = bitmap->pages;
+ bp = bitmap->counts.bp;
+ pages = bitmap->counts.pages;
/* free all allocated memory */
@@ -1659,25 +1640,19 @@ int bitmap_create(struct mddev *mddev)
{
struct bitmap *bitmap;
sector_t blocks = mddev->resync_max_sectors;
- unsigned long chunks;
- unsigned long pages;
struct file *file = mddev->bitmap_info.file;
int err;
struct sysfs_dirent *bm = NULL;
BUILD_BUG_ON(sizeof(bitmap_super_t) != 256);
- if (!file
- && !mddev->bitmap_info.offset) /* bitmap disabled, nothing to do */
- return 0;
-
BUG_ON(file && mddev->bitmap_info.offset);
bitmap = kzalloc(sizeof(*bitmap), GFP_KERNEL);
if (!bitmap)
return -ENOMEM;
- spin_lock_init(&bitmap->lock);
+ spin_lock_init(&bitmap->counts.lock);
atomic_set(&bitmap->pending_writes, 0);
init_waitqueue_head(&bitmap->write_wait);
init_waitqueue_head(&bitmap->overflow_wait);
@@ -1693,7 +1668,7 @@ int bitmap_create(struct mddev *mddev)
} else
bitmap->sysfs_can_clear = NULL;
- bitmap->file = file;
+ bitmap->storage.file = file;
if (file) {
get_file(file);
/* As future accesses to this file will use bmap,
@@ -1724,32 +1699,15 @@ int bitmap_create(struct mddev *mddev)
goto error;
bitmap->daemon_lastrun = jiffies;
- bitmap->chunkshift = (ffz(~mddev->bitmap_info.chunksize)
- - BITMAP_BLOCK_SHIFT);
-
- chunks = (blocks + (1 << bitmap->chunkshift) - 1) >>
- bitmap->chunkshift;
- pages = (chunks + PAGE_COUNTER_RATIO - 1) / PAGE_COUNTER_RATIO;
-
- BUG_ON(!pages);
-
- bitmap->chunks = chunks;
- bitmap->pages = pages;
- bitmap->missing_pages = pages;
-
- bitmap->bp = kzalloc(pages * sizeof(*bitmap->bp), GFP_KERNEL);
-
- err = -ENOMEM;
- if (!bitmap->bp)
+ err = bitmap_resize(bitmap, blocks, mddev->bitmap_info.chunksize, 1);
+ if (err)
goto error;
printk(KERN_INFO "created bitmap (%lu pages) for device %s\n",
- pages, bmname(bitmap));
+ bitmap->counts.pages, bmname(bitmap));
mddev->bitmap = bitmap;
-
-
- return (bitmap->flags & BITMAP_WRITE_ERROR) ? -EIO : 0;
+ return test_bit(BITMAP_WRITE_ERROR, &bitmap->flags) ? -EIO : 0;
error:
bitmap_free(bitmap);
@@ -1790,13 +1748,17 @@ int bitmap_load(struct mddev *mddev)
if (err)
goto out;
+ clear_bit(BITMAP_STALE, &bitmap->flags);
+
+ /* Kick recovery in case any bits were set */
+ set_bit(MD_RECOVERY_NEEDED, &bitmap->mddev->recovery);
mddev->thread->timeout = mddev->bitmap_info.daemon_sleep;
md_wakeup_thread(mddev->thread);
bitmap_update_sb(bitmap);
- if (bitmap->flags & BITMAP_WRITE_ERROR)
+ if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags))
err = -EIO;
out:
return err;
@@ -1806,30 +1768,194 @@ EXPORT_SYMBOL_GPL(bitmap_load);
void bitmap_status(struct seq_file *seq, struct bitmap *bitmap)
{
unsigned long chunk_kb;
- unsigned long flags;
+ struct bitmap_counts *counts;
if (!bitmap)
return;
- spin_lock_irqsave(&bitmap->lock, flags);
+ counts = &bitmap->counts;
+
chunk_kb = bitmap->mddev->bitmap_info.chunksize >> 10;
seq_printf(seq, "bitmap: %lu/%lu pages [%luKB], "
"%lu%s chunk",
- bitmap->pages - bitmap->missing_pages,
- bitmap->pages,
- (bitmap->pages - bitmap->missing_pages)
+ counts->pages - counts->missing_pages,
+ counts->pages,
+ (counts->pages - counts->missing_pages)
<< (PAGE_SHIFT - 10),
chunk_kb ? chunk_kb : bitmap->mddev->bitmap_info.chunksize,
chunk_kb ? "KB" : "B");
- if (bitmap->file) {
+ if (bitmap->storage.file) {
seq_printf(seq, ", file: ");
- seq_path(seq, &bitmap->file->f_path, " \t\n");
+ seq_path(seq, &bitmap->storage.file->f_path, " \t\n");
}
seq_printf(seq, "\n");
- spin_unlock_irqrestore(&bitmap->lock, flags);
}
+int bitmap_resize(struct bitmap *bitmap, sector_t blocks,
+ int chunksize, int init)
+{
+ /* If chunk_size is 0, choose an appropriate chunk size.
+ * Then possibly allocate new storage space.
+ * Then quiesce, copy bits, replace bitmap, and re-start
+ *
+ * This function is called both to set up the initial bitmap
+ * and to resize the bitmap while the array is active.
+ * If this happens as a result of the array being resized,
+ * chunksize will be zero, and we need to choose a suitable
+ * chunksize, otherwise we use what we are given.
+ */
+ struct bitmap_storage store;
+ struct bitmap_counts old_counts;
+ unsigned long chunks;
+ sector_t block;
+ sector_t old_blocks, new_blocks;
+ int chunkshift;
+ int ret = 0;
+ long pages;
+ struct bitmap_page *new_bp;
+
+ if (chunksize == 0) {
+ /* If there is enough space, leave the chunk size unchanged,
+ * else increase by factor of two until there is enough space.
+ */
+ long bytes;
+ long space = bitmap->mddev->bitmap_info.space;
+
+ if (space == 0) {
+ /* We don't know how much space there is, so limit
+ * to current size - in sectors.
+ */
+ bytes = DIV_ROUND_UP(bitmap->counts.chunks, 8);
+ if (!bitmap->mddev->bitmap_info.external)
+ bytes += sizeof(bitmap_super_t);
+ space = DIV_ROUND_UP(bytes, 512);
+ bitmap->mddev->bitmap_info.space = space;
+ }
+ chunkshift = bitmap->counts.chunkshift;
+ chunkshift--;
+ do {
+ /* 'chunkshift' is shift from block size to chunk size */
+ chunkshift++;
+ chunks = DIV_ROUND_UP_SECTOR_T(blocks, 1 << chunkshift);
+ bytes = DIV_ROUND_UP(chunks, 8);
+ if (!bitmap->mddev->bitmap_info.external)
+ bytes += sizeof(bitmap_super_t);
+ } while (bytes > (space << 9));
+ } else
+ chunkshift = ffz(~chunksize) - BITMAP_BLOCK_SHIFT;
+
+ chunks = DIV_ROUND_UP_SECTOR_T(blocks, 1 << chunkshift);
+ memset(&store, 0, sizeof(store));
+ if (bitmap->mddev->bitmap_info.offset || bitmap->mddev->bitmap_info.file)
+ ret = bitmap_storage_alloc(&store, chunks,
+ !bitmap->mddev->bitmap_info.external);
+ if (ret)
+ goto err;
+
+ pages = DIV_ROUND_UP(chunks, PAGE_COUNTER_RATIO);
+
+ new_bp = kzalloc(pages * sizeof(*new_bp), GFP_KERNEL);
+ ret = -ENOMEM;
+ if (!new_bp) {
+ bitmap_file_unmap(&store);
+ goto err;
+ }
+
+ if (!init)
+ bitmap->mddev->pers->quiesce(bitmap->mddev, 1);
+
+ store.file = bitmap->storage.file;
+ bitmap->storage.file = NULL;
+
+ if (store.sb_page && bitmap->storage.sb_page)
+ memcpy(page_address(store.sb_page),
+ page_address(bitmap->storage.sb_page),
+ sizeof(bitmap_super_t));
+ bitmap_file_unmap(&bitmap->storage);
+ bitmap->storage = store;
+
+ old_counts = bitmap->counts;
+ bitmap->counts.bp = new_bp;
+ bitmap->counts.pages = pages;
+ bitmap->counts.missing_pages = pages;
+ bitmap->counts.chunkshift = chunkshift;
+ bitmap->counts.chunks = chunks;
+ bitmap->mddev->bitmap_info.chunksize = 1 << (chunkshift +
+ BITMAP_BLOCK_SHIFT);
+
+ blocks = min(old_counts.chunks << old_counts.chunkshift,
+ chunks << chunkshift);
+
+ spin_lock_irq(&bitmap->counts.lock);
+ for (block = 0; block < blocks; ) {
+ bitmap_counter_t *bmc_old, *bmc_new;
+ int set;
+
+ bmc_old = bitmap_get_counter(&old_counts, block,
+ &old_blocks, 0);
+ set = bmc_old && NEEDED(*bmc_old);
+
+ if (set) {
+ bmc_new = bitmap_get_counter(&bitmap->counts, block,
+ &new_blocks, 1);
+ if (*bmc_new == 0) {
+ /* need to set on-disk bits too. */
+ sector_t end = block + new_blocks;
+ sector_t start = block >> chunkshift;
+ start <<= chunkshift;
+ while (start < end) {
+ bitmap_file_set_bit(bitmap, block);
+ start += 1 << chunkshift;
+ }
+ *bmc_new = 2;
+ bitmap_count_page(&bitmap->counts,
+ block, 1);
+ bitmap_set_pending(&bitmap->counts,
+ block);
+ }
+ *bmc_new |= NEEDED_MASK;
+ if (new_blocks < old_blocks)
+ old_blocks = new_blocks;
+ }
+ block += old_blocks;
+ }
+
+ if (!init) {
+ int i;
+ while (block < (chunks << chunkshift)) {
+ bitmap_counter_t *bmc;
+ bmc = bitmap_get_counter(&bitmap->counts, block,
+ &new_blocks, 1);
+ if (bmc) {
+ /* new space. It needs to be resynced, so
+ * we set NEEDED_MASK.
+ */
+ if (*bmc == 0) {
+ *bmc = NEEDED_MASK | 2;
+ bitmap_count_page(&bitmap->counts,
+ block, 1);
+ bitmap_set_pending(&bitmap->counts,
+ block);
+ }
+ }
+ block += new_blocks;
+ }
+ for (i = 0; i < bitmap->storage.file_pages; i++)
+ set_page_attr(bitmap, i, BITMAP_PAGE_DIRTY);
+ }
+ spin_unlock_irq(&bitmap->counts.lock);
+
+ if (!init) {
+ bitmap_unplug(bitmap);
+ bitmap->mddev->pers->quiesce(bitmap->mddev, 0);
+ }
+ ret = 0;
+err:
+ return ret;
+}
+EXPORT_SYMBOL_GPL(bitmap_resize);
+
static ssize_t
location_show(struct mddev *mddev, char *page)
{
@@ -1923,6 +2049,43 @@ location_store(struct mddev *mddev, const char *buf, size_t len)
static struct md_sysfs_entry bitmap_location =
__ATTR(location, S_IRUGO|S_IWUSR, location_show, location_store);
+/* 'bitmap/space' is the space available at 'location' for the
+ * bitmap. This allows the kernel to know when it is safe to
+ * resize the bitmap to match a resized array.
+ */
+static ssize_t
+space_show(struct mddev *mddev, char *page)
+{
+ return sprintf(page, "%lu\n", mddev->bitmap_info.space);
+}
+
+static ssize_t
+space_store(struct mddev *mddev, const char *buf, size_t len)
+{
+ unsigned long sectors;
+ int rv;
+
+ rv = kstrtoul(buf, 10, &sectors);
+ if (rv)
+ return rv;
+
+ if (sectors == 0)
+ return -EINVAL;
+
+ if (mddev->bitmap &&
+ sectors < (mddev->bitmap->storage.bytes + 511) >> 9)
+ return -EFBIG; /* Bitmap is too big for this small space */
+
+ /* could make sure it isn't too big, but that isn't really
+ * needed - user-space should be careful.
+ */
+ mddev->bitmap_info.space = sectors;
+ return len;
+}
+
+static struct md_sysfs_entry bitmap_space =
+__ATTR(space, S_IRUGO|S_IWUSR, space_show, space_store);
+
static ssize_t
timeout_show(struct mddev *mddev, char *page)
{
@@ -2098,6 +2261,7 @@ __ATTR(max_backlog_used, S_IRUGO | S_IWUSR,
static struct attribute *md_bitmap_attrs[] = {
&bitmap_location.attr,
+ &bitmap_space.attr,
&bitmap_timeout.attr,
&bitmap_backlog.attr,
&bitmap_chunksize.attr,
View
60 drivers/md/bitmap.h
@@ -111,9 +111,9 @@ typedef __u16 bitmap_counter_t;
/* use these for bitmap->flags and bitmap->sb->state bit-fields */
enum bitmap_state {
- BITMAP_STALE = 0x002, /* the bitmap file is out of date or had -EIO */
- BITMAP_WRITE_ERROR = 0x004, /* A write error has occurred */
- BITMAP_HOSTENDIAN = 0x8000,
+ BITMAP_STALE = 1, /* the bitmap file is out of date or had -EIO */
+ BITMAP_WRITE_ERROR = 2, /* A write error has occurred */
+ BITMAP_HOSTENDIAN =15,
};
/* the superblock at the front of the bitmap file -- little endian */
@@ -128,8 +128,10 @@ typedef struct bitmap_super_s {
__le32 chunksize; /* 52 the bitmap chunk size in bytes */
__le32 daemon_sleep; /* 56 seconds between disk flushes */
__le32 write_behind; /* 60 number of outstanding write-behind writes */
+ __le32 sectors_reserved; /* 64 number of 512-byte sectors that are
+ * reserved for the bitmap. */
- __u8 pad[256 - 64]; /* set to zero */
+ __u8 pad[256 - 68]; /* set to zero */
} bitmap_super_t;
/* notes:
@@ -160,35 +162,48 @@ struct bitmap_page {
*/
unsigned int hijacked:1;
/*
+ * If any counter in this page is '1' or '2' - and so could be
+ * cleared then that page is marked as 'pending'
+ */
+ unsigned int pending:1;
+ /*
* count of dirty bits on the page
*/
- unsigned int count:31;
+ unsigned int count:30;
};
/* the main bitmap structure - one per mddev */
struct bitmap {
- struct bitmap_page *bp;
- unsigned long pages; /* total number of pages in the bitmap */
- unsigned long missing_pages; /* number of pages not yet allocated */
- struct mddev *mddev; /* the md device that the bitmap is for */
+ struct bitmap_counts {
+ spinlock_t lock;
+ struct bitmap_page *bp;
+ unsigned long pages; /* total number of pages
+ * in the bitmap */
+ unsigned long missing_pages; /* number of pages
+ * not yet allocated */
+ unsigned long chunkshift; /* chunksize = 2^chunkshift
+ * (for bitops) */
+ unsigned long chunks; /* Total number of data
+ * chunks for the array */
+ } counts;
- /* bitmap chunksize -- how much data does each bit represent? */
- unsigned long chunkshift; /* chunksize = 2^(chunkshift+9) (for bitops) */
- unsigned long chunks; /* total number of data chunks for the array */
+ struct mddev *mddev; /* the md device that the bitmap is for */
__u64 events_cleared;
int need_sync;
- /* bitmap spinlock */
- spinlock_t lock;
-
- struct file *file; /* backing disk file */
- struct page *sb_page; /* cached copy of the bitmap file superblock */
- struct page **filemap; /* list of cache pages for the file */
- unsigned long *filemap_attr; /* attributes associated w/ filemap pages */
- unsigned long file_pages; /* number of pages in the file */
- int last_page_size; /* bytes in the last page */
+ struct bitmap_storage {
+ struct file *file; /* backing disk file */
+ struct page *sb_page; /* cached copy of the bitmap
+ * file superblock */
+ struct page **filemap; /* list of cache pages for
+ * the file */
+ unsigned long *filemap_attr; /* attributes associated
+ * w/ filemap pages */
+ unsigned long file_pages; /* number of pages in the file*/
+ unsigned long bytes; /* total bytes in the bitmap */
+ } storage;
unsigned long flags;
@@ -242,6 +257,9 @@ void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector);
void bitmap_unplug(struct bitmap *bitmap);
void bitmap_daemon_work(struct mddev *mddev);
+
+int bitmap_resize(struct bitmap *bitmap, sector_t blocks,
+ int chunksize, int init);
#endif
#endif
View
22 drivers/md/dm-raid.c
@@ -155,10 +155,7 @@ static void context_free(struct raid_set *rs)
for (i = 0; i < rs->md.raid_disks; i++) {
if (rs->dev[i].meta_dev)
dm_put_device(rs->ti, rs->dev[i].meta_dev);
- if (rs->dev[i].rdev.sb_page)
- put_page(rs->dev[i].rdev.sb_page);
- rs->dev[i].rdev.sb_page = NULL;
- rs->dev[i].rdev.sb_loaded = 0;
+ md_rdev_clear(&rs->dev[i].rdev);
if (rs->dev[i].data_dev)
dm_put_device(rs->ti, rs->dev[i].data_dev);
}
@@ -606,7 +603,7 @@ static int read_disk_sb(struct md_rdev *rdev, int size)
if (!sync_page_io(rdev, 0, size, rdev->sb_page, READ, 1)) {
DMERR("Failed to read superblock of device at position %d",
rdev->raid_disk);
- set_bit(Faulty, &rdev->flags);
+ md_error(rdev->mddev, rdev);
return -EINVAL;
}
@@ -617,16 +614,18 @@ static int read_disk_sb(struct md_rdev *rdev, int size)
static void super_sync(struct mddev *mddev, struct md_rdev *rdev)
{
- struct md_rdev *r;
+ int i;
uint64_t failed_devices;
struct dm_raid_superblock *sb;
+ struct raid_set *rs = container_of(mddev, struct raid_set, md);
sb = page_address(rdev->sb_page);
failed_devices = le64_to_cpu(sb->failed_devices);
- rdev_for_each(r, mddev)
- if ((r->raid_disk >= 0) && test_bit(Faulty, &r->flags))
- failed_devices |= (1ULL << r->raid_disk);
+ for (i = 0; i < mddev->raid_disks; i++)
+ if (!rs->dev[i].data_dev ||
+ test_bit(Faulty, &(rs->dev[i].rdev.flags)))
+ failed_devices |= (1ULL << i);
memset(sb, 0, sizeof(*sb));
@@ -1252,12 +1251,13 @@ static void raid_resume(struct dm_target *ti)
{
struct raid_set *rs = ti->private;
+ set_bit(MD_CHANGE_DEVS, &rs->md.flags);
if (!rs->bitmap_loaded) {
bitmap_load(&rs->md);
rs->bitmap_loaded = 1;
- } else
- md_wakeup_thread(rs->md.thread);
+ }
+ clear_bit(MD_RECOVERY_FROZEN, &rs->md.recovery);
mddev_resume(&rs->md);
}
View
370 drivers/md/md.c
@@ -402,6 +402,7 @@ void mddev_resume(struct mddev *mddev)
wake_up(&mddev->sb_wait);
mddev->pers->quiesce(mddev, 0);
+ set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
md_wakeup_thread(mddev->thread);
md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */
}
@@ -452,7 +453,7 @@ static void submit_flushes(struct work_struct *ws)
atomic_inc(&rdev->nr_pending);
atomic_inc(&rdev->nr_pending);
rcu_read_unlock();
- bi = bio_alloc_mddev(GFP_KERNEL, 0, mddev);
+ bi = bio_alloc_mddev(GFP_NOIO, 0, mddev);
bi->bi_end_io = md_end_flush;
bi->bi_private = rdev;
bi->bi_bdev = rdev->bdev;
@@ -607,6 +608,7 @@ void mddev_init(struct mddev *mddev)