Skip to content

Commit

Permalink
Fix unlink bug, which was partially caused by fragmentation simulation
Browse files Browse the repository at this point in the history
and partially caused by dirty buffers that were overwriting the contents
of data blocks on digest (the allocation of freed blocks happens before
sync_all_buffers in handle_digest_request, which overwrites file
contents).

There's a problem with the fcache still (segfault) and inode allocation
(inums are never being reused), but I'll fix that later.
  • Loading branch information
iangneal committed Jan 28, 2019
1 parent a345011 commit dc60d68
Show file tree
Hide file tree
Showing 10 changed files with 121 additions and 299 deletions.
73 changes: 27 additions & 46 deletions kernfs/balloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

#define SHARED_PARTITION (65536)
#define HASHTABLE_ALIGNMENT_HACK
//#define NEVER_REUSE_BLOCKS

uint64_t size_of_bitmap(mlfs_fsblk_t nrblocks)
{
Expand Down Expand Up @@ -343,7 +344,7 @@ void balloc_init(uint8_t dev, struct super_block *_sb)
_sb->used_blocks = bitmap_weight((uint64_t *)_sb->s_blk_bitmap->bitmap,
_sb->ondisk->ndatablocks);

mlfs_info("[dev %u] used blocks %lu\n", dev, _sb->used_blocks);
mlfs_debug("[dev %u] used blocks %lu\n", dev, _sb->used_blocks);
#if 0
{
mlfs_fsblk_t a;
Expand Down Expand Up @@ -647,6 +648,11 @@ int mlfs_free_blocks_node(struct super_block *sb, unsigned long blocknr,
int new_node_used = 0;
int ret;

#ifdef NEVER_REUSE_BLOCKS
return 0;
#endif


if (num <= 0) {
mlfs_info("ERROR: free %d\n", num);
return -EINVAL;
Expand Down Expand Up @@ -756,6 +762,8 @@ static unsigned long mlfs_alloc_blocks_in_free_list(struct super_block *sb,
curr = container_of(temp, struct mlfs_range_node, node);

curr_blocks = curr->range_high - curr->range_low + 1;
mlfs_debug("low = %llu, high = %llu, num = %llu\n", curr->range_low,
curr->range_high, curr_blocks);

if (num_blocks >= curr_blocks) {
if (btype > 0 && num_blocks > curr_blocks) {
Expand Down Expand Up @@ -791,9 +799,26 @@ static unsigned long mlfs_alloc_blocks_in_free_list(struct super_block *sb,

free_list->num_free_blocks -= num_blocks;


if (found == 0)
return -ENOSPC;

#ifdef NEVER_REUSE_BLOCKS
static unsigned long last_blk_num = 0;
last_blk_num = max(last_blk_num, *new_blocknr);
*new_blocknr = last_blk_num;

last_blk_num += num_blocks;
#endif

#if 0
for (unsigned long i = 0; i < num_blocks; ++i) {
ensure_block_is_clear(sb->s_bdev, (*new_blocknr) + i);
}
#elif 1
sync_all_buffers(sb->s_bdev);
#endif

return num_blocks;
}

Expand Down Expand Up @@ -903,49 +928,7 @@ int mlfs_new_blocks(struct super_block *sb, unsigned long *blocknr,
}
}

#ifdef SIMULATE_FRAGMENTATION
#if 0
static int layout_score_percent = 0;
static bool init_layout_score = false;
static int skip = 0;
static int cur = 0;
if (!init_layout_score) {
const char *mlfs_layout_score = getenv("MLFS_LAYOUT_SCORE");
if (NULL != mlfs_layout_score) {
layout_score_percent = atoi(mlfs_layout_score);
} else {
layout_score_percent = 100;
}
init_layout_score = true;
printf("Simulating fragmentation: '%s' => layout score of %f\n",
mlfs_layout_score, layout_score_percent / 100.0);

skip = layout_score_percent == 100 ? 0 : 100 / (100 - layout_score_percent);
printf("\tSkip size = %d\n", skip);
#ifdef HASHTABLE_ALIGNMENT_HACK
#ifdef HASHTABLE
unsigned long dummy_block;
int junk_block = mlfs_alloc_blocks_in_free_list(sb, free_list, btype,
1, &dummy_block);
#endif
#endif
}

int ncontiguous = skip ? min(skip - cur, num_blocks) : num_blocks;
ret_blocks = mlfs_alloc_blocks_in_free_list(sb, free_list, btype,
ncontiguous, &new_blocknr);

cur += ncontiguous;

// junk block
if (skip && cur == skip) {
unsigned long dummy_block;
int junk_block = mlfs_alloc_blocks_in_free_list(sb, free_list, btype,
1, &dummy_block);
free_list->alloc_data_pages += junk_block;
cur = 0;
}
#else
#if defined(SIMULATE_FRAGMENTATION) && 0
static int layout_score_percent = 0;
static bool init_layout_score = false;
if (!init_layout_score) {
Expand Down Expand Up @@ -990,8 +973,6 @@ int mlfs_new_blocks(struct super_block *sb, unsigned long *blocknr,
}
}

#endif

#else
ret_blocks = mlfs_alloc_blocks_in_free_list(sb, free_list, btype,
num_blocks, &new_blocknr);
Expand Down
25 changes: 25 additions & 0 deletions kernfs/extents.c
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@

#define BUG_ON(x) mlfs_assert((x) == 0)

//#define ZERO_FREED_BLOCKS

pthread_mutex_t block_bitmap_mutex;

static struct inode *__buffer_search(struct rb_root *root,
Expand Down Expand Up @@ -130,6 +132,9 @@ int mlfs_ext_alloc_blocks(handle_t *handle, struct inode *inode,
ret = mlfs_new_blocks(get_inode_sb(handle->dev, inode), blockp,
*count, 0, 0, a_type, goal);

mlfs_debug("[dev %d] [inum %d] ret = %d, pblk = %llu, count = %lu\n",
handle->dev, inode->inum, ret, *blockp, *count);

if (ret > 0) {
//mlfs_assert(*blockp >= disk_sb[handle->dev].datablock_start);
*count = ret;
Expand Down Expand Up @@ -216,6 +221,7 @@ static mlfs_fsblk_t mlfs_new_data_blocks(handle_t *handle,
mlfs_debug("[dev %u] used blocks %d\n", inode->dev,
bitmap_weight((uint64_t *)inode->i_sb[handle->dev]->s_blk_bitmap->bitmap,
inode->i_sb[handle->dev]->ondisk->ndatablocks));
mlfs_debug("DATA alloc: %llu (%lu)\n", block, *count);
#endif

return block;
Expand All @@ -232,10 +238,25 @@ mlfs_fsblk_t mlfs_new_meta_blocks(handle_t *handle,
flags |= MLFS_GET_BLOCKS_CREATE_META;

*errp = mlfs_ext_alloc_blocks(handle, inode, goal, flags, &block, count);
#ifdef ZERO_FREED_BLOCKS
char zero_buf[g_block_size_bytes];
memset(zero_buf, 0, g_block_size_bytes);
for (mlfs_fsblk_t i = 0; i < *count; ++i) {
mlfs_debug("Zero: %lu\n", block + i);
struct buffer_head *bh = bh_get_sync_IO(handle->dev,
block + i, BH_NO_DATA_ALLOC);
bh->b_data = zero_buf;
bh->b_size = g_block_size_bytes;
bh->b_offset = 0;
mlfs_write(bh);
}
#endif
#ifdef KERNFS
mlfs_debug("[dev %u] used blocks %d\n", inode->dev,
bitmap_weight((uint64_t *)inode->i_sb[handle->dev]->s_blk_bitmap->bitmap,
inode->i_sb[handle->dev]->ondisk->ndatablocks));

mlfs_debug("META alloc: %llu (%lu)\n", block, *count);
#endif

return block;
Expand All @@ -252,6 +273,7 @@ void mlfs_free_blocks(handle_t *handle, struct inode *inode,
UNUSED(fake);

#ifdef BALLOC
mlfs_debug("freeing %llu (%d)\n", block, count);
ret = mlfs_free_blocks_node(get_inode_sb(handle->dev, inode),
block, count, 0, 0);
mlfs_assert(ret == 0);
Expand Down Expand Up @@ -409,6 +431,7 @@ static struct buffer_head *read_extent_tree_block(handle_t *handle,
uint64_t tsc_begin = asm_rdtscp();
#endif
bh = fs_bread(handle->dev, pblk, &err);
//mlfs_info("inode inum=%llu has extent block %llu\n", inode->inum, pblk);
#ifdef STORAGE_PERF
g_perf_stats.path_storage_tsc += asm_rdtscp() - tsc_begin;
g_perf_stats.path_storage_nr++;
Expand Down Expand Up @@ -2238,6 +2261,8 @@ static int mlfs_ext_rm_leaf(handle_t *handle, struct inode *inode,
if (ex == EXT_FIRST_EXTENT(eh))
correct_index = 1;

mlfs_debug("[dev %d] [inum %d] truncate from %u to %u\n",
handle->dev, inode->inum, a, b);
err = mlfs_remove_blocks(handle, inode, ex, a, b);
if (err)
goto out;
Expand Down
5 changes: 4 additions & 1 deletion kernfs/fs.c
Original file line number Diff line number Diff line change
Expand Up @@ -1047,7 +1047,8 @@ int digest_unlink(uint8_t from_dev, uint8_t to_dev, uint32_t inum)
handle_t handle = {.dev = to_dev};
mlfs_lblk_t end = (inode->size) >> g_block_size_shift;

ret = mlfs_ext_truncate(&handle, inode, 0, end == 0 ? end : end - 1);
//ret = mlfs_ext_truncate(&handle, inode, 0, end == 0 ? end : end - 1);
ret = mlfs_ext_truncate(&handle, inode, 0, end);
mlfs_assert(!ret);
}
} else if (inode->itype == T_DIR) {
Expand Down Expand Up @@ -1736,6 +1737,8 @@ static int persist_dirty_objects_nvm(void)

if (ip->itype == T_DIR)
persist_dirty_dirent_block(ip);

mlfs_debug("[dev %d] write dirty inode complete\n", ip->dev);
}

// save block allocation bitmap
Expand Down
13 changes: 11 additions & 2 deletions libfs/src/filesystem/fs.c
Original file line number Diff line number Diff line change
Expand Up @@ -534,6 +534,8 @@ int read_ondisk_inode(uint8_t dev, uint32_t inum, struct dinode *dip)
mlfs_free(bh->b_data);
}

bh_release(bh);

return 0;
}

Expand Down Expand Up @@ -704,6 +706,7 @@ int idealloc(struct inode *inode)
struct inode *_inode;
lru_node_t *l, *tmp;

mlfs_assert(inode);
mlfs_assert(inode->i_ref < 2);

if (inode->i_ref == 1 &&
Expand All @@ -728,7 +731,10 @@ int idealloc(struct inode *inode)
inode->itype = 0;

/* delete inode data (log) pointers */
fcache_del_all(inode);
//printf("fcache del?\n");
//fcache_del_all(inode);
//inode->fcache_hash = kh_init(fcache);
//printf("fcache del!\n");

pthread_spin_destroy(&inode->de_cache_spinlock);
pthread_mutex_destroy(&inode->i_mutex);
Expand Down Expand Up @@ -893,6 +899,7 @@ int bmap(struct inode *ip, struct bmap_request *bmap_req)
bmap_req->blk_count_found = ret;
bmap_req->dev = g_root_dev;
bmap_req->block_no = map.m_pblk;
mlfs_debug("physical block: %llu -> %llu\n", map.m_lblk, map.m_pblk);

if (ret == bmap_req->blk_count) {
mlfs_debug("[dev %d] Get all offset %lx: blockno %lx from NVM\n",
Expand Down Expand Up @@ -1228,6 +1235,7 @@ int do_unaligned_read(struct inode *ip, uint8_t *dst, offset_t off, uint32_t io_
}
// continue read either patched or already complete log
bh = bh_get_sync_IO(g_fs_log->dev, block_no, BH_NO_DATA_ALLOC);
mlfs_debug("physical block (log): %llu, %llu bytes\n", block_no, io_size);
bh->b_offset = off - off_aligned;
bh->b_data = dst;
bh->b_size = io_size;
Expand Down Expand Up @@ -1274,6 +1282,7 @@ int do_unaligned_read(struct inode *ip, uint8_t *dst, offset_t off, uint32_t io_
bh->b_offset = off - off_aligned;
bh->b_data = dst;
bh->b_size = io_size;
mlfs_debug("shared io size: %llu\n", io_size);

bh_submit_read_sync_IO(bh);
bh_release(bh);
Expand Down Expand Up @@ -1428,7 +1437,7 @@ int do_aligned_read(struct inode *ip, uint8_t *dst, offset_t off, uint32_t io_si
mlfs_write(bh);
bh_release(bh);
_fcache_block->start_offset = 0;
mlfs_info("patch log %lu with start_offset %u\n", block_no, fc_off);
mlfs_debug("patch log %lu with start_offset %u\n", block_no, fc_off);
}
}
}
Expand Down
Loading

0 comments on commit dc60d68

Please sign in to comment.