Fix unlink bug, which was partially caused by fragmentation simulation

and partially caused by dirty buffers that were overwriting the contents of data blocks on digest (the allocation of freed blocks happens before sync_all_buffers in handle_digest_request, which overwrites file contents). There's a problem with the fcache still (segfault) and inode allocation (inums are never being reused), but I'll fix that later.
iangneal · Jan 28, 2019 · dc60d68 · dc60d68
1 parent a345011
commit dc60d68
Show file tree

Hide file tree

Showing 10 changed files with 121 additions and 299 deletions.
diff --git a/kernfs/balloc.c b/kernfs/balloc.c
@@ -5,6 +5,7 @@
 
 #define SHARED_PARTITION (65536)
 #define HASHTABLE_ALIGNMENT_HACK
+//#define NEVER_REUSE_BLOCKS
 
 uint64_t size_of_bitmap(mlfs_fsblk_t nrblocks)
 {
@@ -343,7 +344,7 @@ void balloc_init(uint8_t dev, struct super_block *_sb)
   _sb->used_blocks = bitmap_weight((uint64_t *)_sb->s_blk_bitmap->bitmap,
       _sb->ondisk->ndatablocks);
 
-  mlfs_info("[dev %u] used blocks %lu\n", dev, _sb->used_blocks);
+  mlfs_debug("[dev %u] used blocks %lu\n", dev, _sb->used_blocks);
 #if 0
   {
     mlfs_fsblk_t a;
@@ -647,6 +648,11 @@ int mlfs_free_blocks_node(struct super_block *sb, unsigned long blocknr,
 	int new_node_used = 0;
 	int ret;
 
+#ifdef NEVER_REUSE_BLOCKS
+    return 0;
+#endif
+
+
 	if (num <= 0) {
 		mlfs_info("ERROR: free %d\n", num);
 		return -EINVAL;
@@ -756,6 +762,8 @@ static unsigned long mlfs_alloc_blocks_in_free_list(struct super_block *sb,
 		curr = container_of(temp, struct mlfs_range_node, node);
 
 		curr_blocks = curr->range_high - curr->range_low + 1;
+        mlfs_debug("low = %llu, high = %llu, num = %llu\n", curr->range_low,
+                curr->range_high, curr_blocks);
 
 		if (num_blocks >= curr_blocks) {
 			if (btype > 0 && num_blocks > curr_blocks) {
@@ -791,9 +799,26 @@ static unsigned long mlfs_alloc_blocks_in_free_list(struct super_block *sb,
 
 	free_list->num_free_blocks -= num_blocks;
 
+
 	if (found == 0)
 		return -ENOSPC;
 
+#ifdef NEVER_REUSE_BLOCKS
+    static unsigned long last_blk_num = 0;
+    last_blk_num = max(last_blk_num, *new_blocknr);
+    *new_blocknr = last_blk_num;
+
+    last_blk_num += num_blocks;
+#endif
+
+#if 0
+    for (unsigned long i = 0; i < num_blocks; ++i) {
+        ensure_block_is_clear(sb->s_bdev, (*new_blocknr) + i);
+    }
+#elif 1
+    sync_all_buffers(sb->s_bdev);
+#endif
+
 	return num_blocks;
 }
 
@@ -903,49 +928,7 @@ int mlfs_new_blocks(struct super_block *sb, unsigned long *blocknr,
 		}
 	}
 
-#ifdef SIMULATE_FRAGMENTATION
-#if 0
-  static int layout_score_percent = 0;
-  static bool init_layout_score = false;
-  static int skip = 0;
-  static int cur  = 0;
-  if (!init_layout_score) {
-    const char *mlfs_layout_score = getenv("MLFS_LAYOUT_SCORE");
-    if (NULL != mlfs_layout_score) {
-      layout_score_percent = atoi(mlfs_layout_score);
-    } else {
-      layout_score_percent = 100;
-    }
-    init_layout_score = true;
-    printf("Simulating fragmentation: '%s' => layout score of %f\n",
-        mlfs_layout_score, layout_score_percent / 100.0);
-
-    skip = layout_score_percent == 100 ? 0 : 100 / (100 - layout_score_percent);
-    printf("\tSkip size = %d\n", skip);
-#ifdef HASHTABLE_ALIGNMENT_HACK
-#ifdef HASHTABLE
-    unsigned long dummy_block;
-    int junk_block = mlfs_alloc_blocks_in_free_list(sb, free_list, btype,
-        1, &dummy_block);
-#endif
-#endif
-  }
-
-  int ncontiguous = skip ? min(skip - cur, num_blocks) : num_blocks;
-  ret_blocks = mlfs_alloc_blocks_in_free_list(sb, free_list, btype,
-      ncontiguous, &new_blocknr);
-
-  cur += ncontiguous;
-
-  // junk block
-  if (skip && cur == skip) {
-    unsigned long dummy_block;
-    int junk_block = mlfs_alloc_blocks_in_free_list(sb, free_list, btype,
-        1, &dummy_block);
-		free_list->alloc_data_pages += junk_block;
-    cur = 0;
-  }
-#else
+#if defined(SIMULATE_FRAGMENTATION) && 0
   static int layout_score_percent = 0;
   static bool init_layout_score = false;
   if (!init_layout_score) {
@@ -990,8 +973,6 @@ int mlfs_new_blocks(struct super_block *sb, unsigned long *blocknr,
     }
   }
 
-#endif
-
 #else
 	ret_blocks = mlfs_alloc_blocks_in_free_list(sb, free_list, btype,
 			num_blocks, &new_blocknr);

diff --git a/kernfs/extents.c b/kernfs/extents.c
@@ -38,6 +38,8 @@
 
 #define BUG_ON(x) mlfs_assert((x) == 0)
 
+//#define ZERO_FREED_BLOCKS
+
 pthread_mutex_t block_bitmap_mutex;
 
 static struct inode *__buffer_search(struct rb_root *root,
@@ -130,6 +132,9 @@ int mlfs_ext_alloc_blocks(handle_t *handle, struct inode *inode,
 	ret = mlfs_new_blocks(get_inode_sb(handle->dev, inode), blockp,
 			*count, 0, 0, a_type, goal);
 
+    mlfs_debug("[dev %d] [inum %d] ret = %d, pblk = %llu, count = %lu\n",
+            handle->dev, inode->inum, ret, *blockp, *count);
+
 	if (ret > 0) {
 		//mlfs_assert(*blockp >= disk_sb[handle->dev].datablock_start);
 		*count = ret;
@@ -216,6 +221,7 @@ static mlfs_fsblk_t mlfs_new_data_blocks(handle_t *handle,
 	mlfs_debug("[dev %u] used blocks %d\n", inode->dev,
 			bitmap_weight((uint64_t *)inode->i_sb[handle->dev]->s_blk_bitmap->bitmap,
 				inode->i_sb[handle->dev]->ondisk->ndatablocks));
+    mlfs_debug("DATA alloc: %llu (%lu)\n", block, *count);
 #endif
 
 	return block;
@@ -232,10 +238,25 @@ mlfs_fsblk_t mlfs_new_meta_blocks(handle_t *handle,
 	flags |= MLFS_GET_BLOCKS_CREATE_META;
 
 	*errp = mlfs_ext_alloc_blocks(handle, inode, goal, flags, &block, count);
+#ifdef ZERO_FREED_BLOCKS
+    char zero_buf[g_block_size_bytes];
+    memset(zero_buf, 0, g_block_size_bytes);
+    for (mlfs_fsblk_t i = 0; i < *count; ++i) {
+        mlfs_debug("Zero: %lu\n", block + i);
+        struct buffer_head *bh = bh_get_sync_IO(handle->dev,
+                block + i, BH_NO_DATA_ALLOC);
+        bh->b_data = zero_buf;
+        bh->b_size = g_block_size_bytes;
+        bh->b_offset = 0;
+        mlfs_write(bh);
+    }
+#endif
 #ifdef KERNFS
 	mlfs_debug("[dev %u] used blocks %d\n", inode->dev,
 			bitmap_weight((uint64_t *)inode->i_sb[handle->dev]->s_blk_bitmap->bitmap,
 				inode->i_sb[handle->dev]->ondisk->ndatablocks));
+
+    mlfs_debug("META alloc: %llu (%lu)\n", block, *count);
 #endif
 
 	return block;
@@ -252,6 +273,7 @@ void mlfs_free_blocks(handle_t *handle, struct inode *inode,
   UNUSED(fake);
 
 #ifdef BALLOC
+    mlfs_debug("freeing %llu (%d)\n", block, count);
 	ret = mlfs_free_blocks_node(get_inode_sb(handle->dev, inode),
 			block, count, 0, 0);
 	mlfs_assert(ret == 0);
@@ -409,6 +431,7 @@ static struct buffer_head *read_extent_tree_block(handle_t *handle,
   uint64_t tsc_begin = asm_rdtscp();
 #endif
 	bh = fs_bread(handle->dev, pblk, &err);
+    //mlfs_info("inode inum=%llu has extent block %llu\n", inode->inum, pblk);
 #ifdef STORAGE_PERF
   g_perf_stats.path_storage_tsc += asm_rdtscp() - tsc_begin;
   g_perf_stats.path_storage_nr++;
@@ -2238,6 +2261,8 @@ static int mlfs_ext_rm_leaf(handle_t *handle, struct inode *inode,
 		if (ex == EXT_FIRST_EXTENT(eh))
 			correct_index = 1;
 
+        mlfs_debug("[dev %d] [inum %d] truncate from %u to %u\n",
+                handle->dev, inode->inum, a, b);
 		err = mlfs_remove_blocks(handle, inode, ex, a, b);
 		if (err)
 			goto out;

diff --git a/kernfs/fs.c b/kernfs/fs.c
@@ -1047,7 +1047,8 @@ int digest_unlink(uint8_t from_dev, uint8_t to_dev, uint32_t inum)
 			handle_t handle = {.dev = to_dev};
 			mlfs_lblk_t end = (inode->size) >> g_block_size_shift;
 
-			ret = mlfs_ext_truncate(&handle, inode, 0, end == 0 ? end : end - 1);
+			//ret = mlfs_ext_truncate(&handle, inode, 0, end == 0 ? end : end - 1);
+			ret = mlfs_ext_truncate(&handle, inode, 0, end);
 			mlfs_assert(!ret);
 		}
 	} else if (inode->itype == T_DIR) {
@@ -1736,6 +1737,8 @@ static int persist_dirty_objects_nvm(void)
 
 		if (ip->itype == T_DIR)
 			persist_dirty_dirent_block(ip);
+
+		mlfs_debug("[dev %d] write dirty inode complete\n", ip->dev);
 	}
 
 	// save block allocation bitmap

diff --git a/libfs/src/filesystem/fs.c b/libfs/src/filesystem/fs.c
@@ -534,6 +534,8 @@ int read_ondisk_inode(uint8_t dev, uint32_t inum, struct dinode *dip)
     mlfs_free(bh->b_data);
   }
 
+  bh_release(bh);
+
   return 0;
 }
 
@@ -704,6 +706,7 @@ int idealloc(struct inode *inode)
   struct inode *_inode;
   lru_node_t *l, *tmp;
 
+  mlfs_assert(inode);
   mlfs_assert(inode->i_ref < 2);
 
   if (inode->i_ref == 1 &&
@@ -728,7 +731,10 @@ int idealloc(struct inode *inode)
   inode->itype = 0;
 
   /* delete inode data (log) pointers */
-  fcache_del_all(inode);
+  //printf("fcache del?\n");
+  //fcache_del_all(inode);
+  //inode->fcache_hash = kh_init(fcache);
+  //printf("fcache del!\n");
 
   pthread_spin_destroy(&inode->de_cache_spinlock);
   pthread_mutex_destroy(&inode->i_mutex);
@@ -893,6 +899,7 @@ int bmap(struct inode *ip, struct bmap_request *bmap_req)
       bmap_req->blk_count_found = ret;
       bmap_req->dev = g_root_dev;
       bmap_req->block_no = map.m_pblk;
+      mlfs_debug("physical block: %llu -> %llu\n", map.m_lblk, map.m_pblk);
 
       if (ret == bmap_req->blk_count) {
         mlfs_debug("[dev %d] Get all offset %lx: blockno %lx from NVM\n",
@@ -1228,6 +1235,7 @@ int do_unaligned_read(struct inode *ip, uint8_t *dst, offset_t off, uint32_t io_
       }
       // continue read either patched or already complete log
       bh = bh_get_sync_IO(g_fs_log->dev, block_no, BH_NO_DATA_ALLOC);
+      mlfs_debug("physical block (log): %llu, %llu bytes\n", block_no, io_size);
       bh->b_offset = off - off_aligned;
       bh->b_data = dst;
       bh->b_size = io_size;
@@ -1274,6 +1282,7 @@ int do_unaligned_read(struct inode *ip, uint8_t *dst, offset_t off, uint32_t io_
     bh->b_offset = off - off_aligned;
     bh->b_data = dst;
     bh->b_size = io_size;
+    mlfs_debug("shared io size: %llu\n", io_size);
 
     bh_submit_read_sync_IO(bh);
     bh_release(bh);
@@ -1428,7 +1437,7 @@ int do_aligned_read(struct inode *ip, uint8_t *dst, offset_t off, uint32_t io_si
             mlfs_write(bh);
             bh_release(bh);
             _fcache_block->start_offset = 0;
-            mlfs_info("patch log %lu with start_offset %u\n", block_no, fc_off);
+            mlfs_debug("patch log %lu with start_offset %u\n", block_no, fc_off);
         }
       }
     }