Skip to content
Permalink
Browse files
ext4: main fast-commit commit path
This patch adds main fast commit commit path handlers. The overall
patch can be divided into two inter-related parts:

(A) Metadata updates tracking

    This part consists of helper functions to track changes that need
    to be committed during a commit operation. These updates are
    maintained by Ext4 in different in-memory queues. Following are
    the APIs and their short description that are implemented in this
    patch:

    - ext4_fc_track_link/unlink/creat() - Track unlink. link and creat
      operations
    - ext4_fc_track_range() - Track changed logical block offsets
      inodes
    - ext4_fc_track_inode() - Track inodes
    - ext4_fc_mark_ineligible() - Mark file system fast commit
      ineligible()
    - ext4_fc_start_update() / ext4_fc_stop_update() /
      ext4_fc_start_ineligible() / ext4_fc_stop_ineligible() These
      functions are useful for co-ordinating inode updates with
      commits.

(B) Main commit Path

    This part consists of functions to convert updates tracked in
    in-memory data structures into on-disk commits. Function
    ext4_fc_commit() is the main entry point to commit path.

Signed-off-by: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
  • Loading branch information
harshadjs authored and intel-lab-lkp committed Sep 19, 2020
1 parent 398751e commit 2384cbfbcf98b789d426c39b458c52adbb36d4f9
Show file tree
Hide file tree
Showing 16 changed files with 1,882 additions and 27 deletions.
@@ -242,6 +242,7 @@ ext4_set_acl(struct inode *inode, struct posix_acl *acl, int type)
handle = ext4_journal_start(inode, EXT4_HT_XATTR, credits);
if (IS_ERR(handle))
return PTR_ERR(handle);
ext4_fc_start_update(inode);

if ((type == ACL_TYPE_ACCESS) && acl) {
error = posix_acl_update_mode(inode, &mode, &acl);
@@ -259,6 +260,7 @@ ext4_set_acl(struct inode *inode, struct posix_acl *acl, int type)
}
out_stop:
ext4_journal_stop(handle);
ext4_fc_stop_update(inode);
if (error == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
goto retry;
return error;
@@ -1022,6 +1022,27 @@ struct ext4_inode_info {

struct list_head i_orphan; /* unlinked but open inodes */

/* Fast commit related info */

struct list_head i_fc_list; /*
* inodes that need fast commit
* protected by sbi->s_fc_lock.
*/

/* Start of lblk range that needs to be committed in this fast commit */
ext4_lblk_t i_fc_lblk_start;

/* End of lblk range that needs to be committed in this fast commit */
ext4_lblk_t i_fc_lblk_len;

/* Number of ongoing updates on this inode */
atomic_t i_fc_updates;

/* Fast commit wait queue for this inode */
wait_queue_head_t i_fc_wait;

struct mutex i_fc_lock;

/*
* i_disksize keeps track of what the inode size is ON DISK, not
* in memory. During truncate, i_size is set to the new size by
@@ -1142,6 +1163,10 @@ struct ext4_inode_info {
#define EXT4_VALID_FS 0x0001 /* Unmounted cleanly */
#define EXT4_ERROR_FS 0x0002 /* Errors detected */
#define EXT4_ORPHAN_FS 0x0004 /* Orphans being recovered */
#define EXT4_FC_INELIGIBLE 0x0008 /* Fast commit ineligible */
#define EXT4_FC_COMMITTING 0x0010 /* File system underoing a fast
* commit.
*/

/*
* Misc. filesystem flags
@@ -1614,6 +1639,25 @@ struct ext4_sb_info {
/* Record the errseq of the backing block device */
errseq_t s_bdev_wb_err;
spinlock_t s_bdev_wb_lock;

/* Ext4 fast commit stuff */
atomic_t s_fc_subtid;
atomic_t s_fc_ineligible_updates;
/*
* After commit starts, the main queue gets locked, and the further
* updates get added in the the staging queue
*/
#define FC_Q_MAIN 0
#define FC_Q_STAGING 1
struct list_head s_fc_q[2]; /* Inodes staged for fast commit
* that have data changes in them.
*/
struct list_head s_fc_dentry_q[2]; /* directory entry updates */
int s_fc_bytes;
spinlock_t s_fc_lock;
struct buffer_head *s_fc_bh;
struct ext4_fc_stats s_fc_stats;
u64 s_fc_avg_commit_time;
};

static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
@@ -1724,6 +1768,7 @@ enum {
EXT4_STATE_EXT_PRECACHED, /* extents have been precached */
EXT4_STATE_LUSTRE_EA_INODE, /* Lustre-style ea_inode */
EXT4_STATE_VERITY_IN_PROGRESS, /* building fs-verity Merkle tree */
EXT4_STATE_FC_COMMITTING, /* Fast commit ongoing */
};

#define EXT4_INODE_BIT_FNS(name, field, offset) \
@@ -2683,6 +2728,22 @@ extern void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate);
/* fast_commit.c */

void ext4_fc_init(struct super_block *sb, journal_t *journal);
void ext4_fc_init_inode(struct inode *inode);
void ext4_fc_track_range(struct inode *inode, ext4_lblk_t start,
ext4_lblk_t end);
void ext4_fc_track_unlink(struct inode *inode, struct dentry *dentry);
void ext4_fc_track_link(struct inode *inode, struct dentry *dentry);
void ext4_fc_track_create(struct inode *inode, struct dentry *dentry);
void ext4_fc_track_inode(struct inode *inode);
void ext4_fc_mark_ineligible(struct super_block *sb, int reason);
void ext4_fc_start_ineligible(struct super_block *sb, int reason);
void ext4_fc_stop_ineligible(struct super_block *sb);
void ext4_fc_start_update(struct inode *inode);
void ext4_fc_stop_update(struct inode *inode);
void ext4_fc_del(struct inode *inode);
int ext4_fc_commit(journal_t *journal, tid_t commit_tid);
int __init ext4_fc_init_dentry_cache(void);

/* mballoc.c */
extern const struct seq_operations ext4_mb_seq_groups_ops;
extern long ext4_mb_stats;
@@ -3723,6 +3723,7 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle,
err = ext4_ext_dirty(handle, inode, path + path->p_depth);
out:
ext4_ext_show_leaf(inode, path);
ext4_fc_track_range(inode, ee_block, ee_block + ee_len - 1);
return err;
}

@@ -3794,6 +3795,7 @@ convert_initialized_extent(handle_t *handle, struct inode *inode,
if (*allocated > map->m_len)
*allocated = map->m_len;
map->m_len = *allocated;
ext4_fc_track_range(inode, ee_block, ee_block + ee_len - 1);
return 0;
}

@@ -4327,7 +4329,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
map->m_len = ar.len;
allocated = map->m_len;
ext4_ext_show_leaf(inode, path);

ext4_fc_track_range(inode, map->m_lblk, map->m_len);
out:
ext4_ext_drop_refs(path);
kfree(path);
@@ -4600,7 +4602,8 @@ static long ext4_zero_range(struct file *file, loff_t offset,
ret = ext4_mark_inode_dirty(handle, inode);
if (unlikely(ret))
goto out_handle;

ext4_fc_track_range(inode, offset >> inode->i_sb->s_blocksize_bits,
(offset + len - 1) >> inode->i_sb->s_blocksize_bits);
/* Zero out partial block at the edges of the range */
ret = ext4_zero_partial_blocks(handle, inode, offset, len);
if (ret >= 0)
@@ -4648,23 +4651,34 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE |
FALLOC_FL_INSERT_RANGE))
return -EOPNOTSUPP;
ext4_fc_track_range(inode, offset >> blkbits,
(offset + len - 1) >> blkbits);

if (mode & FALLOC_FL_PUNCH_HOLE)
return ext4_punch_hole(inode, offset, len);
ext4_fc_start_update(inode);

if (mode & FALLOC_FL_PUNCH_HOLE) {
ret = ext4_punch_hole(inode, offset, len);
goto exit;
}

ret = ext4_convert_inline_data(inode);
if (ret)
return ret;
goto exit;

if (mode & FALLOC_FL_COLLAPSE_RANGE)
return ext4_collapse_range(inode, offset, len);

if (mode & FALLOC_FL_INSERT_RANGE)
return ext4_insert_range(inode, offset, len);
if (mode & FALLOC_FL_COLLAPSE_RANGE) {
ret = ext4_collapse_range(inode, offset, len);
goto exit;
}

if (mode & FALLOC_FL_ZERO_RANGE)
return ext4_zero_range(file, offset, len, mode);
if (mode & FALLOC_FL_INSERT_RANGE) {
ret = ext4_insert_range(inode, offset, len);
goto exit;
}

if (mode & FALLOC_FL_ZERO_RANGE) {
ret = ext4_zero_range(file, offset, len, mode);
goto exit;
}
trace_ext4_fallocate_enter(inode, offset, len, mode);
lblk = offset >> blkbits;

@@ -4698,12 +4712,14 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
goto out;

if (file->f_flags & O_SYNC && EXT4_SB(inode->i_sb)->s_journal) {
ret = jbd2_complete_transaction(EXT4_SB(inode->i_sb)->s_journal,
EXT4_I(inode)->i_sync_tid);
ret = ext4_fc_commit(EXT4_SB(inode->i_sb)->s_journal,
EXT4_I(inode)->i_sync_tid);
}
out:
inode_unlock(inode);
trace_ext4_fallocate_exit(inode, offset, max_blocks, ret);
exit:
ext4_fc_stop_update(inode);
return ret;
}

@@ -5291,6 +5307,7 @@ static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
ret = PTR_ERR(handle);
goto out_mmap;
}
ext4_fc_start_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE);

down_write(&EXT4_I(inode)->i_data_sem);
ext4_discard_preallocations(inode, 0);
@@ -5329,6 +5346,7 @@ static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)

out_stop:
ext4_journal_stop(handle);
ext4_fc_stop_ineligible(sb);
out_mmap:
up_write(&EXT4_I(inode)->i_mmap_sem);
out_mutex:
@@ -5429,6 +5447,7 @@ static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
ret = PTR_ERR(handle);
goto out_mmap;
}
ext4_fc_start_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE);

/* Expand file to avoid data loss if there is error while shifting */
inode->i_size += len;
@@ -5503,6 +5522,7 @@ static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)

out_stop:
ext4_journal_stop(handle);
ext4_fc_stop_ineligible(sb);
out_mmap:
up_write(&EXT4_I(inode)->i_mmap_sem);
out_mutex:

0 comments on commit 2384cbf

Please sign in to comment.