diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 591ca539e44494..df85d8dea37c9e 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2158,6 +2158,86 @@ static blk_status_t btrfs_submit_bio_start(void *private_data, struct bio *bio, return btrfs_csum_one_bio(BTRFS_I(inode), bio, 0, 0); } +int extract_ordered_extent(struct inode *inode, struct bio *bio, + loff_t file_offset) +{ + struct btrfs_ordered_extent *ordered; + struct extent_map *em = NULL, *em_new = NULL; + struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + u64 start = (u64)bio->bi_iter.bi_sector << SECTOR_SHIFT; + u64 len = bio->bi_iter.bi_size; + u64 end = start + len; + u64 ordered_end; + u64 pre, post; + int ret = 0; + + ordered = btrfs_lookup_ordered_extent(BTRFS_I(inode), file_offset); + if (WARN_ON_ONCE(!ordered)) + return -EIO; + + /* No need to split */ + if (ordered->disk_num_bytes == len) + goto out; + + /* We cannot split once end_bio'd ordered extent */ + if (WARN_ON_ONCE(ordered->bytes_left != ordered->disk_num_bytes)) { + ret = -EINVAL; + goto out; + } + + /* We cannot split a compressed ordered extent */ + if (WARN_ON_ONCE(ordered->disk_num_bytes != ordered->num_bytes)) { + ret = -EINVAL; + goto out; + } + + /* We cannot split a waited ordered extent */ + if (WARN_ON_ONCE(wq_has_sleeper(&ordered->wait))) { + ret = -EINVAL; + goto out; + } + + ordered_end = ordered->disk_bytenr + ordered->disk_num_bytes; + /* bio must be in one ordered extent */ + if (WARN_ON_ONCE(start < ordered->disk_bytenr || end > ordered_end)) { + ret = -EINVAL; + goto out; + } + + /* Checksum list should be empty */ + if (WARN_ON_ONCE(!list_empty(&ordered->list))) { + ret = -EINVAL; + goto out; + } + + pre = start - ordered->disk_bytenr; + post = ordered_end - end; + + btrfs_split_ordered_extent(ordered, pre, post); + + read_lock(&em_tree->lock); + em = lookup_extent_mapping(em_tree, ordered->file_offset, len); + if (!em) { + read_unlock(&em_tree->lock); + ret = -EIO; + goto out; + } + read_unlock(&em_tree->lock); + + ASSERT(!test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)); + em_new = create_io_em(BTRFS_I(inode), em->start + pre, len, + em->start + pre, em->block_start + pre, len, + len, len, BTRFS_COMPRESS_NONE, + BTRFS_ORDERED_REGULAR); + free_extent_map(em_new); + +out: + free_extent_map(em); + btrfs_put_ordered_extent(ordered); + + return ret; +} + /* * extent_io.c submission hook. This does the right thing for csum calculation * on write, or reading the csums from the tree before a read. @@ -2192,6 +2272,15 @@ blk_status_t btrfs_submit_data_bio(struct inode *inode, struct bio *bio, if (btrfs_is_free_space_inode(BTRFS_I(inode))) metadata = BTRFS_WQ_ENDIO_FREE_SPACE; + if (bio_op(bio) == REQ_OP_ZONE_APPEND) { + struct page *page = bio_first_bvec_all(bio)->bv_page; + loff_t file_offset = page_offset(page); + + ret = extract_ordered_extent(inode, bio, file_offset); + if (ret) + goto out; + } + if (btrfs_op(bio) != BTRFS_MAP_WRITE) { ret = btrfs_bio_wq_end_io(fs_info, bio, metadata); if (ret) diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 87bac9ecdf4c8d..35ef25e39561f1 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -943,6 +943,82 @@ void btrfs_lock_and_flush_ordered_range(struct btrfs_inode *inode, u64 start, } } +static void clone_ordered_extent(struct btrfs_ordered_extent *ordered, u64 pos, + u64 len) +{ + struct inode *inode = ordered->inode; + u64 file_offset = ordered->file_offset + pos; + u64 disk_bytenr = ordered->disk_bytenr + pos; + u64 num_bytes = len; + u64 disk_num_bytes = len; + int type; + unsigned long flags_masked = + ordered->flags & ~(1 << BTRFS_ORDERED_DIRECT); + int compress_type = ordered->compress_type; + unsigned long weight; + + weight = hweight_long(flags_masked); + WARN_ON_ONCE(weight > 1); + if (!weight) + type = 0; + else + type = __ffs(flags_masked); + + if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered->flags)) { + WARN_ON_ONCE(1); + btrfs_add_ordered_extent_compress(BTRFS_I(inode), file_offset, + disk_bytenr, num_bytes, + disk_num_bytes, type, + compress_type); + } else if (test_bit(BTRFS_ORDERED_DIRECT, &ordered->flags)) { + btrfs_add_ordered_extent_dio(BTRFS_I(inode), file_offset, + disk_bytenr, num_bytes, + disk_num_bytes, type); + } else { + btrfs_add_ordered_extent(BTRFS_I(inode), file_offset, + disk_bytenr, num_bytes, disk_num_bytes, + type); + } +} + +void btrfs_split_ordered_extent(struct btrfs_ordered_extent *ordered, u64 pre, + u64 post) +{ + struct inode *inode = ordered->inode; + struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree; + struct rb_node *node; + struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); + + spin_lock_irq(&tree->lock); + /* Remove from tree once */ + node = &ordered->rb_node; + rb_erase(node, &tree->tree); + RB_CLEAR_NODE(node); + if (tree->last == node) + tree->last = NULL; + + ordered->file_offset += pre; + ordered->disk_bytenr += pre; + ordered->num_bytes -= (pre + post); + ordered->disk_num_bytes -= (pre + post); + ordered->bytes_left -= (pre + post); + + /* Re-insert the node */ + node = tree_insert(&tree->tree, ordered->file_offset, + &ordered->rb_node); + if (node) + btrfs_panic(fs_info, -EEXIST, + "zoned: inconsistency in ordered tree at offset %llu", + ordered->file_offset); + + spin_unlock_irq(&tree->lock); + + if (pre) + clone_ordered_extent(ordered, 0, pre); + if (post) + clone_ordered_extent(ordered, pre + ordered->disk_num_bytes, post); +} + int __init ordered_data_init(void) { btrfs_ordered_extent_cache = kmem_cache_create("btrfs_ordered_extent", diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index c3a2325e64a416..e346b03bd66af4 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -193,6 +193,8 @@ void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, u64 nr, void btrfs_lock_and_flush_ordered_range(struct btrfs_inode *inode, u64 start, u64 end, struct extent_state **cached_state); +void btrfs_split_ordered_extent(struct btrfs_ordered_extent *ordered, u64 pre, + u64 post); int __init ordered_data_init(void); void __cold ordered_data_exit(void);