Skip to content

Commit

Permalink
Merge tag 'vfs-6.10.misc' of git://git.kernel.org/pub/scm/linux/kerne…
Browse files Browse the repository at this point in the history
…l/git/vfs/vfs

Pull misc vfs updates from Christian Brauner:
 "This contains the usual miscellaneous features, cleanups, and fixes
  for vfs and individual fses.

  Features:

   - Free up FMODE_* bits. I've freed up bits 6, 7, 8, and 24. That
     means we now have six free FMODE_* bits in total (but bit #6
     already got used for FMODE_WRITE_RESTRICTED)

   - Add FOP_HUGE_PAGES flag (follow-up to FMODE_* cleanup)

   - Add fd_raw cleanup class so we can make use of automatic cleanup
     provided by CLASS(fd_raw, f)(fd) for O_PATH fds as well

   - Optimize seq_puts()

   - Simplify __seq_puts()

   - Add new anon_inode_getfile_fmode() api to allow specifying f_mode
     instead of open-coding it in multiple places

   - Annotate struct file_handle with __counted_by() and use
     struct_size()

   - Warn in get_file() whether f_count resurrection from zero is
     attempted (epoll/drm discussion)

   - Folio-sophize aio

   - Export the subvolume id in statx() for both btrfs and bcachefs

   - Relax linkat(AT_EMPTY_PATH) requirements

   - Add F_DUPFD_QUERY fcntl() allowing to compare two file descriptors
     for dup*() equality replacing kcmp()

  Cleanups:

   - Compile out swapfile inode checks when swap isn't enabled

   - Use (1 << n) notation for FMODE_* bitshifts for clarity

   - Remove redundant variable assignment in fs/direct-io

   - Cleanup uses of strncpy in orangefs

   - Speed up and cleanup writeback

   - Move fsparam_string_empty() helper into header since it's currently
     open-coded in multiple places

   - Add kernel-doc comments to proc_create_net_data_write()

   - Don't needlessly read dentry->d_flags twice

  Fixes:

   - Fix out-of-range warning in nilfs2

   - Fix ecryptfs overflow due to wrong encryption packet size
     calculation

   - Fix overly long line in xfs file_operations (follow-up to FMODE_*
     cleanup)

   - Don't raise FOP_BUFFER_{R,W}ASYNC for directories in xfs (follow-up
     to FMODE_* cleanup)

   - Don't call xfs_file_open from xfs_dir_open (follow-up to FMODE_*
     cleanup)

   - Fix stable offset api to prevent endless loops

   - Fix afs file server rotations

   - Prevent xattr node from overflowing the eraseblock in jffs2

   - Move fdinfo PTRACE_MODE_READ procfs check into the .permission()
     operation instead of .open() operation since this caused userspace
     regressions"

* tag 'vfs-6.10.misc' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: (39 commits)
  afs: Fix fileserver rotation getting stuck
  selftests: add F_DUPDFD_QUERY selftests
  fcntl: add F_DUPFD_QUERY fcntl()
  file: add fd_raw cleanup class
  fs: WARN when f_count resurrection is attempted
  seq_file: Simplify __seq_puts()
  seq_file: Optimize seq_puts()
  proc: Move fdinfo PTRACE_MODE_READ check into the inode .permission operation
  fs: Create anon_inode_getfile_fmode()
  xfs: don't call xfs_file_open from xfs_dir_open
  xfs: drop fop_flags for directories
  xfs: fix overly long line in the file_operations
  shmem: Fix shmem_rename2()
  libfs: Add simple_offset_rename() API
  libfs: Fix simple_offset_rename_exchange()
  jffs2: prevent xattr node from overflowing the eraseblock
  vfs, swap: compile out IS_SWAPFILE() on swapless configs
  vfs: relax linkat() AT_EMPTY_PATH - aka flink() - requirements
  fs/direct-io: remove redundant assignment to variable retval
  fs/dcache: Re-use value stored to dentry->d_flags instead of re-reading
  ...
  • Loading branch information
torvalds committed May 13, 2024
2 parents c117a43 + da0e01c commit 1b0aabc
Show file tree
Hide file tree
Showing 50 changed files with 438 additions and 249 deletions.
2 changes: 1 addition & 1 deletion block/bdev.c
Original file line number Diff line number Diff line change
Expand Up @@ -912,7 +912,7 @@ int bdev_open(struct block_device *bdev, blk_mode_t mode, void *holder,
disk_unblock_events(disk);

bdev_file->f_flags |= O_LARGEFILE;
bdev_file->f_mode |= FMODE_BUF_RASYNC | FMODE_CAN_ODIRECT;
bdev_file->f_mode |= FMODE_CAN_ODIRECT;
if (bdev_nowait(bdev))
bdev_file->f_mode |= FMODE_NOWAIT;
if (mode & BLK_OPEN_RESTRICT_WRITES)
Expand Down
1 change: 1 addition & 0 deletions block/fops.c
Original file line number Diff line number Diff line change
Expand Up @@ -863,6 +863,7 @@ const struct file_operations def_blk_fops = {
.splice_read = filemap_splice_read,
.splice_write = iter_file_splice_write,
.fallocate = blkdev_fallocate,
.fop_flags = FOP_BUFFER_RASYNC,
};

static __init int blkdev_init(void)
Expand Down
2 changes: 1 addition & 1 deletion drivers/dax/device.c
Original file line number Diff line number Diff line change
Expand Up @@ -377,7 +377,7 @@ static const struct file_operations dax_fops = {
.release = dax_release,
.get_unmapped_area = dax_get_unmapped_area,
.mmap = dax_mmap,
.mmap_supported_flags = MAP_SYNC,
.fop_flags = FOP_MMAP_SYNC,
};

static void dev_dax_cdev_del(void *cdev)
Expand Down
8 changes: 6 additions & 2 deletions fs/afs/rotate.c
Original file line number Diff line number Diff line change
Expand Up @@ -541,11 +541,13 @@ bool afs_select_fileserver(struct afs_operation *op)
test_bit(AFS_SE_EXCLUDED, &se->flags) ||
!test_bit(AFS_SERVER_FL_RESPONDING, &s->flags))
continue;
es = op->server_states->endpoint_state;
es = op->server_states[i].endpoint_state;
sal = es->addresses;

afs_get_address_preferences_rcu(op->net, sal);
for (j = 0; j < sal->nr_addrs; j++) {
if (es->failed_set & (1 << j))
continue;
if (!sal->addrs[j].peer)
continue;
if (sal->addrs[j].prio > best_prio) {
Expand Down Expand Up @@ -605,6 +607,8 @@ bool afs_select_fileserver(struct afs_operation *op)
best_prio = -1;
addr_index = 0;
for (i = 0; i < alist->nr_addrs; i++) {
if (!(set & (1 << i)))
continue;
if (alist->addrs[i].prio > best_prio) {
addr_index = i;
best_prio = alist->addrs[i].prio;
Expand Down Expand Up @@ -674,7 +678,7 @@ bool afs_select_fileserver(struct afs_operation *op)
for (i = 0; i < op->server_list->nr_servers; i++) {
struct afs_endpoint_state *estate;

estate = op->server_states->endpoint_state;
estate = op->server_states[i].endpoint_state;
error = READ_ONCE(estate->error);
if (error < 0)
afs_op_accumulate_error(op, error, estate->abort_code);
Expand Down
91 changes: 47 additions & 44 deletions fs/aio.c
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ struct kioctx {
unsigned long mmap_base;
unsigned long mmap_size;

struct page **ring_pages;
struct folio **ring_folios;
long nr_pages;

struct rcu_work free_rwork; /* see free_ioctx() */
Expand Down Expand Up @@ -160,7 +160,7 @@ struct kioctx {
spinlock_t completion_lock;
} ____cacheline_aligned_in_smp;

struct page *internal_pages[AIO_RING_PAGES];
struct folio *internal_folios[AIO_RING_PAGES];
struct file *aio_ring_file;

unsigned id;
Expand Down Expand Up @@ -334,19 +334,20 @@ static void aio_free_ring(struct kioctx *ctx)
put_aio_ring_file(ctx);

for (i = 0; i < ctx->nr_pages; i++) {
struct page *page;
pr_debug("pid(%d) [%d] page->count=%d\n", current->pid, i,
page_count(ctx->ring_pages[i]));
page = ctx->ring_pages[i];
if (!page)
struct folio *folio = ctx->ring_folios[i];

if (!folio)
continue;
ctx->ring_pages[i] = NULL;
put_page(page);

pr_debug("pid(%d) [%d] folio->count=%d\n", current->pid, i,
folio_ref_count(folio));
ctx->ring_folios[i] = NULL;
folio_put(folio);
}

if (ctx->ring_pages && ctx->ring_pages != ctx->internal_pages) {
kfree(ctx->ring_pages);
ctx->ring_pages = NULL;
if (ctx->ring_folios && ctx->ring_folios != ctx->internal_folios) {
kfree(ctx->ring_folios);
ctx->ring_folios = NULL;
}
}

Expand Down Expand Up @@ -441,7 +442,7 @@ static int aio_migrate_folio(struct address_space *mapping, struct folio *dst,
idx = src->index;
if (idx < (pgoff_t)ctx->nr_pages) {
/* Make sure the old folio hasn't already been changed */
if (ctx->ring_pages[idx] != &src->page)
if (ctx->ring_folios[idx] != src)
rc = -EAGAIN;
} else
rc = -EINVAL;
Expand All @@ -465,8 +466,8 @@ static int aio_migrate_folio(struct address_space *mapping, struct folio *dst,
*/
spin_lock_irqsave(&ctx->completion_lock, flags);
folio_migrate_copy(dst, src);
BUG_ON(ctx->ring_pages[idx] != &src->page);
ctx->ring_pages[idx] = &dst->page;
BUG_ON(ctx->ring_folios[idx] != src);
ctx->ring_folios[idx] = dst;
spin_unlock_irqrestore(&ctx->completion_lock, flags);

/* The old folio is no longer accessible. */
Expand Down Expand Up @@ -516,28 +517,30 @@ static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events)
nr_events = (PAGE_SIZE * nr_pages - sizeof(struct aio_ring))
/ sizeof(struct io_event);

ctx->ring_pages = ctx->internal_pages;
ctx->ring_folios = ctx->internal_folios;
if (nr_pages > AIO_RING_PAGES) {
ctx->ring_pages = kcalloc(nr_pages, sizeof(struct page *),
GFP_KERNEL);
if (!ctx->ring_pages) {
ctx->ring_folios = kcalloc(nr_pages, sizeof(struct folio *),
GFP_KERNEL);
if (!ctx->ring_folios) {
put_aio_ring_file(ctx);
return -ENOMEM;
}
}

for (i = 0; i < nr_pages; i++) {
struct page *page;
page = find_or_create_page(file->f_mapping,
i, GFP_USER | __GFP_ZERO);
if (!page)
struct folio *folio;

folio = __filemap_get_folio(file->f_mapping, i,
FGP_LOCK | FGP_ACCESSED | FGP_CREAT,
GFP_USER | __GFP_ZERO);
if (IS_ERR(folio))
break;
pr_debug("pid(%d) page[%d]->count=%d\n",
current->pid, i, page_count(page));
SetPageUptodate(page);
unlock_page(page);

ctx->ring_pages[i] = page;
pr_debug("pid(%d) [%d] folio->count=%d\n", current->pid, i,
folio_ref_count(folio));
folio_end_read(folio, true);

ctx->ring_folios[i] = folio;
}
ctx->nr_pages = i;

Expand Down Expand Up @@ -570,15 +573,15 @@ static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events)
ctx->user_id = ctx->mmap_base;
ctx->nr_events = nr_events; /* trusted copy */

ring = page_address(ctx->ring_pages[0]);
ring = folio_address(ctx->ring_folios[0]);
ring->nr = nr_events; /* user copy */
ring->id = ~0U;
ring->head = ring->tail = 0;
ring->magic = AIO_RING_MAGIC;
ring->compat_features = AIO_RING_COMPAT_FEATURES;
ring->incompat_features = AIO_RING_INCOMPAT_FEATURES;
ring->header_length = sizeof(struct aio_ring);
flush_dcache_page(ctx->ring_pages[0]);
flush_dcache_folio(ctx->ring_folios[0]);

return 0;
}
Expand Down Expand Up @@ -689,9 +692,9 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)

/* While kioctx setup is in progress,
* we are protected from page migration
* changes ring_pages by ->ring_lock.
* changes ring_folios by ->ring_lock.
*/
ring = page_address(ctx->ring_pages[0]);
ring = folio_address(ctx->ring_folios[0]);
ring->id = ctx->id;
return 0;
}
Expand Down Expand Up @@ -1033,7 +1036,7 @@ static void user_refill_reqs_available(struct kioctx *ctx)
* against ctx->completed_events below will make sure we do the
* safe/right thing.
*/
ring = page_address(ctx->ring_pages[0]);
ring = folio_address(ctx->ring_folios[0]);
head = ring->head;

refill_reqs_available(ctx, head, ctx->tail);
Expand Down Expand Up @@ -1145,12 +1148,12 @@ static void aio_complete(struct aio_kiocb *iocb)
if (++tail >= ctx->nr_events)
tail = 0;

ev_page = page_address(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
ev_page = folio_address(ctx->ring_folios[pos / AIO_EVENTS_PER_PAGE]);
event = ev_page + pos % AIO_EVENTS_PER_PAGE;

*event = iocb->ki_res;

flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
flush_dcache_folio(ctx->ring_folios[pos / AIO_EVENTS_PER_PAGE]);

pr_debug("%p[%u]: %p: %p %Lx %Lx %Lx\n", ctx, tail, iocb,
(void __user *)(unsigned long)iocb->ki_res.obj,
Expand All @@ -1163,10 +1166,10 @@ static void aio_complete(struct aio_kiocb *iocb)

ctx->tail = tail;

ring = page_address(ctx->ring_pages[0]);
ring = folio_address(ctx->ring_folios[0]);
head = ring->head;
ring->tail = tail;
flush_dcache_page(ctx->ring_pages[0]);
flush_dcache_folio(ctx->ring_folios[0]);

ctx->completed_events++;
if (ctx->completed_events > 1)
Expand Down Expand Up @@ -1238,8 +1241,8 @@ static long aio_read_events_ring(struct kioctx *ctx,
sched_annotate_sleep();
mutex_lock(&ctx->ring_lock);

/* Access to ->ring_pages here is protected by ctx->ring_lock. */
ring = page_address(ctx->ring_pages[0]);
/* Access to ->ring_folios here is protected by ctx->ring_lock. */
ring = folio_address(ctx->ring_folios[0]);
head = ring->head;
tail = ring->tail;

Expand All @@ -1260,20 +1263,20 @@ static long aio_read_events_ring(struct kioctx *ctx,
while (ret < nr) {
long avail;
struct io_event *ev;
struct page *page;
struct folio *folio;

avail = (head <= tail ? tail : ctx->nr_events) - head;
if (head == tail)
break;

pos = head + AIO_EVENTS_OFFSET;
page = ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE];
folio = ctx->ring_folios[pos / AIO_EVENTS_PER_PAGE];
pos %= AIO_EVENTS_PER_PAGE;

avail = min(avail, nr - ret);
avail = min_t(long, avail, AIO_EVENTS_PER_PAGE - pos);

ev = page_address(page);
ev = folio_address(folio);
copy_ret = copy_to_user(event + ret, ev + pos,
sizeof(*ev) * avail);

Expand All @@ -1287,9 +1290,9 @@ static long aio_read_events_ring(struct kioctx *ctx,
head %= ctx->nr_events;
}

ring = page_address(ctx->ring_pages[0]);
ring = folio_address(ctx->ring_folios[0]);
ring->head = head;
flush_dcache_page(ctx->ring_pages[0]);
flush_dcache_folio(ctx->ring_folios[0]);

pr_debug("%li h%u t%u\n", ret, head, tail);
out:
Expand Down
33 changes: 33 additions & 0 deletions fs/anon_inodes.c
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,38 @@ struct file *anon_inode_getfile(const char *name,
}
EXPORT_SYMBOL_GPL(anon_inode_getfile);

/**
* anon_inode_getfile_fmode - creates a new file instance by hooking it up to an
* anonymous inode, and a dentry that describe the "class"
* of the file
*
* @name: [in] name of the "class" of the new file
* @fops: [in] file operations for the new file
* @priv: [in] private data for the new file (will be file's private_data)
* @flags: [in] flags
* @f_mode: [in] fmode
*
* Creates a new file by hooking it on a single inode. This is useful for files
* that do not need to have a full-fledged inode in order to operate correctly.
* All the files created with anon_inode_getfile() will share a single inode,
* hence saving memory and avoiding code duplication for the file/inode/dentry
* setup. Allows setting the fmode. Returns the newly created file* or an error
* pointer.
*/
struct file *anon_inode_getfile_fmode(const char *name,
const struct file_operations *fops,
void *priv, int flags, fmode_t f_mode)
{
struct file *file;

file = __anon_inode_getfile(name, fops, priv, flags, NULL, false);
if (!IS_ERR(file))
file->f_mode |= f_mode;

return file;
}
EXPORT_SYMBOL_GPL(anon_inode_getfile_fmode);

/**
* anon_inode_create_getfile - Like anon_inode_getfile(), but creates a new
* !S_PRIVATE anon inode rather than reuse the
Expand Down Expand Up @@ -271,6 +303,7 @@ int anon_inode_create_getfd(const char *name, const struct file_operations *fops
return __anon_inode_getfd(name, fops, priv, flags, context_inode, true);
}


static int __init anon_inode_init(void)
{
anon_inode_mnt = kern_mount(&anon_inode_fs_type);
Expand Down
3 changes: 3 additions & 0 deletions fs/bcachefs/fs.c
Original file line number Diff line number Diff line change
Expand Up @@ -844,6 +844,9 @@ static int bch2_getattr(struct mnt_idmap *idmap,
stat->blksize = block_bytes(c);
stat->blocks = inode->v.i_blocks;

stat->subvol = inode->ei_subvol;
stat->result_mask |= STATX_SUBVOL;

if (request_mask & STATX_BTIME) {
stat->result_mask |= STATX_BTIME;
stat->btime = bch2_time_to_timespec(c, inode->ei_inode.bi_otime);
Expand Down
4 changes: 2 additions & 2 deletions fs/btrfs/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -3719,8 +3719,7 @@ static int btrfs_file_open(struct inode *inode, struct file *filp)
{
int ret;

filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC | FMODE_BUF_WASYNC |
FMODE_CAN_ODIRECT;
filp->f_mode |= FMODE_NOWAIT | FMODE_CAN_ODIRECT;

ret = fsverity_file_open(inode, filp);
if (ret)
Expand Down Expand Up @@ -3850,6 +3849,7 @@ const struct file_operations btrfs_file_operations = {
.compat_ioctl = btrfs_compat_ioctl,
#endif
.remap_file_range = btrfs_remap_file_range,
.fop_flags = FOP_BUFFER_RASYNC | FOP_BUFFER_WASYNC,
};

int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end)
Expand Down
3 changes: 3 additions & 0 deletions fs/btrfs/inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -8789,6 +8789,9 @@ static int btrfs_getattr(struct mnt_idmap *idmap,
generic_fillattr(idmap, request_mask, inode, stat);
stat->dev = BTRFS_I(inode)->root->anon_dev;

stat->subvol = BTRFS_I(inode)->root->root_key.objectid;
stat->result_mask |= STATX_SUBVOL;

spin_lock(&BTRFS_I(inode)->lock);
delalloc_bytes = BTRFS_I(inode)->new_delalloc_bytes;
inode_bytes = inode_get_bytes(inode);
Expand Down
2 changes: 1 addition & 1 deletion fs/dcache.c
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 +355,7 @@ static inline void __d_clear_type_and_inode(struct dentry *dentry)
flags &= ~DCACHE_ENTRY_TYPE;
WRITE_ONCE(dentry->d_flags, flags);
dentry->d_inode = NULL;
if (dentry->d_flags & DCACHE_LRU_LIST)
if (flags & DCACHE_LRU_LIST)
this_cpu_inc(nr_dentry_negative);
}

Expand Down
1 change: 0 additions & 1 deletion fs/direct-io.c
Original file line number Diff line number Diff line change
Expand Up @@ -1217,7 +1217,6 @@ ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
*/
inode_dio_begin(inode);

retval = 0;
sdio.blkbits = blkbits;
sdio.blkfactor = i_blkbits - blkbits;
sdio.block_in_file = offset >> blkbits;
Expand Down
Loading

0 comments on commit 1b0aabc

Please sign in to comment.