From 5fd0d9ca93d294e971761349b186a8a0acaaf0b8 Mon Sep 17 00:00:00 2001 From: yuming Date: Tue, 11 Nov 2025 19:34:33 +0800 Subject: [PATCH 1/7] =?UTF-8?q?fix(mm/vfs):=20=E4=BF=AE=E5=A4=8D=20writev?= =?UTF-8?q?=20=E6=97=A0=E6=B3=95=E6=AD=A3=E7=A1=AE=E5=A4=84=E7=90=86?= =?UTF-8?q?=E9=83=A8=E5=88=86=20iov=20=E4=B8=BA=E5=8F=97=E4=BF=9D=E6=8A=A4?= =?UTF-8?q?=E5=86=85=E5=AD=98=E7=A9=BA=E9=97=B4=E7=9A=84=E6=83=85=E5=86=B5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 引入 user_accessible_len() 以测量从给定地址 `address` 开始,能够被拷贝的最长连续字节长度(注:使用vma进行校验) - 使 IoVecs::gather 返回 Result 并仅聚合可以被读取的 `buf` 部分(注意,一旦碰到不可访问的 iov,后面的iov都会被抛弃) - 在 writev/pwritev 中传播新的 Result 以支持 gVisor 下的部分写入 --- kernel/src/filesystem/vfs/iov.rs | 64 ++++++++--- .../src/filesystem/vfs/syscall/sys_pwritev.rs | 2 +- .../src/filesystem/vfs/syscall/sys_writev.rs | 2 +- kernel/src/syscall/user_access.rs | 108 +++++++++++++++++- 4 files changed, 158 insertions(+), 18 deletions(-) diff --git a/kernel/src/filesystem/vfs/iov.rs b/kernel/src/filesystem/vfs/iov.rs index 771bc2e5b..5d095f1c2 100644 --- a/kernel/src/filesystem/vfs/iov.rs +++ b/kernel/src/filesystem/vfs/iov.rs @@ -1,7 +1,10 @@ use alloc::vec::Vec; use system_error::SystemError; -use crate::syscall::user_access::{UserBufferReader, UserBufferWriter}; +use crate::{ + mm::VirtAddr, + syscall::user_access::{user_accessible_len, UserBufferReader, UserBufferWriter}, +}; #[repr(C)] #[derive(Debug, Clone, Copy)] pub struct IoVec { @@ -73,24 +76,55 @@ impl IoVecs { /// This function reads data from each IoVec in sequence and combines them into /// a single contiguous buffer. /// - /// # Returns + /// **Returns:** /// - /// Returns a [`Vec`] containing all the data from the IoVecs. + /// Returns a [`Vec`] containing the data copied from the IoVecs. /// - /// # Examples + /// **To Be patient:** /// - /// ```rust - /// let iovecs = IoVecs::from_user(/* ... */)?; - /// let buffer = iovecs.gather(); - /// ``` - pub fn gather(&self) -> Vec { - let mut buf = Vec::new(); - for slice in self.0.iter() { - let buf_reader = UserBufferReader::new(slice.iov_base, slice.iov_len, true).unwrap(); - let slice = buf_reader.buffer::(0).unwrap(); - buf.extend_from_slice(slice); + /// If a buffer is only partially accessible, data is copied up to **the first + /// inaccessible byte** and the remaining iovecs are ignored. If no data can be + /// read at all, `Err(SystemError::EFAULT)` is returned. + pub fn gather(&self) -> Result, SystemError> { + let mut buf = Vec::with_capacity(self.total_len()); + + for iov in self.0.iter() { + // 检查从 iov_base 开始有多少 bytes 在 vma 内部且实际可以访问 + let accessible = + user_accessible_len(VirtAddr::new(iov.iov_base as usize), iov.iov_len, false); + + // log::debug!( + // "iov is {:?}. iov_len: {}; accessible len:{}", + // iov, + // iov.iov_len, + // accessible + // ); + + // 如果一个字节都不能访问 + if accessible == 0 { + if buf.is_empty() { + // log::error!( + // "The first iov is empty, returning EFAULT. iov shape: {:?}", + // iov + // ); + return Err(SystemError::EFAULT); + } + return Ok(buf); + } + + // 复制可访问的部分 + unsafe { + let src = core::slice::from_raw_parts(iov.iov_base as *const u8, accessible); + buf.extend_from_slice(src); + } + + // 如果没有读取完整个 iov,说明遇到了不可访问的区域 + if accessible < iov.iov_len { + return Ok(buf); + } } - return buf; + + Ok(buf) } /// Scatters the given data into the IoVecs. diff --git a/kernel/src/filesystem/vfs/syscall/sys_pwritev.rs b/kernel/src/filesystem/vfs/syscall/sys_pwritev.rs index 32269a2d9..8a6a7aefd 100644 --- a/kernel/src/filesystem/vfs/syscall/sys_pwritev.rs +++ b/kernel/src/filesystem/vfs/syscall/sys_pwritev.rs @@ -40,7 +40,7 @@ impl Syscall for SysPwriteVHandle { // 将用户态传入的指向用户态应用的数据结构重新在内核栈上构造 let iovecs = unsafe { IoVecs::from_user(iov, iov_count, false) }?; - let data = iovecs.gather(); + let data = iovecs.gather()?; do_pwritev(fd, &data, offset) } diff --git a/kernel/src/filesystem/vfs/syscall/sys_writev.rs b/kernel/src/filesystem/vfs/syscall/sys_writev.rs index beba0b320..bb2c9299e 100644 --- a/kernel/src/filesystem/vfs/syscall/sys_writev.rs +++ b/kernel/src/filesystem/vfs/syscall/sys_writev.rs @@ -45,7 +45,7 @@ impl Syscall for SysWriteVHandle { // IoVecs会进行用户态检验 let iovecs = unsafe { IoVecs::from_user(iov, count, false) }?; - let data = iovecs.gather(); + let data = iovecs.gather()?; do_write(fd, &data) } diff --git a/kernel/src/syscall/user_access.rs b/kernel/src/syscall/user_access.rs index ad816e62f..da48b886c 100644 --- a/kernel/src/syscall/user_access.rs +++ b/kernel/src/syscall/user_access.rs @@ -1,6 +1,7 @@ //! This file contains functions for kernel-space access to user-space data use core::{ + cmp::min, mem::size_of, num::NonZero, slice::{from_raw_parts, from_raw_parts_mut}, @@ -12,7 +13,7 @@ use defer::defer; use crate::{ arch::MMArch, - mm::{verify_area, MemoryManagementArch, VirtAddr}, + mm::{verify_area, MemoryManagementArch, VirtAddr, VmFlags}, process::ProcessManager, }; @@ -919,3 +920,108 @@ pub unsafe fn copy_to_user_protected(dest: VirtAddr, src: &[u8]) -> Result Err(SystemError::EFAULT), } } + +/// Compute the contiguous accessible length starting at `addr`. +/// +/// Returns the number of bytes that can be accessed before hitting an unmapped +/// page or a page that lacks the requested permissions. +pub fn user_accessible_len(addr: VirtAddr, size: usize, check_write: bool) -> usize { + // log::error!( + // "user_accessible_len(addr: {:?}, size:{:?}, check_write:{:?}", + // addr, + // size, + // check_write + // ); + if size == 0 || addr.is_null() { + return 0; + } + + // 获取当前进程的 VMA (可访问的地址空间) + let vm = match ProcessManager::current_pcb().basic().user_vm() { + Some(vm) => vm, + None => return 0, + }; + + let vma_read_guard = vm.read_irqsave(); + let mappings = &vma_read_guard.mappings; + + let mut checked = 0usize; + let mut current = addr; + + while checked < size { + // 判断当前地址是否落在一个有效 VMA 中 + let Some(vma) = mappings.contains(current) else { + break; + }; + + // 获取地址所在内存页的起始地址 和结束地址,以及访问权限标志 和文件后备长度 + let (region_start, region_end, vm_flags, file_backed_len) = { + let guard = vma.lock_irqsave(); + let region_start = guard.region().start().data(); + let region_end = guard.region().end().data(); + let vm_flags = *guard.vm_flags(); + let vma_size = region_end.saturating_sub(region_start); + + let file_backed_len = guard.vm_file().and_then(|file| { + let file_offset_pages = guard.file_page_offset().unwrap_or(0); + let file_offset_bytes = file_offset_pages.saturating_mul(MMArch::PAGE_SIZE); + let file_size = match file.metadata() { + Ok(md) if md.size > 0 => { + let capped = core::cmp::min(md.size as u128, usize::MAX as u128); + capped as usize + } + Ok(_) => 0, + Err(_) => return None, + }; + + let backed = file_size.saturating_sub(file_offset_bytes); + Some(core::cmp::min(backed, vma_size)) + }); + + (region_start, region_end, vm_flags, file_backed_len) + }; + + // 根据 vm_flags 判断是否具备访问权限 + let has_permission = if check_write { + vm_flags.contains(VmFlags::VM_WRITE) + } else { + vm_flags.contains(VmFlags::VM_READ) + }; + + if !has_permission { + break; + } + + let current_addr = current.data(); + let mut available = region_end.saturating_sub(current_addr); + + if let Some(backed_len) = file_backed_len { + let offset_in_vma = current_addr.saturating_sub(region_start); + let backed_available = backed_len.saturating_sub(offset_in_vma); + // Clamp to the range actually backed by the file to avoid walking into holes. + available = min(available, backed_available); + } + if available == 0 { + break; + } + + // 这里的 `step` 要区分两种情况 + // - 第一种情况:`available`(当前 VMA 剩余长度)已经覆盖了 `size - checked`,说明 + // 本次检查的剩余数据全部落在这个 VMA 内,`step` 直接等于 `size - checked`。 + // - 第二种情况:`available` 比 `size - checked` 小,意味着我们会在这个 VMA 的末尾停下, + // 需要等下一次循环再确认后续地址是否仍有 VMA 覆盖。 + // - 例如 (addr = 0x1, size = 10),若某个 VMA 只覆盖 [0x0, 0x5),则第一轮只能推进 4 个字节, + // 后续是否继续完全取决于下一个 VMA 是否与 0x5 处相接且具有相同访问权限。 + // 若下一轮 VMA 覆盖 [0x5, 0xf),虽然这块 VMA 可访问空间 available == 10 ,但是我们需要检查的部分就只剩 10 - 4 = 6 bytes。 + // 所以 `step` 选择为 size - checked + let step = min(available, size - checked); + checked += step; + + let Some(next) = current_addr.checked_add(step) else { + break; + }; + current = VirtAddr::new(next); + } + + checked +} From 9dc2604648b236809468d067b8747cf5696d831b Mon Sep 17 00:00:00 2001 From: yuming Date: Wed, 19 Nov 2025 17:15:55 +0800 Subject: [PATCH 2/7] =?UTF-8?q?docs(vfs/syscall):=20=E6=B7=BB=E5=8A=A0?= =?UTF-8?q?=E6=96=87=E4=BB=B6=E7=B3=BB=E7=BB=9F=E5=AE=9E=E7=8E=B0=E5=88=86?= =?UTF-8?q?=E6=95=A3=E5=86=99=E5=85=A5=E7=9A=84=20TODO=20=E6=B3=A8?= =?UTF-8?q?=E9=87=8A?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 添加了对于 IoVecs 发起的分散写入的处理方式目前处理方式的注释。 - 添加了实现分散写入依赖于文件系统对于 IoVecs 写入支持的注释。 --- kernel/src/filesystem/vfs/syscall/sys_pwritev.rs | 9 +++++++-- kernel/src/filesystem/vfs/syscall/sys_writev.rs | 9 ++++++++- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/kernel/src/filesystem/vfs/syscall/sys_pwritev.rs b/kernel/src/filesystem/vfs/syscall/sys_pwritev.rs index 8a6a7aefd..537bb6dde 100644 --- a/kernel/src/filesystem/vfs/syscall/sys_pwritev.rs +++ b/kernel/src/filesystem/vfs/syscall/sys_pwritev.rs @@ -38,10 +38,15 @@ impl Syscall for SysPwriteVHandle { let iov_count = Self::iov_count(args); let offset = Self::offset(args); - // 将用户态传入的指向用户态应用的数据结构重新在内核栈上构造 + // 将用户态传入的数据结构 `IoVecs` 重新在内核上构造 let iovecs = unsafe { IoVecs::from_user(iov, iov_count, false) }?; let data = iovecs.gather()?; - + + // TODO: 支持零内核拷贝的分散写 (需要文件系统底层支持分散写) + // - 直接将传入的用户态 IoVec 使用 vma 做校验以后传入底层文件系统进行分散写,避免内核拷贝 + // - 实现路径(linux):wirtev --> vfs_writev --> do_iter_write --> do_loop_readv_writev/do_iter_readv_writev + // - 目前内核文件子系统尚未实现分散写功能,即无法直接使用用户态的 IoVec 进行写操作 + // - 目前先将用户态的 IoVec 聚合成一个连续的内核缓冲区 `data`,然后进行写操作,避免多次发起写操作的开销。 do_pwritev(fd, &data, offset) } diff --git a/kernel/src/filesystem/vfs/syscall/sys_writev.rs b/kernel/src/filesystem/vfs/syscall/sys_writev.rs index bb2c9299e..b95ba40eb 100644 --- a/kernel/src/filesystem/vfs/syscall/sys_writev.rs +++ b/kernel/src/filesystem/vfs/syscall/sys_writev.rs @@ -43,9 +43,16 @@ impl Syscall for SysWriteVHandle { let iov = Self::iov(args); let count = Self::count(args); - // IoVecs会进行用户态检验 + + // 将用户态传入的数据结构 `IoVecs` 重新在内核上构造 let iovecs = unsafe { IoVecs::from_user(iov, count, false) }?; let data = iovecs.gather()?; + + // TODO: 支持零内核拷贝的分散写 (需要文件系统底层支持分散写) + // - 直接将传入的用户态 IoVec 使用 vma 做校验以后传入底层文件系统进行分散写,避免内核拷贝 + // - 实现路径(linux):wirtev --> vfs_writev --> do_iter_write --> do_loop_readv_writev/do_iter_readv_writev + // - 目前内核文件子系统尚未实现分散写功能,即无法直接使用用户态的 IoVec 进行写操作 + // - 目前先将用户态的 IoVec 聚合成一个连续的内核缓冲区 `data`,然后进行写操作,避免多次发起写操作的开销。 do_write(fd, &data) } From af3cd0d61d7aff2b5fe560cc913fa65d38ad8896 Mon Sep 17 00:00:00 2001 From: yuming Date: Wed, 19 Nov 2025 23:00:22 +0800 Subject: [PATCH 3/7] =?UTF-8?q?feat(test):=20=E6=B7=BB=E5=8A=A0=20pwritev?= =?UTF-8?q?=20=E6=80=A7=E8=83=BD=E6=B5=8B=E8=AF=95=E7=A8=8B=E5=BA=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- user/apps/c_unitest/test_pwritev_perf.c | 177 ++++++++++++++++++++++++ 1 file changed, 177 insertions(+) create mode 100644 user/apps/c_unitest/test_pwritev_perf.c diff --git a/user/apps/c_unitest/test_pwritev_perf.c b/user/apps/c_unitest/test_pwritev_perf.c new file mode 100644 index 000000000..28655dbf8 --- /dev/null +++ b/user/apps/c_unitest/test_pwritev_perf.c @@ -0,0 +1,177 @@ +// ============================================== +// +// 本文件用于测试系统调用 pwritev 在使用 +// 大量小块数据写入时的性能表现。 +// 重点测试 user_access_len() 函数的开销 +// +// ============================================== + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define TEST_FILE "pwritev_test.dat" +#define NUM_IOV 1000 +#define SMALL_DATA_SIZE 64 +#define TOTAL_ITERATIONS 100 + +// 测试用的小数据块 +struct test_iovec { + struct iovec iov[NUM_IOV]; + char data[NUM_IOV][SMALL_DATA_SIZE]; +}; + +// 初始化测试数据 +void init_test_data(struct test_iovec *test_vec) { + for (int i = 0; i < NUM_IOV; i++) { + // 填充每个小块数据 + snprintf(test_vec->data[i], SMALL_DATA_SIZE, "Block_%04d:abcdefghijklmnopqrstuvwxyz", i); + test_vec->iov[i].iov_base = test_vec->data[i]; + test_vec->iov[i].iov_len = strlen(test_vec->data[i]); + } +} + +// 性能测试函数 +double test_pwritev_performance(int fd, struct test_iovec *test_vec, int iterations) { + struct timespec start, end; + + clock_gettime(CLOCK_MONOTONIC, &start); + + for (int i = 0; i < iterations; i++) { + ssize_t written = pwritev(fd, test_vec->iov, NUM_IOV, 0); + if (written == -1) { + perror("pwritev failed"); + exit(EXIT_FAILURE); + } + + // 计算总写入字节数 + size_t total_bytes = 0; + for (int j = 0; j < NUM_IOV; j++) { + total_bytes += test_vec->iov[j].iov_len; + } + + if (written != total_bytes) { + fprintf(stderr, "Partial write: expected %zu, got %zd\n", total_bytes, written); + exit(EXIT_FAILURE); + } + } + + clock_gettime(CLOCK_MONOTONIC, &end); + + double elapsed = (end.tv_sec - start.tv_sec) + + (end.tv_nsec - start.tv_nsec) / 1e9; + + return elapsed; +} + +// 对比测试:使用单独的 write 调用 +double test_individual_writes_performance(int fd, struct test_iovec *test_vec, int iterations) { + struct timespec start, end; + + clock_gettime(CLOCK_MONOTONIC, &start); + + for (int i = 0; i < iterations; i++) { + off_t offset = 0; + for (int j = 0; j < NUM_IOV; j++) { + ssize_t written = pwrite(fd, test_vec->iov[j].iov_base, + test_vec->iov[j].iov_len, offset); + if (written == -1) { + perror("pwrite failed"); + exit(EXIT_FAILURE); + } + if (written != test_vec->iov[j].iov_len) { + fprintf(stderr, "Partial write in individual test\n"); + exit(EXIT_FAILURE); + } + offset += written; + } + } + + clock_gettime(CLOCK_MONOTONIC, &end); + + double elapsed = (end.tv_sec - start.tv_sec) + + (end.tv_nsec - start.tv_nsec) / 1e9; + + return elapsed; +} + + +int main(void) { + struct test_iovec test_vec; + + printf("=== pwritev Performance Test ===\n"); + printf("IOV count: %d\n", NUM_IOV); + printf("Small data size: %d bytes\n", SMALL_DATA_SIZE); + printf("Iterations: %d\n", TOTAL_ITERATIONS); + printf("\n"); + + // 初始化测试数据 + init_test_data(&test_vec); + + // 计算总数据大小 + size_t total_data_size = 0; + for (int i = 0; i < NUM_IOV; i++) { + total_data_size += test_vec.iov[i].iov_len; + } + printf("Total data per pwritev call: %zu bytes\n", total_data_size); + printf("Total data to write: %zu KB\n", + (total_data_size * TOTAL_ITERATIONS) / 1024); + printf("\n"); + + // 创建测试文件 + int fd = open(TEST_FILE, O_CREAT | O_WRONLY | O_TRUNC, 0644); + if (fd == -1) { + perror("Failed to create test file"); + exit(EXIT_FAILURE); + } + + // 性能测试:pwritev + printf("Testing pwritev performance...\n"); + double pwritev_time = test_pwritev_performance(fd, &test_vec, TOTAL_ITERATIONS); + printf("pwritev total time: %.4f seconds\n", pwritev_time); + printf("pwritev average time per call: %.6f ms\n", + (pwritev_time * 1000) / TOTAL_ITERATIONS); + printf("pwritev throughput: %.2f MB/s\n", + (total_data_size * TOTAL_ITERATIONS) / (pwritev_time * 1024 * 1024)); + printf("\n"); + + // 重置文件位置 + if (ftruncate(fd, 0) == -1) { + perror("Failed to truncate file"); + close(fd); + exit(EXIT_FAILURE); + } + + // 性能测试:单独的 write 调用对比 + printf("Testing individual pwrite performance (baseline)...\n"); + double individual_time = test_individual_writes_performance(fd, &test_vec, TOTAL_ITERATIONS); + printf("Individual pwrite total time: %.4f seconds\n", individual_time); + printf("Individual pwrite average time per call: %.6f ms\n", + (individual_time * 1000) / TOTAL_ITERATIONS); + printf("Individual pwrite throughput: %.2f MB/s\n", + (total_data_size * TOTAL_ITERATIONS) / (individual_time * 1024 * 1024)); + printf("\n"); + + // 性能对比 + double speedup = individual_time / pwritev_time; + printf("Performance comparison:\n"); + printf("pwritev is %.2fx faster than individual writes\n", speedup); + printf("pwritev saves %.2f%% time\n", (1 - pwritev_time / individual_time) * 100); + printf("\n"); + + // 清理 + close(fd); + if (unlink(TEST_FILE) == -1) { + perror("Failed to remove test file"); + } + + printf("test_pwritev_perf ok\n"); + return 0; +} \ No newline at end of file From 8fb3f6d12d4f52388f33377d529a7e75fa1666ef Mon Sep 17 00:00:00 2001 From: yuming Date: Wed, 19 Nov 2025 23:16:40 +0800 Subject: [PATCH 4/7] =?UTF-8?q?style:=20=E6=A0=BC=E5=BC=8F=E5=8C=96?= =?UTF-8?q?=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- kernel/src/filesystem/vfs/syscall/sys_pwritev.rs | 2 +- kernel/src/filesystem/vfs/syscall/sys_writev.rs | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/kernel/src/filesystem/vfs/syscall/sys_pwritev.rs b/kernel/src/filesystem/vfs/syscall/sys_pwritev.rs index 537bb6dde..04d4e3c9b 100644 --- a/kernel/src/filesystem/vfs/syscall/sys_pwritev.rs +++ b/kernel/src/filesystem/vfs/syscall/sys_pwritev.rs @@ -41,7 +41,7 @@ impl Syscall for SysPwriteVHandle { // 将用户态传入的数据结构 `IoVecs` 重新在内核上构造 let iovecs = unsafe { IoVecs::from_user(iov, iov_count, false) }?; let data = iovecs.gather()?; - + // TODO: 支持零内核拷贝的分散写 (需要文件系统底层支持分散写) // - 直接将传入的用户态 IoVec 使用 vma 做校验以后传入底层文件系统进行分散写,避免内核拷贝 // - 实现路径(linux):wirtev --> vfs_writev --> do_iter_write --> do_loop_readv_writev/do_iter_readv_writev diff --git a/kernel/src/filesystem/vfs/syscall/sys_writev.rs b/kernel/src/filesystem/vfs/syscall/sys_writev.rs index b95ba40eb..8a9e4bde5 100644 --- a/kernel/src/filesystem/vfs/syscall/sys_writev.rs +++ b/kernel/src/filesystem/vfs/syscall/sys_writev.rs @@ -43,11 +43,10 @@ impl Syscall for SysWriteVHandle { let iov = Self::iov(args); let count = Self::count(args); - // 将用户态传入的数据结构 `IoVecs` 重新在内核上构造 let iovecs = unsafe { IoVecs::from_user(iov, count, false) }?; let data = iovecs.gather()?; - + // TODO: 支持零内核拷贝的分散写 (需要文件系统底层支持分散写) // - 直接将传入的用户态 IoVec 使用 vma 做校验以后传入底层文件系统进行分散写,避免内核拷贝 // - 实现路径(linux):wirtev --> vfs_writev --> do_iter_write --> do_loop_readv_writev/do_iter_readv_writev From 10c624a4c98ea565f779907ffacffd1770414108 Mon Sep 17 00:00:00 2001 From: yuming Date: Fri, 21 Nov 2025 16:11:59 +0800 Subject: [PATCH 5/7] =?UTF-8?q?fix(syscall/user=5Faccess):=20=E4=BF=AE?= =?UTF-8?q?=E5=A4=8D=20=E6=BD=9C=E5=9C=A8=E7=9A=84=E6=AD=BB=E9=94=81?= =?UTF-8?q?=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- kernel/src/syscall/user_access.rs | 46 +++++++++++++++++-------------- 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/kernel/src/syscall/user_access.rs b/kernel/src/syscall/user_access.rs index da48b886c..84824e285 100644 --- a/kernel/src/syscall/user_access.rs +++ b/kernel/src/syscall/user_access.rs @@ -936,7 +936,7 @@ pub fn user_accessible_len(addr: VirtAddr, size: usize, check_write: bool) -> us return 0; } - // 获取当前进程的 VMA (可访问的地址空间) + // 获取当前进程的 vm (可访问的地址空间) let vm = match ProcessManager::current_pcb().basic().user_vm() { Some(vm) => vm, None => return 0, @@ -954,44 +954,48 @@ pub fn user_accessible_len(addr: VirtAddr, size: usize, check_write: bool) -> us break; }; - // 获取地址所在内存页的起始地址 和结束地址,以及访问权限标志 和文件后备长度 - let (region_start, region_end, vm_flags, file_backed_len) = { + // 获取地址所在 VMA 的起始地址 和结束地址,访问权限标志,后备的文件和当前VMA第一页映射到文件的哪一页 + let (region_start, region_end, vm_flags, vma_size, file, file_page_offset) = { let guard = vma.lock_irqsave(); let region_start = guard.region().start().data(); let region_end = guard.region().end().data(); let vm_flags = *guard.vm_flags(); let vma_size = region_end.saturating_sub(region_start); + let file = guard.vm_file(); + let file_page_offset = guard.file_page_offset(); - let file_backed_len = guard.vm_file().and_then(|file| { - let file_offset_pages = guard.file_page_offset().unwrap_or(0); - let file_offset_bytes = file_offset_pages.saturating_mul(MMArch::PAGE_SIZE); - let file_size = match file.metadata() { - Ok(md) if md.size > 0 => { - let capped = core::cmp::min(md.size as u128, usize::MAX as u128); - capped as usize - } - Ok(_) => 0, - Err(_) => return None, - }; - - let backed = file_size.saturating_sub(file_offset_bytes); - Some(core::cmp::min(backed, vma_size)) - }); - - (region_start, region_end, vm_flags, file_backed_len) + drop(guard); + (region_start, region_end, vm_flags, vma_size, file, file_page_offset) }; + // 根据 vm_flags 判断是否具备访问权限 let has_permission = if check_write { vm_flags.contains(VmFlags::VM_WRITE) } else { vm_flags.contains(VmFlags::VM_READ) }; - if !has_permission { break; } + let file_backed_len= file.and_then(|file| { + let file_offset_pages = file_page_offset.unwrap_or(0); + let file_offset_bytes = file_offset_pages.saturating_mul(MMArch::PAGE_SIZE); + let file_size = match file.metadata() { + Ok(md) if md.size > 0 => { + let capped = core::cmp::min(md.size as u128, usize::MAX as u128); + capped as usize + } + Ok(_) => 0, + Err(_) => return None, + }; + + let backed = file_size.saturating_sub(file_offset_bytes); + Some(core::cmp::min(backed, vma_size)) + }); + + // 计算当前 VMA 内从 current 地址开始的可用长度 let current_addr = current.data(); let mut available = region_end.saturating_sub(current_addr); From 50b60cbf06b78e7d40b86e0c9ea4d3dfe872a31c Mon Sep 17 00:00:00 2001 From: yuming Date: Fri, 21 Nov 2025 16:16:03 +0800 Subject: [PATCH 6/7] =?UTF-8?q?style:=20=E6=A0=BC=E5=BC=8F=E5=8C=96?= =?UTF-8?q?=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- kernel/src/syscall/user_access.rs | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/kernel/src/syscall/user_access.rs b/kernel/src/syscall/user_access.rs index 84824e285..b6a933eb8 100644 --- a/kernel/src/syscall/user_access.rs +++ b/kernel/src/syscall/user_access.rs @@ -965,10 +965,16 @@ pub fn user_accessible_len(addr: VirtAddr, size: usize, check_write: bool) -> us let file_page_offset = guard.file_page_offset(); drop(guard); - (region_start, region_end, vm_flags, vma_size, file, file_page_offset) + ( + region_start, + region_end, + vm_flags, + vma_size, + file, + file_page_offset, + ) }; - // 根据 vm_flags 判断是否具备访问权限 let has_permission = if check_write { vm_flags.contains(VmFlags::VM_WRITE) @@ -979,7 +985,7 @@ pub fn user_accessible_len(addr: VirtAddr, size: usize, check_write: bool) -> us break; } - let file_backed_len= file.and_then(|file| { + let file_backed_len = file.and_then(|file| { let file_offset_pages = file_page_offset.unwrap_or(0); let file_offset_bytes = file_offset_pages.saturating_mul(MMArch::PAGE_SIZE); let file_size = match file.metadata() { From 94bb0fb95911a2020e345aafa6f3dc24a1c0c0da Mon Sep 17 00:00:00 2001 From: yuming Date: Fri, 21 Nov 2025 16:28:14 +0800 Subject: [PATCH 7/7] =?UTF-8?q?test(gvisor):=20=E5=9C=A8=E8=87=AA=E5=8A=A8?= =?UTF-8?q?=E6=B5=8B=E8=AF=95=E4=B8=AD=E5=90=AF=E7=94=A8=E6=89=80=E6=9C=89?= =?UTF-8?q?=E7=9A=84=20writev=20=E6=B5=8B=E8=AF=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../apps/tests/syscall/gvisor/blocklists/write_test | 13 ------------- 1 file changed, 13 deletions(-) delete mode 100644 user/apps/tests/syscall/gvisor/blocklists/write_test diff --git a/user/apps/tests/syscall/gvisor/blocklists/write_test b/user/apps/tests/syscall/gvisor/blocklists/write_test deleted file mode 100644 index 29d9762b6..000000000 --- a/user/apps/tests/syscall/gvisor/blocklists/write_test +++ /dev/null @@ -1,13 +0,0 @@ -# 由于缺少SYS_RT_SIGTIMEDWAIT而失败 -WriteTest.WriteExceedsRLimit - -# 内核会报如下错误 -# src/arch/x86_64/mm/fault.rs:321 vma_access_error -WriteTest.PartialWriteSIGSEGV -# 内核会报如下错误 -# Location: -# File: src/mm/fault.rs -# Line: 675, Column: 24 -# Message: -# no cache_page in PageFaultMessage -WriteTest.PartialWriteSIGBUS