Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Kernel: purge() incorrectly clears dirty pages #15951

Open
gunnarbeutner opened this issue Nov 6, 2022 · 1 comment
Open

Kernel: purge() incorrectly clears dirty pages #15951

gunnarbeutner opened this issue Nov 6, 2022 · 1 comment
Labels
bug Something isn't working

Comments

@gunnarbeutner
Copy link
Member

When clearing pages purge() drops dirty pages for inode VM objects because we're not tracking whether a page was dirtied. This can be observed by calling purge -c when the following test program is running:

#include <LibMain/Main.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <unistd.h>

ErrorOr<int> serenity_main(Main::Arguments)
{
    int fd = open("/etc/fstab", O_RDONLY);
    char* p = (char*)mmap(nullptr, 4096, PROT_READ | PROT_WRITE, MAP_FILE | MAP_PRIVATE, fd, 0);
    close(fd);
    uintptr_t zero { 0 };
    memcpy(p, &zero, sizeof(zero));
    for (;;) {
        sleep(1);
        VERIFY(memcmp(p, &zero, sizeof(zero)) == 0);
    }
    return 0;
}
@gunnarbeutner gunnarbeutner added the bug Something isn't working label Nov 6, 2022
@gunnarbeutner
Copy link
Member Author

I'm not particularly happy with this patch but here's a general idea of what's necessary:

commit e9e2fe4a952f29dfe4ba4ac3a3c4ca5381be1b91
Author: Gunnar Beutner <gbeutner@serenityos.org>
Date:   Sat Nov 5 12:46:30 2022 +0100

    WIP+Kernel: Track dirty pages for inode VM objects

diff --git a/Kernel/Memory/InodeVMObject.cpp b/Kernel/Memory/InodeVMObject.cpp
index d0fbd558b1..80edce8add 100644
--- a/Kernel/Memory/InodeVMObject.cpp
+++ b/Kernel/Memory/InodeVMObject.cpp
@@ -48,6 +48,16 @@ size_t InodeVMObject::amount_dirty() const
     return count * PAGE_SIZE;
 }
 
+bool InodeVMObject::is_page_dirty(size_t page_index) const
+{
+    return m_dirty_pages.get(page_index);
+}
+
+void InodeVMObject::mark_page_dirty(size_t page_index)
+{
+    m_dirty_pages.set(page_index, true);
+}
+
 int InodeVMObject::release_all_clean_pages()
 {
     SpinlockLocker locker(m_lock);
diff --git a/Kernel/Memory/InodeVMObject.h b/Kernel/Memory/InodeVMObject.h
index 2b439b32fd..d796333096 100644
--- a/Kernel/Memory/InodeVMObject.h
+++ b/Kernel/Memory/InodeVMObject.h
@@ -22,6 +22,9 @@ public:
     size_t amount_dirty() const;
     size_t amount_clean() const;
 
+    bool is_page_dirty(size_t) const;
+    void mark_page_dirty(size_t);
+
     int release_all_clean_pages();
     int try_release_clean_pages(int page_amount);
 
diff --git a/Kernel/Memory/Region.cpp b/Kernel/Memory/Region.cpp
index 4dd3e6c102..19c72fbb13 100644
--- a/Kernel/Memory/Region.cpp
+++ b/Kernel/Memory/Region.cpp
@@ -189,6 +189,8 @@ ErrorOr<NonnullOwnPtr<Region>> Region::try_create_user_accessible(VirtualRange c
 
 bool Region::should_cow(size_t page_index) const
 {
+    if (vmobject().is_inode())
+        return !static_cast<InodeVMObject const&>(vmobject()).is_page_dirty(first_page_index() + page_index);
     if (!vmobject().is_anonymous())
         return false;
     return static_cast<AnonymousVMObject const&>(vmobject()).should_cow(first_page_index() + page_index, m_shared);
@@ -387,10 +389,12 @@ PageFaultResponse Region::handle_fault(PageFault const& fault)
     VERIFY(fault.type() == PageFault::Type::ProtectionViolation);
     if (fault.access() == PageFault::Access::Write && is_writable() && should_cow(page_index_in_region)) {
         dbgln_if(PAGE_FAULT_DEBUG, "PV(cow) fault in Region({})[{}] at {}", this, page_index_in_region, fault.vaddr());
-        auto phys_page = physical_page(page_index_in_region);
-        if (phys_page->is_shared_zero_page() || phys_page->is_lazy_committed_page()) {
-            dbgln_if(PAGE_FAULT_DEBUG, "NP(zero) fault in Region({})[{}] at {}", this, page_index_in_region, fault.vaddr());
-            return handle_zero_fault(page_index_in_region, *phys_page);
+        if (!vmobject().is_inode()) {
+            auto phys_page = physical_page(page_index_in_region);
+            if (phys_page->is_shared_zero_page() || phys_page->is_lazy_committed_page()) {
+                dbgln_if(PAGE_FAULT_DEBUG, "NP(zero) fault in Region({})[{}] at {}", this, page_index_in_region, fault.vaddr());
+                return handle_zero_fault(page_index_in_region, *phys_page);
+            }
         }
         return handle_cow_fault(page_index_in_region);
     }
@@ -452,11 +456,18 @@ PageFaultResponse Region::handle_cow_fault(size_t page_index_in_region)
     if (current_thread)
         current_thread->did_cow_fault();
 
-    if (!vmobject().is_anonymous())
+    if (!vmobject().is_anonymous() && !vmobject().is_inode())
         return PageFaultResponse::ShouldCrash;
 
+    PageFaultResponse response;
     auto page_index_in_vmobject = translate_to_vmobject_page(page_index_in_region);
-    auto response = reinterpret_cast<AnonymousVMObject&>(vmobject()).handle_cow_fault(page_index_in_vmobject, vaddr().offset(page_index_in_region * PAGE_SIZE));
+    if (vmobject().is_inode()) {
+        reinterpret_cast<InodeVMObject&>(vmobject()).mark_page_dirty(page_index_in_vmobject);
+        dbgln("mark_page_dirty");
+        response = PageFaultResponse::Continue;
+    } else {
+        response = reinterpret_cast<AnonymousVMObject&>(vmobject()).handle_cow_fault(page_index_in_vmobject, vaddr().offset(page_index_in_region * PAGE_SIZE));
+    }
     if (!remap_vmobject_page(page_index_in_vmobject, *vmobject().physical_pages()[page_index_in_vmobject]))
         return PageFaultResponse::OutOfMemory;
     return response;
diff --git a/Kernel/Memory/SharedInodeVMObject.cpp b/Kernel/Memory/SharedInodeVMObject.cpp
index 29b2095d13..1eda04383d 100644
--- a/Kernel/Memory/SharedInodeVMObject.cpp
+++ b/Kernel/Memory/SharedInodeVMObject.cpp
@@ -64,6 +64,7 @@ ErrorOr<void> SharedInodeVMObject::sync(off_t offset_in_pages, size_t pages)
         MM.copy_physical_page(*physical_page, page_buffer);
 
         TRY(m_inode->write_bytes(page_index * PAGE_SIZE, PAGE_SIZE, UserOrKernelBuffer::for_kernel_buffer(page_buffer), nullptr));
+        m_dirty_pages.set(page_index, false);
     }
 
     return {};

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
bug Something isn't working
Projects
None yet
Development

No branches or pull requests

1 participant