diff --git a/Code/EntryPoint.cpp b/Code/EntryPoint.cpp index 6b35dc5..fdf5063 100644 --- a/Code/EntryPoint.cpp +++ b/Code/EntryPoint.cpp @@ -14,7 +14,7 @@ int main(int argc, const char * argv[]) { return -1; } - auto processor_information = GetProcessorInformation(); + auto processor_information = FileReadSpeedTest::GetProcessorInformation(); if (!processor_information.has_value()) { std::cerr << "Error: Could not find processor information." << std::endl; return -1; @@ -26,7 +26,7 @@ int main(int argc, const char * argv[]) { } std::cout << std::endl; - auto overlapped_io_file_read = PrepareToReadFile(argv[1], processor_information->actual_cores_); + auto overlapped_io_file_read = FileReadSpeedTest::PrepareToReadFile(argv[1], processor_information->actual_cores_); if (!overlapped_io_file_read.has_value()) { std::cerr << "Error"; return -1; diff --git a/Code/GetProcessorInformation.cpp b/Code/GetProcessorInformation.cpp index 2295e72..6f9fc04 100644 --- a/Code/GetProcessorInformation.cpp +++ b/Code/GetProcessorInformation.cpp @@ -13,82 +13,86 @@ //#include -/*struct ProcessorInformation { - int actual_cores_; - int hyperthreading_cores_; -};*/ +namespace FileReadSpeedTest { -std::optional GetProcessorInformation() noexcept { - DWORD required_buffer_size = 0; - BOOL result = GetLogicalProcessorInformation(nullptr, &required_buffer_size); - if (result == FALSE) { - DWORD error_code = GetLastError(); - if (error_code != ERROR_INSUFFICIENT_BUFFER) { + /*struct ProcessorInformation { + int actual_cores_; + int hyperthreading_cores_; + };*/ + + std::optional GetProcessorInformation() noexcept { + DWORD required_buffer_size = 0; + BOOL result = GetLogicalProcessorInformation(nullptr, &required_buffer_size); + if (result == FALSE) { + DWORD error_code = GetLastError(); + if (error_code != ERROR_INSUFFICIENT_BUFFER) { + return std::nullopt; + } + } + size_t processor_information_count = required_buffer_size / sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION); + auto processor_information = std::make_unique(processor_information_count); + result = GetLogicalProcessorInformation(processor_information.get(), &required_buffer_size); + if (result == FALSE) { return std::nullopt; } - } - size_t processor_information_count = required_buffer_size / sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION); - auto processor_information = std::make_unique(processor_information_count); - result = GetLogicalProcessorInformation(processor_information.get(), &required_buffer_size); - if (result == FALSE) { - return std::nullopt; - } - ProcessorCoreInformation processor_core_information = {}; + ProcessorCoreInformation processor_core_information = {}; - for(DWORD i = 0; i < processor_information_count; i++) { - //std::cout << "Core " << i << ": " << processor_information[i].ProcessorMask << std::endl; - switch (processor_information[i].Relationship) { - /* - case RelationProcessorPackage: - std::cout << "\tPhysical package" << std::endl; - break; - case RelationNumaNode: - std::cout << "\tNUMA node: " << processor_information[i].NumaNode.NodeNumber << std::endl; - break; - */ - case RelationProcessorCore: - { - processor_core_information.actual_cores_++; - if (processor_information[i].ProcessorCore.Flags == 1) { - processor_core_information.hyperthreading_cores_++; - } - } - break; - /* - case RelationCache: - std::cout << "\tL"; - switch (processor_information[i].Cache.Level) { - case 1: - std::cout << "1 "; - break; - case 2: - std::cout << "2 "; + for(DWORD i = 0; i < processor_information_count; i++) { + //std::cout << "Core " << i << ": " << processor_information[i].ProcessorMask << std::endl; + switch (processor_information[i].Relationship) { + /* + case RelationProcessorPackage: + std::cout << "\tPhysical package" << std::endl; break; - case 3: - std::cout << "3 "; + case RelationNumaNode: + std::cout << "\tNUMA node: " << processor_information[i].NumaNode.NodeNumber << std::endl; break; + */ + case RelationProcessorCore: + { + processor_core_information.actual_cores_++; + if (processor_information[i].ProcessorCore.Flags == 1) { + processor_core_information.hyperthreading_cores_++; + } } - switch (processor_information[i].Cache.Type) { - case CacheUnified: - std::cout << "unified"; - break; - case CacheInstruction: - std::cout << "instuction"; - break; - case CacheData: - std::cout << "data"; - break; - case CacheTrace: - std::cout << "trace"; + break; + /* + case RelationCache: + std::cout << "\tL"; + switch (processor_information[i].Cache.Level) { + case 1: + std::cout << "1 "; + break; + case 2: + std::cout << "2 "; + break; + case 3: + std::cout << "3 "; + break; + } + switch (processor_information[i].Cache.Type) { + case CacheUnified: + std::cout << "unified"; + break; + case CacheInstruction: + std::cout << "instuction"; + break; + case CacheData: + std::cout << "data"; + break; + case CacheTrace: + std::cout << "trace"; + break; + } + std::cout << " cache, " << processor_information[i].Cache.Size << " KiB" << std::endl; + std::cout << "\tCache line size: " << processor_information[i].Cache.LineSize << " KiB" << std::endl; break; + */ } - std::cout << " cache, " << processor_information[i].Cache.Size << " KiB" << std::endl; - std::cout << "\tCache line size: " << processor_information[i].Cache.LineSize << " KiB" << std::endl; - break; - */ } + + return processor_core_information; } - return processor_core_information; -} \ No newline at end of file +} // namespace FileReadSpeedTest \ No newline at end of file diff --git a/Code/GetProcessorInformation.hpp b/Code/GetProcessorInformation.hpp index de2b826..05dac23 100644 --- a/Code/GetProcessorInformation.hpp +++ b/Code/GetProcessorInformation.hpp @@ -2,17 +2,21 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#ifndef GETPROCESSORINFORMATION_HPP -#define GETPROCESSORINFORMATION_HPP +#ifndef FILEREADSPEEDTEST_GETPROCESSORINFORMATION_HPP +#define FILEREADSPEEDTEST_GETPROCESSORINFORMATION_HPP #include -class ProcessorCoreInformation { -public: - int actual_cores_; - int hyperthreading_cores_; -}; +namespace FileReadSpeedTest { -std::optional GetProcessorInformation() noexcept; + class ProcessorCoreInformation { + public: + int actual_cores_; + int hyperthreading_cores_; + }; -#endif // #ifndef GETPROCESSORINFORMATION_HPP \ No newline at end of file + std::optional GetProcessorInformation() noexcept; + +} // namespace FileReadSpeedTest + +#endif // #ifndef FILEREADSPEEDTEST_GETPROCESSORINFORMATION_HPP \ No newline at end of file diff --git a/Code/OSAllocator.cpp b/Code/OSAllocator.cpp index 12b5793..94749cc 100644 --- a/Code/OSAllocator.cpp +++ b/Code/OSAllocator.cpp @@ -11,28 +11,32 @@ #endif #include -OSAllocation::OSAllocation(void* memory) noexcept - : memory_(std::move(memory)) -{} +namespace FileReadSpeedTest { + OSAllocation::OSAllocation(void* memory) noexcept + : memory_(std::move(memory)) + {} -std::optional OSAllocator::allocate(size_t size) noexcept { - void* pointer = VirtualAlloc(NULL, size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE); - if (pointer == NULL) { - // GetLastError() - return std::nullopt; - } - - return OSAllocation{pointer}; -} -void OSAllocator::deallocate(OSAllocation& allocation) noexcept { - if (allocation.memory_) { - BOOL result = VirtualFree(allocation.memory_, 0, MEM_RELEASE); - if (result == 0) { + std::optional OSAllocator::allocate(size_t size) noexcept { + void* pointer = VirtualAlloc(NULL, size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE); + if (pointer == NULL) { // GetLastError() + return std::nullopt; } - allocation.memory_ = nullptr; + return OSAllocation{pointer}; } -} \ No newline at end of file + + void OSAllocator::deallocate(OSAllocation& allocation) noexcept { + if (allocation.memory_) { + BOOL result = VirtualFree(allocation.memory_, 0, MEM_RELEASE); + if (result == 0) { + // GetLastError() + } + + allocation.memory_ = nullptr; + } + } + +} // namespace FileReadSpeedTest \ No newline at end of file diff --git a/Code/OSAllocator.hpp b/Code/OSAllocator.hpp index 9852a77..06a03d7 100644 --- a/Code/OSAllocator.hpp +++ b/Code/OSAllocator.hpp @@ -2,26 +2,30 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#ifndef OSALLOCATOR_HPP -#define OSALLOCATOR_HPP +#ifndef FILEREADSPEEDTEST_OSALLOCATOR_HPP +#define FILEREADSPEEDTEST_OSALLOCATOR_HPP #include -class OSAllocation { -public: +namespace FileReadSpeedTest { - explicit OSAllocation(void* memory) noexcept; + class OSAllocation { + public: - void* memory_; + explicit OSAllocation(void* memory) noexcept; -}; + void* memory_; -class OSAllocator { -public: + }; - static std::optional allocate(size_t size) noexcept; - static void deallocate(OSAllocation& allocation) noexcept; + class OSAllocator { + public: -}; + static std::optional allocate(size_t size) noexcept; + static void deallocate(OSAllocation& allocation) noexcept; -#endif // #ifndef OSALLOCATOR_HPP \ No newline at end of file + }; + +} // namespace FileReadSpeedTest + +#endif // #ifndef FILEREADSPEEDTEST_OSALLOCATOR_HPP \ No newline at end of file diff --git a/Code/OverlappedIOFileRead.cpp b/Code/OverlappedIOFileRead.cpp index aae1d48..716b8e5 100644 --- a/Code/OverlappedIOFileRead.cpp +++ b/Code/OverlappedIOFileRead.cpp @@ -13,7 +13,9 @@ #ifndef WIN32_LEAN_AND_MEAN #define WIN32_LEAN_AND_MEAN #endif +#ifndef NOMINMAX #define NOMINMAX +#endif #include namespace { @@ -21,7 +23,7 @@ namespace { std::chrono::time_point pre_open_time_; - std::optional CreateOverlappedIOFile(LPCSTR file_name) noexcept { + std::optional CreateOverlappedIOFile(LPCSTR file_name) noexcept { pre_open_time_ = std::chrono::high_resolution_clock::now(); // Use FILE_FLAG_RANDOM_ACCESS instead of FILE_FLAG_SEQUENTIAL_SCAN for file formats where that is a better fit // FILE_FLAG_NO_BUFFERING @@ -31,31 +33,31 @@ namespace { return std::nullopt; } - return OverlappedIOFile{std::move(file_handle)}; + return FileReadSpeedTest::OverlappedIOFile{std::move(file_handle)}; } - std::optional CreateCompletionPort(OverlappedIOFile& overlapped_io_file, DWORD worker_thread_count) noexcept { + std::optional CreateCompletionPort(FileReadSpeedTest::OverlappedIOFile& overlapped_io_file, DWORD worker_thread_count) noexcept { HANDLE completion_port_handle = CreateIoCompletionPort(overlapped_io_file.handle_, NULL, 0, worker_thread_count); if (completion_port_handle == NULL) { // GetLastError return std::nullopt; } - return CompletionPort{std::move(completion_port_handle)}; + return FileReadSpeedTest::CompletionPort{std::move(completion_port_handle)}; } - std::optional CreateThread(LPTHREAD_START_ROUTINE thread_entry_point, LPVOID parameter) noexcept { + std::optional CreateThread(LPTHREAD_START_ROUTINE thread_entry_point, LPVOID parameter) noexcept { HANDLE thread_handle = ::CreateThread(nullptr, 0, thread_entry_point, parameter, 0, nullptr); if (thread_handle == NULL) { // GetLastError return std::nullopt; } - return Thread(std::move(thread_handle)); + return FileReadSpeedTest::Thread(std::move(thread_handle)); } - std::optional CreateThreadPool(LPTHREAD_START_ROUTINE thread_entry_point, LPVOID parameter, DWORD worker_thread_count) noexcept { - std::vector threads; + std::optional CreateThreadPool(LPTHREAD_START_ROUTINE thread_entry_point, LPVOID parameter, DWORD worker_thread_count) noexcept { + std::vector threads; for (size_t i = worker_thread_count; i > 0; i--) { auto thread = CreateThread(thread_entry_point, parameter); if (!thread.has_value()) { @@ -64,13 +66,13 @@ namespace { threads.emplace_back(std::move(*thread)); } - return ThreadPool{std::move(threads)}; + return FileReadSpeedTest::ThreadPool{std::move(threads)}; } - std::optional> CreateIOContexts(LARGE_INTEGER file_size, unsigned long buffer_size, OverlappedIOFile& overlapped_io_file) noexcept { + std::optional> CreateIOContexts(LARGE_INTEGER file_size, unsigned long buffer_size, FileReadSpeedTest::OverlappedIOFile& overlapped_io_file) noexcept { DWORD operation_count = (DWORD)((file_size.QuadPart + buffer_size - 1) / buffer_size); - std::vector contexts; + std::vector contexts; for (DWORD i = 0; i < operation_count; ++i) { DWORD file_offset = i * buffer_size; @@ -79,14 +81,14 @@ namespace { overlapped.Offset = file_offset; overlapped.OffsetHigh = 0; - auto allocation = OSAllocator::allocate(buffer_size); + auto allocation = FileReadSpeedTest::OSAllocator::allocate(buffer_size); if (!allocation.has_value()) { return std::nullopt; } // TODO: Confirm the allocation is aligned to volume sector size // TODO: Confirm buffer_size is a multiple of volume sector size - auto io_context = IOContext{std::move(overlapped), overlapped_io_file.handle_, std::move(*allocation), buffer_size, file_offset}; + auto io_context = FileReadSpeedTest::IOContext{std::move(overlapped), overlapped_io_file.handle_, std::move(*allocation), buffer_size, file_offset}; contexts.emplace_back(std::move(io_context)); } @@ -98,7 +100,7 @@ namespace { HANDLE completion_port = static_cast(param); DWORD bytes_transferred; ULONG_PTR completion_key; - IOContext* context = nullptr; + FileReadSpeedTest::IOContext* context = nullptr; while (true) { BOOL result = GetQueuedCompletionStatus(completion_port, &bytes_transferred, &completion_key, reinterpret_cast(&context), INFINITE); @@ -126,159 +128,163 @@ namespace { } // anonymous namespace -ThreadPool::ThreadPool(std::vector threads) noexcept - : threads_(std::move(threads)) -{} - -IOContext::IOContext(OVERLAPPED overlapped, HANDLE file_handle, OSAllocation buffer, DWORD bytes_to_read, DWORD file_offset) noexcept - : overlapped_(std::move(overlapped)) - , file_handle_(std::move(file_handle)) - , buffer_(std::move(buffer)) - , bytes_to_read_(std::move(bytes_to_read)) - , file_offset_(std::move(file_offset)) - , is_complete_(false) -{} - -OverlappedIOFileRead::OverlappedIOFileRead(OverlappedIOFile overlapped_io_file, CompletionPort completion_port, ThreadPool thread_pool, std::vector contexts) noexcept - : overlapped_io_file_(std::move(overlapped_io_file)) - , completion_port_(std::move(completion_port)) - , thread_pool_(std::move(thread_pool)) - , contexts_(std::move(contexts)) -{} - -void OverlappedIOFileRead::Read() noexcept { - read_issue_time_ = std::chrono::high_resolution_clock::now(); - - // TODO: Only have a certain number of IOPS in flight at a time? - for (DWORD i = 0; i < contexts_.size(); ++i) { - contexts_[i].request_start_time_ = std::chrono::high_resolution_clock::now(); - BOOL result = ReadFile(overlapped_io_file_.handle_, contexts_[i].buffer_.memory_, contexts_[i].bytes_to_read_, nullptr, &contexts_[i].overlapped_); - if (!result) { - DWORD error = GetLastError(); - // TODO: ERROR_INVALID_USER_BUFFER or ERROR_NOT_ENOUGH_MEMORY if too many outstanding asynchronous IO operations - if (error != ERROR_IO_PENDING) { - // TODO: error - continue; +namespace FileReadSpeedTest { + + ThreadPool::ThreadPool(std::vector threads) noexcept + : threads_(std::move(threads)) + {} + + IOContext::IOContext(OVERLAPPED overlapped, HANDLE file_handle, OSAllocation buffer, DWORD bytes_to_read, DWORD file_offset) noexcept + : overlapped_(std::move(overlapped)) + , file_handle_(std::move(file_handle)) + , buffer_(std::move(buffer)) + , bytes_to_read_(std::move(bytes_to_read)) + , file_offset_(std::move(file_offset)) + , is_complete_(false) + {} + + OverlappedIOFileRead::OverlappedIOFileRead(OverlappedIOFile overlapped_io_file, CompletionPort completion_port, ThreadPool thread_pool, std::vector contexts) noexcept + : overlapped_io_file_(std::move(overlapped_io_file)) + , completion_port_(std::move(completion_port)) + , thread_pool_(std::move(thread_pool)) + , contexts_(std::move(contexts)) + {} + + void OverlappedIOFileRead::Read() noexcept { + read_issue_time_ = std::chrono::high_resolution_clock::now(); + + // TODO: Only have a certain number of IOPS in flight at a time? + for (DWORD i = 0; i < contexts_.size(); ++i) { + contexts_[i].request_start_time_ = std::chrono::high_resolution_clock::now(); + BOOL result = ReadFile(overlapped_io_file_.handle_, contexts_[i].buffer_.memory_, contexts_[i].bytes_to_read_, nullptr, &contexts_[i].overlapped_); + if (!result) { + DWORD error = GetLastError(); + // TODO: ERROR_INVALID_USER_BUFFER or ERROR_NOT_ENOUGH_MEMORY if too many outstanding asynchronous IO operations + if (error != ERROR_IO_PENDING) { + // TODO: error + continue; + } } - } - // TODO: Handle when the OS makes the operation synchronous: - // https://learn.microsoft.com/en-us/previous-versions/troubleshoot/windows/win32/asynchronous-disk-io-synchronous - // This can happen when FS compression or encryption is enabled. + // TODO: Handle when the OS makes the operation synchronous: + // https://learn.microsoft.com/en-us/previous-versions/troubleshoot/windows/win32/asynchronous-disk-io-synchronous + // This can happen when FS compression or encryption is enabled. + } } -} - -void OverlappedIOFileRead::WaitForThreadsToFinish() noexcept { - // Wait for all operations to complete - bool all_complete = false; - while (!all_complete) { - all_complete = true; - for (const auto& context : contexts_) { - if (!context.is_complete_) { - all_complete = false; - break; + + void OverlappedIOFileRead::WaitForThreadsToFinish() noexcept { + // Wait for all operations to complete + bool all_complete = false; + while (!all_complete) { + all_complete = true; + for (const auto& context : contexts_) { + if (!context.is_complete_) { + all_complete = false; + break; + } } + // There is a much better way to handle this. But for now, whatever. + Sleep(100); } - // There is a much better way to handle this. But for now, whatever. - Sleep(100); - } - // Signal worker threads to shutdown - for (size_t i = 0; i < thread_pool_.threads_.size(); ++i) { - PostQueuedCompletionStatus(completion_port_.handle_, 0, 0, nullptr); - } + // Signal worker threads to shutdown + for (size_t i = 0; i < thread_pool_.threads_.size(); ++i) { + PostQueuedCompletionStatus(completion_port_.handle_, 0, 0, nullptr); + } - // Wait for worker threads to finish - WaitForMultipleObjects(static_cast(thread_pool_.threads_.size()), (const HANDLE*)thread_pool_.threads_.data(), TRUE, INFINITE); + // Wait for worker threads to finish + WaitForMultipleObjects(static_cast(thread_pool_.threads_.size()), (const HANDLE*)thread_pool_.threads_.data(), TRUE, INFINITE); - auto open_delay = std::chrono::duration_cast(overlapped_io_file_.file_open_time_ - pre_open_time_); - auto open_to_read_delay = std::chrono::duration_cast(read_issue_time_ - overlapped_io_file_.file_open_time_); - std::cout << "Open delay: " << open_delay << std::endl; - std::cout << "Open to read delay: " << open_to_read_delay << std::endl; + auto open_delay = std::chrono::duration_cast(overlapped_io_file_.file_open_time_ - pre_open_time_); + auto open_to_read_delay = std::chrono::duration_cast(read_issue_time_ - overlapped_io_file_.file_open_time_); + std::cout << "Open delay: " << open_delay << std::endl; + std::cout << "Open to read delay: " << open_to_read_delay << std::endl; - size_t i = 0; - for (auto& context : contexts_) { - auto read_issue_delay = std::chrono::duration_cast(context.request_start_time_ - read_issue_time_); - auto duration = std::chrono::duration_cast(context.request_complete_time_ - context.request_start_time_); + size_t i = 0; + for (auto& context : contexts_) { + auto read_issue_delay = std::chrono::duration_cast(context.request_start_time_ - read_issue_time_); + auto duration = std::chrono::duration_cast(context.request_complete_time_ - context.request_start_time_); - std::cout << "Buffer " << i++ << " - Read issue delay: " << read_issue_delay << " - Issue to completion delay: " << duration << std::endl; + std::cout << "Buffer " << i++ << " - Read issue delay: " << read_issue_delay << " - Issue to completion delay: " << duration << std::endl; - OSAllocator::deallocate(context.buffer_); + OSAllocator::deallocate(context.buffer_); + } } -} -std::expected PrepareToReadFile(LPCSTR file_name, DWORD worker_thread_count) noexcept { - std::cout << "Worker thread count: " << worker_thread_count << std::endl; + std::expected PrepareToReadFile(LPCSTR file_name, DWORD worker_thread_count) noexcept { + std::cout << "Worker thread count: " << worker_thread_count << std::endl; - if (std::chrono::high_resolution_clock::is_steady) { - std::cout << "Clock is steady" << std::endl; - } else { - std::cout << "Clock is not steady" << std::endl; - } + if (std::chrono::high_resolution_clock::is_steady) { + std::cout << "Clock is steady" << std::endl; + } else { + std::cout << "Clock is not steady" << std::endl; + } - auto file = CreateOverlappedIOFile(file_name); - if (!file.has_value()) { - return std::unexpected{PrepareToReadFileError::CouldNotOpenFile}; - } + auto file = CreateOverlappedIOFile(file_name); + if (!file.has_value()) { + return std::unexpected{PrepareToReadFileError::CouldNotOpenFile}; + } - // TODO: Print drive make & model - // TODO: Print filesystem info (format, block size) + // TODO: Print drive make & model + // TODO: Print filesystem info (format, block size) - LARGE_INTEGER file_size; - BOOL result = GetFileSizeEx(file->handle_, &file_size); - if (result == 0) { - // GetLastError - return std::unexpected{PrepareToReadFileError::CouldNotGetFileSize}; - } + LARGE_INTEGER file_size; + BOOL result = GetFileSizeEx(file->handle_, &file_size); + if (result == 0) { + // GetLastError + return std::unexpected{PrepareToReadFileError::CouldNotGetFileSize}; + } - std::cout << "File size: " << file_size.QuadPart << std::endl; + std::cout << "File size: " << file_size.QuadPart << std::endl; - FILE_STORAGE_INFO file_storage_info; - result = GetFileInformationByHandleEx(file->handle_, FileStorageInfo, &file_storage_info, sizeof(file_storage_info)); - if (result == FALSE) { - return std::unexpected{PrepareToReadFileError::CouldNotGetFileSize}; - } - unsigned long partition_block_size = file_storage_info.LogicalBytesPerSector; - // When doing writes instead of reads, prefer file_storage_info.PhysicalBytesPerSectorForPerformance; - - // Get the page size - SYSTEM_INFO system_info; - GetSystemInfo(&system_info); - /*if (compiling for x86 or arm64) { - SYSTEM_INFO native_system_info; - GetNativeSystemInfo(&native_system_info); - }*/ - partition_block_size = system_info.dwPageSize; - // TODO: Confirm the page size is a multiple of the logical bytes per sector - - // On Windows, VirtualAlloc has 64 KiB granularity: - // https://devblogs.microsoft.com/oldnewthing/20031008-00/?p=42223 - // TODO: Should we push the buffer size to 64 KiB? - - std::cout << "Buffer size: " << partition_block_size << std::endl; - - auto completion_port = CreateCompletionPort(*file, worker_thread_count); - if (!completion_port.has_value()) { - return std::unexpected{PrepareToReadFileError::CouldNotCreateCompletionPort}; - } + FILE_STORAGE_INFO file_storage_info; + result = GetFileInformationByHandleEx(file->handle_, FileStorageInfo, &file_storage_info, sizeof(file_storage_info)); + if (result == FALSE) { + return std::unexpected{PrepareToReadFileError::CouldNotGetFileSize}; + } + unsigned long partition_block_size = file_storage_info.LogicalBytesPerSector; + // When doing writes instead of reads, prefer file_storage_info.PhysicalBytesPerSectorForPerformance; + + // Get the page size + SYSTEM_INFO system_info; + GetSystemInfo(&system_info); + /*if (compiling for x86 or arm64) { + SYSTEM_INFO native_system_info; + GetNativeSystemInfo(&native_system_info); + }*/ + partition_block_size = system_info.dwPageSize; + // TODO: Confirm the page size is a multiple of the logical bytes per sector + + // On Windows, VirtualAlloc has 64 KiB granularity: + // https://devblogs.microsoft.com/oldnewthing/20031008-00/?p=42223 + // TODO: Should we push the buffer size to 64 KiB? + + std::cout << "Buffer size: " << partition_block_size << std::endl; + + auto completion_port = CreateCompletionPort(*file, worker_thread_count); + if (!completion_port.has_value()) { + return std::unexpected{PrepareToReadFileError::CouldNotCreateCompletionPort}; + } - // TODO: Creating the threads takes a significant amount of time. - // This happens after the file is opened and the completion port is created. - // The OS *could* begin prefetching the data. I don't know if it does or not. - // But if it does, this gives it ample opportunity to impact the measurements. - auto thread_pool = CreateThreadPool(WorkerThread, completion_port->handle_, worker_thread_count); - if (!thread_pool.has_value()) { - return std::unexpected{PrepareToReadFileError::CouldNotCreateThread}; - } + // TODO: Creating the threads takes a significant amount of time. + // This happens after the file is opened and the completion port is created. + // The OS *could* begin prefetching the data. I don't know if it does or not. + // But if it does, this gives it ample opportunity to impact the measurements. + auto thread_pool = CreateThreadPool(WorkerThread, completion_port->handle_, worker_thread_count); + if (!thread_pool.has_value()) { + return std::unexpected{PrepareToReadFileError::CouldNotCreateThread}; + } + + auto io_contexts = CreateIOContexts(file_size, partition_block_size, *file); + if (!io_contexts.has_value()) { + return std::unexpected{PrepareToReadFileError::CouldNotCreateIOContexts}; + } - auto io_contexts = CreateIOContexts(file_size, partition_block_size, *file); - if (!io_contexts.has_value()) { - return std::unexpected{PrepareToReadFileError::CouldNotCreateIOContexts}; + return OverlappedIOFileRead(std::move(*file), + std::move(*completion_port), + std::move(*thread_pool), + std::move(*io_contexts)); } - return OverlappedIOFileRead(std::move(*file), - std::move(*completion_port), - std::move(*thread_pool), - std::move(*io_contexts)); -} \ No newline at end of file +} // namespace FileReadSpeedTest \ No newline at end of file diff --git a/Code/OverlappedIOFileRead.hpp b/Code/OverlappedIOFileRead.hpp index b832be9..7f87d23 100644 --- a/Code/OverlappedIOFileRead.hpp +++ b/Code/OverlappedIOFileRead.hpp @@ -2,8 +2,8 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#ifndef OVERLAPPEDIOFILEREAD_HPP -#define OVERLAPPEDIOFILEREAD_HPP +#ifndef FILEREADSPEEDTEST_OVERLAPPEDIOFILEREAD_HPP +#define FILEREADSPEEDTEST_OVERLAPPEDIOFILEREAD_HPP #include #include @@ -15,72 +15,81 @@ #include "OSAllocator.hpp" #include "Win32Types.hpp" +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN +#endif +#ifndef NOMINMAX #define NOMINMAX +#endif #include -//using OverlappedIOFile = SpecificHandleObject; -using CompletionPort = SpecificHandleObject; -using Thread = SpecificHandleObject; +namespace FileReadSpeedTest { -struct OverlappedIOFile : public SpecificHandleObject { + //using OverlappedIOFile = SpecificHandleObject; + using CompletionPort = SpecificHandleObject; + using Thread = SpecificHandleObject; - explicit OverlappedIOFile(HANDLE handle) noexcept - : SpecificHandleObject(std::move(handle)) - , file_open_time_(std::chrono::high_resolution_clock::now()) - {} + struct OverlappedIOFile : public SpecificHandleObject { - std::chrono::time_point file_open_time_; + explicit OverlappedIOFile(HANDLE handle) noexcept + : SpecificHandleObject(std::move(handle)) + , file_open_time_(std::chrono::high_resolution_clock::now()) + {} -}; + std::chrono::time_point file_open_time_; -class ThreadPool { -public: + }; - explicit ThreadPool(std::vector threads) noexcept; + class ThreadPool { + public: - std::vector threads_; + explicit ThreadPool(std::vector threads) noexcept; -}; + std::vector threads_; -struct IOContext { - IOContext(OVERLAPPED overlapped, HANDLE file_handle, OSAllocation buffer, DWORD bytes_to_read, DWORD file_offset) noexcept; + }; - OVERLAPPED overlapped_; - HANDLE file_handle_; - OSAllocation buffer_; - //std::unique_ptr buffer_; - DWORD bytes_to_read_; - DWORD file_offset_; - bool is_complete_; - std::chrono::time_point request_start_time_; - std::chrono::time_point request_complete_time_; + struct IOContext { + IOContext(OVERLAPPED overlapped, HANDLE file_handle, OSAllocation buffer, DWORD bytes_to_read, DWORD file_offset) noexcept; -}; + OVERLAPPED overlapped_; + HANDLE file_handle_; + OSAllocation buffer_; + //std::unique_ptr buffer_; + DWORD bytes_to_read_; + DWORD file_offset_; + bool is_complete_; + std::chrono::time_point request_start_time_; + std::chrono::time_point request_complete_time_; -class OverlappedIOFileRead { -public: + }; - explicit OverlappedIOFileRead(OverlappedIOFile overlapped_io_file, CompletionPort completion_port, ThreadPool thread_pool, std::vector contexts) noexcept; + class OverlappedIOFileRead { + public: - void Read() noexcept; - void WaitForThreadsToFinish() noexcept; + explicit OverlappedIOFileRead(OverlappedIOFile overlapped_io_file, CompletionPort completion_port, ThreadPool thread_pool, std::vector contexts) noexcept; - OverlappedIOFile overlapped_io_file_; - CompletionPort completion_port_; - ThreadPool thread_pool_; - // TODO: Put contexts in their own cache line so there isn't cache contention - std::vector contexts_; - std::chrono::time_point read_issue_time_; + void Read() noexcept; + void WaitForThreadsToFinish() noexcept; + + OverlappedIOFile overlapped_io_file_; + CompletionPort completion_port_; + ThreadPool thread_pool_; + // TODO: Put contexts in their own cache line so there isn't cache contention + std::vector contexts_; + std::chrono::time_point read_issue_time_; -}; - -enum PrepareToReadFileError { - CouldNotOpenFile, - CouldNotCreateCompletionPort, - CouldNotCreateThread, - CouldNotGetFileSize, - CouldNotCreateIOContexts, -}; -std::expected PrepareToReadFile(LPCSTR file_name, DWORD worker_thread_count) noexcept; - -#endif // #ifndef OVERLAPPEDIOFILEREAD_HPP \ No newline at end of file + }; + + enum PrepareToReadFileError { + CouldNotOpenFile, + CouldNotCreateCompletionPort, + CouldNotCreateThread, + CouldNotGetFileSize, + CouldNotCreateIOContexts, + }; + std::expected PrepareToReadFile(LPCSTR file_name, DWORD worker_thread_count) noexcept; + +} // namespace FileReadSpeedTest + +#endif // #ifndef FILEREADSPEEDTEST_OVERLAPPEDIOFILEREAD_HPP \ No newline at end of file diff --git a/Code/Win32Types.cpp b/Code/Win32Types.cpp index e533d1f..28d657e 100644 --- a/Code/Win32Types.cpp +++ b/Code/Win32Types.cpp @@ -6,31 +6,35 @@ #include -GenericHandleObject::GenericHandleObject(HANDLE handle) noexcept - : handle_(std::move(handle)) -{} - -GenericHandleObject::GenericHandleObject(GenericHandleObject&& rhs) noexcept - : handle_(std::move(rhs.handle_)) -{ - rhs.handle_ = INVALID_HANDLE_VALUE; -} - -GenericHandleObject::~GenericHandleObject() noexcept { - if (handle_ != INVALID_HANDLE_VALUE) { - BOOL result = CloseHandle(handle_); - if (result == 0) { - // GetLastError +namespace FileReadSpeedTest { + + GenericHandleObject::GenericHandleObject(HANDLE handle) noexcept + : handle_(std::move(handle)) + {} + + GenericHandleObject::GenericHandleObject(GenericHandleObject&& rhs) noexcept + : handle_(std::move(rhs.handle_)) + { + rhs.handle_ = INVALID_HANDLE_VALUE; + } + + GenericHandleObject::~GenericHandleObject() noexcept { + if (handle_ != INVALID_HANDLE_VALUE) { + BOOL result = CloseHandle(handle_); + if (result == 0) { + // GetLastError + } } } -} -GenericHandleObject& GenericHandleObject::operator =(GenericHandleObject&& rhs) noexcept { - handle_ = std::move(rhs.handle_); - rhs.handle_ = INVALID_HANDLE_VALUE; - return *this; -} + GenericHandleObject& GenericHandleObject::operator =(GenericHandleObject&& rhs) noexcept { + handle_ = std::move(rhs.handle_); + rhs.handle_ = INVALID_HANDLE_VALUE; + return *this; + } + + SpecificHandleObject::SpecificHandleObject(HANDLE handle) noexcept + : GenericHandleObject(std::move(handle)) + {} -SpecificHandleObject::SpecificHandleObject(HANDLE handle) noexcept - : GenericHandleObject(std::move(handle)) -{} \ No newline at end of file +} // namespace FileReadSpeedTest \ No newline at end of file diff --git a/Code/Win32Types.hpp b/Code/Win32Types.hpp index 6fb5076..448823a 100644 --- a/Code/Win32Types.hpp +++ b/Code/Win32Types.hpp @@ -2,32 +2,41 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#ifndef WIN32TYPES_HPP -#define WIN32TYPES_HPP +#ifndef FILEREADSPEEDTEST_WIN32TYPES_HPP +#define FILEREADSPEEDTEST_WIN32TYPES_HPP +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN +#endif +#ifndef NOMINMAX #define NOMINMAX +#endif #include -class GenericHandleObject { -public: +namespace FileReadSpeedTest { - explicit GenericHandleObject(HANDLE handle) noexcept; - GenericHandleObject(const GenericHandleObject& rhs) = delete; - GenericHandleObject(GenericHandleObject&& rhs) noexcept; - ~GenericHandleObject() noexcept; + class GenericHandleObject { + public: - GenericHandleObject& operator =(const GenericHandleObject& rhs) = delete; - GenericHandleObject& operator =(GenericHandleObject&& rhs) noexcept; + explicit GenericHandleObject(HANDLE handle) noexcept; + GenericHandleObject(const GenericHandleObject& rhs) = delete; + GenericHandleObject(GenericHandleObject&& rhs) noexcept; + ~GenericHandleObject() noexcept; - HANDLE handle_ = INVALID_HANDLE_VALUE; + GenericHandleObject& operator =(const GenericHandleObject& rhs) = delete; + GenericHandleObject& operator =(GenericHandleObject&& rhs) noexcept; -}; + HANDLE handle_ = INVALID_HANDLE_VALUE; -class SpecificHandleObject : public GenericHandleObject { -public: + }; - explicit SpecificHandleObject(HANDLE handle) noexcept; + class SpecificHandleObject : public GenericHandleObject { + public: -}; + explicit SpecificHandleObject(HANDLE handle) noexcept; -#endif // #ifndef WIN32TYPES_HPP \ No newline at end of file + }; + +} // namespace FileReadSpeedTest + +#endif // #ifndef FILEREADSPEEDTEST_WIN32TYPES_HPP \ No newline at end of file