diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..8e79e7c --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +build*/ +old/tmp/ +*.csv \ No newline at end of file diff --git a/.idea/.gitignore b/.idea/.gitignore deleted file mode 100644 index 13566b8..0000000 --- a/.idea/.gitignore +++ /dev/null @@ -1,8 +0,0 @@ -# Default ignored files -/shelf/ -/workspace.xml -# Editor-based HTTP Client requests -/httpRequests/ -# Datasource local storage ignored files -/dataSources/ -/dataSources.local.xml diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..6134542 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,6 @@ +cmake_minimum_required(VERSION 3.28) +project(parbfs CXX) +set(CMAKE_CXX_STANDARD 23) + +add_executable(parbfs main.cpp bfs.cpp) +target_link_libraries(parbfs fmt) \ No newline at end of file diff --git a/CMakePresets.json b/CMakePresets.json new file mode 100644 index 0000000..f147cef --- /dev/null +++ b/CMakePresets.json @@ -0,0 +1,27 @@ +{ + "version": 3, + "cmakeMinimumRequired": { + "major": 3, + "minor": 28, + "patch": 0 + }, + "configurePresets": [ + { + "name": "default", + "generator": "Ninja Multi-Config", + "binaryDir": "${sourceDir}/build", + "cacheVariables": { + "CMAKE_EXPORT_COMPILE_COMMANDS": "ON", + "CMAKE_CXX_FLAGS": "-Wall -Wextra -Wattributes" + } + }, + { + "name": "san", + "inherits": "default", + "binaryDir": "${sourceDir}/build-san", + "cacheVariables": { + "CMAKE_CXX_FLAGS": "-fsanitize=address" + } + } + ] +} \ No newline at end of file diff --git a/README.md b/README.md index 3887682..d757b35 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,4 @@ +[![Review Assignment Due Date](https://classroom.github.com/assets/deadline-readme-button-22041afd0340ce965d47ae6ef1cefeee28c7c493a6346c4f15d667ab976d596c.svg)](https://classroom.github.com/a/urO9t9_z) # Лабораторная работа № 1: определение достижимости параллелизма и реализация параллельных алгоритмов. Шаги выполнения: diff --git a/bfs.cpp b/bfs.cpp new file mode 100644 index 0000000..b852668 --- /dev/null +++ b/bfs.cpp @@ -0,0 +1,158 @@ +#include "digraph.hpp" +#include +#include +#include +#include +#include + +static void reset_depths(const digraph& g [[maybe_unused]], + std::span depths, + int start_vert) { + assert(std::ssize(depths) == g.num_verts()); + std::fill_n(depths.begin(), start_vert, -1); + depths[start_vert] = 0; + std::fill(depths.begin() + start_vert + 1, depths.end(), -1); +} + +void bfs(const digraph& g, std::span depths) { + reset_depths(g, depths, 0); + std::queue q; + q.push(0); + do { + int v = q.front(); + q.pop(); + for (int n: g.adj[v]) { + if (depths[n] == -1) { + depths[n] = depths[v] + 1; + q.push(n); + } + } + } while (!q.empty()); +} + +struct parbfs { + const digraph& g; + std::span depths; + + struct block { + constexpr static int max_size = 256; + int verts[max_size]; + }; + + int load_depth(int vert) { + return __atomic_load_n(&depths[vert], __ATOMIC_SEQ_CST); + } + + bool weak_cas_depth(int vert, int* expected, int desired) { + return __atomic_compare_exchange_n( + &depths[vert], expected, desired, + true, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); + } + + struct { + std::mutex mutex; + std::condition_variable more; + std::queue> queue; + int idle = 0; + bool done = false; + } q; + + std::vector workers; + + explicit parbfs(int n_threads, const digraph& g, std::span depths): + g(g), + depths(depths), + workers(n_threads) + { + auto initial = std::make_unique(); + initial->verts[0] = 0; + initial->verts[1] = -1; + q.queue.push(std::move(initial)); + + for (int i = 0; i < n_threads; ++i) { + workers[i] = std::jthread(&parbfs::worker, this); + } + } + + std::unique_ptr pop_block() { + std::unique_lock lk(q.mutex); + if (++q.idle == std::ssize(workers) && q.queue.empty()) { + q.done = true; + q.more.notify_all(); + return nullptr; + } + q.more.wait(lk, [&] { return !q.queue.empty() || q.done; }); + if (q.done) { + assert(q.queue.empty()); + return nullptr; + } + auto result = std::move(q.queue.front()); + q.queue.pop(); + --q.idle; + return result; + } + + void push_block(std::unique_ptr block) { + std::unique_lock lk(q.mutex); + q.queue.push(std::move(block)); + q.more.notify_one(); + } + + void worker() { + int out_size = 0; + std::unique_ptr out = nullptr; + + auto push_out = [&] { + if (out) { + if (out_size != block::max_size) { + out->verts[out_size] = -1; + } + push_block(std::move(out)); + out_size = 0; + } + }; + + auto push_vert = [&](int vert) { + if (!out) { + assert(out_size == 0); + out = std::make_unique(); + } + assert(out_size < block::max_size); + out->verts[out_size++] = vert; + if (out_size == block::max_size) { + push_out(); + } + }; + + auto process_vert = [&](int src) { + const int src_depth = load_depth(src); + const int new_depth = src_depth + 1; + for (int dst: g.adj[src]) { + int dst_depth = load_depth(dst); + if (dst_depth != -1 && dst_depth <= new_depth) { + continue; + } + do { + if (weak_cas_depth(dst, &dst_depth, new_depth)) { + push_vert(dst); + break; + } + assert(dst_depth != -1); + } while (dst_depth > new_depth); + } + }; + + while (auto in = pop_block()) { + for (int src: in->verts) { + if (src == -1) { break; } + process_vert(src); + } + push_out(); + } + } +}; + +void parallel_bfs(int n_threads, const digraph& g, std::span depths) { + reset_depths(g, depths, 0); + parbfs parbfs(n_threads, g, depths); +} \ No newline at end of file diff --git a/digraph.hpp b/digraph.hpp new file mode 100644 index 0000000..4ecd21c --- /dev/null +++ b/digraph.hpp @@ -0,0 +1,31 @@ +#pragma once +#include "svo.hpp" +#include +#include +#include + +struct digraph { + std::vector> adj; + int num_edges = 0; + + explicit digraph(int verts): adj(verts) {} + + int num_verts() const { return std::ssize(adj); } + + bool maybe_add_edge(int from, int to) { + assert(from >= 0 && from < num_verts()); + assert(to >= 0 && to < num_verts()); + if (from != to) { + auto& v = adj[from]; + if (std::find(v.begin(), v.end(), to) == v.end()) { + v.emplace_back(to); + ++num_edges; + return true; + } + } + return false; + } +}; + +void bfs(const digraph&, std::span depths); +void parallel_bfs(int n_threads, const digraph&, std::span depths); \ No newline at end of file diff --git a/main.cpp b/main.cpp index a4c0368..bbf0bb2 100644 --- a/main.cpp +++ b/main.cpp @@ -1,60 +1,146 @@ +#include "digraph.hpp" +#include +#include #include -#include +#include +#include +#include #include -#include -#include -#include "Graph.h" -#include "RandomGraphGenerator.h" - -static long long executeSerialBfsAndGetTime(Graph& g) { - auto start = std::chrono::steady_clock::now(); - g.bfs(0); - auto end = std::chrono::steady_clock::now(); - return std::chrono::duration_cast(end - start).count(); -} +#include + +namespace { +#if 1 +struct xorshift128 { + using result_type = uint64_t; + constexpr static result_type max() { return UINT64_MAX; } + constexpr static result_type min() { return 0; } + + explicit xorshift128() = default; + explicit xorshift128(uint64_t s): a(s), b(s) {} + explicit xorshift128(uint64_t a, uint64_t b): a(a), b(b) {} + + uint64_t a = 0xfe48ec23c5fb18e0; + uint64_t b = 0xac5f64acb55eda12; + + result_type operator()() { + uint64_t x = a, y = b; + a = b; + x ^= x << 23; + b = x ^ y ^ (x >> 17) ^ (y >> 26); + return b + y; + } +}; +using rng = xorshift128; +#else +#include +using rng = std::mt19937_64; +#endif + +template +[[gnu::noinline]] +digraph make_random_digraph(Rng& rng, int n_verts, int n_edges) { + assert(n_verts > 1); + assert(n_edges >= n_verts-1); + assert(n_edges <= long(n_verts) * (n_verts - 1)); + + digraph g(n_verts); + + { + std::vector perm(n_verts); + std::iota(perm.begin(), perm.end(), 0); + std::shuffle(perm.begin(), perm.end(), rng); + for (int i = 1; i < n_verts; ++i) { + g.maybe_add_edge(perm[i-1], perm[i]); + } + if (n_edges >= n_verts) { + g.maybe_add_edge(perm[n_verts-1], perm[0]); + } + } + + std::uniform_int_distribution dist(0, n_verts-1); + +#if 1 + for (int from = 0; from < n_verts; ++from) { + int wantout = std::min(n_verts-1, n_edges / n_verts); + while (std::ssize(g.adj[from]) < wantout) { + g.maybe_add_edge(from, dist(rng)); + } + } +#endif + + while (g.num_edges < n_edges) { + g.maybe_add_edge(dist(rng), dist(rng)); + } -static long long executeParallelBfsAndGetTime(Graph& g) { - auto start = std::chrono::steady_clock::now(); - g.parallelBFS(0); // заглушка - auto end = std::chrono::steady_clock::now(); - return std::chrono::duration_cast(end - start).count(); + assert(g.num_edges == n_edges); + return g; } +struct timer { + using clock = std::chrono::steady_clock; + clock::time_point started = clock::now(); + + using dmilliseconds = std::chrono::duration; + + dmilliseconds measure() const { + return std::chrono::duration_cast(clock::now() - started); + }; +}; +} // namespace + int main() { - try { - std::vector sizes = {10, 100, 1000, 10000, 10000, 50000, 100000, 1000000, 2000000, 20000000}; - std::vector connections = {50, 500, 5000, 50000, 100000, 1000000, 1000000, 10000000, 10000000, 50000000}; - - std::mt19937_64 r(42); - - std::filesystem::create_directories("tmp"); - std::ofstream fw("tmp/results.txt"); - if (!fw) { - std::cerr << "Failed to open tmp/results.txt for writing\n"; - return 1; - } - - RandomGraphGenerator gen; - - for (size_t i = 0; i < sizes.size(); ++i) { - std::cout << "--------------------------\n"; - std::cout << "Generating graph of size " << sizes[i] << " ... wait\n"; - Graph g = gen.generateGraph(r, sizes[i], connections[i]); - std::cout << "Generation completed!\nStarting bfs\n"; - long long serialTime = executeSerialBfsAndGetTime(g); - long long parallelTime = executeParallelBfsAndGetTime(g); - - fw << "Times for " << sizes[i] << " vertices and " << connections[i] << " connections: "; - fw << "\nSerial: " << serialTime; - fw << "\nParallel: " << parallelTime; - fw << "\n--------\n"; - fw.flush(); - } - - std::cout << "Done. Results in tmp/results.txt\n"; - } catch (const std::exception& ex) { - std::cerr << "Exception: " << ex.what() << "\n"; - return 2; - } - return 0; + constexpr static struct { int v, e; } configs[] = { + { 10, 50 }, + { 100, 500 }, + { 1000, 5000 }, + { 10'000, 50'000 }, + { 50'000, 1000'000 }, + { 100'000, 1000'000 }, + { 250'000, 250'000 }, + { 2000'000, 10'000'000 }, + { 20'000'000, 50'000'000 }, + { 20'000'000, 100'000'000 }, + { 20'000'000, 500'000'000 }, + }; + + std::vector depths_seq(20'000'000); + std::vector depths_par(20'000'000); + constexpr int n_threads = 4; + + std::ofstream csv("out.csv"); + csv << "v,e,buildtime,seqtime,partime,threads\n"; + + for (auto [v, e]: configs) { + rng rng; + timer build_timer; + digraph g = make_random_digraph(rng, v, e); + auto build_time = build_timer.measure(); + + auto seq_span = std::span(depths_seq).subspan(0, v); + auto par_span = std::span(depths_par).subspan(0, v); + + timer seq_timer; + bfs(g, seq_span); + auto seq_time = seq_timer.measure(); + + timer par_timer; + parallel_bfs(n_threads, g, par_span); + auto par_time = par_timer.measure(); + + bool equal = std::ranges::equal(seq_span, par_span); + + constexpr auto green = fg(fmt::color::green); + constexpr auto red = fg(fmt::color::red); + using namespace std::literals; + + fmt::print( + "{}v / {}e\tseq bfs: {}\tpar bfs ({} threads): {}.\tresult {}\n", + v, e, + styled(seq_time, seq_time < par_time ? green : red), + n_threads, + styled(par_time, par_time < seq_time ? green : red), + equal ? styled("matches"sv, green) : styled("mismatch"sv, red)); + csv << fmt::format("{},{},{},{},{},{}\n", + v, e, build_time.count(), seq_time.count(), par_time.count(), n_threads); + } } \ No newline at end of file diff --git a/Graph.cpp b/old/Graph.cpp similarity index 100% rename from Graph.cpp rename to old/Graph.cpp diff --git a/Graph.h b/old/Graph.h similarity index 100% rename from Graph.h rename to old/Graph.h diff --git a/RandomGraphGenerator.cpp b/old/RandomGraphGenerator.cpp similarity index 100% rename from RandomGraphGenerator.cpp rename to old/RandomGraphGenerator.cpp diff --git a/RandomGraphGenerator.h b/old/RandomGraphGenerator.h similarity index 100% rename from RandomGraphGenerator.h rename to old/RandomGraphGenerator.h diff --git a/old/main.cpp b/old/main.cpp new file mode 100644 index 0000000..033b329 --- /dev/null +++ b/old/main.cpp @@ -0,0 +1,63 @@ +#include +#include +#include +#include +#include +#include "Graph.h" +#include "RandomGraphGenerator.h" + +static long long executeSerialBfsAndGetTime(Graph& g) { + auto start = std::chrono::steady_clock::now(); + g.bfs(0); + auto end = std::chrono::steady_clock::now(); + return std::chrono::duration_cast(end - start).count(); +} + +static long long executeParallelBfsAndGetTime(Graph& g) { + auto start = std::chrono::steady_clock::now(); + g.parallelBFS(0); // заглушка + auto end = std::chrono::steady_clock::now(); + return std::chrono::duration_cast(end - start).count(); +} + +int main() { + try { + std::vector sizes = {10, 100, 1000, 10000, 10000, 50000, 100000, 1000000, 2000000, 20000000}; + std::vector connections = {50, 500, 5000, 50000, 100000, 1000000, 1000000, 10000000, 10000000, 50000000}; + + std::mt19937_64 r(42); + + std::filesystem::create_directories("tmp"); + std::ofstream fw("tmp/results.txt"); + if (!fw) { + std::cerr << "Failed to open tmp/results.txt for writing\n"; + return 1; + } + + RandomGraphGenerator gen; + + for (size_t i = 0; i < sizes.size(); ++i) { + std::cout << "--------------------------\n"; + std::cout << "Generating graph of size " << sizes[i] << " ... wait\n"; + auto started = std::chrono::steady_clock::now(); + Graph g = gen.generateGraph(r, sizes[i], connections[i]); + auto done = std::chrono::steady_clock::now(); + auto elapsed = std::chrono::duration_cast(done - started); + std::cout << "Generation completed - " << elapsed.count() << "ms\nStarting bfs\n"; + long long serialTime = executeSerialBfsAndGetTime(g); + long long parallelTime = executeParallelBfsAndGetTime(g); + + fw << "Times for " << sizes[i] << " vertices and " << connections[i] << " connections: "; + fw << "\nSerial: " << serialTime; + fw << "\nParallel: " << parallelTime; + fw << "\n--------\n"; + fw.flush(); + } + + std::cout << "Done. Results in tmp/results.txt\n"; + } catch (const std::exception& ex) { + std::cerr << "Exception: " << ex.what() << "\n"; + return 2; + } + return 0; +} \ No newline at end of file diff --git a/result.ipynb b/result.ipynb new file mode 100644 index 0000000..ecbb5e7 --- /dev/null +++ b/result.ipynb @@ -0,0 +1,52 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "9d7ced3e", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import plotly.graph_objects as go\n", + "import plotly.express as px\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "380f54e7", + "metadata": {}, + "outputs": [], + "source": [ + "data = pd.read_csv('out.csv')\n", + "data['ve'] = data['e']\n", + "data['ratio'] = data['seqtime'] / data['partime']\n", + "display(data)\n", + "px.line(data, x='ve', y=['seqtime', 'partime'])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/svo.hpp b/svo.hpp new file mode 100644 index 0000000..41827ae --- /dev/null +++ b/svo.hpp @@ -0,0 +1,99 @@ +#pragma once +#include +#include +#include +#include + +// Non-feature-complete small vector. It is more compact than in boost. +// It stores inline as many elements as will fit within the size +// of std::vector, minus four bytes +template +class svo_vector { + using large_vector = std::vector; + constexpr static size_t max_small + = std::min(size_t(1), (sizeof(large_vector) - 4) / sizeof(T)); + + union repr { + struct { + // is_small is LSB of the data pointer in std::vector stored in large_storage. + // It will always be 0 when std::vector is live, due to allocator alignment. + // So in the small case, we set it to 1. + bool is_small: 1; + unsigned size: 31; + alignas(T) char storage[sizeof(T) * max_small]; + T elems[max_small]; + } small; + alignas(large_vector) char large_storage[sizeof(large_vector)]; + } repr; + static_assert(sizeof(repr) == sizeof(large_vector)); + + bool is_small() const { + return repr.small.is_small; + } + + T* small_storage() { + return reinterpret_cast(repr.small.storage); + } + + const T* small_storage() const { + return reinterpret_cast(repr.small.storage); + } + + large_vector& large() { + return reinterpret_cast(repr.large_storage); + } + + const large_vector& large() const { + return reinterpret_cast(repr.large_storage); + } + +public: + using value_type = T; + using reference = T&; + using iterator = T*; + + svo_vector() { + repr.small.is_small = true; + repr.small.size = 0; + } + + ~svo_vector() { + if (is_small()) { + std::destroy_n(small_storage(), repr.small.size); + } else { + large().~vector(); + } + } + + svo_vector(svo_vector&&) = delete; + svo_vector(const svo_vector&) = delete; + svo_vector& operator=(svo_vector&&) = delete; + svo_vector& operator=(const svo_vector&) = delete; + + size_t size() const { + return is_small() ? repr.small.size : large().size(); + } + + template + reference emplace_back(Args&&... args) { + if (!is_small()) { + return large().emplace_back(std::forward(args)...); + } + if (repr.small.size < max_small) { + return *new(small_storage() + repr.small.size++) + value_type(std::forward(args)...); + } + large_vector tmp; + tmp.reserve(max_small + 1); + std::move(small_storage(), + small_storage() + repr.small.size, + std::back_inserter(tmp)); + tmp.emplace_back(std::forward(args)...); + std::destroy_n(small_storage(), repr.small.size); + new(repr.large_storage) large_vector(std::move(tmp)); + return large().back(); + } + + auto begin(this auto& v) { return v.is_small() ? v.small_storage() : v.large().data(); } + auto end(this auto& v) { return v.begin() + v.size(); } +}; \ No newline at end of file diff --git a/tmp/results.txt b/tmp/results.txt deleted file mode 100644 index e3145c0..0000000 --- a/tmp/results.txt +++ /dev/null @@ -1,40 +0,0 @@ -Times for 10 vertices and 50 connections: -Serial: 0 -Parallel: 0 --------- -Times for 100 vertices and 500 connections: -Serial: 0 -Parallel: 0 --------- -Times for 1000 vertices and 5000 connections: -Serial: 0 -Parallel: 0 --------- -Times for 10000 vertices and 50000 connections: -Serial: 0 -Parallel: 0 --------- -Times for 10000 vertices and 100000 connections: -Serial: 0 -Parallel: 0 --------- -Times for 50000 vertices and 1000000 connections: -Serial: 2 -Parallel: 0 --------- -Times for 100000 vertices and 1000000 connections: -Serial: 3 -Parallel: 0 --------- -Times for 1000000 vertices and 10000000 connections: -Serial: 90 -Parallel: 0 --------- -Times for 2000000 vertices and 10000000 connections: -Serial: 153 -Parallel: 0 --------- -Times for 20000000 vertices and 50000000 connections: -Serial: 1261 -Parallel: 0 ---------