Skip to content

Commit

Permalink
12ms -> 7ms. Enable full optimisation mode in gcc
Browse files Browse the repository at this point in the history
  • Loading branch information
Jumbub committed Oct 3, 2021
1 parent 4dfdf82 commit 21761f9
Show file tree
Hide file tree
Showing 5 changed files with 49 additions and 45 deletions.
6 changes: 3 additions & 3 deletions makefile
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
CC = g++

COMPILER_FLAGS = -Wall -std=c++2a -lpthread
COMPILER_FLAGS_PROFILE = $(COMPILER_FLAGS) -pg
COMPILER_FLAGS_DEBUG = $(COMPILER_FLAGS) -ggdb
COMPILER_FLAGS = -Wall -W -pedantic -Werror -std=c++2a -lpthread -Ofast
COMPILER_FLAGS_PROFILE = $(COMPILER_FLAGS) -O0 -pg
COMPILER_FLAGS_DEBUG = $(COMPILER_FLAGS) -O0 -ggdb

LINKER_FLAGS_BENCHMARK = -isystem benchmark/include -Lbenchmark/build/src -lbenchmark
LINKER_FLAGS_GRAPHICS = -lSDL2
Expand Down
4 changes: 2 additions & 2 deletions results/benchmark.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
--------------------------------------------------------------------------------
Benchmark Time CPU Iterations
--------------------------------------------------------------------------------
BM_NextBoard/process_time/real_time 12.2 ms 45.2 ms 54
BM_RenderBoard/process_time/real_time 7.13 ms 7.04 ms 90
BM_NextBoard/process_time/real_time 7.08 ms 24.7 ms 94
BM_RenderBoard/process_time/real_time 7.13 ms 7.11 ms 88
4 changes: 0 additions & 4 deletions src/board/generate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,6 @@ Board randomBoard(int width, int height) {
* - bottom: breeder
*/
Board benchmarkBoard(int width, int height) {
if (height < BREEDER_HEIGHT * 2)
throw std::underflow_error(
"Did not meet minimum height required for the benchmark board");

srand(0);
auto board = new Cell[width * height];
for (int y = 0; y < height; ++y) {
Expand Down
48 changes: 24 additions & 24 deletions src/board/next.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,20 @@
#include <tuple>
#include <vector>

int THREAD_COUNT =
std::max(std::thread::hardware_concurrency(), (unsigned int)1);

int getThreads() { return THREAD_COUNT; }
void setThreads(int n) { THREAD_COUNT = std::max(n, 1); }

void nextBoardSection(const int startY, const int endY, const Board &board,
Cell *output) {
Cell *output) {
const auto &[input, width, height] = board;

int neighbours[3];
int yAboveBase;
int yBelowBase;
int yBase;
int neighbours[3] = {0,0,0};
int yAboveBase = 0;
int yBelowBase = 0;
int yBase = 0;

const auto endI = endY * width;
for (int i = startY * width; i < endI; i++) {
Expand Down Expand Up @@ -70,36 +76,30 @@ void nextBoardSection(const int startY, const int endY, const Board &board,
}
}

int THREAD_COUNT =
std::max(std::thread::hardware_concurrency(), (unsigned int)1);

int getThreads() { return THREAD_COUNT; }
void setThreads(int n) { THREAD_COUNT = std::max(n, 1); }

Board nextBoard(const Board &board) {
const auto &[input, width, height] = board;
auto output = new Cell[width * height];

auto threads = std::min(getThreads(), height);
auto split = height / threads;
auto remainder = height % threads;
auto totalThreads = std::min(getThreads(), height);
auto threadLines = height / totalThreads;
auto threadLinesRemaining = height % totalThreads;

std::thread nextBoardSegments[threads];
for (int thread = 0; thread < threads; thread++) {
std::vector<std::thread> threads;
for (int t = 0; t < totalThreads; t++) {
// Compute start and end indexes for threads
const auto startY = thread * split;
auto endY = (thread + 1) * split;
const auto startY = t * threadLines;
auto endY = (t + 1) * threadLines;

// In the case of an uneven divide, the last thread gets the left-overs
if (thread == threads - 1)
endY += remainder;
if (t == totalThreads - 1)
endY += threadLinesRemaining;

nextBoardSegments[thread] =
std::thread(&nextBoardSection, startY, endY, board, output);
threads.push_back(
std::thread(&nextBoardSection, startY, endY, board, output));
}

for (int i = 0; i < threads; i++) {
nextBoardSegments[i].join();
for (auto &thread : threads) {
thread.join();
}

free(input);
Expand Down
32 changes: 20 additions & 12 deletions src/entrypoints/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,11 @@ int main() {
SDL_Event event;

// Create window
SDL_Renderer *renderer;
SDL_Window *window;
SDL_CreateWindowAndRenderer(2560, 1440, SDL_WINDOW_RESIZABLE, &window,
&renderer);
SDL_Window *window = SDL_CreateWindow(
"Game of Speed", SDL_WINDOWPOS_UNDEFINED, SDL_WINDOWPOS_UNDEFINED, 2560,
1440, SDL_WINDOW_RESIZABLE);
SDL_Renderer *renderer =
SDL_CreateRenderer(window, -1, SDL_RENDERER_ACCELERATED);

// Window texture
int width, height;
Expand All @@ -40,8 +41,10 @@ int main() {
bool running = true;
bool recreateBoard = false;
while (running) {
/* auto loopTimer = startProfiling(); */
auto loopTimer = startProfiling();
auto sdlTimer = startProfiling();

// Start computing next board
#ifdef ENABLE_THREADING
std::promise<Board> nextBoardPromise;
auto nextBoardFuture = nextBoardPromise.get_future();
Expand All @@ -58,28 +61,33 @@ int main() {
running = false;
// Re-create board when Enter is pressed, or window is resized
else if ((event.type == SDL_KEYDOWN &&
event.key.keysym.scancode == SDL_SCANCODE_RETURN) || (event.type == SDL_WINDOWEVENT &&
event.window.event == SDL_WINDOWEVENT_RESIZED)) {
event.key.keysym.scancode == SDL_SCANCODE_RETURN) ||
(event.type == SDL_WINDOWEVENT &&
event.window.event == SDL_WINDOWEVENT_RESIZED)) {
recreateBoard = true;
} else if (event.type == SDL_KEYDOWN &&
event.key.keysym.scancode == SDL_SCANCODE_J) {
setThreads(getThreads()-1);
event.key.keysym.scancode == SDL_SCANCODE_J) {
setThreads(getThreads() - 1);
std::cout << "Setting thread count: " << getThreads() << std::endl;
} else if (event.type == SDL_KEYDOWN &&
event.key.keysym.scancode == SDL_SCANCODE_K) {
setThreads(getThreads()+1);
event.key.keysym.scancode == SDL_SCANCODE_K) {
setThreads(getThreads() + 1);
std::cout << "Setting thread count: " << getThreads() << std::endl;
}
}

renderBoardSdl(board, renderer, texture);
stopProfiling(sdlTimer, " sdl");

// Wait for the board computation thread to complete
auto joiningTimer = startProfiling();
#ifdef ENABLE_THREADING
nextBoardThread.join();
board = nextBoardFuture.get();
#else
board = nextBoard(board);
#endif
stopProfiling(joiningTimer, " nextBoard.join");

// Re-create board when computation is complete
if (recreateBoard) {
Expand All @@ -93,7 +101,7 @@ int main() {
std::cout << "Re-created board: " << width << "x" << height << std::endl;
}

/* stopProfiling(loopTimer, "Done loop"); */
stopProfiling(loopTimer, "main");
}

free(get<0>(board));
Expand Down

0 comments on commit 21761f9

Please sign in to comment.