From 4eecc272a30b9651b980c83c868ffe73c16e79fd Mon Sep 17 00:00:00 2001 From: Matthew Watson Date: Tue, 19 May 2015 10:24:12 +1000 Subject: [PATCH 1/3] New mandelbrot.rs --- src/mandelbrot.rs | 68 ++++++++++++++++ src/mandelbrot.rs.broken | 164 --------------------------------------- 2 files changed, 68 insertions(+), 164 deletions(-) create mode 100644 src/mandelbrot.rs delete mode 100644 src/mandelbrot.rs.broken diff --git a/src/mandelbrot.rs b/src/mandelbrot.rs new file mode 100644 index 0000000..8d0f2c2 --- /dev/null +++ b/src/mandelbrot.rs @@ -0,0 +1,68 @@ +// The Computer Language Benchmarks Game +// http://benchmarksgame.alioth.debian.org/ +// +// contributed by the Rust Project Developers +// contributed by TeXitoi +// contributed by Matt Watson +use std::io::Write; +use std::io; +use std::thread; +const THREADS: usize = 8; +const MAX_ITER: usize = 50; +const DX: f64 = -1.5; +const DY: f64 = -1.0; +pub fn mbrotpt(x: f64, y: f64) -> usize { + let mut z = (0.0, 0.0); + for _ in 0..MAX_ITER { + z = (z.0 * z.0 - z.1 * z.1 + x, + 2.0 * z.0 * z.1 + y); + if z.0 * z.0 + z.1 * z.1 >= 4.0 { + return 0; + } + } + return 1; +} + +fn mbrot8(x: usize, y: usize, inv: f64) -> u8 { + let mut result = 0 as usize; + let mut i = 0; + while i < 8 { + result = result << 1; + result = result | mbrotpt((x + i) as f64 * inv + DX, + y as f64 * inv + DY); + i += 1; + } + result as u8 +} + +fn main() { + let size = std::env::args_os().nth(1) + .and_then(|s| s.into_string().ok()) + .and_then(|n| n.parse().ok()) + .unwrap_or(200); + let inv = 2.0 / size as f64; + println!("P4"); + println!("{} {}",size, size); + let workers: Vec = (0..THREADS).collect();; + let handles: Vec<_> = workers.into_iter().map(|t| { + thread::spawn(move || { + let mut rows = vec![vec![0 as u8; 8 * size / 64]; size / THREADS]; + for z in 0..size / THREADS { + let mut row = vec![0; size / 8]; + for x in 0..size / 8 { + row[x] = mbrot8(x * 8,t * (size / THREADS) + z, inv); + } + rows[z] = row.to_vec(); + } + rows + }) + }).collect(); + + for h in handles { + let rows = h.join().unwrap(); + for i in 0..size / THREADS { + std::io::stdout().write(&rows[i]).ok().expect("Could not write to stdout"); + } + } + io::stdout().flush().ok().expect("Could not flush stdout"); +} diff --git a/src/mandelbrot.rs.broken b/src/mandelbrot.rs.broken deleted file mode 100644 index b639ff8..0000000 --- a/src/mandelbrot.rs.broken +++ /dev/null @@ -1,164 +0,0 @@ -// The Computer Language Benchmarks Game -// http://benchmarksgame.alioth.debian.org/ -// -// contributed by the Rust Project Developers -// contributed by TeXitoi - -#![feature(core)] - -use std::io::Write; -use std::simd::f64x2; -use std::thread::scoped; - -const ITER: i32 = 50; -const LIMIT: f64 = 2.0; -const WORKERS: usize = 16; - -#[inline(always)] -fn mandelbrot(w: usize, mut out: W) -> std::io::Result<()> { - assert!(WORKERS % 2 == 0); - - // Ensure w and h are multiples of 8. - let w = (w + 7) / 8 * 8; - let h = w; - - let chunk_size = h / WORKERS; - - // Account for remainders in workload division, e.g. 1000 / 16 = 62.5 - let last_chunk_size = if h % WORKERS != 0 { - chunk_size + h % WORKERS - } else { - chunk_size - }; - - // precalc values - let inverse_w_doubled = 2.0 / w as f64; - let inverse_h_doubled = 2.0 / h as f64; - let v_inverses = f64x2(inverse_w_doubled, inverse_h_doubled); - let v_consts = f64x2(1.5, 1.0); - - // A lot of this code assumes this (so do other lang benchmarks) - assert!(w == h); - let mut precalc_r = Vec::with_capacity(w); - let mut precalc_i = Vec::with_capacity(h); - - let precalc_futures = (0..WORKERS).map(|i| { - scoped(move|| { - let mut rs = Vec::with_capacity(w / WORKERS); - let mut is = Vec::with_capacity(w / WORKERS); - - let start = i * chunk_size; - let end = if i == (WORKERS - 1) { - start + last_chunk_size - } else { - (i + 1) * chunk_size - }; - - // This assumes w == h - for x in start..end { - let xf = x as f64; - let xy = f64x2(xf, xf); - - let f64x2(r, i) = xy * v_inverses - v_consts; - rs.push(r); - is.push(i); - } - - (rs, is) - }) - }).collect::>(); - - for res in precalc_futures.into_iter() { - let (rs, is) = res.join(); - precalc_r.extend(rs.into_iter()); - precalc_i.extend(is.into_iter()); - } - - assert_eq!(precalc_r.len(), w); - assert_eq!(precalc_i.len(), h); - - let vec_init_r = &precalc_r; - let vec_init_i = &precalc_i; - - let data = (0..WORKERS).map(|i| { - scoped(move|| { - let mut res: Vec = Vec::with_capacity((chunk_size * w) / 8); - - let start = i * chunk_size; - let end = if i == (WORKERS - 1) { - start + last_chunk_size - } else { - (i + 1) * chunk_size - }; - - for &init_i in vec_init_i[start..end].iter() { - write_line(init_i, &vec_init_r, &mut res); - } - - res - }) - }).collect::>(); - - try!(writeln!(&mut out, "P4\n{} {}", w, h)); - for res in data.into_iter() { - try!(out.write_all(&res.join())); - } - out.flush() -} - -fn write_line(init_i: f64, vec_init_r: &[f64], res: &mut Vec) { - let v_init_i : f64x2 = f64x2(init_i, init_i); - let v_2 : f64x2 = f64x2(2.0, 2.0); - const LIMIT_SQUARED: f64 = LIMIT * LIMIT; - - for chunk_init_r in vec_init_r.chunks(8) { - let mut cur_byte = 0xff; - let mut i = 0; - - while i < 8 { - let v_init_r = f64x2(chunk_init_r[i], chunk_init_r[i + 1]); - let mut cur_r = v_init_r; - let mut cur_i = v_init_i; - let mut r_sq = v_init_r * v_init_r; - let mut i_sq = v_init_i * v_init_i; - - let mut b = 0; - for _ in 0..ITER { - let r = cur_r; - let i = cur_i; - - cur_i = v_2 * r * i + v_init_i; - cur_r = r_sq - i_sq + v_init_r; - - let f64x2(bit1, bit2) = r_sq + i_sq; - - if bit1 > LIMIT_SQUARED { - b |= 2; - if b == 3 { break; } - } - - if bit2 > LIMIT_SQUARED { - b |= 1; - if b == 3 { break; } - } - - r_sq = cur_r * cur_r; - i_sq = cur_i * cur_i; - } - - cur_byte = (cur_byte << 2) + b; - i += 2; - } - - res.push(cur_byte^-1); - } -} - -fn main() { - let n = std::env::args_os().nth(1) - .and_then(|s| s.into_string().ok()) - .and_then(|n| n.parse().ok()) - .unwrap_or(200); - let stdout = std::io::stdout(); - mandelbrot(n, stdout.lock()).unwrap(); -} From 10bf179835a62a59be792872964c947f1bebb2aa Mon Sep 17 00:00:00 2001 From: Matthew Watson Date: Wed, 20 May 2015 06:50:03 +1000 Subject: [PATCH 2/3] Another near-complete rewrite. Now with easy-to-vectorise code. --- src/mandelbrot.rs | 118 ++++++++++++++++++++++++++++++---------------- 1 file changed, 77 insertions(+), 41 deletions(-) diff --git a/src/mandelbrot.rs b/src/mandelbrot.rs index 8d0f2c2..f9c4085 100644 --- a/src/mandelbrot.rs +++ b/src/mandelbrot.rs @@ -1,68 +1,104 @@ // The Computer Language Benchmarks Game // http://benchmarksgame.alioth.debian.org/ // -// contributed by the Rust Project Developers -// contributed by TeXitoi // contributed by Matt Watson +// contributed by TeXitoi + use std::io::Write; -use std::io; use std::thread; const THREADS: usize = 8; const MAX_ITER: usize = 50; -const DX: f64 = -1.5; -const DY: f64 = -1.0; -pub fn mbrotpt(x: f64, y: f64) -> usize { - let mut z = (0.0, 0.0); - for _ in 0..MAX_ITER { - z = (z.0 * z.0 - z.1 * z.1 + x, - 2.0 * z.0 * z.1 + y); - if z.0 * z.0 + z.1 * z.1 >= 4.0 { - return 0; - } - } - return 1; +const VLEN: usize = 8; +const ZEROS: Vecf64 = [0.0; VLEN]; + +pub type Vecf64 = [f64; VLEN]; + +fn mul2 (x: Vecf64, y: Vecf64) -> Vecf64 { + let mut res = ZEROS; + for i in 0..VLEN {res[i] = x[i] * y[i];} + res +} +fn add2 (x: Vecf64, y: Vecf64) -> Vecf64 { + let mut res = ZEROS; + for i in 0..VLEN {res[i] = x[i] + y[i];} + res +} +fn sub2 (x: Vecf64, y: Vecf64) -> Vecf64 { + let mut res = ZEROS; + for i in 0..VLEN { res[i] = x[i] - y[i]; } + res } -fn mbrot8(x: usize, y: usize, inv: f64) -> u8 { - let mut result = 0 as usize; - let mut i = 0; - while i < 8 { - result = result << 1; - result = result | mbrotpt((x + i) as f64 * inv + DX, - y as f64 * inv + DY); - i += 1; +pub fn mbrot8(cr: Vecf64, ci: Vecf64) -> u8 { + let mut zr = cr; + let mut zi = ci; + let mut esc_bits = 0; + for _ in 0..MAX_ITER { + // Find Re(z)^2 and Im(z)^2 + let rr = mul2(zr,zr); + let ii = mul2(zi,zi); + // Check if we escape + // May as well store this info in + // same byte as output + let mag = add2(rr, ii); + for i in 0..VLEN { + if mag[i] > 4.0 {esc_bits |= 128 >> i} + } + // If no more work, break early + if esc_bits == 0xff {break} + // Find Im(z^2) + let ir = mul2(zr, zi); + // Set Re(z^2) + zr = sub2(rr, ii); + // Set Im(z^2) + zi = add2(ir, ir); + // Add c + zr = add2(zr, cr); + zi = add2(zi, ci); } - result as u8 + !esc_bits } fn main() { + let size = std::env::args_os().nth(1) .and_then(|s| s.into_string().ok()) .and_then(|n| n.parse().ok()) .unwrap_or(200); let inv = 2.0 / size as f64; - println!("P4"); - println!("{} {}",size, size); - let workers: Vec = (0..THREADS).collect();; - let handles: Vec<_> = workers.into_iter().map(|t| { + let mut xvals = vec![0.0; size]; + let mut yvals = vec![0.0; size]; + for i in 0..size { + xvals[i] = i as f64 * inv - 1.5; + yvals[i] = i as f64 * inv - 1.0; + } + let xloc = &xvals; + let yloc = &yvals; + + let handles: Vec<_> = (0..THREADS).map(|e| { + let xloc = xloc.to_vec(); + let yloc = yloc.to_vec(); thread::spawn(move || { - let mut rows = vec![vec![0 as u8; 8 * size / 64]; size / THREADS]; - for z in 0..size / THREADS { - let mut row = vec![0; size / 8]; - for x in 0..size / 8 { - row[x] = mbrot8(x * 8,t * (size / THREADS) + z, inv); + let mut rows = vec![vec![0 as u8; size / 8]; size / THREADS]; + for y in 0..size / THREADS { + for x in 0..size / 8 { + let mut cr = ZEROS; + let ci = [yloc[y + e * size / THREADS]; VLEN]; + for i in 0..VLEN { + cr[i] = xloc[8 * x + i]; } - rows[z] = row.to_vec(); + rows[y][x] = mbrot8(cr, ci); } - rows + } + rows }) }).collect(); - for h in handles { - let rows = h.join().unwrap(); - for i in 0..size / THREADS { - std::io::stdout().write(&rows[i]).ok().expect("Could not write to stdout"); - } + println!("P4\n{} {}", size, size); + let stdout_unlocked = std::io::stdout(); + let mut stdout = stdout_unlocked.lock(); + for row in handles.into_iter().flat_map(|h| h.join().unwrap().into_iter()) { + stdout.write_all(&row).unwrap(); } - io::stdout().flush().ok().expect("Could not flush stdout"); + stdout.flush().unwrap(); } From 01bdd341f037999125ad567ab35a7bd2e9c16ae5 Mon Sep 17 00:00:00 2001 From: Matthew Watson Date: Wed, 20 May 2015 07:11:59 +1000 Subject: [PATCH 3/3] Thread count was too low. --- src/mandelbrot.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mandelbrot.rs b/src/mandelbrot.rs index f9c4085..37e944c 100644 --- a/src/mandelbrot.rs +++ b/src/mandelbrot.rs @@ -6,7 +6,7 @@ use std::io::Write; use std::thread; -const THREADS: usize = 8; +const THREADS: usize = 20; const MAX_ITER: usize = 50; const VLEN: usize = 8; const ZEROS: Vecf64 = [0.0; VLEN];