From 5c55728cbc13c699435d357b8bc3c75eeb435176 Mon Sep 17 00:00:00 2001 From: "Christopher H. Jordan" Date: Wed, 9 Nov 2022 07:51:54 +0800 Subject: [PATCH] Allow benchmarks to run by making relevant types public. --- CHANGELOG.md | 1 + Cargo.lock | 8 +- benches/bench.rs | 1100 +++++++++++-------------- src/averaging/mod.rs | 18 +- src/beam/error.rs | 2 +- src/beam/fee.rs | 2 +- src/beam/mod.rs | 10 +- src/di_calibrate/mod.rs | 6 +- src/lib.rs | 10 +- src/model/mod.rs | 6 +- src/srclist/mod.rs | 2 +- src/srclist/types/components/mod.rs | 20 +- src/srclist/types/flux_density/mod.rs | 14 +- src/srclist/types/mod.rs | 8 +- src/srclist/types/source/mod.rs | 4 +- src/srclist/types/source_list/mod.rs | 2 +- 16 files changed, 534 insertions(+), 679 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9d5c4dbc..03255023 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ Versioning](https://semver.org/spec/v2.0.0.html). this report. - If all sources are vetoed from a source list, then the program exits with this report. +- More CUDA benchmarks for the modelling code. ## [0.2.1] - 2022-10-20 ### Added diff --git a/Cargo.lock b/Cargo.lock index eb2b1c26..7a8ef69b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1262,9 +1262,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.132" +version = "0.2.144" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8371e4e5341c3a96db127eb2465ac681ced4c433e01dd0e938adbef26ba93ba5" +checksum = "2b00cc1c228a6782d0f076e7b232802e0c5689d41bb5df366f2a6b6621cfdfe1" [[package]] name = "libgit2-sys" @@ -1578,9 +1578,9 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.13.1" +version = "1.17.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "074864da206b4973b84eb91683020dbefd6a8c3f0f38e054d93954e891935e4e" +checksum = "9670a07f94779e00908f3e686eab508878ebb390ba6e604d3a284c00e8d0487b" [[package]] name = "oorandom" diff --git a/benches/bench.rs b/benches/bench.rs index adb4b9fe..b2aa4495 100644 --- a/benches/bench.rs +++ b/benches/bench.rs @@ -2,638 +2,492 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at http://mozilla.org/MPL/2.0/. +use std::collections::HashSet; + use criterion::*; +use hifitime::{Duration, Epoch}; +use marlu::{ + constants::{MWA_LAT_RAD, MWA_LONG_RAD}, + Jones, RADec, XyzGeodetic, +}; +use ndarray::prelude::*; +use std::ops::Deref; +use vec1::{vec1, Vec1}; + +use mwa_hyperdrive::{ + averaging::{Chanblock, Timeblock}, + beam::{create_fee_beam_object, Delays}, + di_calibrate::calibrate_timeblocks, + model, + srclist::{ + get_instrumental_flux_densities, ComponentType, FluxDensity, FluxDensityType, + ShapeletCoeff, Source, SourceComponent, SourceList, + }, +}; fn model_benchmarks(c: &mut Criterion) { - // These "nothing" benchmarks should be commented out when real benchmarking - // is desired. - c.bench_function("nothing", |b| b.iter(marlu::Jones::::identity)); - - // use hifitime::{Duration, Epoch}; - // use marlu::{ - // constants::{MWA_LAT_RAD, MWA_LONG_RAD}, - // Jones, RADec, XyzGeodetic, - // }; - // use mwa_hyperdrive::{ - // beam::{create_fee_beam_object, Delays}, - // model, - // srclist::{ - // ComponentType, FluxDensity, FluxDensityType, ShapeletCoeff, Source, SourceComponent, - // SourceList, - // }, - // }; - // use ndarray::prelude::*; - // use std::ops::Deref; - // use vec1::vec1; - - // let num_tiles = 128; - // let num_bls = (num_tiles * (num_tiles - 1)) / 2; - // let phase_centre = RADec::new_degrees(0.0, -27.0); - // let dut1 = Duration::from_total_nanoseconds(0); - // let apply_precession = true; - // let timestamp = Epoch::from_gpst_seconds(1065880128.0); - // let xyzs = vec![XyzGeodetic::default(); num_tiles]; - // let flagged_tiles = []; - // let beam_file: Option<&str> = None; // Assume the env. variable is set. - // let beam = - // create_fee_beam_object(beam_file, num_tiles, Delays::Partial(vec![0; 16]), None).unwrap(); - - // let mut points = c.benchmark_group("model points"); - // points.bench_function("100 with CPU, 128 tiles, 2 channels", |b| { - // let num_points = 100; - // let num_chans = 2; - // let freqs = Array1::linspace(150e6, 200e6, num_chans as usize).to_vec(); - // let mut vis = Array2::from_elem((num_bls, num_chans), Jones::default()); - // let mut source_list = SourceList::new(); - // for i in 0..num_points { - // source_list.insert( - // format!("source{i}"), - // Source { - // components: vec1![SourceComponent { - // radec: RADec::new_degrees(0.0, -27.0), - // comp_type: ComponentType::Point, - // flux_type: FluxDensityType::PowerLaw { - // si: -0.7, - // fd: FluxDensity { - // freq: 150e6, - // i: 1.0, - // q: 0.0, - // u: 0.0, - // v: 0.0, - // }, - // }, - // }], - // }, - // ); - // } - // let modeller = model::new_cpu_sky_modeller( - // beam.deref(), - // &source_list, - // &xyzs, - // &freqs, - // &flagged_tiles, - // phase_centre, - // MWA_LONG_RAD, - // MWA_LAT_RAD, - // dut1, - // apply_precession, - // ); - - // b.iter(|| { - // modeller.model_points(vis.view_mut(), timestamp).unwrap(); - // }) - // }); - - // #[cfg(feature = "cuda")] - // points.bench_function("100 with GPU, 128 tiles, 2 channels", |b| { - // let num_points = 100; - // let num_chans = 2; - // let freqs = Array1::linspace(150e6, 200e6, num_chans as usize).to_vec(); - // let mut vis = Array2::from_elem((num_bls, num_chans), Jones::default()); - // let mut source_list = SourceList::new(); - // for i in 0..num_points { - // source_list.insert( - // format!("source{i}"), - // Source { - // components: vec1![SourceComponent { - // radec: RADec::new_degrees(0.0, -27.0), - // comp_type: ComponentType::Point, - // flux_type: FluxDensityType::PowerLaw { - // si: -0.7, - // fd: FluxDensity { - // freq: 150e6, - // i: 1.0, - // q: 0.0, - // u: 0.0, - // v: 0.0, - // }, - // }, - // }], - // }, - // ); - // } - // let modeller = unsafe { - // model::new_cuda_sky_modeller( - // beam.deref(), - // &source_list, - // &xyzs, - // &freqs, - // &flagged_tiles, - // phase_centre, - // MWA_LONG_RAD, - // MWA_LAT_RAD, - // dut1, - // apply_precession, - // ) - // .unwrap() - // }; - - // b.iter(|| modeller.model_points(vis.view_mut(), timestamp).unwrap()); - // }); - - // #[cfg(feature = "cuda")] - // points.bench_function("1000 with GPU, 128 tiles, 20 channels", |b| { - // let num_points = 1000; - // let num_chans = 20; - // let freqs = Array1::linspace(150e6, 200e6, num_chans as usize).to_vec(); - // let mut vis = Array2::from_elem((num_bls, num_chans), Jones::default()); - // let mut source_list = SourceList::new(); - // for i in 0..num_points { - // source_list.insert( - // format!("source{i}"), - // Source { - // components: vec1![SourceComponent { - // radec: RADec::new_degrees(0.0, -27.0), - // comp_type: ComponentType::Point, - // flux_type: FluxDensityType::PowerLaw { - // si: -0.7, - // fd: FluxDensity { - // freq: 150e6, - // i: 1.0, - // q: 0.0, - // u: 0.0, - // v: 0.0, - // }, - // }, - // }], - // }, - // ); - // } - // let modeller = unsafe { - // model::new_cuda_sky_modeller( - // beam.deref(), - // &source_list, - // &xyzs, - // &freqs, - // &flagged_tiles, - // phase_centre, - // MWA_LONG_RAD, - // MWA_LAT_RAD, - // dut1, - // apply_precession, - // ) - // .unwrap() - // }; - - // b.iter(|| modeller.model_points(vis.view_mut(), timestamp).unwrap()); - // }); - // points.finish(); - - // let mut gaussians = c.benchmark_group("model gaussians"); - // gaussians.bench_function("100 with CPU, 128 tiles, 2 channels", |b| { - // let num_gaussians = 100; - // let num_chans = 2; - // let freqs = Array1::linspace(150e6, 200e6, num_chans as usize).to_vec(); - // let mut vis = Array2::from_elem((num_bls, num_chans), Jones::default()); - // let mut source_list = SourceList::new(); - // for i in 0..num_gaussians { - // source_list.insert( - // format!("source{i}"), - // Source { - // components: vec1![SourceComponent { - // radec: RADec::new_degrees(0.0, -27.0), - // comp_type: ComponentType::Gaussian { - // maj: 1.0, - // min: 0.5, - // pa: 0.0, - // }, - // flux_type: FluxDensityType::PowerLaw { - // si: -0.7, - // fd: FluxDensity { - // freq: 150e6, - // i: 1.0, - // q: 0.0, - // u: 0.0, - // v: 0.0, - // }, - // }, - // }], - // }, - // ); - // } - // let modeller = model::new_cpu_sky_modeller( - // beam.deref(), - // &source_list, - // &xyzs, - // &freqs, - // &flagged_tiles, - // phase_centre, - // MWA_LONG_RAD, - // MWA_LAT_RAD, - // dut1, - // apply_precession, - // ); - - // b.iter(|| { - // modeller.model_gaussians(vis.view_mut(), timestamp).unwrap(); - // }) - // }); - - // #[cfg(feature = "cuda")] - // gaussians.bench_function("100 with GPU, 128 tiles, 2 channels", |b| { - // let num_gaussians = 100; - // let num_chans = 2; - // let freqs = Array1::linspace(150e6, 200e6, num_chans as usize).to_vec(); - // let mut vis = Array2::from_elem((num_bls, num_chans), Jones::default()); - // let mut source_list = SourceList::new(); - // for i in 0..num_gaussians { - // source_list.insert( - // format!("source{i}"), - // Source { - // components: vec1![SourceComponent { - // radec: RADec::new_degrees(0.0, -27.0), - // comp_type: ComponentType::Gaussian { - // maj: 1.0, - // min: 0.5, - // pa: 0.0, - // }, - // flux_type: FluxDensityType::PowerLaw { - // si: -0.7, - // fd: FluxDensity { - // freq: 150e6, - // i: 1.0, - // q: 0.0, - // u: 0.0, - // v: 0.0, - // }, - // }, - // }], - // }, - // ); - // } - // let modeller = unsafe { - // model::new_cuda_sky_modeller( - // beam.deref(), - // &source_list, - // &xyzs, - // &freqs, - // &flagged_tiles, - // phase_centre, - // MWA_LONG_RAD, - // MWA_LAT_RAD, - // dut1, - // apply_precession, - // ) - // .unwrap() - // }; - - // b.iter(|| modeller.model_gaussians(vis.view_mut(), timestamp).unwrap()); - // }); - - // #[cfg(feature = "cuda")] - // gaussians.bench_function("1000 with GPU, 128 tiles, 20 channels", |b| { - // let num_gaussians = 1000; - // let num_chans = 20; - // let freqs = Array1::linspace(150e6, 200e6, num_chans as usize).to_vec(); - // let mut vis = Array2::from_elem((num_bls, num_chans), Jones::default()); - // let mut source_list = SourceList::new(); - // for i in 0..num_gaussians { - // source_list.insert( - // format!("source{i}"), - // Source { - // components: vec1![SourceComponent { - // radec: RADec::new_degrees(0.0, -27.0), - // comp_type: ComponentType::Gaussian { - // maj: 1.0, - // min: 0.5, - // pa: 0.0, - // }, - // flux_type: FluxDensityType::PowerLaw { - // si: -0.7, - // fd: FluxDensity { - // freq: 150e6, - // i: 1.0, - // q: 0.0, - // u: 0.0, - // v: 0.0, - // }, - // }, - // }], - // }, - // ); - // } - // let modeller = unsafe { - // model::new_cuda_sky_modeller( - // beam.deref(), - // &source_list, - // &xyzs, - // &freqs, - // &flagged_tiles, - // phase_centre, - // MWA_LONG_RAD, - // MWA_LAT_RAD, - // dut1, - // apply_precession, - // ) - // .unwrap() - // }; - - // b.iter(|| modeller.model_gaussians(vis.view_mut(), timestamp).unwrap()); - // }); - // gaussians.finish(); - - // let mut shapelets = c.benchmark_group("model shapelets"); - // shapelets.bench_function( - // "100 with CPU (10 coeffs each), 128 tiles, 2 channels", - // |b| { - // let num_shapelets = 100; - // let num_chans = 2; - // let freqs = Array1::linspace(150e6, 200e6, num_chans as usize).to_vec(); - // let mut vis = Array2::from_elem((num_bls, num_chans), Jones::default()); - // let mut source_list = SourceList::new(); - // for i in 0..num_shapelets { - // source_list.insert( - // format!("source{i}"), - // Source { - // components: vec1![SourceComponent { - // radec: RADec::new_degrees(0.0, -27.0), - // comp_type: ComponentType::Shapelet { - // maj: 1.0, - // min: 0.5, - // pa: 0.0, - // coeffs: vec![ - // ShapeletCoeff { - // n1: 0, - // n2: 1, - // value: 1.0, - // }; - // 10 - // ], - // }, - // flux_type: FluxDensityType::PowerLaw { - // si: -0.7, - // fd: FluxDensity { - // freq: 150e6, - // i: 1.0, - // q: 0.0, - // u: 0.0, - // v: 0.0, - // }, - // }, - // }], - // }, - // ); - // } - // let modeller = model::new_cpu_sky_modeller( - // beam.deref(), - // &source_list, - // &xyzs, - // &freqs, - // &flagged_tiles, - // phase_centre, - // MWA_LONG_RAD, - // MWA_LAT_RAD, - // dut1, - // apply_precession, - // ); - - // b.iter(|| { - // modeller.model_shapelets(vis.view_mut(), timestamp).unwrap(); - // }) - // }, - // ); - - // #[cfg(feature = "cuda")] - // shapelets.bench_function( - // "100 with GPU (10 coeffs each), 128 tiles, 2 channels", - // |b| { - // let num_shapelets = 100; - // let num_chans = 2; - // let freqs = Array1::linspace(150e6, 200e6, num_chans as usize).to_vec(); - // let mut vis = Array2::from_elem((num_bls, num_chans), Jones::default()); - // let mut source_list = SourceList::new(); - // for i in 0..num_shapelets { - // source_list.insert( - // format!("source{i}"), - // Source { - // components: vec1![SourceComponent { - // radec: RADec::new_degrees(0.0, -27.0), - // comp_type: ComponentType::Shapelet { - // maj: 1.0, - // min: 0.5, - // pa: 0.0, - // coeffs: vec![ - // ShapeletCoeff { - // n1: 0, - // n2: 1, - // value: 1.0, - // }; - // 10 - // ], - // }, - // flux_type: FluxDensityType::PowerLaw { - // si: -0.7, - // fd: FluxDensity { - // freq: 150e6, - // i: 1.0, - // q: 0.0, - // u: 0.0, - // v: 0.0, - // }, - // }, - // }], - // }, - // ); - // } - // let modeller = unsafe { - // model::new_cuda_sky_modeller( - // beam.deref(), - // &source_list, - // &xyzs, - // &freqs, - // &flagged_tiles, - // phase_centre, - // MWA_LONG_RAD, - // MWA_LAT_RAD, - // dut1, - // apply_precession, - // ) - // .unwrap() - // }; - - // b.iter(|| modeller.model_shapelets(vis.view_mut(), timestamp).unwrap()); - // }, - // ); - - // #[cfg(feature = "cuda")] - // shapelets.bench_function( - // "1000 with GPU (10 coeffs each), 128 tiles, 20 channels", - // |b| { - // let num_shapelets = 1000; - // let num_chans = 20; - // let freqs = Array1::linspace(150e6, 200e6, num_chans as usize).to_vec(); - // let mut vis = Array2::from_elem((num_bls, num_chans), Jones::default()); - // let mut source_list = SourceList::new(); - // for i in 0..num_shapelets { - // source_list.insert( - // format!("source{i}"), - // Source { - // components: vec1![SourceComponent { - // radec: RADec::new_degrees(0.0, -27.0), - // comp_type: ComponentType::Shapelet { - // maj: 1.0, - // min: 0.5, - // pa: 0.0, - // coeffs: vec![ - // ShapeletCoeff { - // n1: 0, - // n2: 1, - // value: 1.0, - // }; - // 10 - // ], - // }, - // flux_type: FluxDensityType::PowerLaw { - // si: -0.7, - // fd: FluxDensity { - // freq: 150e6, - // i: 1.0, - // q: 0.0, - // u: 0.0, - // v: 0.0, - // }, - // }, - // }], - // }, - // ); - // } - // let modeller = unsafe { - // model::new_cuda_sky_modeller( - // beam.deref(), - // &source_list, - // &xyzs, - // &freqs, - // &flagged_tiles, - // phase_centre, - // MWA_LONG_RAD, - // MWA_LAT_RAD, - // dut1, - // apply_precession, - // ) - // .unwrap() - // }; - - // b.iter(|| modeller.model_shapelets(vis.view_mut(), timestamp).unwrap()); - // }, - // ); - // shapelets.finish(); + let num_tiles = 128; + let num_bls = (num_tiles * (num_tiles - 1)) / 2; + let phase_centre = RADec::new_degrees(0.0, -27.0); + let dut1 = Duration::from_total_nanoseconds(0); + let apply_precession = true; + let timestamp = Epoch::from_gpst_seconds(1065880128.0); + let xyzs = vec![XyzGeodetic::default(); num_tiles]; + let flagged_tiles = HashSet::new(); + let beam_file: Option<&str> = None; // Assume the env. variable is set. + let beam = + create_fee_beam_object(beam_file, num_tiles, Delays::Partial(vec![0; 16]), None).unwrap(); + + let mut points = c.benchmark_group("model FEE points"); + points.bench_function("100 with CPU, 128 tiles, 2 channels", |b| { + let num_points = 100; + let num_chans = 2; + let freqs = Array1::linspace(150e6, 200e6, num_chans).to_vec(); + let mut vis = Array2::from_elem((num_bls, num_chans), Jones::default()); + let mut source_list = SourceList::default(); + for i in 0..num_points { + source_list.insert( + format!("source{i}"), + Source { + components: vec1![SourceComponent { + radec: RADec::new_degrees(0.0, -27.0), + comp_type: ComponentType::Point, + flux_type: FluxDensityType::PowerLaw { + si: -0.7, + fd: FluxDensity { + freq: 150e6, + i: 1.0, + q: 0.0, + u: 0.0, + v: 0.0, + }, + }, + }], + }, + ); + } + let modeller = model::new_cpu_sky_modeller( + beam.deref(), + &source_list, + &xyzs, + &freqs, + &flagged_tiles, + phase_centre, + MWA_LONG_RAD, + MWA_LAT_RAD, + dut1, + apply_precession, + ); + + b.iter(|| { + modeller.model_points(vis.view_mut(), timestamp).unwrap(); + }) + }); + + #[cfg(feature = "cuda")] + for (num_sources, num_chans) in [ + (1, 768), + (32, 768), + (64, 768), + (128, 768), + (256, 768), + (512, 768), + (1024, 768), + ] { + points.bench_function( + format!("{num_sources} with GPU, 128 tiles, {num_chans} channels"), + |b| { + let freqs = Array1::linspace(150e6, 200e6, num_chans).to_vec(); + let mut vis = Array2::from_elem((num_bls, num_chans), Jones::default()); + let mut source_list = SourceList::default(); + for i in 0..num_sources { + source_list.insert( + format!("source{i}"), + Source { + components: vec1![SourceComponent { + radec: RADec::new_degrees(0.0, -27.0), + comp_type: ComponentType::Point, + flux_type: FluxDensityType::PowerLaw { + si: -0.7, + fd: FluxDensity { + freq: 150e6, + i: 1.0, + q: 0.0, + u: 0.0, + v: 0.0, + }, + }, + }], + }, + ); + } + let modeller = unsafe { + model::new_cuda_sky_modeller( + beam.deref(), + &source_list, + &xyzs, + &freqs, + &flagged_tiles, + phase_centre, + MWA_LONG_RAD, + MWA_LAT_RAD, + dut1, + apply_precession, + ) + .unwrap() + }; + + b.iter(|| modeller.model_points(vis.view_mut(), timestamp).unwrap()); + }, + ); + } + points.finish(); + + let mut gaussians = c.benchmark_group("model FEE gaussians"); + gaussians.bench_function("100 with CPU, 128 tiles, 2 channels", |b| { + let num_gaussians = 100; + let num_chans = 2; + let freqs = Array1::linspace(150e6, 200e6, num_chans).to_vec(); + let mut vis = Array2::from_elem((num_bls, num_chans), Jones::default()); + let mut source_list = SourceList::default(); + for i in 0..num_gaussians { + source_list.insert( + format!("source{i}"), + Source { + components: vec1![SourceComponent { + radec: RADec::new_degrees(0.0, -27.0), + comp_type: ComponentType::Gaussian { + maj: 1.0, + min: 0.5, + pa: 0.0, + }, + flux_type: FluxDensityType::PowerLaw { + si: -0.7, + fd: FluxDensity { + freq: 150e6, + i: 1.0, + q: 0.0, + u: 0.0, + v: 0.0, + }, + }, + }], + }, + ); + } + let modeller = model::new_cpu_sky_modeller( + beam.deref(), + &source_list, + &xyzs, + &freqs, + &flagged_tiles, + phase_centre, + MWA_LONG_RAD, + MWA_LAT_RAD, + dut1, + apply_precession, + ); + + b.iter(|| { + modeller.model_gaussians(vis.view_mut(), timestamp).unwrap(); + }) + }); + + #[cfg(feature = "cuda")] + for (num_sources, num_chans) in [ + (1, 768), + (32, 768), + (64, 768), + (128, 768), + (256, 768), + (512, 768), + (1024, 768), + ] { + gaussians.bench_function( + format!("{num_sources} with GPU, 128 tiles, {num_chans} channels"), + |b| { + let freqs = Array1::linspace(150e6, 200e6, num_chans).to_vec(); + let mut vis = Array2::from_elem((num_bls, num_chans), Jones::default()); + let mut source_list = SourceList::default(); + for i in 0..num_sources { + source_list.insert( + format!("source{i}"), + Source { + components: vec1![SourceComponent { + radec: RADec::new_degrees(0.0, -27.0), + comp_type: ComponentType::Gaussian { + maj: 1.0, + min: 0.5, + pa: 0.0, + }, + flux_type: FluxDensityType::PowerLaw { + si: -0.7, + fd: FluxDensity { + freq: 150e6, + i: 1.0, + q: 0.0, + u: 0.0, + v: 0.0, + }, + }, + }], + }, + ); + } + let modeller = unsafe { + model::new_cuda_sky_modeller( + beam.deref(), + &source_list, + &xyzs, + &freqs, + &flagged_tiles, + phase_centre, + MWA_LONG_RAD, + MWA_LAT_RAD, + dut1, + apply_precession, + ) + .unwrap() + }; + + b.iter(|| modeller.model_gaussians(vis.view_mut(), timestamp).unwrap()); + }, + ); + } + gaussians.finish(); + + let mut shapelets = c.benchmark_group("model FEE shapelets"); + shapelets.bench_function( + "100 with CPU (10 coeffs each), 128 tiles, 2 channels", + |b| { + let num_shapelets = 100; + let num_chans = 2; + let freqs = Array1::linspace(150e6, 200e6, num_chans).to_vec(); + let mut vis = Array2::from_elem((num_bls, num_chans), Jones::default()); + let mut source_list = SourceList::default(); + for i in 0..num_shapelets { + source_list.insert( + format!("source{i}"), + Source { + components: vec1![SourceComponent { + radec: RADec::new_degrees(0.0, -27.0), + comp_type: ComponentType::Shapelet { + maj: 1.0, + min: 0.5, + pa: 0.0, + coeffs: vec![ + ShapeletCoeff { + n1: 0, + n2: 1, + value: 1.0, + }; + 10 + ], + }, + flux_type: FluxDensityType::PowerLaw { + si: -0.7, + fd: FluxDensity { + freq: 150e6, + i: 1.0, + q: 0.0, + u: 0.0, + v: 0.0, + }, + }, + }], + }, + ); + } + let modeller = model::new_cpu_sky_modeller( + beam.deref(), + &source_list, + &xyzs, + &freqs, + &flagged_tiles, + phase_centre, + MWA_LONG_RAD, + MWA_LAT_RAD, + dut1, + apply_precession, + ); + + b.iter(|| { + modeller.model_shapelets(vis.view_mut(), timestamp).unwrap(); + }) + }, + ); + + #[cfg(feature = "cuda")] + for (num_sources, num_chans) in [ + (1, 768), + (32, 768), + (64, 768), + (128, 768), + (256, 768), + (512, 768), + (1024, 768), + ] { + shapelets.bench_function( + format!("{num_sources} with GPU (10 coeffs each), 128 tiles, {num_chans} channels"), + |b| { + let freqs = Array1::linspace(150e6, 200e6, num_chans).to_vec(); + let mut vis = Array2::from_elem((num_bls, num_chans), Jones::default()); + let mut source_list = SourceList::default(); + for i in 0..num_sources { + source_list.insert( + format!("source{i}"), + Source { + components: vec1![SourceComponent { + radec: RADec::new_degrees(0.0, -27.0), + comp_type: ComponentType::Shapelet { + maj: 1.0, + min: 0.5, + pa: 0.0, + coeffs: vec![ + ShapeletCoeff { + n1: 0, + n2: 1, + value: 1.0, + }; + 10 + ], + }, + flux_type: FluxDensityType::PowerLaw { + si: -0.7, + fd: FluxDensity { + freq: 150e6, + i: 1.0, + q: 0.0, + u: 0.0, + v: 0.0, + }, + }, + }], + }, + ); + } + let modeller = unsafe { + model::new_cuda_sky_modeller( + beam.deref(), + &source_list, + &xyzs, + &freqs, + &flagged_tiles, + phase_centre, + MWA_LONG_RAD, + MWA_LAT_RAD, + dut1, + apply_precession, + ) + .unwrap() + }; + + b.iter(|| modeller.model_shapelets(vis.view_mut(), timestamp).unwrap()); + }, + ); + } + shapelets.finish(); } fn calibrate_benchmarks(c: &mut Criterion) { - c.bench_function("nothing", |b| b.iter(marlu::Jones::::identity)); - - // use hifitime::Epoch; - // use marlu::Jones; - // use ndarray::prelude::*; - // use vec1::{vec1, Vec1}; - - // use mwa_hyperdrive::{ - // averaging::{Chanblock, Timeblock}, - // calibrate::di::calibrate_timeblocks, - // }; - - // let num_timesteps = 10; - // let num_timeblocks = 1; - // let mut timeblocks = Vec::with_capacity(num_timeblocks); - // timeblocks.push(Timeblock { - // index: 0, - // range: 0..num_timesteps, - // timestamps: vec1![ - // Epoch::from_gpst_seconds(1090008640.0), - // Epoch::from_gpst_seconds(1090008641.0), - // Epoch::from_gpst_seconds(1090008642.0), - // Epoch::from_gpst_seconds(1090008643.0), - // Epoch::from_gpst_seconds(1090008644.0), - // Epoch::from_gpst_seconds(1090008645.0), - // Epoch::from_gpst_seconds(1090008646.0), - // Epoch::from_gpst_seconds(1090008647.0), - // Epoch::from_gpst_seconds(1090008648.0), - // Epoch::from_gpst_seconds(1090008649.0), - // ], - // median: Epoch::from_gpst_seconds(1090008644.5), - // }); - // let timeblocks = Vec1::try_from_vec(timeblocks).unwrap(); - - // let num_chanblocks = 100; - // let mut chanblocks = Vec::with_capacity(num_chanblocks); - // for i_chanblock in 0..num_chanblocks { - // chanblocks.push(Chanblock { - // chanblock_index: i_chanblock as _, - // unflagged_index: i_chanblock as _, - // _freq: 150e6 + i_chanblock as f64, - // }) - // } - // let chanblocks = Vec1::try_from_vec(chanblocks).unwrap(); - - // let num_tiles = 128; - // let num_baselines = num_tiles * (num_tiles - 1) / 2; - - // let vis_shape = (num_timesteps, num_baselines, num_chanblocks); - // let vis_data: Array3> = Array3::from_elem(vis_shape, Jones::identity() * 4.0); - // let vis_model: Array3> = Array3::from_elem(vis_shape, Jones::identity()); - - // c.bench_function( - // &format!("calibrate with {num_timesteps} timesteps, {num_baselines} baselines, {num_chanblocks} chanblocks"), - // |b| { - // b.iter(|| { - // calibrate_timeblocks( - // vis_data.view(), - // vis_model.view(), - // &timeblocks, - // &chanblocks, - // 50, - // 1e-8, - // 1e-4, - // false, - // false, - // ); - // }); - // } - // ); + let num_timesteps = 10; + let num_timeblocks = 1; + let mut timeblocks = Vec::with_capacity(num_timeblocks); + timeblocks.push(Timeblock { + index: 0, + range: 0..num_timesteps, + timestamps: vec1![ + Epoch::from_gpst_seconds(1090008640.0), + Epoch::from_gpst_seconds(1090008641.0), + Epoch::from_gpst_seconds(1090008642.0), + Epoch::from_gpst_seconds(1090008643.0), + Epoch::from_gpst_seconds(1090008644.0), + Epoch::from_gpst_seconds(1090008645.0), + Epoch::from_gpst_seconds(1090008646.0), + Epoch::from_gpst_seconds(1090008647.0), + Epoch::from_gpst_seconds(1090008648.0), + Epoch::from_gpst_seconds(1090008649.0), + ], + median: Epoch::from_gpst_seconds(1090008644.5), + }); + let timeblocks = Vec1::try_from_vec(timeblocks).unwrap(); + + let num_chanblocks = 100; + let mut chanblocks = Vec::with_capacity(num_chanblocks); + for i_chanblock in 0..num_chanblocks { + chanblocks.push(Chanblock { + chanblock_index: i_chanblock as _, + unflagged_index: i_chanblock as _, + _freq: 150e6 + i_chanblock as f64, + }) + } + let chanblocks = Vec1::try_from_vec(chanblocks).unwrap(); + + let num_tiles = 128; + let num_baselines = num_tiles * (num_tiles - 1) / 2; + + let vis_shape = (num_timesteps, num_baselines, num_chanblocks); + let vis_data: Array3> = Array3::from_elem(vis_shape, Jones::identity() * 4.0); + let vis_model: Array3> = Array3::from_elem(vis_shape, Jones::identity()); + + c.bench_function( + &format!("calibrate with {num_timesteps} timesteps, {num_baselines} baselines, {num_chanblocks} chanblocks"), + |b| { + b.iter(|| { + calibrate_timeblocks( + vis_data.view(), + vis_model.view(), + &timeblocks, + &chanblocks, + 50, + 1e-8, + 1e-4, + false, + false, + ); + }); + } + ); } fn source_list_benchmarks(c: &mut Criterion) { - c.bench_function("nothing", |b| b.iter(marlu::Jones::::identity)); - - // use mwa_hyperdrive::srclist::get_instrumental_flux_densities; - - // let num_comps = 1000; - // let num_freqs = 400; - - // let fds = vec1![ - // FluxDensity { - // freq: 150e6, - // i: 1.0, - // q: 0.0, - // u: 0.0, - // v: 0.0, - // }, - // FluxDensity { - // freq: 175e6, - // i: 3.0, - // q: 0.0, - // u: 0.0, - // v: 0.0, - // }, - // FluxDensity { - // freq: 200e6, - // i: 2.0, - // q: 0.0, - // u: 0.0, - // v: 0.0, - // } - // ]; - // let comp_fds = vec![FluxDensityType::List { fds }; num_comps]; - // let freqs: Vec = Array1::linspace(140e6, 210e6, num_comps).to_vec(); - - // c.bench_function( - // &format!("Estimate flux densities for source list with {num_comps} 'list' components over {num_freqs} frequencies"), - // |b| { - // b.iter(|| { - // get_instrumental_flux_densities(&comp_fds, &freqs); - // }); - // } - // ); + let num_comps = 1000; + let num_freqs = 400; + + let fds = vec1![ + FluxDensity { + freq: 150e6, + i: 1.0, + q: 0.0, + u: 0.0, + v: 0.0, + }, + FluxDensity { + freq: 175e6, + i: 3.0, + q: 0.0, + u: 0.0, + v: 0.0, + }, + FluxDensity { + freq: 200e6, + i: 2.0, + q: 0.0, + u: 0.0, + v: 0.0, + } + ]; + let comp_fds = vec![FluxDensityType::List { fds }; num_comps]; + let freqs: Vec = Array1::linspace(140e6, 210e6, num_comps).to_vec(); + + c.bench_function( + &format!("Estimate flux densities for source list with {num_comps} 'list' components over {num_freqs} frequencies"), + |b| { + b.iter(|| { + get_instrumental_flux_densities(&comp_fds, &freqs); + }); + } + ); } criterion_group!( diff --git a/src/averaging/mod.rs b/src/averaging/mod.rs index e9714dbd..8e319d6a 100644 --- a/src/averaging/mod.rs +++ b/src/averaging/mod.rs @@ -20,10 +20,10 @@ use crate::unit_parsing::{parse_freq, parse_time, FreqFormat, TimeFormat}; /// A collection of timesteps. #[derive(Debug, Clone)] -pub(crate) struct Timeblock { +pub struct Timeblock { /// The timeblock index. e.g. If all observation timesteps are being used in /// a single calibration timeblock, then its index is 0. - pub(crate) index: usize, + pub index: usize, /// The range of indices into an *unflagged* array of visibilities. /// @@ -37,11 +37,11 @@ pub(crate) struct Timeblock { /// /// We can use a range because the timesteps belonging to a timeblock are /// always contiguous. - pub(crate) range: Range, + pub range: Range, /// The timestamps comprising this timeblock. These are determined by the /// timesteps into all available timestamps. - pub(crate) timestamps: Vec1, + pub timestamps: Vec1, /// The median timestamp of the *ideal* timeblock. /// @@ -57,25 +57,25 @@ pub(crate) struct Timeblock { /// /// In the first case, this `median` is [1, 4, 7] for each timeblock, [2, 5, /// 8] for the second. Note how missing timestamps don't affect it. - pub(crate) median: Epoch, + pub median: Epoch, } /// A collection of fine-frequency channels. #[derive(Debug, Clone)] -pub(crate) struct Chanblock { +pub struct Chanblock { /// The chanblock index, regardless of flagging. e.g. If the first two /// calibration chanblocks are flagged, then the first unflagged chanblock /// has a chanblock_index of 2 but an unflagged_index of 0. - pub(crate) chanblock_index: u16, + pub chanblock_index: u16, /// The index into an *unflagged* array of visibilities. Regardless of the /// first unflagged chanblock's index, its unflagged index is 0. - pub(crate) unflagged_index: u16, + pub unflagged_index: u16, // TODO: Use frequency information. May become important for calibration // solutions and what frequencies they apply to. /// The centroid frequency for this chanblock \[Hz\]. - pub(crate) _freq: f64, + pub _freq: f64, } /// A spectral windows, a.k.a. a contiguous-band of fine-frequency channels diff --git a/src/beam/error.rs b/src/beam/error.rs index 2148ced3..b8bafde5 100644 --- a/src/beam/error.rs +++ b/src/beam/error.rs @@ -7,7 +7,7 @@ use thiserror::Error; #[derive(Error, Debug)] -pub(crate) enum BeamError { +pub enum BeamError { #[error("The number of delays per tile ({delays}) didn't match the number of gains per tile ({gains})")] DelayGainsDimensionMismatch { delays: usize, gains: usize }, diff --git a/src/beam/fee.rs b/src/beam/fee.rs index 9c4f868c..930b7033 100644 --- a/src/beam/fee.rs +++ b/src/beam/fee.rs @@ -318,7 +318,7 @@ impl BeamCUDA for FEEBeamCUDA { /// `amps` *must* have 16 or 32 elements per row (each corresponds to an MWA /// dipole in a tile, in the M&C order; see /// ). -pub(crate) fn create_fee_beam_object>( +pub fn create_fee_beam_object>( beam_file: Option

, num_tiles: usize, delays: Delays, diff --git a/src/beam/mod.rs b/src/beam/mod.rs index f04cc8fe..32000cbd 100644 --- a/src/beam/mod.rs +++ b/src/beam/mod.rs @@ -18,7 +18,7 @@ mod fee; mod tests; pub(crate) use error::*; -pub(crate) use fee::*; +pub use fee::*; use std::path::Path; @@ -33,7 +33,7 @@ use crate::cuda::CudaFloat; /// Supported beam types. #[derive(Debug, Clone, Copy, PartialEq, Eq)] #[allow(clippy::upper_case_acronyms)] -pub(crate) enum BeamType { +pub enum BeamType { /// Fully-embedded element beam. FEE, @@ -42,7 +42,7 @@ pub(crate) enum BeamType { } /// A trait abstracting beam code functions. -pub(crate) trait Beam: Sync + Send { +pub trait Beam: Sync + Send { /// Get the type of beam. fn get_beam_type(&self) -> BeamType; @@ -109,7 +109,7 @@ pub(crate) trait Beam: Sync + Send { /// A trait abstracting beam code functions on a CUDA-capable device. #[cfg(feature = "cuda")] -pub(crate) trait BeamCUDA { +pub trait BeamCUDA { /// Calculate the Jones matrices for an [AzEl] direction. The pointing /// information is not needed because it was provided when `self` was /// created. @@ -145,7 +145,7 @@ pub(crate) trait BeamCUDA { /// An enum to track whether MWA dipole delays are provided and/or necessary. #[derive(Debug, Clone)] -pub(crate) enum Delays { +pub enum Delays { /// Delays are fully specified. Full(Array2), diff --git a/src/di_calibrate/mod.rs b/src/di_calibrate/mod.rs index e5be975a..a3304453 100644 --- a/src/di_calibrate/mod.rs +++ b/src/di_calibrate/mod.rs @@ -466,7 +466,7 @@ fn model_vis( /// because all of them are necessary for `calibrate_timeblocks`, which is the /// only function to create `IncompleteSolutions`. To "complete" the solutions, /// extra metadata may be supplied. -pub(crate) struct IncompleteSolutions<'a> { +pub struct IncompleteSolutions<'a> { /// Direction-independent calibration solutions *for only unflagged data*. /// The first dimension is timeblock, the second is unflagged tile, the /// third is unflagged chanblock. @@ -708,7 +708,7 @@ impl<'a> IncompleteSolutions<'a> { /// calibrated together (as if they all belonged to a single timeblock) before /// any timeblocks are individually calibrated. This decision can be revisited. #[allow(clippy::too_many_arguments)] -pub(crate) fn calibrate_timeblocks<'a>( +pub fn calibrate_timeblocks<'a>( vis_data: ArrayView3>, vis_model: ArrayView3>, timeblocks: &'a Vec1, @@ -1091,7 +1091,7 @@ fn calibrate_timeblock( } #[derive(Debug)] -pub(crate) struct CalibrationResult { +pub struct CalibrationResult { pub(crate) num_iterations: u32, pub(crate) converged: bool, pub(crate) max_precision: f64, diff --git a/src/lib.rs b/src/lib.rs index 22776fc1..1714a874 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -7,12 +7,12 @@ //! //! -pub(crate) mod averaging; -pub(crate) mod beam; +pub mod averaging; +pub mod beam; mod cli; pub(crate) mod constants; pub(crate) mod context; -pub(crate) mod di_calibrate; +pub mod di_calibrate; pub(crate) mod error; pub(crate) mod filenames; pub(crate) mod flagging; @@ -22,11 +22,11 @@ pub(crate) mod math; pub(crate) mod messages; pub(crate) mod metafits; pub(crate) mod misc; -pub(crate) mod model; +pub mod model; pub(crate) mod pfb_gains; pub(crate) mod shapelets; pub(crate) mod solutions; -pub(crate) mod srclist; +pub mod srclist; pub(crate) mod unit_parsing; pub(crate) mod vis_io; diff --git a/src/model/mod.rs b/src/model/mod.rs index a43f11c4..74d14ff1 100644 --- a/src/model/mod.rs +++ b/src/model/mod.rs @@ -43,7 +43,7 @@ pub(crate) enum ModellerInfo { } /// An object that simulates sky-model visibilities. -pub(crate) trait SkyModeller<'a> { +pub trait SkyModeller<'a> { /// Generate sky-model visibilities for a single timestep. The [UVW] /// coordinates used in generating the visibilities are returned. /// @@ -145,7 +145,7 @@ pub(crate) trait SkyModeller<'a> { /// flagged tiles is the total number of tiles in the observation. The /// frequencies should have units of \[Hz\]. #[allow(clippy::too_many_arguments)] -pub(crate) fn new_cpu_sky_modeller<'a>( +pub fn new_cpu_sky_modeller<'a>( beam: &'a dyn Beam, source_list: &SourceList, unflagged_tile_xyzs: &'a [XyzGeodetic], @@ -189,7 +189,7 @@ pub(crate) fn new_cpu_sky_modeller<'a>( /// catch problems but there are no guarantees. #[cfg(feature = "cuda")] #[allow(clippy::too_many_arguments)] -pub(crate) unsafe fn new_cuda_sky_modeller<'a>( +pub unsafe fn new_cuda_sky_modeller<'a>( beam: &'a dyn Beam, source_list: &SourceList, unflagged_tile_xyzs: &'a [XyzGeodetic], diff --git a/src/srclist/mod.rs b/src/srclist/mod.rs index fc3b168c..a69711bf 100644 --- a/src/srclist/mod.rs +++ b/src/srclist/mod.rs @@ -18,7 +18,7 @@ mod general_tests; mod veto; pub(crate) use error::*; -pub(crate) use types::*; +pub use types::*; pub(crate) use veto::*; use itertools::Itertools; diff --git a/src/srclist/types/components/mod.rs b/src/srclist/types/components/mod.rs index 36beb785..795b6d3c 100644 --- a/src/srclist/types/components/mod.rs +++ b/src/srclist/types/components/mod.rs @@ -16,13 +16,13 @@ use super::{FluxDensity, FluxDensityType, SourceList}; /// Information on a source's component. #[derive(Clone, Debug, PartialEq)] -pub(crate) struct SourceComponent { +pub struct SourceComponent { /// Coordinates struct associated with the component. - pub(crate) radec: RADec, + pub radec: RADec, /// The type of component. - pub(crate) comp_type: ComponentType, + pub comp_type: ComponentType, /// The flux densities associated with this component. - pub(crate) flux_type: FluxDensityType, + pub flux_type: FluxDensityType, } impl SourceComponent { @@ -49,7 +49,7 @@ impl SourceComponent { /// Source component types supported by hyperdrive. #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] -pub(crate) enum ComponentType { +pub enum ComponentType { #[serde(rename = "point")] Point, @@ -77,10 +77,10 @@ pub(crate) enum ComponentType { } #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] -pub(crate) struct ShapeletCoeff { - pub(crate) n1: usize, - pub(crate) n2: usize, - pub(crate) value: f64, +pub struct ShapeletCoeff { + pub n1: usize, + pub n2: usize, + pub value: f64, } impl ComponentType { @@ -275,7 +275,7 @@ impl ShapeletComponentParams { // // These don't change with time, so we can save a lot of computation by just // doing this once. -pub(crate) fn get_instrumental_flux_densities( +pub fn get_instrumental_flux_densities( comp_fds: &[FluxDensityType], unflagged_fine_chan_freqs: &[f64], ) -> Array2> { diff --git a/src/srclist/types/flux_density/mod.rs b/src/srclist/types/flux_density/mod.rs index 11d488bd..23d80026 100644 --- a/src/srclist/types/flux_density/mod.rs +++ b/src/srclist/types/flux_density/mod.rs @@ -17,27 +17,27 @@ use crate::constants::{DEFAULT_SPEC_INDEX, SPEC_INDEX_CAP}; #[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)] /// At a frequency, four flux densities for each Stokes parameter. // When serialising/deserialising, ignore Stokes Q U V if they are zero. -pub(crate) struct FluxDensity { +pub struct FluxDensity { /// The frequency that these flux densities apply to \[Hz\] - pub(crate) freq: f64, + pub freq: f64, /// The flux density of Stokes I \[Jy\] - pub(crate) i: f64, + pub i: f64, /// The flux density of Stokes Q \[Jy\] #[serde(default)] #[serde(skip_serializing_if = "is_zero")] - pub(crate) q: f64, + pub q: f64, /// The flux density of Stokes U \[Jy\] #[serde(default)] #[serde(skip_serializing_if = "is_zero")] - pub(crate) u: f64, + pub u: f64, /// The flux density of Stokes V \[Jy\] #[serde(default)] #[serde(skip_serializing_if = "is_zero")] - pub(crate) v: f64, + pub v: f64, } impl FluxDensity { @@ -115,7 +115,7 @@ impl std::ops::Mul for FluxDensity { } #[derive(Clone, Debug, PartialEq)] -pub(crate) enum FluxDensityType { +pub enum FluxDensityType { /// $S_\nu = a \nu^{\alpha}$ PowerLaw { /// Spectral index (alpha) diff --git a/src/srclist/types/mod.rs b/src/srclist/types/mod.rs index e19b98c2..9242ca4d 100644 --- a/src/srclist/types/mod.rs +++ b/src/srclist/types/mod.rs @@ -9,7 +9,7 @@ mod flux_density; mod source; mod source_list; -pub(crate) use components::*; -pub(crate) use flux_density::*; -pub(crate) use source::*; -pub(crate) use source_list::*; +pub use components::*; +pub use flux_density::*; +pub use source::*; +pub use source_list::*; diff --git a/src/srclist/types/source/mod.rs b/src/srclist/types/source/mod.rs index feb8813f..d55afda4 100644 --- a/src/srclist/types/source/mod.rs +++ b/src/srclist/types/source/mod.rs @@ -13,9 +13,9 @@ use super::{FluxDensity, SourceComponent}; /// A collection of components. #[derive(Clone, Debug, PartialEq)] -pub(crate) struct Source { +pub struct Source { /// The components associated with the source. - pub(crate) components: Vec1, + pub components: Vec1, } impl Source { diff --git a/src/srclist/types/source_list/mod.rs b/src/srclist/types/source_list/mod.rs index 77f9fb93..b9639b00 100644 --- a/src/srclist/types/source_list/mod.rs +++ b/src/srclist/types/source_list/mod.rs @@ -19,7 +19,7 @@ use super::*; /// By making [SourceList] a new type (specifically, an anonymous struct), /// useful methods can be put onto it. #[derive(Debug, Clone, Default)] -pub(crate) struct SourceList(IndexMap); +pub struct SourceList(IndexMap); impl SourceList { /// Create an empty [SourceList].