Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: template classes for specifying floating-point precision at runtime #1560

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
5 changes: 3 additions & 2 deletions python/meep.i
Expand Up @@ -1864,8 +1864,9 @@ PyObject *_get_array_slice_dimensions(meep::fields *f, const meep::volume &where
%newobject create_structure_and_set_materials;
%inline %{

size_t get_realnum_size() {
return sizeof(meep::realnum);
template <class T>
size_t get_T_size() {
return sizeof(T);
}

meep::structure *create_structure_and_set_materials(vector3 cell_size,
Expand Down
12 changes: 7 additions & 5 deletions src/anisotropic_averaging.cpp
Expand Up @@ -188,6 +188,7 @@ done : {
}
}

template <class T>
void structure_chunk::set_chi1inv(component c, material_function &medium,
bool use_anisotropic_averaging, double tol, int maxeval) {
if (!is_mine() || !gv.has_field(c)) return;
Expand All @@ -212,7 +213,7 @@ void structure_chunk::set_chi1inv(component c, material_function &medium,

FOR_FT_COMPONENTS(ft, c2) if (gv.has_field(c2)) {
direction d = component_direction(c2);
if (!chi1inv[c][d]) chi1inv[c][d] = new realnum[gv.ntot()];
if (!chi1inv[c][d]) chi1inv[c][d] = new T[gv.ntot()];
if (!chi1inv[c][d]) abort("Memory allocation error.\n");
}
direction dc = component_direction(c);
Expand Down Expand Up @@ -274,6 +275,7 @@ void structure_chunk::set_chi1inv(component c, material_function &medium,
medium.unset_volume();
}

template <class T>
void structure_chunk::add_susceptibility(material_function &sigma, field_type ft,
const susceptibility &sus) {
if (ft != E_stuff && ft != H_stuff) abort("susceptibilities must be for E or H fields");
Expand All @@ -294,7 +296,7 @@ void structure_chunk::add_susceptibility(material_function &sigma, field_type ft
if (is_mine()) FOR_FT_COMPONENTS(ft, c) if (gv.has_field(c)) {
FOR_FT_COMPONENTS(ft, c2) if (gv.has_field(c2)) {
direction d = component_direction(c2);
if (!newsus->sigma[c][d]) newsus->sigma[c][d] = new realnum[gv.ntot()];
if (!newsus->sigma[c][d]) newsus->sigma[c][d] = new T[gv.ntot()];
if (!newsus->sigma[c][d]) abort("Memory allocation error.\n");
}
bool trivial[3] = {true, true, true};
Expand All @@ -305,9 +307,9 @@ void structure_chunk::add_susceptibility(material_function &sigma, field_type ft
d1 = P;
}
int idiag = component_index(c);
realnum *s0 = newsus->sigma[c][d0];
realnum *s1 = newsus->sigma[c][d1];
realnum *s2 = newsus->sigma[c][d2];
T *s0 = newsus->sigma[c][d0];
T *s1 = newsus->sigma[c][d1];
T *s2 = newsus->sigma[c][d2];
vec shift1(gv[unit_ivec(gv.dim, component_direction(c)) * (ft == E_stuff ? 1 : -1)]);
LOOP_OVER_VOL(gv, c, i) {
double sigrow[3], sigrow_offdiag[3];
Expand Down
15 changes: 9 additions & 6 deletions src/boundaries.cpp
Expand Up @@ -260,6 +260,7 @@ void fields_chunk::zero_metal(field_type ft) {
*(zeroes[ft][i]) = 0.0;
}

template <class T>
void fields::find_metals() {
for (int i = 0; i < num_chunks; i++)
if (chunks[i]->is_mine()) {
Expand All @@ -276,8 +277,8 @@ void fields::find_metals() {
}
}
}
typedef realnum *realnum_ptr;
chunks[i]->zeroes[ft] = new realnum_ptr[chunks[i]->num_zeroes[ft]];
typedef T *T_ptr;
chunks[i]->zeroes[ft] = new T_ptr[chunks[i]->num_zeroes[ft]];
size_t num = 0;
DOCMP FOR_COMPONENTS(c) {
if (type(c) == ft && chunks[i]->f[c][cmp]) LOOP_OVER_VOL_OWNED(vi, c, n) {
Expand All @@ -298,6 +299,7 @@ bool fields_chunk::needs_W_notowned(component c) {
return false;
}

template <class T>
void fields::connect_the_chunks() {
size_t *nc[NUM_FIELD_TYPES][3][2];
FOR_FIELD_TYPES(f) {
Expand Down Expand Up @@ -416,7 +418,7 @@ void fields::connect_the_chunks() {
FOR_FIELD_TYPES(ft) {
for (int j = 0; j < num_chunks; j++) {
delete[] comm_blocks[ft][j + i * num_chunks];
comm_blocks[ft][j + i * num_chunks] = new realnum[comm_size_tot(ft, j + i * num_chunks)];
comm_blocks[ft][j + i * num_chunks] = new T[comm_size_tot(ft, j + i * num_chunks)];
}
}
} // loop over i chunks
Expand Down Expand Up @@ -553,16 +555,17 @@ void fields::connect_the_chunks() {
delete[] B_redundant;
}

template <class T>
void fields_chunk::alloc_extra_connections(field_type f, connect_phase ip, in_or_out io,
size_t num) {
if (num == 0) return; // No need to go to any bother...
const size_t tot = num_connections[f][ip][io] + num;
if (io == Incoming && ip == CONNECT_PHASE) {
delete[] connection_phases[f];
connection_phases[f] = new complex<realnum>[tot];
connection_phases[f] = new complex<T>[tot];
}
typedef realnum *realnum_ptr;
realnum **conn = new realnum_ptr[tot];
typedef T *T_ptr;
T **conn = new T_ptr[tot];
if (!conn) abort("Out of memory!\n");
delete[] connections[f][ip][io];
connections[f][ip][io] = conn;
Expand Down
48 changes: 26 additions & 22 deletions src/cw_fields.cpp
Expand Up @@ -22,12 +22,13 @@ using namespace std;

namespace meep {

static void fields_to_array(const fields &f, complex<realnum> *x) {
template <class T>
static void fields_to_array(const fields &f, complex<T> *x) {
size_t ix = 0;
for (int i = 0; i < f.num_chunks; i++)
if (f.chunks[i]->is_mine()) FOR_COMPONENTS(c) {
if (is_D(c) || is_B(c)) {
realnum *fr, *fi;
T *fr, *fi;
#define COPY_FROM_FIELD(fld) \
if ((fr = f.chunks[i]->fld[0]) && (fi = f.chunks[i]->fld[1])) \
LOOP_OVER_VOL_OWNED(f.chunks[i]->gv, c, idx) \
Expand All @@ -43,12 +44,12 @@ static void fields_to_array(const fields &f, complex<realnum> *x) {
}
}

static void array_to_fields(const complex<realnum> *x, fields &f) {
static void array_to_fields(const complex<T> *x, fields &f) {
size_t ix = 0;
for (int i = 0; i < f.num_chunks; i++)
if (f.chunks[i]->is_mine()) FOR_COMPONENTS(c) {
if (is_D(c) || is_B(c)) {
realnum *fr, *fi;
T *fr, *fi;
#define COPY_TO_FIELD(fld) \
if ((fr = f.chunks[i]->fld[0]) && (fi = f.chunks[i]->fld[1])) \
LOOP_OVER_VOL_OWNED(f.chunks[i]->gv, c, idx) { \
Expand Down Expand Up @@ -81,32 +82,34 @@ typedef struct {
complex<double> iomega;
} fieldop_data;

static void fieldop(const realnum *xr, realnum *yr, void *data_) {
const complex<realnum> *x = reinterpret_cast<const complex<realnum> *>(xr);
complex<realnum> *y = reinterpret_cast<complex<realnum> *>(yr);
template <class T>
static void fieldop(const T *xr, T *yr, void *data_) {
const complex<T> *x = reinterpret_cast<const complex<T> *>(xr);
complex<T> *y = reinterpret_cast<complex<T> *>(yr);
fieldop_data *data = (fieldop_data *)data_;
array_to_fields(x, *data->f);
data->f->step();
fields_to_array(*data->f, y);
size_t n = data->n;
realnum dt_inv = 1.0 / data->f->dt;
complex<realnum> iomega = complex<realnum>(real(data->iomega), imag(data->iomega));
T dt_inv = 1.0 / data->f->dt;
complex<T> iomega = complex<T>(real(data->iomega), imag(data->iomega));
for (size_t i = 0; i < n; ++i)
y[i] = (y[i] - x[i]) * dt_inv + iomega * x[i];
}

// Rayleigh-quotient estimate <x,Ax>/<x,x> for eigenfrequency given approximate eigenvector x
// (length n), overwriting x with Ax and b with x/|x|.
static complex<double> estimate_eigfreq(complex<realnum> *b, complex<realnum> *x, size_t n,
template <class T>
static complex<double> estimate_eigfreq(complex<T> *b, complex<T> *x, size_t n,
fieldop_data *data) {
memcpy(b, x, n * sizeof(complex<realnum>));
fieldop(reinterpret_cast<realnum *>(b), reinterpret_cast<realnum *>(x), (void *)data);
memcpy(b, x, n * sizeof(complex<T>));
fieldop(reinterpret_cast<T *>(b), reinterpret_cast<T *>(x), (void *)data);
complex<double> bdotx(0, 0);
double bnorm2 = 0;
for (size_t i = 0; i < n; ++i) {
complex<realnum> bi = b[i];
complex<T> bi = b[i];
bnorm2 += real(bi) * real(bi) + imag(bi) * imag(bi);
complex<realnum> bx = conj(bi) * x[i];
complex<T> bx = conj(bi) * x[i];
bdotx += complex<double>(real(bx), imag(bx));
}
bnorm2 = sum_to_all(bnorm2);
Expand Down Expand Up @@ -137,6 +140,7 @@ static complex<double> estimate_eigfreq(complex<realnum> *b, complex<realnum> *x
shift-and-invert power iteration to find the closest eigenfrequency and
eigenvector to frequency: the solver is iterated up to eigiters times,
or until the estimated eigenfreq stops changing by <= eigtol (relative). */
template <class T>
bool fields::solve_cw(double tol, int maxiters, complex<double> frequency, int L,
complex<double> *eigfreq, double eigtol, int eigiters) {
if (is_real) abort("solve_cw is incompatible with use_real_fields()");
Expand Down Expand Up @@ -169,9 +173,9 @@ bool fields::solve_cw(double tol, int maxiters, complex<double> frequency, int L

iters = maxiters;
size_t nwork = (size_t)bicgstabL(L, N, 0, 0, 0, 0, tol, &iters, 0, true);
realnum *work = new realnum[nwork + 2 * N];
complex<realnum> *x = reinterpret_cast<complex<realnum> *>(work + nwork);
complex<realnum> *b = reinterpret_cast<complex<realnum> *>(work + nwork + N);
T *work = new T[nwork + 2 * N];
complex<T> *x = reinterpret_cast<complex<T> *>(work + nwork);
complex<T> *b = reinterpret_cast<complex<T> *>(work + nwork + N);

fields_to_array(*this, x); // initial guess = initial fields

Expand Down Expand Up @@ -206,8 +210,8 @@ bool fields::solve_cw(double tol, int maxiters, complex<double> frequency, int L
data.iomega = ((1.0 - exp(complex<double>(0., -1.) * (2 * pi * frequency) * dt)) * (1.0 / dt));
iters = maxiters;

int ierr = (int)bicgstabL(L, N, reinterpret_cast<realnum *>(x), fieldop, &data,
reinterpret_cast<realnum *>(b), tol, &iters, work, verbosity == 0);
int ierr = (int)bicgstabL(L, N, reinterpret_cast<T *>(x), fieldop, &data,
reinterpret_cast<T *>(b), tol, &iters, work, verbosity == 0);

if (verbosity > 0) {
master_printf("Finished solve_cw after %d CG iters (~ %d timesteps).\n", iters, iters * 2 * L);
Expand All @@ -222,8 +226,8 @@ bool fields::solve_cw(double tol, int maxiters, complex<double> frequency, int L
}
for (int eigiter = 0; eigiter < eigiters; ++eigiter) {
iters = maxiters;
int ierr = (int)bicgstabL(L, N, reinterpret_cast<realnum *>(x), fieldop, &data,
reinterpret_cast<realnum *>(b), tol, &iters, work, verbosity == 0);
int ierr = (int)bicgstabL(L, N, reinterpret_cast<T *>(x), fieldop, &data,
reinterpret_cast<T *>(b), tol, &iters, work, verbosity == 0);
complex<double> newfreq = estimate_eigfreq(b, x, data.n, &data);
complex<double> dfreq = newfreq - *eigfreq;
if (verbosity > 0) {
Expand All @@ -234,7 +238,7 @@ bool fields::solve_cw(double tol, int maxiters, complex<double> frequency, int L
*eigfreq = newfreq;
if (abs(dfreq) <= eigtol * abs(newfreq)) break; // converged
}
memcpy(x, b, N * sizeof(realnum));
memcpy(x, b, N * sizeof(T));
}

array_to_fields(x, *this);
Expand Down
9 changes: 5 additions & 4 deletions src/dft_ldos.cpp
Expand Up @@ -90,6 +90,7 @@ complex<double> *dft_ldos::J() const {
return out;
}

template <class T>
void dft_ldos::update(fields &f) {
complex<double> EJ = 0.0; // integral E * J*
complex<double> HJ = 0.0; // integral H * J* for magnetic currents
Expand All @@ -104,8 +105,8 @@ void dft_ldos::update(fields &f) {
if (f.chunks[ic]->is_mine()) {
for (src_vol *sv = f.chunks[ic]->sources[D_stuff]; sv; sv = sv->next) {
component c = direction_component(Ex, component_direction(sv->c));
realnum *fr = f.chunks[ic]->f[c][0];
realnum *fi = f.chunks[ic]->f[c][1];
T *fr = f.chunks[ic]->f[c][0];
T *fi = f.chunks[ic]->f[c][1];
if (fr && fi) // complex E
for (size_t j = 0; j < sv->npts; j++) {
const ptrdiff_t idx = sv->index[j];
Expand All @@ -124,8 +125,8 @@ void dft_ldos::update(fields &f) {
}
for (src_vol *sv = f.chunks[ic]->sources[B_stuff]; sv; sv = sv->next) {
component c = direction_component(Hx, component_direction(sv->c));
realnum *fr = f.chunks[ic]->f[c][0];
realnum *fi = f.chunks[ic]->f[c][1];
T *fr = f.chunks[ic]->f[c][0];
T *fi = f.chunks[ic]->f[c][1];
if (fr && fi) // complex H
for (size_t j = 0; j < sv->npts; j++) {
const ptrdiff_t idx = sv->index[j];
Expand Down
11 changes: 6 additions & 5 deletions src/energy_and_flux.cpp
Expand Up @@ -94,6 +94,7 @@ double fields::magnetic_energy_in_box(const volume &where) {
return sum;
}

template <class T>
void fields_chunk::backup_component(component c) {
DOCMP {
if (c < NUM_FIELD_COMPONENTS && f[c][cmp] &&
Expand All @@ -102,8 +103,8 @@ void fields_chunk::backup_component(component c) {

#define BACKUP(f) \
if (f[c][cmp]) { \
if (!f##_backup[c][cmp]) f##_backup[c][cmp] = new realnum[gv.ntot()]; \
memcpy(f##_backup[c][cmp], f[c][cmp], gv.ntot() * sizeof(realnum)); \
if (!f##_backup[c][cmp]) f##_backup[c][cmp] = new T[gv.ntot()]; \
memcpy(f##_backup[c][cmp], f[c][cmp], gv.ntot() * sizeof(T)); \
}

BACKUP(f);
Expand All @@ -120,7 +121,7 @@ void fields_chunk::restore_component(component c) {
DOCMP {
#define RESTORE(f) \
if (f##_backup[c][cmp] && f[c][cmp]) \
memcpy(f[c][cmp], f##_backup[c][cmp], gv.ntot() * sizeof(realnum));
memcpy(f[c][cmp], f##_backup[c][cmp], gv.ntot() * sizeof(T));

RESTORE(f);
RESTORE(f_u);
Expand All @@ -133,8 +134,8 @@ void fields_chunk::restore_component(component c) {

void fields_chunk::average_with_backup(component c) {
DOCMP {
realnum *fc = f[c][cmp];
realnum *backup = f_backup[c][cmp];
T *fc = f[c][cmp];
T *backup = f_backup[c][cmp];
if (fc && backup)
for (size_t i = 0; i < gv.ntot(); i++)
fc[i] = 0.5 * (fc[i] + backup[i]);
Expand Down