From 585b8ed30e6f51d2f721f0d00b289f4fc9f20eb7 Mon Sep 17 00:00:00 2001 From: aniabrown Date: Tue, 14 Jul 2020 11:38:31 +0100 Subject: [PATCH 01/56] changing cpu to use same example as gpu in Project folder. Changing gpu saveData to not write halo regions --- Project/CPU/Src/main.cc | 105 ++++++++++++++++++++---------------- Project/GPU/Makefile | 2 +- Project/GPU/Src/saveData.cu | 101 ++++++++++++++++++++++++++-------- Project/compare.py | 34 ++++++++++++ 4 files changed, 172 insertions(+), 70 deletions(-) create mode 100644 Project/compare.py diff --git a/Project/CPU/Src/main.cc b/Project/CPU/Src/main.cc index 231d0af1..941c5d1e 100644 --- a/Project/CPU/Src/main.cc +++ b/Project/CPU/Src/main.cc @@ -1,88 +1,101 @@ // Serial main -#include "parallelBoundaryConds.h" -#include "fluxVectorSplitting.h" -#include "parallelSaveData.h" +#include "simData.h" #include "simulation.h" #include "initFunc.h" -#include "simData.h" -#include "RKPlus.h" -#include "hybrid.h" +#include "srmhd.h" +#include "srrmhd.h" +#include "boundaryConds.h" +#include "rkSplit.h" +#include "SSP2.h" +#include "serialSaveData.h" +#include "fluxVectorSplitting.h" #include "weno.h" +#include +#include #include +#include #include +#include + using namespace std; int main(int argc, char *argv[]) { + const double MU(1000); // Set up domain - int Ng(7); - int nx(800); - int ny(0); + int Ng(4); + int nx(256); + int ny(512); int nz(0); - double xmin(0.0); - double xmax(1.0); + double xmin(-0.5); + double xmax(0.5); double ymin(-1.0); double ymax(1.0); - double zmin(0.0); - double zmax(1.0); - double endTime(0.4); - double gamma(2.0); - double cfl(0.5); - double cp(1); - double mu1(-1); - double mu2(1); - int frameSkip(1); - int reportItersPeriod(1); - - double sigma(40); - bool functionalSigma(true); - double gam(6); - - double nxRanks(4); - double nyRanks(1); - double nzRanks(1); - - ParallelEnv env(&argc, &argv, nxRanks, nyRanks, nzRanks); + double zmin(-1.5); + double zmax(1.5); + double endTime(3.0); + double cfl(0.1); + double gamma(4.0/3.0); + double sigma(300); + double cp(1.0); + double mu1(-MU); + double mu2(MU); + int frameSkip(180); + bool output(true); + int safety(180); + + + char * ptr(0); + //! Overwrite any variables that have been passed in as main() arguments + for (int i(0); i < argc; i++) { + if (strcmp(argv[i], "sigma") == 0) { + sigma = (double)strtol(argv[i+1], &ptr, 10); + } + } + + SerialEnv env(&argc, &argv, 1, 1, 1); Data data(nx, ny, nz, xmin, xmax, ymin, ymax, zmin, zmax, endTime, &env, - cfl, Ng, gamma, sigma, cp, mu1, mu2, frameSkip, reportItersPeriod, functionalSigma, gam); + cfl, Ng, gamma, sigma, cp, mu1, mu2, frameSkip); + // Choose particulars of simulation - Hybrid model(&data); + SRRMHD model(&data); - Weno7 weno(&data); + Weno3 weno(&data); FVS fluxMethod(&data, &weno, &model); - model.setupREGIME(&fluxMethod); - - ParallelOutflow bcs(&data, &env); + Flow bcs(&data); Simulation sim(&data, &env); - BrioWuSingleFluid init(&data); + KHInstabilitySingleFluid init(&data, 1); - RK4 timeInt(&data, &model, &bcs, &fluxMethod); + SSP2 timeInt(&data, &model, &bcs, &fluxMethod); - ParallelSaveData save(&data, &env, 0); + SerialSaveData save(&data, &env, 0); // Now objects have been created, set up the simulation sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save); - // Time execution of programme - clock_t startTime(clock()); + //double startTime(omp_get_wtime()); // Run until end time and save results - sim.evolve(); - // sim.updateTime(); + // sim.evolve(output, safety); + sim.updateTime(); + sim.updateTime(); + sim.updateTime(); + sim.updateTime(); + sim.updateTime(); - double timeTaken(double(clock() - startTime)/(double)CLOCKS_PER_SEC); + //double timeTaken(omp_get_wtime()- startTime); save.saveAll(); - if (env.rank==0) printf("\nRuntime: %.5fs\nCompleted %d iterations.\n", timeTaken, data.iters); + //printf("\nRuntime: %.5fs\nCompleted %d iterations.\n", timeTaken, data.iters); return 0; diff --git a/Project/GPU/Makefile b/Project/GPU/Makefile index a84ac2e0..666dc027 100644 --- a/Project/GPU/Makefile +++ b/Project/GPU/Makefile @@ -23,7 +23,7 @@ RTFIND_SRC_DIR = ./CminpackLibrary/Src CXXFLAGS = -Xcompiler -fopenmp -Xcompiler -Wall # NVIDIA compiler flags -NVFLAGS = -std=c++11 -Wno-deprecated-gpu-targets -rdc=true -fmad=false -O3 +NVFLAGS = -std=c++11 -Wno-deprecated-gpu-targets -rdc=true -fmad=false -O3 -arch=sm_52 # Sources SRCS = main.cu \ diff --git a/Project/GPU/Src/saveData.cu b/Project/GPU/Src/saveData.cu index 95798573..c87bbccb 100644 --- a/Project/GPU/Src/saveData.cu +++ b/Project/GPU/Src/saveData.cu @@ -35,7 +35,7 @@ void SaveData::saveCons() { FILE * f; - char fname[60]; + char fname[120]; strcpy(fname, dir); strcat(fname, "/Conserved/cons"); strcat(fname, app); @@ -43,7 +43,6 @@ void SaveData::saveCons() f = fopen(fname, "w"); // Ensure file is open - printf("Writing into %s\n", fname); if (f == NULL) { printf("Error: could not open 'cons.dat' for writing.\n"); exit(1); @@ -56,17 +55,35 @@ void SaveData::saveCons() } fprintf(f, "%s\n", d->consLabels[d->Ncons-1].c_str()); - - for (int var(0); var < d->Ncons; var++) { - for (int i(0); i < d->Nx; i++) { - for (int j(0); j < d->Ny; j++) { - for (int k(0); k < d->Nz; k++) { - fprintf(f, "%.16f ", d->cons[ID(var, i, j, k)]); + if (d->dims==3){ + for (int var(0); var < d->Ncons; var++) { + for (int i(0); i < d->Nx-(2*d->Ng); i++) { + for (int j(0); j < d->Ny-(2*d->Ng); j++) { + for (int k(0); k < d->Nz-(2*d->Ng); k++) { + fprintf(f, "%.16f ", d->cons[ID(var, i + d->Ng, j + d->Ng, k + d->Ng)]); + } + fprintf(f, "\n"); } + } + } + } else if (d->dims==2){ + for (int var(0); var < d->Ncons; var++) { + for (int i(0); i < d->Nx-(2*d->Ng); i++) { + for (int j(0); j < d->Ny-(2*d->Ng); j++) { + fprintf(f, "%.16f ", d->cons[ID(var, i + d->Ng, j + d->Ng, 0)]); + fprintf(f, "\n"); + } + } + } + } else { + for (int var(0); var < d->Ncons; var++) { + for (int i(0); i < d->Nx-(2*d->Ng); i++) { + fprintf(f, "%.16f ", d->cons[ID(var, i + d->Ng, 0, 0)]); fprintf(f, "\n"); } } } + fclose(f); } @@ -75,7 +92,7 @@ void SaveData::saveCons() void SaveData::savePrims() { FILE * f; - char fname[60]; + char fname[120]; strcpy(fname, dir); strcat(fname, "/Primitive/prims"); strcat(fname, app); @@ -91,12 +108,31 @@ void SaveData::savePrims() fprintf(f, "prims = "); for (int i(0); i < d->Nprims-1; i++) fprintf(f, "%s, ", d->primsLabels[i].c_str()); fprintf(f, "%s\n", d->primsLabels[d->Nprims-1].c_str()); - for (int var(0); var < d->Nprims; var++) { - for (int i(0); i < d->Nx; i++) { - for (int j(0); j < d->Ny; j++) { - for (int k(0); k < d->Nz; k++) { - fprintf(f, "%.16f ", d->prims[ID(var, i, j, k)]); + + if (d->dims==3){ + for (int var(0); var < d->Nprims; var++) { + for (int i(0); i < d->Nx-(2*d->Ng); i++) { + for (int j(0); j < d->Ny-(2*d->Ng); j++) { + for (int k(0); k < d->Nz-(2*d->Ng); k++) { + fprintf(f, "%.16f ", d->prims[ID(var, i + d->Ng, j + d->Ng, k + d->Ng)]); + } + fprintf(f, "\n"); + } + } + } + } else if (d->dims==2){ + for (int var(0); var < d->Nprims; var++) { + for (int i(0); i < d->Nx-(2*d->Ng); i++) { + for (int j(0); j < d->Ny-(2*d->Ng); j++) { + fprintf(f, "%.16f ", d->prims[ID(var, i + d->Ng, j + d->Ng, 0)]); + fprintf(f, "\n"); } + } + } + } else { + for (int var(0); var < d->Nprims; var++) { + for (int i(0); i < d->Nx-(2*d->Ng); i++) { + fprintf(f, "%.16f ", d->prims[ID(var, i + d->Ng, 0, 0)]); fprintf(f, "\n"); } } @@ -109,7 +145,7 @@ void SaveData::savePrims() void SaveData::saveAux() { FILE * f; - char fname[60]; + char fname[120]; strcpy(fname, dir); strcat(fname, "/Auxiliary/aux"); strcat(fname, app); @@ -125,12 +161,31 @@ void SaveData::saveAux() fprintf(f, "aux = "); for (int i(0); i < d->Naux-1; i++) fprintf(f, "%s, ", d->auxLabels[i].c_str()); fprintf(f, "%s\n", d->auxLabels[d->Naux-1].c_str()); - for (int var(0); var < d->Naux; var++) { - for (int i(0); i < d->Nx; i++) { - for (int j(0); j < d->Ny; j++) { - for (int k(0); k < d->Nz; k++) { - fprintf(f, "%.16f ", d->aux[ID(var, i, j, k)]); + + if (d->dims==3){ + for (int var(0); var < d->Naux; var++) { + for (int i(0); i < d->Nx-(2*d->Ng); i++) { + for (int j(0); j < d->Ny-(2*d->Ng); j++) { + for (int k(0); k < d->Nz-(2*d->Ng); k++) { + fprintf(f, "%.16f ", d->aux[ID(var, i + d->Ng, j + d->Ng, k + d->Ng)]); + } + fprintf(f, "\n"); } + } + } + } else if (d->dims==2){ + for (int var(0); var < d->Naux; var++) { + for (int i(0); i < d->Nx-(2*d->Ng); i++) { + for (int j(0); j < d->Ny-(2*d->Ng); j++) { + fprintf(f, "%.16f ", d->aux[ID(var, i + d->Ng, j + d->Ng, 0)]); + fprintf(f, "\n"); + } + } + } + } else { + for (int var(0); var < d->Naux; var++) { + for (int i(0); i < d->Nx-(2*d->Ng); i++) { + fprintf(f, "%.16f ", d->aux[ID(var, i + d->Ng, 0, 0)]); fprintf(f, "\n"); } } @@ -144,7 +199,7 @@ void SaveData::saveAux() void SaveData::saveDomain() { FILE * f; - char fname[60]; + char fname[120]; strcpy(fname, dir); strcat(fname, "/Domain/domain"); strcat(fname, app); @@ -176,7 +231,7 @@ void SaveData::saveDomain() void SaveData::saveConsts() { FILE * f; - char fname[60]; + char fname[120]; strcpy(fname, dir); strcat(fname, "/Constants/constants"); strcat(fname, app); @@ -204,7 +259,7 @@ void SaveData::saveVar(string variable, int num) int cpa(0); // cons=1,prims=2,aux=3 int Nvar(0); // Variable number FILE * f; - char fname[60]; + char fname[120]; // Determine which variable the user wants saved for (int var(0); var < d->Ncons; var++) { diff --git a/Project/compare.py b/Project/compare.py new file mode 100644 index 00000000..99fa52ad --- /dev/null +++ b/Project/compare.py @@ -0,0 +1,34 @@ +TOL=10e-15 + +time_format_folder="Final" +vars_folders=["Conserved", "Auxiliary", "Primitive"] +vars_files=["cons", "aux", "prims"] +extension=".dat" + +for index in range(len(vars_folders)): + serial_filename = "/".join(["CPU", "Data", time_format_folder, vars_folders[index], vars_files[index]]) + parallel_filename = "/".join(["GPU", "Data", time_format_folder, vars_folders[index], vars_files[index]]) + serial_filename = serial_filename+extension + parallel_filename = parallel_filename+extension + print("Processing: " + serial_filename + ", " + parallel_filename) + + try: + with open(serial_filename, 'r') as serial_dat_file: + with open(parallel_filename, 'r') as parallel_dat_file: + skip_header = 1 + line_number = 0 + for serial_line, parallel_line in zip(serial_dat_file, parallel_dat_file): + if skip_header: + skip_header = 0 + continue + serial_val = float(serial_line) + parallel_val = float(parallel_line) + line_number = line_number + 1 + if (abs(serial_val-parallel_val) > TOL): + print("\n\n!! Error in {} (val={}, line={}), {}, (val={})\n\n".format(serial_filename, serial_val, line_number, parallel_filename, parallel_val)) + break + + except IOError: + print("Could not read file:", filename) + + From 175cdadb222437943f759dbf32e4c08a7f762815 Mon Sep 17 00:00:00 2001 From: AlexJamesWright Date: Wed, 5 Aug 2020 14:10:28 +0100 Subject: [PATCH 02/56] only for now --- Project/CPU/Src/interactivePlotCPU.py | 757 ++++++++++++++++++++++++++ Tests/GPU/repeat.sh | 8 + Tests/play.py | 24 + 3 files changed, 789 insertions(+) create mode 100644 Project/CPU/Src/interactivePlotCPU.py create mode 100755 Tests/GPU/repeat.sh create mode 100644 Tests/play.py diff --git a/Project/CPU/Src/interactivePlotCPU.py b/Project/CPU/Src/interactivePlotCPU.py new file mode 100644 index 00000000..d8a288ec --- /dev/null +++ b/Project/CPU/Src/interactivePlotCPU.py @@ -0,0 +1,757 @@ +""" + Script gathers the state vectors stored in the Data directory and offers + functionality to plot various elements. +""" + + +import numpy as np +from matplotlib import pyplot as plt +from scipy.special import erf +from matplotlib import cm +import warnings +from contextlib import suppress + +warnings.filterwarnings('ignore', "No labelled objects found. ") + +# Change this to the relative path to the data you want to plot +# File names must start with e.g. `primitive`, anything between this +# and `.dat` should be stored in appendix +# By default, this script will gather data for the final condition of the +# simulation at t=t_end. To gather different data, add arguments to the +# constructor to include the path to the directory and any appendages. +FinalDirectory = '../Data/Final/' +appendix = '' + +class InteractivePlot(object): + + def __init__(self, DatDirectory=None, append=None, states=True): + if DatDirectory is None: + self.DatDir = FinalDirectory + else: + self.DatDir = DatDirectory + if append is None: + self.appendix = appendix + else: + self.appendix = append + self.gatherData(states) + print("Ready!") + + def gatherData(self, states): + """ + Collects and stores all the data required for plotting the final state of + the system. + + Parameters + ---------- + states : bool + Load all of the state arrays. If false, only the constants are + loaded to save time for animation. + + Notes + ----- + Stores the following public variables: + + cons : array of float + (Ncons, nx, ny, nz) Array containing the conserved vector + consLabels : array of string + (Ncons,) The labels of the conserved elements + prims : array of float + (Nprims, nx, ny, nz) Array containing the primitive vector + primLabels : array of string + (Nprims,) The labels of the primitive elements + aux : array of float + (Naux, nx, ny, nz) Array containing the auxiliary vector + auxLabels : array of string + (Naux,) The labels of the auxiliary elements + c : dictionary + Dictionary containing all constant data saved in simData. Access + elements by typing as an argument the constant you want as a string. + E.g. to get zmax, enter --> c['zmax'] + All links are the same as the constant name in the SimData class. + + """ + + # Dictionary to hold constants + self.c = {} + c = self.c + # Get constants first + print("Fetching constants...") + with open(self.DatDir + 'Constants/constants' + self.appendix + '.dat', 'r') as f: + for i, line in enumerate(f): + if not i==0: + line=line.split() + c['nx'] = int(line[0]) + c['ny'] = int(line[1]) + if c['ny'] == 0: + c['ny'] = 1 + c['nz'] = int(line[2]) + if c['nz'] == 0: + c['nz'] = 1 + c['Nx'] = int(line[3]) + c['Ny'] = int(line[4]) + c['Nz'] = int(line[5]) + c['xmin'] = float(line[6]) + c['xmax'] = float(line[7]) + c['ymin'] = float(line[8]) + c['ymax'] = float(line[9]) + c['zmin'] = float(line[10]) + c['zmax'] = float(line[11]) + c['endTime'] = float(line[12]) + c['cfl'] = float(line[13]) + c['Ng'] = int(line[14]) + c['gamma'] = float(line[15]) + c['sigma'] = float(line[16]) + c['Ncons'] = int(line[17]) + c['Nprims'] = int(line[18]) + c['Naux'] = int(line[19]) + c['cp'] = float(line[20]) + c['dt'] = float(line[21]) + c['t'] = float(line[22]) + c['dx'] = float(line[23]) + c['dy'] = float(line[24]) + c['dz'] = float(line[25]) + + print("{} conserved vectors".format(c['Ncons'])) + print("{} primitive vectors".format(c['Nprims'])) + print("{} auxiliary vectors".format(c['Naux'])) + + if states: + # Now gather conserved data + self.cons = np.zeros([c['Ncons'], c['nx'], c['ny'], c['nz']]) + print("Fetching conserved variables...") + with open(self.DatDir + 'Conserved/cons' + self.appendix + '.dat', 'r') as f: + for i, line in enumerate(f): + # Get cons var labels + if i==0: + consLabels = line.split()[2:] + # Get cons var data + else: + temp = line.split() + for k in range(c['nz']): + self.cons[self._getVarFromLine(i, c['nx'], c['ny'])][self._getXIndexFromLine(i, c['nx'], c['ny'])][self._getYIndexFromLine(i, c['nx'], c['ny'])][k] = float(temp[k]) + + + # Clean up labels (remove the commas) + self.cleanConsLabels = [] + for i in range(len(consLabels)-1): + self.cleanConsLabels.append(consLabels[i][:-1]) + self.cleanConsLabels.append(consLabels[-1]) + + with suppress(FileNotFoundError): + # Now get primitive variables if and store the data in array... + self.prims = np.zeros([c['Nprims'], c['nx'], c['ny'], c['nz']]) + print("Fetching primitive variables...") + with open(self.DatDir + 'Primitive/prims' + self.appendix + '.dat', 'r') as f: + for i, line in enumerate(f): + # Get primitive var labels + if i==0: + primLabels = line.split()[2:] + # Get primitive var data + else: + temp = line.split() + for k in range(c['nz']): + self.prims[self._getVarFromLine(i, c['nx'], c['ny'])][self._getXIndexFromLine(i, c['nx'], c['ny'])][self._getYIndexFromLine(i, c['nx'], c['ny'])][k] = float(temp[k]) + + # Clean up labels (remove the commas) + self.cleanPrimLabels = [] + for i in range(len(primLabels)-1): + self.cleanPrimLabels.append(primLabels[i][:-1]) + self.cleanPrimLabels.append(primLabels[-1]) + + with suppress(FileNotFoundError): + # And finally the aux vars if available + self.aux = np.zeros([c['Naux'], c['nx'], c['ny'], c['nz']]) + print("Fetching auxiliary variables...") + with open(self.DatDir + 'Auxiliary/aux' + self.appendix +'.dat', 'r') as f: + for i, line in enumerate(f): + # Get cons var labels + if i==0: + auxLabels = line.split()[2:] + # Get cons var data + else: + temp = line.split() + for k in range(c['nz']): + self.aux[self._getVarFromLine(i, c['nx'], c['ny'])][self._getXIndexFromLine(i, c['nx'], c['ny'])][self._getYIndexFromLine(i, c['nx'], c['ny'])][k] = float(temp[k]) + + # Clean up labels (remove the commas) + self.cleanAuxLabels = [] + for i in range(len(auxLabels)-1): + self.cleanAuxLabels.append(auxLabels[i][:-1]) + self.cleanAuxLabels.append(auxLabels[-1]) + + with suppress(FileNotFoundError): + # Grab domain data + self.x = np.zeros(c['nx']) + self.y = np.zeros(c['ny']) + self.z = np.zeros(c['nz']) + coords = [self.x, self.y, self.z] + print("Fetching domain coordinates...") + with open(self.DatDir + 'Domain/domain' + self.appendix +'.dat', 'r') as f: + for coord, (i, line) in zip(coords, enumerate(f)): + temp = line.split() + print(len(temp)) + for k, val in enumerate(temp): + coord[k] = float(val) + + + + def _getVarFromLine(self, line, nx, ny): + """ + Given the line number that the iterator is on, and the size of the x-domain, + returns the index of the primitive variable this data belongs to. + + Parameters + ---------- + line: int + The line number the file pointer is pointing to. We want to know which + primitive variable this line's data corresponds to. + nx: int + The total number (incl ghost cells) of domain cells in the x-direction. + ny: int + The total number (incl ghost cells) of domain cells in the y-direction. + + Returns + ------- + var: + The primitive variable index of this line's data. + + Other + ----- + Function will throw a ValueError if trying to get the primitive index + of the first (zero'th) line. + """ + if line == 0: + raise ValueError('Line zero does not contain any data') + else: + return ((line-1)//ny)//nx + + + def _getXIndexFromLine(self, line, nx, ny): + """ + Given the line number that the iterator is on, and the size of the x-domain, + returns the x-index of this line's data. + + Parameters + ---------- + line: int + The line number the file pointer is pointing to. We want to know which + primitive variable this line's data corresponds to. + nx: int + The total number (incl ghost cells) of domain cells in the x-direction. + ny: int + The total number (incl ghost cells) of domain cells in the y-direction. + + Returns + ------- + index: + The x-index of the current line's data. + """ + return ((line-1)//ny)%nx + + def _getYIndexFromLine(self, line, nx, ny): + """ + Given the line number that the iterator is on, and the size of the y-domain, + returns the y-index of this line's data. + + Parameters + ---------- + line: int + The line number the file pointer is pointing to. We want to know which + primitive variable this line's data corresponds to. + nx: int + The total number (incl ghost cells) of domain cells in the x-direction. + ny: int + The total number (incl ghost cells) of domain cells in the y-direction. + + Returns + ------- + index: + The y-index of the current line's data. + """ + return (line-1)%ny + + + + + ############################################################################### + # Plotting Functions # + ############################################################################### + + + + + def plotHeatMaps(self, data='prims', color=None, axis=2): + """ + Plots the 2D heatmap of the given data. The axes to be plotted can be + selected via the axis parameter---this corresponds to the axis you want + to ignore. + + Parameters + ---------- + data: string + Describes which variables the user wants to plot. Choose from + 'prims', 'cons', 'aux' or 'primitive', 'conserved' and 'auxiliary' + color: matplotlib color map + The colour theme to be plotting in. This can take string arguments + but best to stick to variants of cm.somecolourscheme + E.g. cm.magma + axis: int + The axis the user wants to ignore. + (0, 1, 2) = (x, y, z) + """ + if data=='prims' or data=='primitive': + data = self.prims + dataLabels = self.cleanPrimLabels + elif data=='cons' or data=='conserved': + data = self.cons + dataLabels = self.cleanConsLabels + elif data=='aux' or data=='auxiliary': + data = self.aux + data = self.cleanAuxLabels + else: + raise ValueError("Variable type not recognised, please try again") + c = self.c + + for i in range(data.shape[0]): + fig, ax = plt.subplots(1) + if (axis == 0): + plotVars = data[i, c['Nx']//2, :, :] + axisLabel1 = r'$y$' + axisLabel2 = r'$z$' + if (axis == 1): + plotVars = data[i, :, c['Ny']//2, :] + axisLabel1 = r'$x$' + axisLabel2 = r'$z$' + if (axis == 2): + plotVars = data[i, :, :, c['Nz']//2] + axisLabel1 = r'$x$' + axisLabel2 = r'$y$' + + if color==None: + color = cm.afmhot + surf = ax.imshow(plotVars.T, cmap=color, interpolation='bicubic', aspect='auto') + ax.set_title(r'Time Evolution for {}: $t = {}$'.format(dataLabels[i], c['t'])) + ax.set_xlim([0, self.c['nx']]) + ax.set_ylim([0, self.c['ny']]) + ax.set_xlabel(axisLabel1) + ax.set_ylabel(axisLabel2) + fig.colorbar(surf, shrink=0.5, aspect=5) + plt.show() + return ax + + def plotSlice(self, data='prims', axis=0): + """ + Plots the variation of data in the `axis` direction. + + Parameters + ---------- + data: string + Describes which variables the user wants to plot. Choose from + 'prims', 'cons', 'aux' or 'primitive', 'conserved' and 'auxiliary' + color: matplotlib color map + The colour theme to be plotting in. This can take string arguments + but best to stick to variants of cm.somecolourscheme + E.g. cm.magma + axis: int, optional + The axis the user wants to plot in. + (0, 1, 2) = (x, y, z) + Defaults to axis=0, x-direction. + """ + if data=='prims' or data=='primitive': + data = self.prims + dataLabels = self.cleanPrimLabels + elif data=='cons' or data=='conserved': + data = self.cons + dataLabels = self.cleanConsLabels + elif data=='aux' or data=='auxiliary': + data = self.aux + dataLabels = self.cleanAuxLabels + else: + raise ValueError("Variable type not recognised, please try again") + c = self.c + + Nx, Ny, Nz = c['Nx'], c['Ny'], c['Nz'] + + for i in range(len(data)): + plt.figure() + if (axis == 0): + plotVars = data[i, :, Ny//2, Nz//2] + axisLabel = r'$x$' + step = c['dx'] + n = c['nx'] + left, right = c['xmin'], c['xmax'] + if (axis == 1): + plotVars = data[i, Nx//2, :, Nz//2] + axisLabel = r'$y$' + step = c['dy'] + n = c['ny'] + left, right = c['ymin'], c['ymax'] + if (axis == 2): + plotVars = data[i, Nx//2, Ny//2, :] + axisLabel = r'$z$' + step = c['dz'] + n = c['nz'] + left, right = c['zmin'], c['zmax'] + + ymin = np.min(plotVars) + ymax = np.max(plotVars) + rangeY = ymax - ymin + ylower = ymin - 0.025 * rangeY + yupper = ymax + 0.025 * rangeY + xs = np.linspace(left + step/2, right - step/2, n) + plt.plot(xs, plotVars, label='{}'.format(dataLabels[i])) + plt.title(r'Time Evolution for {}: $t = {}$'.format(dataLabels[i], c['t'])) + plt.xlabel(axisLabel) + plt.ylabel(r'$q_{}(x)$'.format(i+1)) + plt.xlim([c['xmin'], c['xmax']]) +# plt.ylim((ylower, yupper)) + plt.legend(loc='lower center', fontsize=10) + plt.show() + + + def plotTwoFluidSlice(self): + """ + Plots the variation of total data in the x-direction of the two fluids. + + """ + + c = self.c + Ny, Nz = c['Ny'], c['Nz'] + + rho = self.prims[0, :, Ny//2, Nz//2] + self.prims[5, :, Ny//2, Nz//2] + p = self.prims[4, :, Ny//2, Nz//2] + self.prims[9, :, Ny//2, Nz//2] + var = [rho, *self.aux[31:34, :, Ny//2, Nz//2], p, *self.prims[10:, :, Ny//2, Nz//2]] + varLab = [r'$\rho$', r'$u_x$', r'$u_y$', r'$u_z$', r'$p$', r'$B_x$', r'$B_y$', r'$B_z$', r'$E_x$', r'$E_y$', r'$E_z$'] + + xs = np.linspace(c['xmin'] + c['dx']/2, c['xmax'] - c['dx']/2, c['nx']) + + for i, v in enumerate(var): + plt.figure() + plt.plot(xs, v) + plt.title(varLab[i]) + ymin = np.min(v) + ymax = np.max(v) + rangeY = ymax - ymin + ylower = ymin - 0.025 * rangeY + yupper = ymax + 0.025 * rangeY + plt.title(r'Time Evolution for {}: $t = {}$'.format(varLab[i], c['t'])) + plt.xlabel(r'$x$') + plt.ylabel(r'$q_{}(x)$'.format(i+1)) + plt.ylim((ylower, yupper)) + plt.xlim([c['xmin'], c['xmax']]) + plt.legend(loc='lower center', fontsize=10) + plt.show() + + def plotTwoFluidCurrentSheetAgainstExact(self): + """ + The current sheet has an analytical solution for the y-direction magnetic + field. This is plotted against the given B-field. + """ + By = self.cons[11] + c = self.c + plt.figure() + xs = np.linspace(c['xmin'], c['xmax'], c['nx']) + exact = np.sign(xs)*erf(0.5 * np.sqrt(c['sigma'] * xs ** 2 / (c['t']+1))) + plt.plot(xs, By[:, 0, 0], label='Numerical') + plt.plot(xs, exact, label='Exact') + plt.xlim([c['xmin'], c['xmax']]) + plt.ylim([-1.2, 1.2]) + plt.xlabel(r'$x$') + plt.ylabel(r'$B_y$') + plt.title(r'Comparison of exact and numerical $B_y$ at $t={:.4f}$'.format(c['t']+1)) + plt.legend(loc='upper left') + plt.show() + #return np.linalg.norm(exact - By[:, 0, 0]) + + + def plotSingleFluidCurrentSheetAgainstExact(self, direction=0): + """ + The current sheet has an analytical solution for the y-direction magnetic + field. This is plotted against the given B-field. + """ + c = self.c + plt.figure() + nx = self.c['Nx'] // 2 + ny = self.c['Ny'] // 2 + nz = self.c['Nz'] // 2 + + if direction == 0: + B = self.cons[6, :, ny, nz] + x = np.linspace(c['xmin'], c['xmax'], c['nx']) + elif direction == 1: + B = self.cons[7, nx, :, nz] + x = np.linspace(c['ymin'], c['ymax'], c['ny']) + else: + B = self.cons[5, nx, ny, :] + x = np.linspace(c['zmin'], c['zmax'], c['nz']) + + exact = np.sign(x)*erf(0.5 * np.sqrt(c['sigma'] * x ** 2 / (c['t']+1))) + initial = np.sign(x)*erf(0.5 * np.sqrt(c['sigma'] * x ** 2 )) + plt.plot(x, B, label='Numerical') + plt.plot(x, exact, 'k--', label='Exact') + plt.plot(x, initial, label='Initial') + plt.xlim([c['xmin'], c['xmax']]) + plt.ylim([-1.2, 1.2]) + plt.xlabel(r'$x$') + plt.ylabel(r'$B_y$') + plt.title(r'Comparison of exact and numerical $B_y$ at $t={:.4f}$'.format(c['t']+1)) + plt.legend(loc='upper left') + plt.show() + + def plotTwoFluidCPAlfvenWaveAgainstExact(self): + """ + The cirularly polarized alfven wave has an exact solution, see Amano 2016 + for details. This method plots all non-trivial prims against their exact + values for case 3. + """ + + rho1, vx1, vy1, vz1, p1, rho2, vx2, vy2, vz2, p2, Bx, By, Bz, Ex, Ey, Ez = self.prims[:] + c = self.c + xs = np.linspace(c['xmin'], c['xmax'], c['nx']) + t = c['t'] + + h = 1.04 + B0 = h + omegaBar1 = -np.sqrt(1.04) + omegaBar2 = -omegaBar1 + kx = 1.0/4.0 + + omega = 5.63803828148e-1 + Wp = 5.19940020571e-6 + 1 + We = 6.68453076522e-5 + 1 + xsi = 0.01 + + U1 = -xsi * omega * omegaBar1 / (kx * (omega + omegaBar1 * We)) + U2 = -xsi * omega * omegaBar2 / (kx * (omega + omegaBar2 * Wp)) + + phi = kx * xs - omega * t + + BySol = xsi * B0 * np.cos(phi) + BzSol = -xsi * B0 * np.sin(phi) + EySol = -(omega/kx)*xsi*B0*np.sin(phi) + EzSol = -(omega/kx)*xsi*B0*np.cos(phi) + vy1sol = U1 * np.cos(phi) + vz1sol = -U1 * np.sin(phi) + vy2sol = U2 * np.cos(phi) + vz2sol = -U2 * np.sin(phi) + + # Bx + BxSol = np.zeros_like(BySol) + BxSol[:] = B0 + plt.figure() + plt.plot(xs, Bx[:, 0, 0], label='Numerical') + plt.plot(xs, BxSol, '--', label='Exact') + plt.title(r'Exact comparison for $B_x$ at $t={}$'.format(t)) + plt.xlim([c['xmin'], c['xmax']]) + plt.legend() + # By + plt.figure() + plt.plot(xs, By[:, 0, 0], label='Numerical') + plt.plot(xs, BySol, '--', label='Exact') + plt.title(r'Exact comparison for $B_y$ at $t={}$'.format(t)) + plt.xlim([c['xmin'], c['xmax']]) + plt.legend() + # By + plt.figure() + plt.plot(xs, Bz[:, 0, 0], label='Numerical') + plt.plot(xs, BzSol, '--', label='Exact') + plt.title(r'Exact comparison for $B_z$ at $t={}$'.format(t)) + plt.xlim([c['xmin'], c['xmax']]) + plt.legend() + # Ex + plt.figure() + plt.plot(xs, Ex[:, 0, 0], label='Numerical') + plt.plot(xs, np.zeros_like(xs), '--', label='Exact') + plt.title(r'Exact comparison for $E_x$ at $t={}$'.format(t)) + plt.xlim([c['xmin'], c['xmax']]) + minn = min(np.min(Ex), 0) + maxx = max(np.max(Ex), 0) + sep = maxx - minn + plt.ylim([minn-0.1*sep, maxx+0.1*sep]) + plt.legend() + # Ey + plt.figure() + plt.plot(xs, Ey[:, 0, 0], label='Numerical') + plt.plot(xs, EySol, '--', label='Exact') + plt.title(r'Exact comparison for $E_y$ at $t={}$'.format(t)) + plt.xlim([c['xmin'], c['xmax']]) + plt.legend() + # Ez + plt.figure() + plt.plot(xs, Ez[:, 0, 0], label='Numerical') + plt.plot(xs, EzSol, '--', label='Exact') + plt.title(r'Exact comparison for $E_z$ at $t={}$'.format(t)) + plt.xlim([c['xmin'], c['xmax']]) + plt.legend() + # vx1 + plt.figure() + plt.plot(xs, vx1[:, 0, 0], label='Numerical') + plt.plot(xs, np.zeros_like(xs), '--', label='Exact') + plt.title(r'Exact comparison for $v_x1$ at $t={}$'.format(t)) + plt.xlim([c['xmin'], c['xmax']]) + minn = min(np.min(vx1), 0) + maxx = max(np.max(vx1), 0) + sep = maxx - minn + plt.ylim([minn-0.1*sep, maxx+0.1*sep]) + plt.legend() + # vy1 + plt.figure() + plt.plot(xs, vy1[:, 0, 0], label='Numerical') + plt.plot(xs, vy1sol, '--', label='Exact') + plt.title(r'Exact comparison for $v_y1$ at $t={}$'.format(t)) + plt.xlim([c['xmin'], c['xmax']]) + plt.legend() + # vz1 + plt.figure() + plt.plot(xs, vz1[:, 0, 0], label='Numerical') + plt.plot(xs, vz1sol, '--', label='Exact') + plt.title(r'Exact comparison for $v_z1$ at $t={}$'.format(t)) + plt.xlim([c['xmin'], c['xmax']]) + plt.legend() + # vx2 + plt.figure() + plt.plot(xs, vx2[:, 0, 0], label='Numerical') + plt.plot(xs, np.zeros_like(xs), '--', label='Exact') + plt.title(r'Exact comparison for $v_x2$ at $t={}$'.format(t)) + plt.xlim([c['xmin'], c['xmax']]) + minn = min(np.min(vx2), 0) + maxx = max(np.max(vx2), 0) + sep = maxx - minn + plt.ylim([minn-0.1*sep, maxx+0.1*sep]) + plt.legend() + # vy2 + plt.figure() + plt.plot(xs, vy2[:, 0, 0], label='Numerical') + plt.plot(xs, vy2sol, '--', label='Exact') + plt.title(r'Exact comparison for $v_y2$ at $t={}$'.format(t)) + plt.xlim([c['xmin'], c['xmax']]) + plt.legend() + # vz2 + plt.figure() + plt.plot(xs, vz2[:, 0, 0], label='Numerical') + plt.plot(xs, vz2sol, '--', label='Exact') + plt.title(r'Exact comparison for $v_z2$ at $t={}$'.format(t)) + plt.xlim([c['xmin'], c['xmax']]) + plt.legend() + + + + def plot2DBrioWu(self, diag=0): + """ + Plots the main diagonal of the 2D Brio-Wu problem + + Parameters + ---------- + diag : int + The diagonal to plot the slice + """ + + nx = self.c['nx'] +# Ny = self.c['Ny'] + midZ = self.c['Nz'] // 2 + Ng = self.c['Ng'] + + if diag == 0: + LB = -Ng + RB = Ng + step = -1 + else: + LB = Ng + RB = -Ng + step = 1 + + + dens = self.prims[0, :, LB:RB:step, midZ].diagonal() + vx = self.prims[1, :, LB:RB:step, midZ].diagonal() + vy = self.prims[2, :, LB:RB:step, midZ].diagonal() + + + p = self.prims[4, :, LB:RB:step, midZ].diagonal() + B = self.prims[5, :, LB:RB:step, midZ].diagonal() / np.sqrt(2) + \ + self.prims[6, :, LB:RB:step, midZ].diagonal() / np.sqrt(2) + + # rho + plt.figure() + plt.plot(np.linspace(0, 1, nx), dens) + plt.ylabel(r'$\rho$') + plt.xlim([0, 1]) + plt.show() + # vx + plt.figure() + plt.plot(np.linspace(0, 1, nx), vx) + plt.ylabel(r'$vx$') + plt.xlim([0, 1]) + plt.show() + # vy + plt.figure() + plt.plot(np.linspace(0, 1, nx), vy) + plt.ylabel(r'$vy$') + plt.xlim([0, 1]) + plt.show() + # v rel + plt.figure() + plt.plot(np.linspace(0, 1, nx),(vx-vy)/(1-vx*vy)) + plt.ylabel(r'$v (rel)$') + plt.xlim([0, 1]) + plt.show() + # v non-rel + plt.figure() + plt.plot(np.linspace(0, 1, nx), vx/np.sqrt(2) - vy/np.sqrt(2)) + plt.ylabel(r'$v (non-rel)$') + plt.xlim([0, 1]) + plt.show() + # p + plt.figure() + plt.plot(np.linspace(0, 1, nx), p) + plt.ylabel(r'$p$') + plt.xlim([0, 1]) + plt.show() + # B + plt.figure() + plt.plot(np.linspace(0, 1, nx), B) + plt.ylabel(r'$B$') + plt.xlim([0, 1]) + plt.show() + + return B + + def plotAdvectionAgainstInitial(self): + xs = np.linspace(Plot.c['dx']/2, 1-Plot.c['dx']/2, Plot.c['nx']) + initialRho = np.ones_like(xs)*0.1 + initialRho += 0.4*np.exp(-(10 * (xs - 0.5))**2) + + fig, axs = plt.subplots(2) + fig.set_size_inches(8, 6) + axs[0].plot(xs, initialRho, 'k-', linewidth=5, alpha=0.3, label='initial') + axs[0].plot(xs, Plot.prims[0, :, 0, 0], 'b:', label='rho') + axs[0].set_xlim(xs[0], xs[-1]) + axs[0].set_xlabel(r'$x$') + axs[0].set_ylabel(r'$\rho$') + axs[0].legend() + + error = np.abs(initialRho-Plot.prims[0, :, 0, 0]) + errorNorm = np.sum(error)/len(error) + axs[1].semilogy(xs, error, label=rf'Mean = ${errorNorm:.1e}$') + axs[1].set_xlabel(r"$x$") + axs[1].set_ylabel('Error') + axs[1].set_xlim(xs[0], xs[-1]) + axs[1].legend() + plt.show() + + +# Function declarations over, access data and plot! + + +if __name__ == '__main__': + + Plot = InteractivePlot() + +# Plot.plotSlice() +# Plot.plotSingleFluidCurrentSheetAgainstExact() +# Plot.plotAdvectionAgainstInitial() +# Plot.plotHeatMaps() + + plt.figure() + plt.imshow(np.log(Plot.prims[4, :, :, 0].T), extent=[0, 8, 0, 4], origin='lower') + plt.show() + diff --git a/Tests/GPU/repeat.sh b/Tests/GPU/repeat.sh new file mode 100755 index 00000000..2716a559 --- /dev/null +++ b/Tests/GPU/repeat.sh @@ -0,0 +1,8 @@ +#!/bin/sh + +cd ../CPU +make test_rk2 +./test_rk2 +cd ../GPU +make test_rk2 +./test_rk2 diff --git a/Tests/play.py b/Tests/play.py new file mode 100644 index 00000000..8f110abd --- /dev/null +++ b/Tests/play.py @@ -0,0 +1,24 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Wed Aug 5 10:56:02 2020 + +@author: alex +""" + +import sys +sys.path.append('../Project/GPU/Src') +sys.path.append('../Project/CPU/Src') +from interactivePlotGPU import InteractivePlot as PlotGPU +from interactivePlotCPU import InteractivePlot as PlotCPU + + +parallel = PlotGPU("TestData/GPU/", "RK2") +#serial = PlotCPU("TestData/Serial/", "RK2") + +pp = parallel.prims +sp = serial.prims + + +#for sv, pv in zip(serial.prims, parallel.prims): +# print(f"{np.sum(np.abs(sv-pv) > 1e-15)}/{30**3} failures") \ No newline at end of file From eb8bb9e03cd69b560e486bcbb404ca54849a05f3 Mon Sep 17 00:00:00 2001 From: AlexJamesWright Date: Wed, 5 Aug 2020 15:13:05 +0100 Subject: [PATCH 03/56] GPU should now be match CPU --- Project/CPU/Src/interactivePlotCPU.py | 757 ---------------------- Project/GPU/Src/interactivePlot.py | 7 +- Tests/CPU/Src/test_fvs.cc | 8 +- Tests/CPU/Src/test_rk2.cc | 14 +- Tests/GPU/Src/compareParallelAndSerial.py | 25 +- Tests/GPU/repeat.sh | 8 - Tests/play.py | 24 - 7 files changed, 25 insertions(+), 818 deletions(-) delete mode 100644 Project/CPU/Src/interactivePlotCPU.py delete mode 100755 Tests/GPU/repeat.sh delete mode 100644 Tests/play.py diff --git a/Project/CPU/Src/interactivePlotCPU.py b/Project/CPU/Src/interactivePlotCPU.py deleted file mode 100644 index d8a288ec..00000000 --- a/Project/CPU/Src/interactivePlotCPU.py +++ /dev/null @@ -1,757 +0,0 @@ -""" - Script gathers the state vectors stored in the Data directory and offers - functionality to plot various elements. -""" - - -import numpy as np -from matplotlib import pyplot as plt -from scipy.special import erf -from matplotlib import cm -import warnings -from contextlib import suppress - -warnings.filterwarnings('ignore', "No labelled objects found. ") - -# Change this to the relative path to the data you want to plot -# File names must start with e.g. `primitive`, anything between this -# and `.dat` should be stored in appendix -# By default, this script will gather data for the final condition of the -# simulation at t=t_end. To gather different data, add arguments to the -# constructor to include the path to the directory and any appendages. -FinalDirectory = '../Data/Final/' -appendix = '' - -class InteractivePlot(object): - - def __init__(self, DatDirectory=None, append=None, states=True): - if DatDirectory is None: - self.DatDir = FinalDirectory - else: - self.DatDir = DatDirectory - if append is None: - self.appendix = appendix - else: - self.appendix = append - self.gatherData(states) - print("Ready!") - - def gatherData(self, states): - """ - Collects and stores all the data required for plotting the final state of - the system. - - Parameters - ---------- - states : bool - Load all of the state arrays. If false, only the constants are - loaded to save time for animation. - - Notes - ----- - Stores the following public variables: - - cons : array of float - (Ncons, nx, ny, nz) Array containing the conserved vector - consLabels : array of string - (Ncons,) The labels of the conserved elements - prims : array of float - (Nprims, nx, ny, nz) Array containing the primitive vector - primLabels : array of string - (Nprims,) The labels of the primitive elements - aux : array of float - (Naux, nx, ny, nz) Array containing the auxiliary vector - auxLabels : array of string - (Naux,) The labels of the auxiliary elements - c : dictionary - Dictionary containing all constant data saved in simData. Access - elements by typing as an argument the constant you want as a string. - E.g. to get zmax, enter --> c['zmax'] - All links are the same as the constant name in the SimData class. - - """ - - # Dictionary to hold constants - self.c = {} - c = self.c - # Get constants first - print("Fetching constants...") - with open(self.DatDir + 'Constants/constants' + self.appendix + '.dat', 'r') as f: - for i, line in enumerate(f): - if not i==0: - line=line.split() - c['nx'] = int(line[0]) - c['ny'] = int(line[1]) - if c['ny'] == 0: - c['ny'] = 1 - c['nz'] = int(line[2]) - if c['nz'] == 0: - c['nz'] = 1 - c['Nx'] = int(line[3]) - c['Ny'] = int(line[4]) - c['Nz'] = int(line[5]) - c['xmin'] = float(line[6]) - c['xmax'] = float(line[7]) - c['ymin'] = float(line[8]) - c['ymax'] = float(line[9]) - c['zmin'] = float(line[10]) - c['zmax'] = float(line[11]) - c['endTime'] = float(line[12]) - c['cfl'] = float(line[13]) - c['Ng'] = int(line[14]) - c['gamma'] = float(line[15]) - c['sigma'] = float(line[16]) - c['Ncons'] = int(line[17]) - c['Nprims'] = int(line[18]) - c['Naux'] = int(line[19]) - c['cp'] = float(line[20]) - c['dt'] = float(line[21]) - c['t'] = float(line[22]) - c['dx'] = float(line[23]) - c['dy'] = float(line[24]) - c['dz'] = float(line[25]) - - print("{} conserved vectors".format(c['Ncons'])) - print("{} primitive vectors".format(c['Nprims'])) - print("{} auxiliary vectors".format(c['Naux'])) - - if states: - # Now gather conserved data - self.cons = np.zeros([c['Ncons'], c['nx'], c['ny'], c['nz']]) - print("Fetching conserved variables...") - with open(self.DatDir + 'Conserved/cons' + self.appendix + '.dat', 'r') as f: - for i, line in enumerate(f): - # Get cons var labels - if i==0: - consLabels = line.split()[2:] - # Get cons var data - else: - temp = line.split() - for k in range(c['nz']): - self.cons[self._getVarFromLine(i, c['nx'], c['ny'])][self._getXIndexFromLine(i, c['nx'], c['ny'])][self._getYIndexFromLine(i, c['nx'], c['ny'])][k] = float(temp[k]) - - - # Clean up labels (remove the commas) - self.cleanConsLabels = [] - for i in range(len(consLabels)-1): - self.cleanConsLabels.append(consLabels[i][:-1]) - self.cleanConsLabels.append(consLabels[-1]) - - with suppress(FileNotFoundError): - # Now get primitive variables if and store the data in array... - self.prims = np.zeros([c['Nprims'], c['nx'], c['ny'], c['nz']]) - print("Fetching primitive variables...") - with open(self.DatDir + 'Primitive/prims' + self.appendix + '.dat', 'r') as f: - for i, line in enumerate(f): - # Get primitive var labels - if i==0: - primLabels = line.split()[2:] - # Get primitive var data - else: - temp = line.split() - for k in range(c['nz']): - self.prims[self._getVarFromLine(i, c['nx'], c['ny'])][self._getXIndexFromLine(i, c['nx'], c['ny'])][self._getYIndexFromLine(i, c['nx'], c['ny'])][k] = float(temp[k]) - - # Clean up labels (remove the commas) - self.cleanPrimLabels = [] - for i in range(len(primLabels)-1): - self.cleanPrimLabels.append(primLabels[i][:-1]) - self.cleanPrimLabels.append(primLabels[-1]) - - with suppress(FileNotFoundError): - # And finally the aux vars if available - self.aux = np.zeros([c['Naux'], c['nx'], c['ny'], c['nz']]) - print("Fetching auxiliary variables...") - with open(self.DatDir + 'Auxiliary/aux' + self.appendix +'.dat', 'r') as f: - for i, line in enumerate(f): - # Get cons var labels - if i==0: - auxLabels = line.split()[2:] - # Get cons var data - else: - temp = line.split() - for k in range(c['nz']): - self.aux[self._getVarFromLine(i, c['nx'], c['ny'])][self._getXIndexFromLine(i, c['nx'], c['ny'])][self._getYIndexFromLine(i, c['nx'], c['ny'])][k] = float(temp[k]) - - # Clean up labels (remove the commas) - self.cleanAuxLabels = [] - for i in range(len(auxLabels)-1): - self.cleanAuxLabels.append(auxLabels[i][:-1]) - self.cleanAuxLabels.append(auxLabels[-1]) - - with suppress(FileNotFoundError): - # Grab domain data - self.x = np.zeros(c['nx']) - self.y = np.zeros(c['ny']) - self.z = np.zeros(c['nz']) - coords = [self.x, self.y, self.z] - print("Fetching domain coordinates...") - with open(self.DatDir + 'Domain/domain' + self.appendix +'.dat', 'r') as f: - for coord, (i, line) in zip(coords, enumerate(f)): - temp = line.split() - print(len(temp)) - for k, val in enumerate(temp): - coord[k] = float(val) - - - - def _getVarFromLine(self, line, nx, ny): - """ - Given the line number that the iterator is on, and the size of the x-domain, - returns the index of the primitive variable this data belongs to. - - Parameters - ---------- - line: int - The line number the file pointer is pointing to. We want to know which - primitive variable this line's data corresponds to. - nx: int - The total number (incl ghost cells) of domain cells in the x-direction. - ny: int - The total number (incl ghost cells) of domain cells in the y-direction. - - Returns - ------- - var: - The primitive variable index of this line's data. - - Other - ----- - Function will throw a ValueError if trying to get the primitive index - of the first (zero'th) line. - """ - if line == 0: - raise ValueError('Line zero does not contain any data') - else: - return ((line-1)//ny)//nx - - - def _getXIndexFromLine(self, line, nx, ny): - """ - Given the line number that the iterator is on, and the size of the x-domain, - returns the x-index of this line's data. - - Parameters - ---------- - line: int - The line number the file pointer is pointing to. We want to know which - primitive variable this line's data corresponds to. - nx: int - The total number (incl ghost cells) of domain cells in the x-direction. - ny: int - The total number (incl ghost cells) of domain cells in the y-direction. - - Returns - ------- - index: - The x-index of the current line's data. - """ - return ((line-1)//ny)%nx - - def _getYIndexFromLine(self, line, nx, ny): - """ - Given the line number that the iterator is on, and the size of the y-domain, - returns the y-index of this line's data. - - Parameters - ---------- - line: int - The line number the file pointer is pointing to. We want to know which - primitive variable this line's data corresponds to. - nx: int - The total number (incl ghost cells) of domain cells in the x-direction. - ny: int - The total number (incl ghost cells) of domain cells in the y-direction. - - Returns - ------- - index: - The y-index of the current line's data. - """ - return (line-1)%ny - - - - - ############################################################################### - # Plotting Functions # - ############################################################################### - - - - - def plotHeatMaps(self, data='prims', color=None, axis=2): - """ - Plots the 2D heatmap of the given data. The axes to be plotted can be - selected via the axis parameter---this corresponds to the axis you want - to ignore. - - Parameters - ---------- - data: string - Describes which variables the user wants to plot. Choose from - 'prims', 'cons', 'aux' or 'primitive', 'conserved' and 'auxiliary' - color: matplotlib color map - The colour theme to be plotting in. This can take string arguments - but best to stick to variants of cm.somecolourscheme - E.g. cm.magma - axis: int - The axis the user wants to ignore. - (0, 1, 2) = (x, y, z) - """ - if data=='prims' or data=='primitive': - data = self.prims - dataLabels = self.cleanPrimLabels - elif data=='cons' or data=='conserved': - data = self.cons - dataLabels = self.cleanConsLabels - elif data=='aux' or data=='auxiliary': - data = self.aux - data = self.cleanAuxLabels - else: - raise ValueError("Variable type not recognised, please try again") - c = self.c - - for i in range(data.shape[0]): - fig, ax = plt.subplots(1) - if (axis == 0): - plotVars = data[i, c['Nx']//2, :, :] - axisLabel1 = r'$y$' - axisLabel2 = r'$z$' - if (axis == 1): - plotVars = data[i, :, c['Ny']//2, :] - axisLabel1 = r'$x$' - axisLabel2 = r'$z$' - if (axis == 2): - plotVars = data[i, :, :, c['Nz']//2] - axisLabel1 = r'$x$' - axisLabel2 = r'$y$' - - if color==None: - color = cm.afmhot - surf = ax.imshow(plotVars.T, cmap=color, interpolation='bicubic', aspect='auto') - ax.set_title(r'Time Evolution for {}: $t = {}$'.format(dataLabels[i], c['t'])) - ax.set_xlim([0, self.c['nx']]) - ax.set_ylim([0, self.c['ny']]) - ax.set_xlabel(axisLabel1) - ax.set_ylabel(axisLabel2) - fig.colorbar(surf, shrink=0.5, aspect=5) - plt.show() - return ax - - def plotSlice(self, data='prims', axis=0): - """ - Plots the variation of data in the `axis` direction. - - Parameters - ---------- - data: string - Describes which variables the user wants to plot. Choose from - 'prims', 'cons', 'aux' or 'primitive', 'conserved' and 'auxiliary' - color: matplotlib color map - The colour theme to be plotting in. This can take string arguments - but best to stick to variants of cm.somecolourscheme - E.g. cm.magma - axis: int, optional - The axis the user wants to plot in. - (0, 1, 2) = (x, y, z) - Defaults to axis=0, x-direction. - """ - if data=='prims' or data=='primitive': - data = self.prims - dataLabels = self.cleanPrimLabels - elif data=='cons' or data=='conserved': - data = self.cons - dataLabels = self.cleanConsLabels - elif data=='aux' or data=='auxiliary': - data = self.aux - dataLabels = self.cleanAuxLabels - else: - raise ValueError("Variable type not recognised, please try again") - c = self.c - - Nx, Ny, Nz = c['Nx'], c['Ny'], c['Nz'] - - for i in range(len(data)): - plt.figure() - if (axis == 0): - plotVars = data[i, :, Ny//2, Nz//2] - axisLabel = r'$x$' - step = c['dx'] - n = c['nx'] - left, right = c['xmin'], c['xmax'] - if (axis == 1): - plotVars = data[i, Nx//2, :, Nz//2] - axisLabel = r'$y$' - step = c['dy'] - n = c['ny'] - left, right = c['ymin'], c['ymax'] - if (axis == 2): - plotVars = data[i, Nx//2, Ny//2, :] - axisLabel = r'$z$' - step = c['dz'] - n = c['nz'] - left, right = c['zmin'], c['zmax'] - - ymin = np.min(plotVars) - ymax = np.max(plotVars) - rangeY = ymax - ymin - ylower = ymin - 0.025 * rangeY - yupper = ymax + 0.025 * rangeY - xs = np.linspace(left + step/2, right - step/2, n) - plt.plot(xs, plotVars, label='{}'.format(dataLabels[i])) - plt.title(r'Time Evolution for {}: $t = {}$'.format(dataLabels[i], c['t'])) - plt.xlabel(axisLabel) - plt.ylabel(r'$q_{}(x)$'.format(i+1)) - plt.xlim([c['xmin'], c['xmax']]) -# plt.ylim((ylower, yupper)) - plt.legend(loc='lower center', fontsize=10) - plt.show() - - - def plotTwoFluidSlice(self): - """ - Plots the variation of total data in the x-direction of the two fluids. - - """ - - c = self.c - Ny, Nz = c['Ny'], c['Nz'] - - rho = self.prims[0, :, Ny//2, Nz//2] + self.prims[5, :, Ny//2, Nz//2] - p = self.prims[4, :, Ny//2, Nz//2] + self.prims[9, :, Ny//2, Nz//2] - var = [rho, *self.aux[31:34, :, Ny//2, Nz//2], p, *self.prims[10:, :, Ny//2, Nz//2]] - varLab = [r'$\rho$', r'$u_x$', r'$u_y$', r'$u_z$', r'$p$', r'$B_x$', r'$B_y$', r'$B_z$', r'$E_x$', r'$E_y$', r'$E_z$'] - - xs = np.linspace(c['xmin'] + c['dx']/2, c['xmax'] - c['dx']/2, c['nx']) - - for i, v in enumerate(var): - plt.figure() - plt.plot(xs, v) - plt.title(varLab[i]) - ymin = np.min(v) - ymax = np.max(v) - rangeY = ymax - ymin - ylower = ymin - 0.025 * rangeY - yupper = ymax + 0.025 * rangeY - plt.title(r'Time Evolution for {}: $t = {}$'.format(varLab[i], c['t'])) - plt.xlabel(r'$x$') - plt.ylabel(r'$q_{}(x)$'.format(i+1)) - plt.ylim((ylower, yupper)) - plt.xlim([c['xmin'], c['xmax']]) - plt.legend(loc='lower center', fontsize=10) - plt.show() - - def plotTwoFluidCurrentSheetAgainstExact(self): - """ - The current sheet has an analytical solution for the y-direction magnetic - field. This is plotted against the given B-field. - """ - By = self.cons[11] - c = self.c - plt.figure() - xs = np.linspace(c['xmin'], c['xmax'], c['nx']) - exact = np.sign(xs)*erf(0.5 * np.sqrt(c['sigma'] * xs ** 2 / (c['t']+1))) - plt.plot(xs, By[:, 0, 0], label='Numerical') - plt.plot(xs, exact, label='Exact') - plt.xlim([c['xmin'], c['xmax']]) - plt.ylim([-1.2, 1.2]) - plt.xlabel(r'$x$') - plt.ylabel(r'$B_y$') - plt.title(r'Comparison of exact and numerical $B_y$ at $t={:.4f}$'.format(c['t']+1)) - plt.legend(loc='upper left') - plt.show() - #return np.linalg.norm(exact - By[:, 0, 0]) - - - def plotSingleFluidCurrentSheetAgainstExact(self, direction=0): - """ - The current sheet has an analytical solution for the y-direction magnetic - field. This is plotted against the given B-field. - """ - c = self.c - plt.figure() - nx = self.c['Nx'] // 2 - ny = self.c['Ny'] // 2 - nz = self.c['Nz'] // 2 - - if direction == 0: - B = self.cons[6, :, ny, nz] - x = np.linspace(c['xmin'], c['xmax'], c['nx']) - elif direction == 1: - B = self.cons[7, nx, :, nz] - x = np.linspace(c['ymin'], c['ymax'], c['ny']) - else: - B = self.cons[5, nx, ny, :] - x = np.linspace(c['zmin'], c['zmax'], c['nz']) - - exact = np.sign(x)*erf(0.5 * np.sqrt(c['sigma'] * x ** 2 / (c['t']+1))) - initial = np.sign(x)*erf(0.5 * np.sqrt(c['sigma'] * x ** 2 )) - plt.plot(x, B, label='Numerical') - plt.plot(x, exact, 'k--', label='Exact') - plt.plot(x, initial, label='Initial') - plt.xlim([c['xmin'], c['xmax']]) - plt.ylim([-1.2, 1.2]) - plt.xlabel(r'$x$') - plt.ylabel(r'$B_y$') - plt.title(r'Comparison of exact and numerical $B_y$ at $t={:.4f}$'.format(c['t']+1)) - plt.legend(loc='upper left') - plt.show() - - def plotTwoFluidCPAlfvenWaveAgainstExact(self): - """ - The cirularly polarized alfven wave has an exact solution, see Amano 2016 - for details. This method plots all non-trivial prims against their exact - values for case 3. - """ - - rho1, vx1, vy1, vz1, p1, rho2, vx2, vy2, vz2, p2, Bx, By, Bz, Ex, Ey, Ez = self.prims[:] - c = self.c - xs = np.linspace(c['xmin'], c['xmax'], c['nx']) - t = c['t'] - - h = 1.04 - B0 = h - omegaBar1 = -np.sqrt(1.04) - omegaBar2 = -omegaBar1 - kx = 1.0/4.0 - - omega = 5.63803828148e-1 - Wp = 5.19940020571e-6 + 1 - We = 6.68453076522e-5 + 1 - xsi = 0.01 - - U1 = -xsi * omega * omegaBar1 / (kx * (omega + omegaBar1 * We)) - U2 = -xsi * omega * omegaBar2 / (kx * (omega + omegaBar2 * Wp)) - - phi = kx * xs - omega * t - - BySol = xsi * B0 * np.cos(phi) - BzSol = -xsi * B0 * np.sin(phi) - EySol = -(omega/kx)*xsi*B0*np.sin(phi) - EzSol = -(omega/kx)*xsi*B0*np.cos(phi) - vy1sol = U1 * np.cos(phi) - vz1sol = -U1 * np.sin(phi) - vy2sol = U2 * np.cos(phi) - vz2sol = -U2 * np.sin(phi) - - # Bx - BxSol = np.zeros_like(BySol) - BxSol[:] = B0 - plt.figure() - plt.plot(xs, Bx[:, 0, 0], label='Numerical') - plt.plot(xs, BxSol, '--', label='Exact') - plt.title(r'Exact comparison for $B_x$ at $t={}$'.format(t)) - plt.xlim([c['xmin'], c['xmax']]) - plt.legend() - # By - plt.figure() - plt.plot(xs, By[:, 0, 0], label='Numerical') - plt.plot(xs, BySol, '--', label='Exact') - plt.title(r'Exact comparison for $B_y$ at $t={}$'.format(t)) - plt.xlim([c['xmin'], c['xmax']]) - plt.legend() - # By - plt.figure() - plt.plot(xs, Bz[:, 0, 0], label='Numerical') - plt.plot(xs, BzSol, '--', label='Exact') - plt.title(r'Exact comparison for $B_z$ at $t={}$'.format(t)) - plt.xlim([c['xmin'], c['xmax']]) - plt.legend() - # Ex - plt.figure() - plt.plot(xs, Ex[:, 0, 0], label='Numerical') - plt.plot(xs, np.zeros_like(xs), '--', label='Exact') - plt.title(r'Exact comparison for $E_x$ at $t={}$'.format(t)) - plt.xlim([c['xmin'], c['xmax']]) - minn = min(np.min(Ex), 0) - maxx = max(np.max(Ex), 0) - sep = maxx - minn - plt.ylim([minn-0.1*sep, maxx+0.1*sep]) - plt.legend() - # Ey - plt.figure() - plt.plot(xs, Ey[:, 0, 0], label='Numerical') - plt.plot(xs, EySol, '--', label='Exact') - plt.title(r'Exact comparison for $E_y$ at $t={}$'.format(t)) - plt.xlim([c['xmin'], c['xmax']]) - plt.legend() - # Ez - plt.figure() - plt.plot(xs, Ez[:, 0, 0], label='Numerical') - plt.plot(xs, EzSol, '--', label='Exact') - plt.title(r'Exact comparison for $E_z$ at $t={}$'.format(t)) - plt.xlim([c['xmin'], c['xmax']]) - plt.legend() - # vx1 - plt.figure() - plt.plot(xs, vx1[:, 0, 0], label='Numerical') - plt.plot(xs, np.zeros_like(xs), '--', label='Exact') - plt.title(r'Exact comparison for $v_x1$ at $t={}$'.format(t)) - plt.xlim([c['xmin'], c['xmax']]) - minn = min(np.min(vx1), 0) - maxx = max(np.max(vx1), 0) - sep = maxx - minn - plt.ylim([minn-0.1*sep, maxx+0.1*sep]) - plt.legend() - # vy1 - plt.figure() - plt.plot(xs, vy1[:, 0, 0], label='Numerical') - plt.plot(xs, vy1sol, '--', label='Exact') - plt.title(r'Exact comparison for $v_y1$ at $t={}$'.format(t)) - plt.xlim([c['xmin'], c['xmax']]) - plt.legend() - # vz1 - plt.figure() - plt.plot(xs, vz1[:, 0, 0], label='Numerical') - plt.plot(xs, vz1sol, '--', label='Exact') - plt.title(r'Exact comparison for $v_z1$ at $t={}$'.format(t)) - plt.xlim([c['xmin'], c['xmax']]) - plt.legend() - # vx2 - plt.figure() - plt.plot(xs, vx2[:, 0, 0], label='Numerical') - plt.plot(xs, np.zeros_like(xs), '--', label='Exact') - plt.title(r'Exact comparison for $v_x2$ at $t={}$'.format(t)) - plt.xlim([c['xmin'], c['xmax']]) - minn = min(np.min(vx2), 0) - maxx = max(np.max(vx2), 0) - sep = maxx - minn - plt.ylim([minn-0.1*sep, maxx+0.1*sep]) - plt.legend() - # vy2 - plt.figure() - plt.plot(xs, vy2[:, 0, 0], label='Numerical') - plt.plot(xs, vy2sol, '--', label='Exact') - plt.title(r'Exact comparison for $v_y2$ at $t={}$'.format(t)) - plt.xlim([c['xmin'], c['xmax']]) - plt.legend() - # vz2 - plt.figure() - plt.plot(xs, vz2[:, 0, 0], label='Numerical') - plt.plot(xs, vz2sol, '--', label='Exact') - plt.title(r'Exact comparison for $v_z2$ at $t={}$'.format(t)) - plt.xlim([c['xmin'], c['xmax']]) - plt.legend() - - - - def plot2DBrioWu(self, diag=0): - """ - Plots the main diagonal of the 2D Brio-Wu problem - - Parameters - ---------- - diag : int - The diagonal to plot the slice - """ - - nx = self.c['nx'] -# Ny = self.c['Ny'] - midZ = self.c['Nz'] // 2 - Ng = self.c['Ng'] - - if diag == 0: - LB = -Ng - RB = Ng - step = -1 - else: - LB = Ng - RB = -Ng - step = 1 - - - dens = self.prims[0, :, LB:RB:step, midZ].diagonal() - vx = self.prims[1, :, LB:RB:step, midZ].diagonal() - vy = self.prims[2, :, LB:RB:step, midZ].diagonal() - - - p = self.prims[4, :, LB:RB:step, midZ].diagonal() - B = self.prims[5, :, LB:RB:step, midZ].diagonal() / np.sqrt(2) + \ - self.prims[6, :, LB:RB:step, midZ].diagonal() / np.sqrt(2) - - # rho - plt.figure() - plt.plot(np.linspace(0, 1, nx), dens) - plt.ylabel(r'$\rho$') - plt.xlim([0, 1]) - plt.show() - # vx - plt.figure() - plt.plot(np.linspace(0, 1, nx), vx) - plt.ylabel(r'$vx$') - plt.xlim([0, 1]) - plt.show() - # vy - plt.figure() - plt.plot(np.linspace(0, 1, nx), vy) - plt.ylabel(r'$vy$') - plt.xlim([0, 1]) - plt.show() - # v rel - plt.figure() - plt.plot(np.linspace(0, 1, nx),(vx-vy)/(1-vx*vy)) - plt.ylabel(r'$v (rel)$') - plt.xlim([0, 1]) - plt.show() - # v non-rel - plt.figure() - plt.plot(np.linspace(0, 1, nx), vx/np.sqrt(2) - vy/np.sqrt(2)) - plt.ylabel(r'$v (non-rel)$') - plt.xlim([0, 1]) - plt.show() - # p - plt.figure() - plt.plot(np.linspace(0, 1, nx), p) - plt.ylabel(r'$p$') - plt.xlim([0, 1]) - plt.show() - # B - plt.figure() - plt.plot(np.linspace(0, 1, nx), B) - plt.ylabel(r'$B$') - plt.xlim([0, 1]) - plt.show() - - return B - - def plotAdvectionAgainstInitial(self): - xs = np.linspace(Plot.c['dx']/2, 1-Plot.c['dx']/2, Plot.c['nx']) - initialRho = np.ones_like(xs)*0.1 - initialRho += 0.4*np.exp(-(10 * (xs - 0.5))**2) - - fig, axs = plt.subplots(2) - fig.set_size_inches(8, 6) - axs[0].plot(xs, initialRho, 'k-', linewidth=5, alpha=0.3, label='initial') - axs[0].plot(xs, Plot.prims[0, :, 0, 0], 'b:', label='rho') - axs[0].set_xlim(xs[0], xs[-1]) - axs[0].set_xlabel(r'$x$') - axs[0].set_ylabel(r'$\rho$') - axs[0].legend() - - error = np.abs(initialRho-Plot.prims[0, :, 0, 0]) - errorNorm = np.sum(error)/len(error) - axs[1].semilogy(xs, error, label=rf'Mean = ${errorNorm:.1e}$') - axs[1].set_xlabel(r"$x$") - axs[1].set_ylabel('Error') - axs[1].set_xlim(xs[0], xs[-1]) - axs[1].legend() - plt.show() - - -# Function declarations over, access data and plot! - - -if __name__ == '__main__': - - Plot = InteractivePlot() - -# Plot.plotSlice() -# Plot.plotSingleFluidCurrentSheetAgainstExact() -# Plot.plotAdvectionAgainstInitial() -# Plot.plotHeatMaps() - - plt.figure() - plt.imshow(np.log(Plot.prims[4, :, :, 0].T), extent=[0, 8, 0, 4], origin='lower') - plt.show() - diff --git a/Project/GPU/Src/interactivePlot.py b/Project/GPU/Src/interactivePlot.py index 6021e9ce..f34461dc 100644 --- a/Project/GPU/Src/interactivePlot.py +++ b/Project/GPU/Src/interactivePlot.py @@ -104,9 +104,10 @@ def gatherData(self): print("{} conserved vectors".format(c['Ncons'])) print("{} primitive vectors".format(c['Nprims'])) print("{} auxiliary vectors".format(c['Naux'])) + print(f"Domain extent is {c['nx']}, {c['ny']}, {c['nz']}") # Now gather conserved data - self.cons = np.zeros([c['Ncons'], c['Nx'], c['Ny'], c['Nz']]) + self.cons = np.zeros([c['Ncons'], c['nx'], c['ny'], c['nz']]) print("Fetching conserved variables...") with open(self.DatDir + 'Conserved/cons' + self.appendix + '.dat', 'r') as f: for i, line in enumerate(f): @@ -128,7 +129,7 @@ def gatherData(self): with suppress(FileNotFoundError): # Now get primitive variables if and store the data in array... - self.prims = np.zeros([c['Nprims'], c['Nx'], c['Ny'], c['Nz']]) + self.prims = np.zeros([c['Nprims'], c['nx'], c['ny'], c['nz']]) print("Fetching primitive variables...") with open(self.DatDir + 'Primitive/prims' + self.appendix + '.dat', 'r') as f: for i, line in enumerate(f): @@ -149,7 +150,7 @@ def gatherData(self): with suppress(FileNotFoundError): # And finally the aux vars if available - self.aux = np.zeros([c['Naux'], c['Nx'], c['Ny'], c['Nz']]) + self.aux = np.zeros([c['Naux'], c['nx'], c['ny'], c['nz']]) print("Fetching auxiliary variables...") with open(self.DatDir + 'Auxiliary/aux' + self.appendix +'.dat', 'r') as f: for i, line in enumerate(f): diff --git a/Tests/CPU/Src/test_fvs.cc b/Tests/CPU/Src/test_fvs.cc index 973c5b95..77d72eb2 100644 --- a/Tests/CPU/Src/test_fvs.cc +++ b/Tests/CPU/Src/test_fvs.cc @@ -19,7 +19,7 @@ TEST(FVS, SameFnetAsSerial) */ { SerialEnv env(0, NULL, 1, 1, 1); - Data d(20, 20, 0, 0, 1, 0, 1, 0, 1, 0.8, &env); + Data d(20, 20, 20, 0, 1, 0, 1, 0, 1, 0.8, &env); SRMHD model(&d); Weno3 weno(&d); FVS fluxMethod(&d, &weno, &model); @@ -56,7 +56,7 @@ TEST(FVS, SameFnetAsSerial) TEST(FVS, SameXReconstructionAsSerial) { SerialEnv env(0, NULL, 1, 1, 1); - Data d(20, 20, 0, 0, 1, 0, 1, 0, 1, 0.8, &env); + Data d(20, 20, 20, 0, 1, 0, 1, 0, 1, 0.8, &env); SRMHD model(&d); Weno3 weno(&d); FVS fluxMethod(&d, &weno, &model); @@ -95,7 +95,7 @@ TEST(FVS, SameXReconstructionAsSerial) TEST(FVS, SameYReconstructionAsSerial) { SerialEnv env(0, NULL, 1, 1, 1); - Data d(20, 20, 0, 0, 1, 0, 1, 0, 1, 0.8, &env); + Data d(20, 20, 20, 0, 1, 0, 1, 0, 1, 0.8, &env); SRMHD model(&d); Weno3 weno(&d); FVS fluxMethod(&d, &weno, &model); @@ -134,7 +134,7 @@ TEST(FVS, SameYReconstructionAsSerial) TEST(FVS, SameZReconstructionAsSerial) { SerialEnv env(0, NULL, 1, 1, 1); - Data d(20, 20, 0, 0, 1, 0, 1, 0, 1, 0.8, &env); + Data d(20, 20, 20, 0, 1, 0, 1, 0, 1, 0.8, &env); SRMHD model(&d); Weno3 weno(&d); FVS fluxMethod(&d, &weno, &model); diff --git a/Tests/CPU/Src/test_rk2.cc b/Tests/CPU/Src/test_rk2.cc index 840040a2..7cdcdb91 100644 --- a/Tests/CPU/Src/test_rk2.cc +++ b/Tests/CPU/Src/test_rk2.cc @@ -322,10 +322,8 @@ TEST(RK2OutputConsistentWithSerial, RK2SrmhdFlowBrioWuSF) save.saveAux(); save.saveConsts(); } -#endif -#if 0 -TEST(RK2OutputConsistentWithSerial, RK2SrrmhdOutflowOTVortexSingleFluidRK2) +TEST(RK2, RK2OutputConsistentWithSerial) { /* @@ -334,9 +332,10 @@ TEST(RK2OutputConsistentWithSerial, RK2SrrmhdOutflowOTVortexSingleFluidRK2) */ SerialEnv env(0, NULL, 1, 1, 1, 1); - Data d(40, 40, 0, 0, 1, 0, 1, 0, 1, 0.004, &env); + Data d(30, 30, 30, 0, 1, 0, 1, 0, 1, 0.004, &env); + Weno3 weno(&d); SRRMHD model(&d); - FVS fluxMethod(&d, &model); + FVS fluxMethod(&d, &weno, &model); Outflow bcs(&d); Simulation sim(&d, &env); OTVortexSingleFluid init(&d); @@ -349,11 +348,12 @@ TEST(RK2OutputConsistentWithSerial, RK2SrrmhdOutflowOTVortexSingleFluidRK2) // Save data in test directory strcpy(save.dir, "../TestData/Serial"); - strcpy(save.app, "RK2SrrmhdOutflowOTVortexSingleFluid"); - + strcpy(save.app, "RK2"); save.saveCons(); save.savePrims(); save.saveAux(); save.saveConsts(); + } + #endif diff --git a/Tests/GPU/Src/compareParallelAndSerial.py b/Tests/GPU/Src/compareParallelAndSerial.py index bd05205f..286fccac 100644 --- a/Tests/GPU/Src/compareParallelAndSerial.py +++ b/Tests/GPU/Src/compareParallelAndSerial.py @@ -45,9 +45,9 @@ class CompareParallelAndSerial(object): Ncons = [] Nprims = [] Naux = [] - Nx = [] - Ny = [] - Nz = [] + nx = [] + ny = [] + nz = [] Ng = [] xbounds = [] ybounds = [] @@ -75,20 +75,15 @@ def getFiles(self): self.Ncons.append(self.Serials[i].c['Ncons']) self.Nprims.append(self.Serials[i].c['Nprims']) self.Naux.append(self.Serials[i].c['Naux']) - self.Nx.append(self.Serials[i].c['Nx']) - self.Ny.append(self.Serials[i].c['Ny']) - self.Nz.append(self.Serials[i].c['Nz']) + self.nx.append(self.Serials[i].c['nx']) + self.ny.append(self.Serials[i].c['ny']) + self.nz.append(self.Serials[i].c['nz']) self.Ng.append(self.Serials[i].c['Ng']) - self.xbounds.append((self.Ng[-1], self.Nx[-1] - self.Ng[-1])) - if (self.Ny[-1] > 1): - self.ybounds.append((self.Ng[-1], self.Ny[-1] - self.Ng[-1])) - else: - self.ybounds.append((0, 1)) - if (self.Nz[-1] > 1): - self.zbounds.append((self.Ng[-1], self.Nz[-1] - self.Ng[-1])) - else: - self.zbounds.append((0, 1)) + # Bounds within arrays which do not include ghost cells + self.xbounds.append((0, self.nx[-1])) + self.ybounds.append((0, self.ny[-1])) + self.zbounds.append((0, self.nz[-1])) diff --git a/Tests/GPU/repeat.sh b/Tests/GPU/repeat.sh deleted file mode 100755 index 2716a559..00000000 --- a/Tests/GPU/repeat.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/sh - -cd ../CPU -make test_rk2 -./test_rk2 -cd ../GPU -make test_rk2 -./test_rk2 diff --git a/Tests/play.py b/Tests/play.py deleted file mode 100644 index 8f110abd..00000000 --- a/Tests/play.py +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -Created on Wed Aug 5 10:56:02 2020 - -@author: alex -""" - -import sys -sys.path.append('../Project/GPU/Src') -sys.path.append('../Project/CPU/Src') -from interactivePlotGPU import InteractivePlot as PlotGPU -from interactivePlotCPU import InteractivePlot as PlotCPU - - -parallel = PlotGPU("TestData/GPU/", "RK2") -#serial = PlotCPU("TestData/Serial/", "RK2") - -pp = parallel.prims -sp = serial.prims - - -#for sv, pv in zip(serial.prims, parallel.prims): -# print(f"{np.sum(np.abs(sv-pv) > 1e-15)}/{30**3} failures") \ No newline at end of file From ba80bd07002d96f9dad20e497841ac137dda65ec Mon Sep 17 00:00:00 2001 From: AlexJamesWright Date: Wed, 5 Aug 2020 15:22:35 +0100 Subject: [PATCH 04/56] Ready --- Project/CPU/Src/main.cc | 25 +++++++++++-------------- Project/GPU/Src/main.cu | 7 +------ 2 files changed, 12 insertions(+), 20 deletions(-) diff --git a/Project/CPU/Src/main.cc b/Project/CPU/Src/main.cc index d82aff43..c1caf758 100644 --- a/Project/CPU/Src/main.cc +++ b/Project/CPU/Src/main.cc @@ -1,5 +1,7 @@ -// Serial main -#include "simData.h" +// CPU main +#include "parallelBoundaryConds.h" +#include "fluxVectorSplitting.h" +#include "parallelSaveData.h" #include "simulation.h" #include "initFunc.h" #include "simData.h" @@ -7,20 +9,14 @@ #include "Euler.h" #include "weno.h" -#include -#include #include -#include #include -#include - using namespace std; int main(int argc, char *argv[]) { - const double MU(1000); // Set up domain int Ng(5); int nx(800); @@ -55,7 +51,7 @@ int main(int argc, char *argv[]) { // Choose particulars of simulation Euler model(&data); - Weno3 weno(&data); + Weno7 weno(&data); FVS fluxMethod(&data, &weno, &model); @@ -65,23 +61,24 @@ int main(int argc, char *argv[]) { FancyMETHODData init(&data); - SSP2 timeInt(&data, &model, &bcs, &fluxMethod); + RK4 timeInt(&data, &model, &bcs, &fluxMethod); - SerialSaveData save(&data, &env, 0); + ParallelSaveData save(&data, &env, 0); // Now objects have been created, set up the simulation sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save); + // Time execution of programme - //double startTime(omp_get_wtime()); + clock_t startTime(clock()); // Run until end time and save results sim.evolve(output, safety); - //double timeTaken(omp_get_wtime()- startTime); + double timeTaken(double(clock() - startTime)/(double)CLOCKS_PER_SEC); save.saveAll(); - //printf("\nRuntime: %.5fs\nCompleted %d iterations.\n", timeTaken, data.iters); + if (env.rank==0) printf("\nRuntime: %.5fs\nCompleted %d iterations.\n", timeTaken, data.iters); return 0; diff --git a/Project/GPU/Src/main.cu b/Project/GPU/Src/main.cu index 50efa978..a5642b3a 100644 --- a/Project/GPU/Src/main.cu +++ b/Project/GPU/Src/main.cu @@ -80,12 +80,7 @@ int main(int argc, char *argv[]) { double startTime(omp_get_wtime()); // Run until end time and save results - // sim.evolve(output, safety); - sim.updateTime(); - sim.updateTime(); - sim.updateTime(); - sim.updateTime(); - sim.updateTime(); + sim.evolve(output, safety); double timeTaken(omp_get_wtime()- startTime); From c2951d0a7480b1f79afeacea2fb080aa534a213e Mon Sep 17 00:00:00 2001 From: "A.M.Brown" Date: Mon, 24 Aug 2020 11:53:16 +0100 Subject: [PATCH 05/56] updated makefile to compile gtest with std=c++11 to work on more cuda versions --- Tests/GPU/Makefile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Tests/GPU/Makefile b/Tests/GPU/Makefile index 603ae7d4..8f8bfd1a 100644 --- a/Tests/GPU/Makefile +++ b/Tests/GPU/Makefile @@ -31,6 +31,7 @@ INC_DIR = ./../../Project/GPU/Include # Set Google Test's header directory as a system directory, such that # the compiler doesn't generate warnings in Google Test headers. CPPFLAGS = -isystem $(GTEST_DIR)/include +STDFLAGS = -std=c++11 # Flags passed to the C++ compiler. # c++11 is required for the vector looping srmhd @@ -120,11 +121,11 @@ RTFIND_OBJS = $(RTFIND_SRC_DIR)/dogleg.o \ gtest-all.o : $(GTEST_SRCS_) - @$(CXX) $(CPPFLAGS) -I$(GTEST_DIR) $(CXXFLAGS) -c \ + @$(CXX) $(CPPFLAGS) $(STDFLAGS) -I$(GTEST_DIR) $(CXXFLAGS) -c \ $(GTEST_DIR)/src/gtest-all.cc gtest_main.o : $(GTEST_SRCS_) - @$(CXX) $(CPPFLAGS) -I$(GTEST_DIR) $(CXXFLAGS) -c \ + @$(CXX) $(CPPFLAGS) $(STDFLAGS) -I$(GTEST_DIR) $(CXXFLAGS) -c \ $(GTEST_DIR)/src/gtest_main.cc gtest.a : gtest-all.o From 0ab3caea72fa6e7c3e2eba572e394f902ef0e478 Mon Sep 17 00:00:00 2001 From: "A.M.Brown" Date: Mon, 24 Aug 2020 16:22:53 +0100 Subject: [PATCH 06/56] added FlowKHSingleFluid unit test. Changed project to use KHSingleFluidRandom example --- Project/CPU/Src/main.cc | 53 +++++++-------- Project/GPU/Src/main.cu | 48 ++++++-------- Tests/CPU/Src/test_imex.cc | 80 +++++++++++++++++++++++ Tests/GPU/Src/compareParallelAndSerial.py | 40 ++++++++++++ Tests/GPU/Src/test_imex.cu | 74 +++++++++++++++++++++ 5 files changed, 237 insertions(+), 58 deletions(-) diff --git a/Project/CPU/Src/main.cc b/Project/CPU/Src/main.cc index c1caf758..b362e58e 100644 --- a/Project/CPU/Src/main.cc +++ b/Project/CPU/Src/main.cc @@ -5,7 +5,7 @@ #include "simulation.h" #include "initFunc.h" #include "simData.h" -#include "RKPlus.h" +#include "SSP2.h" #include "Euler.h" #include "weno.h" @@ -15,53 +15,48 @@ using namespace std; int main(int argc, char *argv[]) { - - + const double MU(1000); // Set up domain - int Ng(5); - int nx(800); - int ny(400); + int Ng(4); + int nx(64); + int ny(16); int nz(0); - double xmin(0.0); - double xmax(8.0); - double ymin(0.0); - double ymax(4.0); - double zmin(0.0); - double zmax(1.0); - double endTime(30.0); - double gamma(2.0); - double cfl(0.5); - double cp(1); - double mu1(-1); - double mu2(1); + double xmin(-0.5); + double xmax(0.5); + double ymin(-1.0); + double ymax(1.0); + double zmin(-1.5); + double zmax(1.5); + double endTime(0.5); + double cfl(0.1); + double gamma(4.0/3.0); + double sigma(0); bool output(true); - int frameSkip(50); - int safety(frameSkip); - int reportItersPeriod(1); - double sigma(50); - double nxRanks(4); + int safety(180); + + double nxRanks(1); double nyRanks(1); double nzRanks(1); ParallelEnv env(&argc, &argv, nxRanks, nyRanks, nzRanks); Data data(nx, ny, nz, xmin, xmax, ymin, ymax, zmin, zmax, endTime, &env, - cfl, Ng, gamma, sigma, cp, mu1, mu2, frameSkip, reportItersPeriod); + cfl, Ng, gamma, sigma); // Choose particulars of simulation - Euler model(&data); + SRRMHD model(&data); - Weno7 weno(&data); + Weno3 weno(&data); FVS fluxMethod(&data, &weno, &model); - ParallelOutflow bcs(&data, &env); + ParallelFlow bcs(&data, &env); Simulation sim(&data, &env); - FancyMETHODData init(&data); + KHInstabilitySingleFluid init(&data); - RK4 timeInt(&data, &model, &bcs, &fluxMethod); + SSP2 timeInt(&data, &model, &bcs, &fluxMethod); ParallelSaveData save(&data, &env, 0); diff --git a/Project/GPU/Src/main.cu b/Project/GPU/Src/main.cu index a5642b3a..005a4199 100644 --- a/Project/GPU/Src/main.cu +++ b/Project/GPU/Src/main.cu @@ -23,44 +23,34 @@ using namespace std; int main(int argc, char *argv[]) { - - const double MU(1000); // Set up domain int Ng(4); - int nx(256); - int ny(512); + int nx(64); + int ny(64); int nz(0); - double xmin(-0.5); - double xmax(0.5); - double ymin(-1.0); + double xmin(0.0); + double xmax(1.0); + double ymin(0.0); double ymax(1.0); - double zmin(-1.5); - double zmax(1.5); + double zmin(0.0); + double zmax(1.0); double endTime(3.0); - double cfl(0.1); + double cfl(0.6); double gamma(4.0/3.0); - double sigma(300); + double sigma(10); double cp(1.0); - double mu1(-MU); - double mu2(MU); - int frameSkip(180); + double mu1(-100); + double mu2(100); + int frameSkip(10); bool output(true); - int safety(180); - - - char * ptr(0); - //! Overwrite any variables that have been passed in as main() arguments - for (int i(0); i < argc; i++) { - if (strcmp(argv[i], "sigma") == 0) { - sigma = (double)strtol(argv[i+1], &ptr, 10); - } - } + if (argc != 2) throw std::invalid_argument("Expected ./main seed!\n"); + int seed(atoi(argv[1])); Data data(nx, ny, nz, xmin, xmax, ymin, ymax, zmin, zmax, endTime, cfl, Ng, gamma, sigma, cp, mu1, mu2, frameSkip); // Choose particulars of simulation - SRRMHD model(&data); + SRMHD model(&data); FVS fluxMethod(&data, &model); @@ -68,21 +58,21 @@ int main(int argc, char *argv[]) { KHInstabilitySingleFluid init(&data, 1); - Flow bcs(&data); + Periodic bcs(&data); - SSP2 timeInt(&data, &model, &bcs, &fluxMethod); + RKSplit timeInt(&data, &model, &bcs, &fluxMethod); SaveData save(&data); // Now objects have been created, set up the simulation sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save); // Time execution of programme - double startTime(omp_get_wtime()); + //double startTime(omp_get_wtime()); // Run until end time and save results sim.evolve(output, safety); - double timeTaken(omp_get_wtime()- startTime); + //double timeTaken(omp_get_wtime()- startTime); save.saveAll(); printf("\nRuntime: %.5fs\nCompleted %d iterations.\n", timeTaken, data.iters); diff --git a/Tests/CPU/Src/test_imex.cc b/Tests/CPU/Src/test_imex.cc index 8c65d3fa..2ff2c566 100644 --- a/Tests/CPU/Src/test_imex.cc +++ b/Tests/CPU/Src/test_imex.cc @@ -51,6 +51,86 @@ TEST(SSP2, IMEX2BenchmarkForParallelCode) save.saveConsts(); } +TEST(SSP2FlowKHSingleFluid, IMEX2ConsistentWithSerialVersion) +{ + /* + Run a resistive MHD problem to test the IMEX2 scheme. This test will run + the simulation and save the output data in the TestData directory, ready + to be compared with the parallel output. + */ + + double sigma(0); + + SerialEnv env(0, NULL, 1, 1, 1); + Data data(64, 16, 0, -0.5, 0.5, -1, 1, 0, 1, 0.05, &env, + 0.5, 4, 4.0/3.0, sigma); + + // Choose particulars of simulation + SRRMHD model(&data); + Weno3 weno(&data); + FVS fluxMethod(&data, &weno, &model); + Flow bcs(&data); + Simulation sim(&data, &env); + KHInstabilitySingleFluid init(&data); + SSP2 timeInt(&data, &model, &bcs, &fluxMethod); + SerialSaveData save(&data, &env); + + sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save); + sim.evolve(); + + + // Save data in test directory + strcpy(save.dir, "../TestData/Serial"); + strcpy(save.app, "SSP2FlowKHSingleFluid"); + save.saveCons(); + save.savePrims(); + save.saveAux(); + save.saveConsts(); +} +#if 0 +TEST(SSP2FlowKHSingleFluid, IMEX2ConsistentWithSerialVersion) +{ + /* + Run a resistive MHD problem to test the IMEX2 scheme. This test will run + the simulation and save the output data in the TestData directory, ready + to be compared with the parallel output. + */ + const double MU(1000); + double sigma(300); + double cp(1.0); + double mu1(-MU); + double mu2(MU); + int nx(256); + int ny(512); + double endTime(0.01); + + SerialEnv env(0, NULL, 1, 1, 1); + Data data(nx, ny, 0, -0.5, 0.5, -1, 1, 0, 1, endTime, &env, + 0.1, 4, 4.0/3.0, sigma, cp, mu1, mu2); + + // Choose particulars of simulation + SRRMHD model(&data); + Weno3 weno(&data); + FVS fluxMethod(&data, &weno, &model); + Flow bcs(&data); + Simulation sim(&data, &env); + KHInstabilitySingleFluid init(&data); + SSP2 timeInt(&data, &model, &bcs, &fluxMethod); + SerialSaveData save(&data, &env); + + sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save); + sim.evolve(); + + + // Save data in test directory + strcpy(save.dir, "../TestData/Serial"); + strcpy(save.app, "SSP2FlowKHSingleFluid"); + save.saveCons(); + save.savePrims(); + save.saveAux(); + save.saveConsts(); +} +#endif TEST(SSP3, IMEX3BenchmarkForParallelCode) { diff --git a/Tests/GPU/Src/compareParallelAndSerial.py b/Tests/GPU/Src/compareParallelAndSerial.py index 286fccac..808c5340 100644 --- a/Tests/GPU/Src/compareParallelAndSerial.py +++ b/Tests/GPU/Src/compareParallelAndSerial.py @@ -174,6 +174,46 @@ def test_AuxEquivalentForSSP2(): print("Error for (Nv, i, j, k) = ({}, {}, {}, {})".format(Nv, i, j, k)) assert(abs((Serial.aux[Nv, i, j, k] - Parallel.aux[Nv, i, j, k]) < TOL)) +def test_ConsEquivalentForSSP2FlowKHSingleFluid(): + Obj = Compare.Appendicies.index('SSP2FlowKHSingleFluid') + Serial, Parallel = Compare.Serials[Obj], Compare.Parallels[Obj] + for Nv in range(Compare.Ncons[Obj]): + for i in range(*Compare.xbounds[Obj]): + for j in range(*Compare.ybounds[Obj]): + for k in range(*Compare.zbounds[Obj]): + try: + assert(abs((Serial.cons[Nv, i, j, k] - Parallel.cons[Nv, i, j, k]) < TOL)) + except AssertionError: + print("Error for (Nv, i, j, k) = ({}, {}, {}, {})".format(Nv, i, j, k)) + assert(abs((Serial.cons[Nv, i, j, k] - Parallel.cons[Nv, i, j, k]) < TOL)) + +def test_PrimsEquivalentForSSP2FlowKHSingleFluid(): + Obj = Compare.Appendicies.index('SSP2FlowKHSingleFluid') + Serial, Parallel = Compare.Serials[Obj], Compare.Parallels[Obj] + for Nv in range(Compare.Nprims[Obj]): + for i in range(*Compare.xbounds[Obj]): + for j in range(*Compare.ybounds[Obj]): + for k in range(*Compare.zbounds[Obj]): + try: + assert(abs((Serial.prims[Nv, i, j, k] - Parallel.prims[Nv, i, j, k]) < TOL)) + except AssertionError: + print("Error for (Nv, i, j, k) = ({}, {}, {}, {})".format(Nv, i, j, k)) + assert(abs((Serial.prims[Nv, i, j, k] - Parallel.prims[Nv, i, j, k]) < TOL)) + +def test_AuxEquivalentForSSP2FlowKHSingleFluid(): + Obj = Compare.Appendicies.index('SSP2FlowKHSingleFluid') + Serial, Parallel = Compare.Serials[Obj], Compare.Parallels[Obj] + for Nv in range(Compare.Naux[Obj]): + for i in range(*Compare.xbounds[Obj]): + for j in range(*Compare.ybounds[Obj]): + for k in range(*Compare.zbounds[Obj]): + try: + assert(abs((Serial.aux[Nv, i, j, k] - Parallel.aux[Nv, i, j, k]) < TOL)) + except AssertionError: + print("Error for (Nv, i, j, k) = ({}, {}, {}, {})".format(Nv, i, j, k)) + assert(abs((Serial.aux[Nv, i, j, k] - Parallel.aux[Nv, i, j, k]) < TOL)) + + # RK2 def test_ConsEquivalentForRK2(): Obj = Compare.Appendicies.index('RK2') diff --git a/Tests/GPU/Src/test_imex.cu b/Tests/GPU/Src/test_imex.cu index aa695e0b..830da06e 100644 --- a/Tests/GPU/Src/test_imex.cu +++ b/Tests/GPU/Src/test_imex.cu @@ -45,6 +45,80 @@ TEST(SSP2, IMEX2ConsistentWithSerialVersion) save.saveConsts(); } +TEST(SSP2FlowKHSingleFluid, IMEX2ConsistentWithSerialVersion) +{ + /* + Run a resistive MHD problem to test the IMEX2 scheme. This test will run + the simulation and save the output data in the TestData directory, ready + to be compared to the serial output. + */ + double sigma(0); + + Data data(64, 16, 0, -0.5, 0.5, -1, 1, 0, 1, 0.05, + 0.5, 4, 4.0/3.0, sigma); + + // Choose particulars of simulation + SRRMHD model(&data); + FVS fluxMethod(&data, &model); + Simulation sim(&data); + KHInstabilitySingleFluid init(&data); + Flow bcs(&data); + SSP2 timeInt(&data, &model, &bcs, &fluxMethod); + SaveData save(&data); + + sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save); + sim.evolve(); + + // Save data in test directory + strcpy(save.dir, "../TestData/GPU"); + strcpy(save.app, "SSP2FlowKHSingleFluid"); + save.saveCons(); + save.savePrims(); + save.saveAux(); + save.saveConsts(); +} + +#if 0 +TEST(SSP2FlowKHSingleFluid, IMEX2ConsistentWithSerialVersion) +{ + /* + Run a resistive MHD problem to test the IMEX2 scheme. This test will run + the simulation and save the output data in the TestData directory, ready + to be compared to the serial output. + */ + const double MU(1000); + double sigma(300); + double cp(1.0); + double mu1(-MU); + double mu2(MU); + int nx(256); + int ny(512); + double endTime(0.01); + + Data data(nx, ny, 0, -0.5, 0.5, -1, 1, 0, 1, endTime, + 0.1, 4, 4.0/3.0, sigma, cp, mu1, mu2); + + // Choose particulars of simulation + SRRMHD model(&data); + FVS fluxMethod(&data, &model); + Simulation sim(&data); + KHInstabilitySingleFluid init(&data); + Flow bcs(&data); + SSP2 timeInt(&data, &model, &bcs, &fluxMethod); + SaveData save(&data); + + sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save); + sim.evolve(); + + // Save data in test directory + strcpy(save.dir, "../TestData/GPU"); + strcpy(save.app, "SSP2FlowKHSingleFluid"); + save.saveCons(); + save.savePrims(); + save.saveAux(); + save.saveConsts(); +} +#endif TEST(SSP3, IMEX3ConsistentWithSerialVersion) { From 1c5ff45cc32377b2c2d35e0d2f04e3a7fd5f66d1 Mon Sep 17 00:00:00 2001 From: "ania.brown" Date: Tue, 25 Aug 2020 10:39:47 +0100 Subject: [PATCH 07/56] updated main functions in Project/CPU and Project/GPU to simulate the same systems --- Project/CPU/Src/main.cc | 61 ++++++++--------- Project/GPU/Makefile | 2 +- Project/GPU/Src/main.cu | 14 ++-- Tests/CPU/Src/test_imex.cc | 80 +++++++++++++++++++++++ Tests/GPU/Src/compareParallelAndSerial.py | 40 ++++++++++++ Tests/GPU/Src/test_imex.cu | 74 +++++++++++++++++++++ 6 files changed, 228 insertions(+), 43 deletions(-) diff --git a/Project/CPU/Src/main.cc b/Project/CPU/Src/main.cc index c1caf758..4cd28f9e 100644 --- a/Project/CPU/Src/main.cc +++ b/Project/CPU/Src/main.cc @@ -1,11 +1,11 @@ // CPU main -#include "parallelBoundaryConds.h" +#include "boundaryConds.h" #include "fluxVectorSplitting.h" -#include "parallelSaveData.h" +#include "serialSaveData.h" #include "simulation.h" #include "initFunc.h" #include "simData.h" -#include "RKPlus.h" +#include "SSP2.h" #include "Euler.h" #include "weno.h" @@ -15,55 +15,50 @@ using namespace std; int main(int argc, char *argv[]) { - - + const double MU(1000); // Set up domain - int Ng(5); - int nx(800); - int ny(400); + int Ng(4); + int nx(64); + int ny(16); int nz(0); - double xmin(0.0); - double xmax(8.0); - double ymin(0.0); - double ymax(4.0); - double zmin(0.0); - double zmax(1.0); - double endTime(30.0); - double gamma(2.0); - double cfl(0.5); - double cp(1); - double mu1(-1); - double mu2(1); + double xmin(-0.5); + double xmax(0.5); + double ymin(-1.0); + double ymax(1.0); + double zmin(-1.5); + double zmax(1.5); + double endTime(0.05); + double cfl(0.1); + double gamma(4.0/3.0); + double sigma(0); bool output(true); - int frameSkip(50); - int safety(frameSkip); - int reportItersPeriod(1); - double sigma(50); - double nxRanks(4); + int safety(180); + + double nxRanks(1); double nyRanks(1); double nzRanks(1); - ParallelEnv env(&argc, &argv, nxRanks, nyRanks, nzRanks); + SerialEnv env(&argc, &argv, nxRanks, nyRanks, nzRanks); Data data(nx, ny, nz, xmin, xmax, ymin, ymax, zmin, zmax, endTime, &env, - cfl, Ng, gamma, sigma, cp, mu1, mu2, frameSkip, reportItersPeriod); + cfl, Ng, gamma, sigma); // Choose particulars of simulation - Euler model(&data); + SRRMHD model(&data); - Weno7 weno(&data); + Weno3 weno(&data); FVS fluxMethod(&data, &weno, &model); - ParallelOutflow bcs(&data, &env); + Flow bcs(&data); Simulation sim(&data, &env); - FancyMETHODData init(&data); + KHInstabilitySingleFluid init(&data); - RK4 timeInt(&data, &model, &bcs, &fluxMethod); + SSP2 timeInt(&data, &model, &bcs, &fluxMethod); - ParallelSaveData save(&data, &env, 0); + SerialSaveData save(&data, &env, 0); // Now objects have been created, set up the simulation sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save); diff --git a/Project/GPU/Makefile b/Project/GPU/Makefile index 666dc027..a84ac2e0 100644 --- a/Project/GPU/Makefile +++ b/Project/GPU/Makefile @@ -23,7 +23,7 @@ RTFIND_SRC_DIR = ./CminpackLibrary/Src CXXFLAGS = -Xcompiler -fopenmp -Xcompiler -Wall # NVIDIA compiler flags -NVFLAGS = -std=c++11 -Wno-deprecated-gpu-targets -rdc=true -fmad=false -O3 -arch=sm_52 +NVFLAGS = -std=c++11 -Wno-deprecated-gpu-targets -rdc=true -fmad=false -O3 # Sources SRCS = main.cu \ diff --git a/Project/GPU/Src/main.cu b/Project/GPU/Src/main.cu index a5642b3a..10a10340 100644 --- a/Project/GPU/Src/main.cu +++ b/Project/GPU/Src/main.cu @@ -27,8 +27,8 @@ int main(int argc, char *argv[]) { const double MU(1000); // Set up domain int Ng(4); - int nx(256); - int ny(512); + int nx(64); + int ny(16); int nz(0); double xmin(-0.5); double xmax(0.5); @@ -36,14 +36,10 @@ int main(int argc, char *argv[]) { double ymax(1.0); double zmin(-1.5); double zmax(1.5); - double endTime(3.0); + double endTime(0.05); double cfl(0.1); double gamma(4.0/3.0); - double sigma(300); - double cp(1.0); - double mu1(-MU); - double mu2(MU); - int frameSkip(180); + double sigma(0); bool output(true); int safety(180); @@ -57,7 +53,7 @@ int main(int argc, char *argv[]) { } Data data(nx, ny, nz, xmin, xmax, ymin, ymax, zmin, zmax, endTime, - cfl, Ng, gamma, sigma, cp, mu1, mu2, frameSkip); + cfl, Ng, gamma, sigma); // Choose particulars of simulation SRRMHD model(&data); diff --git a/Tests/CPU/Src/test_imex.cc b/Tests/CPU/Src/test_imex.cc index 8c65d3fa..2ff2c566 100644 --- a/Tests/CPU/Src/test_imex.cc +++ b/Tests/CPU/Src/test_imex.cc @@ -51,6 +51,86 @@ TEST(SSP2, IMEX2BenchmarkForParallelCode) save.saveConsts(); } +TEST(SSP2FlowKHSingleFluid, IMEX2ConsistentWithSerialVersion) +{ + /* + Run a resistive MHD problem to test the IMEX2 scheme. This test will run + the simulation and save the output data in the TestData directory, ready + to be compared with the parallel output. + */ + + double sigma(0); + + SerialEnv env(0, NULL, 1, 1, 1); + Data data(64, 16, 0, -0.5, 0.5, -1, 1, 0, 1, 0.05, &env, + 0.5, 4, 4.0/3.0, sigma); + + // Choose particulars of simulation + SRRMHD model(&data); + Weno3 weno(&data); + FVS fluxMethod(&data, &weno, &model); + Flow bcs(&data); + Simulation sim(&data, &env); + KHInstabilitySingleFluid init(&data); + SSP2 timeInt(&data, &model, &bcs, &fluxMethod); + SerialSaveData save(&data, &env); + + sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save); + sim.evolve(); + + + // Save data in test directory + strcpy(save.dir, "../TestData/Serial"); + strcpy(save.app, "SSP2FlowKHSingleFluid"); + save.saveCons(); + save.savePrims(); + save.saveAux(); + save.saveConsts(); +} +#if 0 +TEST(SSP2FlowKHSingleFluid, IMEX2ConsistentWithSerialVersion) +{ + /* + Run a resistive MHD problem to test the IMEX2 scheme. This test will run + the simulation and save the output data in the TestData directory, ready + to be compared with the parallel output. + */ + const double MU(1000); + double sigma(300); + double cp(1.0); + double mu1(-MU); + double mu2(MU); + int nx(256); + int ny(512); + double endTime(0.01); + + SerialEnv env(0, NULL, 1, 1, 1); + Data data(nx, ny, 0, -0.5, 0.5, -1, 1, 0, 1, endTime, &env, + 0.1, 4, 4.0/3.0, sigma, cp, mu1, mu2); + + // Choose particulars of simulation + SRRMHD model(&data); + Weno3 weno(&data); + FVS fluxMethod(&data, &weno, &model); + Flow bcs(&data); + Simulation sim(&data, &env); + KHInstabilitySingleFluid init(&data); + SSP2 timeInt(&data, &model, &bcs, &fluxMethod); + SerialSaveData save(&data, &env); + + sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save); + sim.evolve(); + + + // Save data in test directory + strcpy(save.dir, "../TestData/Serial"); + strcpy(save.app, "SSP2FlowKHSingleFluid"); + save.saveCons(); + save.savePrims(); + save.saveAux(); + save.saveConsts(); +} +#endif TEST(SSP3, IMEX3BenchmarkForParallelCode) { diff --git a/Tests/GPU/Src/compareParallelAndSerial.py b/Tests/GPU/Src/compareParallelAndSerial.py index 286fccac..808c5340 100644 --- a/Tests/GPU/Src/compareParallelAndSerial.py +++ b/Tests/GPU/Src/compareParallelAndSerial.py @@ -174,6 +174,46 @@ def test_AuxEquivalentForSSP2(): print("Error for (Nv, i, j, k) = ({}, {}, {}, {})".format(Nv, i, j, k)) assert(abs((Serial.aux[Nv, i, j, k] - Parallel.aux[Nv, i, j, k]) < TOL)) +def test_ConsEquivalentForSSP2FlowKHSingleFluid(): + Obj = Compare.Appendicies.index('SSP2FlowKHSingleFluid') + Serial, Parallel = Compare.Serials[Obj], Compare.Parallels[Obj] + for Nv in range(Compare.Ncons[Obj]): + for i in range(*Compare.xbounds[Obj]): + for j in range(*Compare.ybounds[Obj]): + for k in range(*Compare.zbounds[Obj]): + try: + assert(abs((Serial.cons[Nv, i, j, k] - Parallel.cons[Nv, i, j, k]) < TOL)) + except AssertionError: + print("Error for (Nv, i, j, k) = ({}, {}, {}, {})".format(Nv, i, j, k)) + assert(abs((Serial.cons[Nv, i, j, k] - Parallel.cons[Nv, i, j, k]) < TOL)) + +def test_PrimsEquivalentForSSP2FlowKHSingleFluid(): + Obj = Compare.Appendicies.index('SSP2FlowKHSingleFluid') + Serial, Parallel = Compare.Serials[Obj], Compare.Parallels[Obj] + for Nv in range(Compare.Nprims[Obj]): + for i in range(*Compare.xbounds[Obj]): + for j in range(*Compare.ybounds[Obj]): + for k in range(*Compare.zbounds[Obj]): + try: + assert(abs((Serial.prims[Nv, i, j, k] - Parallel.prims[Nv, i, j, k]) < TOL)) + except AssertionError: + print("Error for (Nv, i, j, k) = ({}, {}, {}, {})".format(Nv, i, j, k)) + assert(abs((Serial.prims[Nv, i, j, k] - Parallel.prims[Nv, i, j, k]) < TOL)) + +def test_AuxEquivalentForSSP2FlowKHSingleFluid(): + Obj = Compare.Appendicies.index('SSP2FlowKHSingleFluid') + Serial, Parallel = Compare.Serials[Obj], Compare.Parallels[Obj] + for Nv in range(Compare.Naux[Obj]): + for i in range(*Compare.xbounds[Obj]): + for j in range(*Compare.ybounds[Obj]): + for k in range(*Compare.zbounds[Obj]): + try: + assert(abs((Serial.aux[Nv, i, j, k] - Parallel.aux[Nv, i, j, k]) < TOL)) + except AssertionError: + print("Error for (Nv, i, j, k) = ({}, {}, {}, {})".format(Nv, i, j, k)) + assert(abs((Serial.aux[Nv, i, j, k] - Parallel.aux[Nv, i, j, k]) < TOL)) + + # RK2 def test_ConsEquivalentForRK2(): Obj = Compare.Appendicies.index('RK2') diff --git a/Tests/GPU/Src/test_imex.cu b/Tests/GPU/Src/test_imex.cu index aa695e0b..830da06e 100644 --- a/Tests/GPU/Src/test_imex.cu +++ b/Tests/GPU/Src/test_imex.cu @@ -45,6 +45,80 @@ TEST(SSP2, IMEX2ConsistentWithSerialVersion) save.saveConsts(); } +TEST(SSP2FlowKHSingleFluid, IMEX2ConsistentWithSerialVersion) +{ + /* + Run a resistive MHD problem to test the IMEX2 scheme. This test will run + the simulation and save the output data in the TestData directory, ready + to be compared to the serial output. + */ + double sigma(0); + + Data data(64, 16, 0, -0.5, 0.5, -1, 1, 0, 1, 0.05, + 0.5, 4, 4.0/3.0, sigma); + + // Choose particulars of simulation + SRRMHD model(&data); + FVS fluxMethod(&data, &model); + Simulation sim(&data); + KHInstabilitySingleFluid init(&data); + Flow bcs(&data); + SSP2 timeInt(&data, &model, &bcs, &fluxMethod); + SaveData save(&data); + + sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save); + sim.evolve(); + + // Save data in test directory + strcpy(save.dir, "../TestData/GPU"); + strcpy(save.app, "SSP2FlowKHSingleFluid"); + save.saveCons(); + save.savePrims(); + save.saveAux(); + save.saveConsts(); +} + +#if 0 +TEST(SSP2FlowKHSingleFluid, IMEX2ConsistentWithSerialVersion) +{ + /* + Run a resistive MHD problem to test the IMEX2 scheme. This test will run + the simulation and save the output data in the TestData directory, ready + to be compared to the serial output. + */ + const double MU(1000); + double sigma(300); + double cp(1.0); + double mu1(-MU); + double mu2(MU); + int nx(256); + int ny(512); + double endTime(0.01); + + Data data(nx, ny, 0, -0.5, 0.5, -1, 1, 0, 1, endTime, + 0.1, 4, 4.0/3.0, sigma, cp, mu1, mu2); + + // Choose particulars of simulation + SRRMHD model(&data); + FVS fluxMethod(&data, &model); + Simulation sim(&data); + KHInstabilitySingleFluid init(&data); + Flow bcs(&data); + SSP2 timeInt(&data, &model, &bcs, &fluxMethod); + SaveData save(&data); + + sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save); + sim.evolve(); + + // Save data in test directory + strcpy(save.dir, "../TestData/GPU"); + strcpy(save.app, "SSP2FlowKHSingleFluid"); + save.saveCons(); + save.savePrims(); + save.saveAux(); + save.saveConsts(); +} +#endif TEST(SSP3, IMEX3ConsistentWithSerialVersion) { From 6c89018936779847b0e2237e1fb97b7952f47294 Mon Sep 17 00:00:00 2001 From: "ania.brown" Date: Tue, 25 Aug 2020 11:50:24 +0100 Subject: [PATCH 08/56] switching back to Project/main with implemented model etc, to better test update to new API --- Project/CPU/Src/main.cc | 12 +++++------ Project/GPU/Src/main.cu | 48 +++++++++++++++++++++++------------------ 2 files changed, 33 insertions(+), 27 deletions(-) diff --git a/Project/CPU/Src/main.cc b/Project/CPU/Src/main.cc index b362e58e..4cd28f9e 100644 --- a/Project/CPU/Src/main.cc +++ b/Project/CPU/Src/main.cc @@ -1,7 +1,7 @@ // CPU main -#include "parallelBoundaryConds.h" +#include "boundaryConds.h" #include "fluxVectorSplitting.h" -#include "parallelSaveData.h" +#include "serialSaveData.h" #include "simulation.h" #include "initFunc.h" #include "simData.h" @@ -27,7 +27,7 @@ int main(int argc, char *argv[]) { double ymax(1.0); double zmin(-1.5); double zmax(1.5); - double endTime(0.5); + double endTime(0.05); double cfl(0.1); double gamma(4.0/3.0); double sigma(0); @@ -38,7 +38,7 @@ int main(int argc, char *argv[]) { double nyRanks(1); double nzRanks(1); - ParallelEnv env(&argc, &argv, nxRanks, nyRanks, nzRanks); + SerialEnv env(&argc, &argv, nxRanks, nyRanks, nzRanks); Data data(nx, ny, nz, xmin, xmax, ymin, ymax, zmin, zmax, endTime, &env, cfl, Ng, gamma, sigma); @@ -50,7 +50,7 @@ int main(int argc, char *argv[]) { FVS fluxMethod(&data, &weno, &model); - ParallelFlow bcs(&data, &env); + Flow bcs(&data); Simulation sim(&data, &env); @@ -58,7 +58,7 @@ int main(int argc, char *argv[]) { SSP2 timeInt(&data, &model, &bcs, &fluxMethod); - ParallelSaveData save(&data, &env, 0); + SerialSaveData save(&data, &env, 0); // Now objects have been created, set up the simulation sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save); diff --git a/Project/GPU/Src/main.cu b/Project/GPU/Src/main.cu index 005a4199..10a10340 100644 --- a/Project/GPU/Src/main.cu +++ b/Project/GPU/Src/main.cu @@ -23,34 +23,40 @@ using namespace std; int main(int argc, char *argv[]) { + + const double MU(1000); // Set up domain int Ng(4); int nx(64); - int ny(64); + int ny(16); int nz(0); - double xmin(0.0); - double xmax(1.0); - double ymin(0.0); + double xmin(-0.5); + double xmax(0.5); + double ymin(-1.0); double ymax(1.0); - double zmin(0.0); - double zmax(1.0); - double endTime(3.0); - double cfl(0.6); + double zmin(-1.5); + double zmax(1.5); + double endTime(0.05); + double cfl(0.1); double gamma(4.0/3.0); - double sigma(10); - double cp(1.0); - double mu1(-100); - double mu2(100); - int frameSkip(10); + double sigma(0); bool output(true); - if (argc != 2) throw std::invalid_argument("Expected ./main seed!\n"); - int seed(atoi(argv[1])); + int safety(180); + + + char * ptr(0); + //! Overwrite any variables that have been passed in as main() arguments + for (int i(0); i < argc; i++) { + if (strcmp(argv[i], "sigma") == 0) { + sigma = (double)strtol(argv[i+1], &ptr, 10); + } + } Data data(nx, ny, nz, xmin, xmax, ymin, ymax, zmin, zmax, endTime, - cfl, Ng, gamma, sigma, cp, mu1, mu2, frameSkip); + cfl, Ng, gamma, sigma); // Choose particulars of simulation - SRMHD model(&data); + SRRMHD model(&data); FVS fluxMethod(&data, &model); @@ -58,21 +64,21 @@ int main(int argc, char *argv[]) { KHInstabilitySingleFluid init(&data, 1); - Periodic bcs(&data); + Flow bcs(&data); - RKSplit timeInt(&data, &model, &bcs, &fluxMethod); + SSP2 timeInt(&data, &model, &bcs, &fluxMethod); SaveData save(&data); // Now objects have been created, set up the simulation sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save); // Time execution of programme - //double startTime(omp_get_wtime()); + double startTime(omp_get_wtime()); // Run until end time and save results sim.evolve(output, safety); - //double timeTaken(omp_get_wtime()- startTime); + double timeTaken(omp_get_wtime()- startTime); save.saveAll(); printf("\nRuntime: %.5fs\nCompleted %d iterations.\n", timeTaken, data.iters); From aa846704a850e5f97e9a5347796107acd03c437e Mon Sep 17 00:00:00 2001 From: "ania.brown" Date: Tue, 25 Aug 2020 11:52:58 +0100 Subject: [PATCH 09/56] adding platformEnv files --- Project/GPU/Include/parallelEnv.h | 50 +++++++++++++ Project/GPU/Include/platformEnv.h | 65 ++++++++++++++++ Project/GPU/Include/serialEnv.h | 48 ++++++++++++ Project/GPU/Src/parallelEnv.cu | 118 ++++++++++++++++++++++++++++++ Project/GPU/Src/serialEnv.cu | 59 +++++++++++++++ 5 files changed, 340 insertions(+) create mode 100644 Project/GPU/Include/parallelEnv.h create mode 100644 Project/GPU/Include/platformEnv.h create mode 100644 Project/GPU/Include/serialEnv.h create mode 100644 Project/GPU/Src/parallelEnv.cu create mode 100644 Project/GPU/Src/serialEnv.cu diff --git a/Project/GPU/Include/parallelEnv.h b/Project/GPU/Include/parallelEnv.h new file mode 100644 index 00000000..7c67181e --- /dev/null +++ b/Project/GPU/Include/parallelEnv.h @@ -0,0 +1,50 @@ +#ifndef PARALLEL_ENV_H +#define PARALLEL_ENV_H + +#include +#include "platformEnv.h" + +//! ParallelEnv +/*! + @par + For keeping track of parameters related to the platform that the code is running on -- + currently serial on a single core or multi-core using MPI. For the MPI version, processes are mapped onto a + cartesian grid with the number of processes in each dimension specified by the user. + + For a 2D problem, specify nzRanks = 1 + For a 1D problem, specify nzRanks = 1, nyRanks = 1 + + The number of ranks in each dimension must be a factor of the number of cells in the dimension +*/ +class ParallelEnv : public PlatformEnv +{ + public: + MPI_Comm mpiCartesianComm; //!< Cartesian MPI communicator that maps processes to the simulation grid + + //! Constructor -- Initialize global MPI communicator + ParallelEnv(int *argcP, char **argvP[], int nxRanks, int nyRanks, int nzRanks, int testing=0); + + //! Destructor + virtual ~ParallelEnv(); + + //! Check for external boundary + /*! + @par + Returns true if a subdomain is on the external boundary of the simulation grid in a particular direction + @param[in] dimension {x=0, y=1, z=2} + @param[in] direction direction to look for the external boundary in a particular direction {low=0, high=1} + */ + int isNeighbourExternal(int dimension, int direction); + + //! Create cartesian grid of processes and calculate neighbours along that grid for each process + /*! + @par + Creates the cartesian grid of processes that are responsible for the corresponding subdomains in the simulation grid + @param[in] xPeriodic whether the x dimension has periodic boundary conditions + @param[in] yPeriodic whether the y dimension has periodic boundary conditions + @param[in] zPeriodic whether the z dimension has periodic boundary conditions + */ + void setParallelDecomposition(int xPeriodic, int yPeriodic, int zPeriodic); +}; + +#endif diff --git a/Project/GPU/Include/platformEnv.h b/Project/GPU/Include/platformEnv.h new file mode 100644 index 00000000..3f3c7e11 --- /dev/null +++ b/Project/GPU/Include/platformEnv.h @@ -0,0 +1,65 @@ +#ifndef PLATFORM_ENV_H +#define PLATFORM_ENV_H + +//! PlatformEnv +/*! + @par + For keeping track of parameters related to the platform that the code is running on -- + currently serial on a single core or multi-core using MPI. For the MPI version, processes are mapped onto a + cartesian grid with the number of processes in each dimension specified by the user. + + For a 2D problem, specify nzRanks = 1 + For a 1D problem, specify nzRanks = 1, nyRanks = 1 + + The number of ranks in each dimension must be a factor of the number of cells in the dimension +*/ +class PlatformEnv +{ + public: + int + nProc, //!< Number of MPI processes in total (1 for serial job) + rank, //!< Global id of this MPI process (0 for serial job) + //@{ + nxRanks, nyRanks, nzRanks, //!< Number of processes in each dimension of the cartesian grid of processes + //@} + //@{ + xRankId, yRankId, zRankId, //!< Id of this MPI process in each dimension of the cartesian grid of processes + //@} + //@{ + leftXNeighbourRank, rightXNeighbourRank, //!< Global ids of this process's left and right neighbours + //@} + //@{ + leftYNeighbourRank, rightYNeighbourRank, //!< Global ids of this process's front and back neighbours + //@} + //@{ + leftZNeighbourRank, rightZNeighbourRank, //!< Global ids of this process's bottom and top neighbour + //@} + testing; //!< boolean flag used to disable MPI init/finalise during unit testing + + //! Constructor -- Initialize global MPI communicator + PlatformEnv(int testing=0) : testing(testing) {} + + //! Destructor + virtual ~PlatformEnv() {} + + //! Check for external boundary + /*! + @par + Returns true if a subdomain is on the external boundary of the simulation grid in a particular direction + @param[in] dimension {x=0, y=1, z=2} + @param[in] direction direction to look for the external boundary in a particular direction {low=0, high=1} + */ + virtual int isNeighbourExternal(int dimension, int direction) = 0; + + //! Create cartesian grid of processes + /*! + @par + Creates the cartesian grid of processes that are responsible for the corresponding subdomains in the simulation grid + @param[in] xPeriodic whether the x dimension has periodic boundary conditions + @param[in] yPeriodic whether the y dimension has periodic boundary conditions + @param[in] zPeriodic whether the z dimension has periodic boundary conditions + */ + virtual void setParallelDecomposition(int xPeriodic, int yPeriodic, int zPeriodic) = 0; +}; + +#endif diff --git a/Project/GPU/Include/serialEnv.h b/Project/GPU/Include/serialEnv.h new file mode 100644 index 00000000..f1cae491 --- /dev/null +++ b/Project/GPU/Include/serialEnv.h @@ -0,0 +1,48 @@ +#ifndef SERIAL_ENV_H +#define SERIAL_ENV_H + +#include "platformEnv.h" + +//! SerialEnv +/*! + @par + For keeping track of parameters related to the platform that the code is running on -- + currently serial on a single core or multi-core using MPI. For the MPI version, processes are mapped onto a + cartesian grid with the number of processes in each dimension specified by the user. + + For a 2D problem, specify nzRanks = 1 + For a 1D problem, specify nzRanks = 1, nyRanks = 1 + + The number of ranks in each dimension must be a factor of the number of cells in the dimension +*/ +class SerialEnv : public PlatformEnv +{ + public: + + //! Constructor -- Initialize global MPI communicator + SerialEnv(int *argcP, char **argvP[], int nxRanks, int nyRanks, int nzRanks, int testing=0); + + //! Destructor + virtual ~SerialEnv(); + + //! Check for external boundary + /*! + @par + Returns true if a subdomain is on the external boundary of the simulation grid in a particular direction + @param[in] dimension {x=0, y=1, z=2} + @param[in] direction direction to look for the external boundary in a particular direction {low=0, high=1} + */ + int isNeighbourExternal(int dimension, int direction); + + //! Create cartesian grid of processes and calculate neighbours along that grid for each process + /*! + @par + Creates the cartesian grid of processes that are responsible for the corresponding subdomains in the simulation grid + @param[in] xPeriodic whether the x dimension has periodic boundary conditions + @param[in] yPeriodic whether the y dimension has periodic boundary conditions + @param[in] zPeriodic whether the z dimension has periodic boundary conditions + */ + void setParallelDecomposition(int xPeriodic, int yPeriodic, int zPeriodic); +}; + +#endif diff --git a/Project/GPU/Src/parallelEnv.cu b/Project/GPU/Src/parallelEnv.cu new file mode 100644 index 00000000..0c8141a8 --- /dev/null +++ b/Project/GPU/Src/parallelEnv.cu @@ -0,0 +1,118 @@ +#include "parallelEnv.h" +#include +#include "simData.h" +#include "parallelBoundaryConds.h" +#include +#include +#include + +#include + +// TODO -- rename setParallelDecomposition and split it out into more functions + +ParallelEnv::ParallelEnv(int *argcP, char **argvP[], int nxRanks, int nyRanks, int nzRanks, int testing) : PlatformEnv(testing) +{ + int initialized; + MPI_Initialized(&initialized); + if (!initialized && !testing) MPI_Init(argcP, argvP); + + MPI_Comm_size(MPI_COMM_WORLD, &nProc); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + + if (rank==0){ + printf("Running in multi-process mode with %d processes\n", nProc); + } + + this->nxRanks = nxRanks; + this->nyRanks = nyRanks; + this->nzRanks = nzRanks; +} + +ParallelEnv::~ParallelEnv() +{ + // TODO -- Free cartesian communicator + + int finalized; + MPI_Finalized(&finalized); + if (!finalized && !testing) MPI_Finalize(); +} + +int ParallelEnv::isNeighbourExternal(int dimension, int direction) +{ + int isExternal = 0; + int dimRank = 0; + int maxRank = 0; + + if (dimension==0) { + dimRank = xRankId; + maxRank = nxRanks; + } else if (dimension==1) { + dimRank = yRankId; + maxRank = nyRanks; + } else { + dimRank = zRankId; + maxRank = nzRanks; + } + + if (direction==0){ + isExternal = (dimRank==0); + } else { + isExternal = (dimRank==maxRank-1); + } + + return isExternal; +} + +void ParallelEnv::setParallelDecomposition(int xPeriodic, int yPeriodic, int zPeriodic) +{ + // number of dimensions in process grid + int ndims=1; + // number of ranks in each dimension of the grid + int dims[3]; + // bool: whether grid is periodic in each dimension + int periods[3]; + // bool: whether reordering of processes is allowed + int reorder=0; + + // TODO -- Could choose best nxRanks, nyRanks, nzRanks proportionate to nx, ny, nz, with errors if nRanks is prime + + // TODO -- Could use properties on bcs to set whether grid is periodic + + // TODO -- We are setting up a 3D topology even when nyRanks, nzRanks == 1, as we may want to find + // neighbours in y even when there is only one process if ny>1 and boundary conditions are periodic. + // Does this introduce too much overhead? Could also send through nx, ny, nz from data. + + dims[0] = nxRanks; + periods[0] = xPeriodic; + dims[1] = nyRanks; + periods[1] = yPeriodic; + dims[2] = nzRanks; + periods[2] = zPeriodic; + ndims = 3; + + // Create MPI communicator in a cartesian grid that matches the domain + MPI_Cart_create(MPI_COMM_WORLD, ndims, dims, periods, reorder, &mpiCartesianComm); + + int coords[3]; + + // Get (x,y,z) coords of rank in grid and set on object + // This is a 3D topology regardless of how many processes we use in each dimension + MPI_Cart_coords(mpiCartesianComm, rank, ndims, coords); + xRankId = coords[0]; + yRankId = coords[1]; + zRankId = coords[2]; + + // Get neighbour rank + int direction = 0; + int displacement = 1; + MPI_Cart_shift(mpiCartesianComm, direction, displacement, + &(leftXNeighbourRank), &(rightXNeighbourRank)); + direction = 1; + MPI_Cart_shift(mpiCartesianComm, direction, displacement, + &(leftYNeighbourRank), &(rightYNeighbourRank)); + direction = 2; + MPI_Cart_shift(mpiCartesianComm, direction, displacement, + &(leftZNeighbourRank), &(rightZNeighbourRank)); +} + + diff --git a/Project/GPU/Src/serialEnv.cu b/Project/GPU/Src/serialEnv.cu new file mode 100644 index 00000000..7daf1799 --- /dev/null +++ b/Project/GPU/Src/serialEnv.cu @@ -0,0 +1,59 @@ +#include "serialEnv.h" +#include +#include "simData.h" +#include "boundaryConds.h" +#include +#include +#include + +// TODO -- rename setParallelDecomposition and split it out into more functions + +SerialEnv::SerialEnv(int *argcP, char **argvP[], int nxRanks, int nyRanks, int nzRanks, int testing) : PlatformEnv(testing) +{ + this->nxRanks = 1; + this->nyRanks = 1; + this->nzRanks = 1; + this->xRankId = 0; + this->yRankId = 0; + this->zRankId = 0; + this->rank = 0; + this->nProc = 1; +} + +SerialEnv::~SerialEnv() +{ + +} + +int SerialEnv::isNeighbourExternal(int dimension, int direction) +{ + int isExternal = 0; + int dimRank = 0; + int maxRank = 0; + + if (dimension==0) { + dimRank = xRankId; + maxRank = nxRanks; + } else if (dimension==1) { + dimRank = yRankId; + maxRank = nyRanks; + } else { + dimRank = zRankId; + maxRank = nzRanks; + } + + if (direction==0){ + isExternal = (dimRank==0); + } else { + isExternal = (dimRank==maxRank-1); + } + + return isExternal; +} + +void SerialEnv::setParallelDecomposition(int xPeriodic, int yPeriodic, int zPeriodic) +{ + +} + + From f984cb417ce5bd0ed5cac487327c589dee71b19d Mon Sep 17 00:00:00 2001 From: "ania.brown" Date: Tue, 25 Aug 2020 15:06:57 +0100 Subject: [PATCH 10/56] adding platformEnv to Data --- Project/GPU/Include/simData.h | 5 ++++- Project/GPU/Makefile | 4 ++++ Project/GPU/Src/main.cu | 9 +++++++-- Project/GPU/Src/simData.cu | 15 ++++++++++----- 4 files changed, 25 insertions(+), 8 deletions(-) diff --git a/Project/GPU/Include/simData.h b/Project/GPU/Include/simData.h index dfe0e8e9..7e0056d2 100644 --- a/Project/GPU/Include/simData.h +++ b/Project/GPU/Include/simData.h @@ -3,6 +3,7 @@ #include #include +#include "platformEnv.h" /*! Currently (and possibly permanently) a very hacky way of keeping singleCell cons2prims function @@ -76,6 +77,7 @@ class Data sigma; //!< Resistivity int memSet, //!< Indicator that memory has been allocated for state vectors + bcsSet, //!< Indicator that boundary conditions have been created (before this information about the domain decomposition used in MPI version will not be correct). //@{ Ncons, Nprims, Naux; //!< Number of specified variables //@} @@ -169,7 +171,8 @@ class Data double xmin, double xmax, double ymin, double ymax, double zmin, double zmax, - double endTime, double cfl=0.5, int Ng=4, + double endTime, PlatformEnv *env, + double cfl=0.5, int Ng=4, double gamma=5.0/3.0, double sigma=1e3, double cp=0.1, double mu1=-1.0e4, double mu2=1.0e4, diff --git a/Project/GPU/Makefile b/Project/GPU/Makefile index 666dc027..28576504 100644 --- a/Project/GPU/Makefile +++ b/Project/GPU/Makefile @@ -43,6 +43,7 @@ SRCS = main.cu \ IMEX3Args.cu \ boundaryConds.cu \ saveData.cu \ + serialEnv.cu \ fluxVectorSplitting.cu \ srrmhd.cu \ C2PArgs.cu @@ -153,6 +154,9 @@ boundaryConds.o : $(MODULE_DIR)/boundaryConds.cu $(INC_DIR)/boundaryConds.h saveData.o : $(MODULE_DIR)/saveData.cu $(INC_DIR)/saveData.h $(CC) $< -c $(NVFLAGS) -I$(INC_DIR) +serialEnv.o : $(MODULE_DIR)/serialEnv.cu $(INC_DIR)/serialEnv.h + $(CC) $< -c $(NVFLAGS) -I$(INC_DIR) + fluxVectorSplitting.o : $(MODULE_DIR)/fluxVectorSplitting.cu $(INC_DIR)/fluxVectorSplitting.h $(INC_DIR)/weno.h $(CC) $< -c $(NVFLAGS) -I$(INC_DIR) -Xcompiler -fopenmp diff --git a/Project/GPU/Src/main.cu b/Project/GPU/Src/main.cu index 10a10340..996e8dc6 100644 --- a/Project/GPU/Src/main.cu +++ b/Project/GPU/Src/main.cu @@ -9,6 +9,7 @@ #include "SSP2.h" #include "saveData.h" #include "fluxVectorSplitting.h" +#include "serialEnv.h" #include #include @@ -42,7 +43,9 @@ int main(int argc, char *argv[]) { double sigma(0); bool output(true); int safety(180); - + int nxRanks(4); + int nyRanks(1); + int nzRanks(1); char * ptr(0); //! Overwrite any variables that have been passed in as main() arguments @@ -52,7 +55,9 @@ int main(int argc, char *argv[]) { } } - Data data(nx, ny, nz, xmin, xmax, ymin, ymax, zmin, zmax, endTime, + SerialEnv env(&argc, &argv, nxRanks, nyRanks, nzRanks); + + Data data(nx, ny, nz, xmin, xmax, ymin, ymax, zmin, zmax, endTime, &env, cfl, Ng, gamma, sigma); // Choose particulars of simulation diff --git a/Project/GPU/Src/simData.cu b/Project/GPU/Src/simData.cu index 581224fe..84896524 100644 --- a/Project/GPU/Src/simData.cu +++ b/Project/GPU/Src/simData.cu @@ -1,4 +1,5 @@ #include "simData.h" +#include "platformEnv.h" #include "cudaErrorCheck.h" #include #include @@ -7,7 +8,8 @@ Data::Data(int nx, int ny, int nz, double xmin, double xmax, double ymin, double ymax, double zmin, double zmax, - double endTime, double cfl, int Ng, + double endTime, PlatformEnv *env, + double cfl, int Ng, double gamma, double sigma, double cp, double mu1, double mu2, @@ -19,16 +21,19 @@ Data::Data(int nx, int ny, int nz, zmin(zmin), zmax(zmax), endTime(endTime), cfl(cfl), Ng(Ng), gamma(gamma), sigma(sigma), - memSet(0), + memSet(0), bcsSet(0), Ncons(0), Nprims(0), Naux(0), cp(cp), mu1(mu1), mu2(mu2), frameSkip(frameSkip) { + // TODO -- handle nx not dividing perfectly into nxRanks + + // Set Nx to be nx per MPI process + ghost cells + this->Nx = nx/env->nxRanks + 2 * Ng; + this->Ny = ny/env->nyRanks + 2 * Ng; + this->Nz = nz/env->nzRanks + 2 * Ng; - this->Nx = nx + 2 * Ng; - this->Ny = ny + 2 * Ng; - this->Nz = nz + 2 * Ng; dims = 3; // Catch 2D case From 86cd8be4fcf69df9640b5dee67f60ae39ddef1fb Mon Sep 17 00:00:00 2001 From: "ania.brown" Date: Tue, 25 Aug 2020 16:49:22 +0100 Subject: [PATCH 11/56] added env to Simulation --- Project/GPU/Include/simulation.h | 8 ++++++-- Project/GPU/Src/main.cu | 3 +-- Project/GPU/Src/simulation.cu | 29 ++++++++++++++++++++--------- 3 files changed, 27 insertions(+), 13 deletions(-) diff --git a/Project/GPU/Include/simulation.h b/Project/GPU/Include/simulation.h index 9dd9f4bc..83802731 100644 --- a/Project/GPU/Include/simulation.h +++ b/Project/GPU/Include/simulation.h @@ -8,6 +8,7 @@ #include "boundaryConds.h" #include "flux.h" #include "saveData.h" +#include "platformEnv.h" //! The Simulation interface for the programme @@ -36,6 +37,8 @@ class Simulation SaveData * save; //!< Pointer to SaveData object + PlatformEnv *env; //!< Pointer to PlatformEnv object + public: Data * data; //!< Pointer to Data class containing global simulation data @@ -50,11 +53,12 @@ class Simulation and once this has been completed, the initial function class may be implemented. @param[in] *data pointer to Data class containing global simulation data + @param[in] *env pointer to the PlatformEnv object */ - Simulation(Data * data); + Simulation(Data * data, PlatformEnv *env); //! Destructor frees alloc'd memory - ~Simulation(); + virtual ~Simulation(); diff --git a/Project/GPU/Src/main.cu b/Project/GPU/Src/main.cu index 996e8dc6..ddf37c4b 100644 --- a/Project/GPU/Src/main.cu +++ b/Project/GPU/Src/main.cu @@ -25,7 +25,6 @@ using namespace std; int main(int argc, char *argv[]) { - const double MU(1000); // Set up domain int Ng(4); int nx(64); @@ -65,7 +64,7 @@ int main(int argc, char *argv[]) { FVS fluxMethod(&data, &model); - Simulation sim(&data); + Simulation sim(&data, &env); KHInstabilitySingleFluid init(&data, 1); diff --git a/Project/GPU/Src/simulation.cu b/Project/GPU/Src/simulation.cu index ff868076..d692a0a0 100644 --- a/Project/GPU/Src/simulation.cu +++ b/Project/GPU/Src/simulation.cu @@ -1,4 +1,5 @@ #include "simulation.h" +#include "platformEnv.h" #include "cudaErrorCheck.h" #include #include @@ -7,7 +8,7 @@ #define ID(variable, idx, jdx, kdx) ((variable)*(d->Nx)*(d->Ny)*(d->Nz) + (idx)*(d->Ny)*(d->Nz) + (jdx)*(d->Nz) + (kdx)) -Simulation::Simulation(Data * data) : data(data) +Simulation::Simulation(Data * data, PlatformEnv *env) : data(data), env(env) { // Simplify syntax Data * d; @@ -16,7 +17,10 @@ Simulation::Simulation(Data * data) : data(data) // Allocate memory for state arrays int Ntot(d->Nx * d->Ny * d->Nz); - if (d->Ncons == 0) throw std::runtime_error("Must set model before constructing simulation"); + if (env->rank==0){ + if (d->Ncons == 0) throw std::runtime_error("Must set model before constructing simulation"); + if (d->bcsSet != 1) throw std::runtime_error("Must construct boundary condition class before implementing simulation. Need to set domain decomposition parameters including periodicity."); + } gpuErrchk( cudaHostAlloc((void **)&d->cons, sizeof(double) * Ntot * d->Ncons, @@ -62,17 +66,20 @@ Simulation::Simulation(Data * data) : data(data) d->dt = (dtX < dtY && dtX < dtZ) ? dtX : ((dtY < dtZ) ? dtY : dtZ); d->memSet = 1; + int iOffset = (d->Nx - 2*d->Ng)*env->xRankId; + int jOffset = (d->Ny - 2*d->Ng)*env->yRankId; + int kOffset = (d->Nz - 2*d->Ng)*env->zRankId; + // Set axes for (int i(0); i < d->Nx; i++) { - d->x[i] = d->xmin + (i + 0.5 - d->Ng) * d->dx; + d->x[i] = d->xmin + (i + iOffset + 0.5 - d->Ng) * d->dx; } for (int j(0); j < d->Ny; j++) { - d->y[j] = d->ymin + (j + 0.5 - d->Ng) * d->dy; + d->y[j] = d->ymin + (j + jOffset + 0.5 - d->Ng) * d->dy; } for (int k(0); k < d->Nz; k++) { - d->z[k] = d->zmin + (k + 0.5 - d->Ng) * d->dz; + d->z[k] = d->zmin + (k + kOffset + 0.5 - d->Ng) * d->dz; } - } Simulation::~Simulation() @@ -116,7 +123,9 @@ void Simulation::updateTime() // Syntax Data * d(this->data); - printf("t = %f\n", d->t); + if (env->rank == 0){ + printf("t = %f\n", d->t); + } // Calculate the size of the next timestep double dtX(d->cfl * d->dx / (d->alphaX * sqrt(d->dims))); @@ -183,7 +192,7 @@ void Simulation::evolve(bool output, int safety) if (safety>0 && d->iters%safety==0) { this->save->saveAll(); - printf("Data saved...\n"); + if (env->rank==0) printf("Data saved...\n"); } } @@ -205,6 +214,8 @@ void Simulation::evolve(bool output, int safety) this->save->saveVar("Ez", 11); } - printf("\n"); + if (env->rank == 0){ + printf("\n"); + } } From 6b265a73b0525cb9df8b8c2b8ad234badef3bb9a Mon Sep 17 00:00:00 2001 From: AlexJamesWright Date: Tue, 25 Aug 2020 19:12:47 +0100 Subject: [PATCH 12/56] Update the initFunc for KHI (also some stuff related to interactive plot that wasnt related to #26) --- Project/CPU/Src/interactivePlot.py | 44 ++-- Project/CPU/Src/main.cc | 4 +- Project/GPU/Src/initFunc.cu | 8 + Project/GPU/Src/interactivePlot.py | 401 ++++++++++++++++++----------- Project/GPU/Src/main.cu | 2 +- Project/compare.py | 6 +- 6 files changed, 281 insertions(+), 184 deletions(-) diff --git a/Project/CPU/Src/interactivePlot.py b/Project/CPU/Src/interactivePlot.py index d8a288ec..cd6deb73 100644 --- a/Project/CPU/Src/interactivePlot.py +++ b/Project/CPU/Src/interactivePlot.py @@ -179,19 +179,18 @@ def gatherData(self, states): self.cleanAuxLabels.append(auxLabels[i][:-1]) self.cleanAuxLabels.append(auxLabels[-1]) - with suppress(FileNotFoundError): - # Grab domain data - self.x = np.zeros(c['nx']) - self.y = np.zeros(c['ny']) - self.z = np.zeros(c['nz']) - coords = [self.x, self.y, self.z] - print("Fetching domain coordinates...") - with open(self.DatDir + 'Domain/domain' + self.appendix +'.dat', 'r') as f: - for coord, (i, line) in zip(coords, enumerate(f)): - temp = line.split() - print(len(temp)) - for k, val in enumerate(temp): - coord[k] = float(val) +# with suppress(FileNotFoundError): +# # Grab domain data +# self.x = np.zeros(c['nx']) +# self.y = np.zeros(c['ny']) +# self.z = np.zeros(c['nz']) +# coords = [self.x, self.y, self.z] +# print("Fetching domain coordinates...") +# with open(self.DatDir + 'Domain/domain' + self.appendix +'.dat', 'r') as f: +# for coord, (i, line) in zip(coords, enumerate(f)): +# temp = line.split() +# for k, val in enumerate(temp): +# coord[k] = float(val) @@ -259,7 +258,7 @@ def _getYIndexFromLine(self, line, nx, ny): The line number the file pointer is pointing to. We want to know which primitive variable this line's data corresponds to. nx: int - The total number (incl ghost cells) of domain cells in the x-direction. + The total number (incl ghost cells)n of domain cells in the x-direction. ny: int The total number (incl ghost cells) of domain cells in the y-direction. @@ -329,10 +328,11 @@ def plotHeatMaps(self, data='prims', color=None, axis=2): if color==None: color = cm.afmhot - surf = ax.imshow(plotVars.T, cmap=color, interpolation='bicubic', aspect='auto') + ext = [self.c['xmin'], self.c['xmax'], self.c['ymin'], self.c['ymax']] + surf = ax.imshow(plotVars.T, cmap=color, interpolation='bicubic', aspect='auto', origin='lower', extent=ext) ax.set_title(r'Time Evolution for {}: $t = {}$'.format(dataLabels[i], c['t'])) - ax.set_xlim([0, self.c['nx']]) - ax.set_ylim([0, self.c['ny']]) + ax.set_xlim([self.c['xmin'], self.c['xmax']]) + ax.set_ylim([self.c['ymin'], self.c['ymax']]) ax.set_xlabel(axisLabel1) ax.set_ylabel(axisLabel2) fig.colorbar(surf, shrink=0.5, aspect=5) @@ -746,12 +746,4 @@ def plotAdvectionAgainstInitial(self): Plot = InteractivePlot() -# Plot.plotSlice() -# Plot.plotSingleFluidCurrentSheetAgainstExact() -# Plot.plotAdvectionAgainstInitial() -# Plot.plotHeatMaps() - - plt.figure() - plt.imshow(np.log(Plot.prims[4, :, :, 0].T), extent=[0, 8, 0, 4], origin='lower') - plt.show() - +# Plot.plotHeatMaps() \ No newline at end of file diff --git a/Project/CPU/Src/main.cc b/Project/CPU/Src/main.cc index 4cd28f9e..8f76b365 100644 --- a/Project/CPU/Src/main.cc +++ b/Project/CPU/Src/main.cc @@ -27,7 +27,7 @@ int main(int argc, char *argv[]) { double ymax(1.0); double zmin(-1.5); double zmax(1.5); - double endTime(0.05); + double endTime(0.0005); double cfl(0.1); double gamma(4.0/3.0); double sigma(0); @@ -54,7 +54,7 @@ int main(int argc, char *argv[]) { Simulation sim(&data, &env); - KHInstabilitySingleFluid init(&data); + KHInstabilitySingleFluid init(&data, 1); SSP2 timeInt(&data, &model, &bcs, &fluxMethod); diff --git a/Project/GPU/Src/initFunc.cu b/Project/GPU/Src/initFunc.cu index 2941769d..c9a99601 100644 --- a/Project/GPU/Src/initFunc.cu +++ b/Project/GPU/Src/initFunc.cu @@ -441,6 +441,14 @@ KHInstabilitySingleFluid::KHInstabilitySingleFluid(Data * data, int mag) : Initi d->prims[ID(1, i, j, k)] = - vShear * tanh((d->y[j] + 0.5)/a); d->prims[ID(2, i, j, k)] = - A0 * vShear * sin(2*PI*d->x[i]) * (exp(-pow((d->y[j] + 0.5), 2)/(sig*sig))); } + + // If we have electric fields, set to the ideal values + if (d->Ncons > 9) + { + d->prims[ID(8, i, j, k)] = -(d->prims[ID(2, i, j, k)] * d->prims[ID(7, i, j, k)] - d->prims[ID(3, i, j, k)] * d->prims[ID(6, i, j, k)]); + d->prims[ID(9, i, j, k)] = -(d->prims[ID(3, i, j, k)] * d->prims[ID(5, i, j, k)] - d->prims[ID(1, i, j, k)] * d->prims[ID(7, i, j, k)]); + d->prims[ID(10, i, j, k)] = -(d->prims[ID(1, i, j, k)] * d->prims[ID(6, i, j, k)] - d->prims[ID(2, i, j, k)] * d->prims[ID(5, i, j, k)]); + } } } } diff --git a/Project/GPU/Src/interactivePlot.py b/Project/GPU/Src/interactivePlot.py index f34461dc..9015edbf 100644 --- a/Project/GPU/Src/interactivePlot.py +++ b/Project/GPU/Src/interactivePlot.py @@ -5,7 +5,7 @@ import numpy as np -import pylab as plt +from matplotlib import pyplot as plt from scipy.special import erf from matplotlib import cm import warnings @@ -24,7 +24,7 @@ class InteractivePlot(object): - def __init__(self, DatDirectory=None, append=None): + def __init__(self, DatDirectory=None, append=None, states=True): if DatDirectory is None: self.DatDir = FinalDirectory else: @@ -33,28 +33,34 @@ def __init__(self, DatDirectory=None, append=None): self.appendix = appendix else: self.appendix = append - self.gatherData() + self.gatherData(states) print("Ready!") - def gatherData(self): + def gatherData(self, states): """ Collects and stores all the data required for plotting the final state of the system. + + Parameters + ---------- + states : bool + Load all of the state arrays. If false, only the constants are + loaded to save time for animation. Notes ----- Stores the following public variables: cons : array of float - (Ncons, Nx, Ny, Nz) Array containing the conserved vector + (Ncons, nx, ny, nz) Array containing the conserved vector consLabels : array of string (Ncons,) The labels of the conserved elements prims : array of float - (Nprims, Nx, Ny, Nz) Array containing the primitive vector + (Nprims, nx, ny, nz) Array containing the primitive vector primLabels : array of string (Nprims,) The labels of the primitive elements aux : array of float - (Naux, Nx, Ny, Nz) Array containing the auxiliary vector + (Naux, nx, ny, nz) Array containing the auxiliary vector auxLabels : array of string (Naux,) The labels of the auxiliary elements c : dictionary @@ -76,7 +82,11 @@ def gatherData(self): line=line.split() c['nx'] = int(line[0]) c['ny'] = int(line[1]) + if c['ny'] == 0: + c['ny'] = 1 c['nz'] = int(line[2]) + if c['nz'] == 0: + c['nz'] = 1 c['Nx'] = int(line[3]) c['Ny'] = int(line[4]) c['Nz'] = int(line[5]) @@ -104,92 +114,87 @@ def gatherData(self): print("{} conserved vectors".format(c['Ncons'])) print("{} primitive vectors".format(c['Nprims'])) print("{} auxiliary vectors".format(c['Naux'])) - print(f"Domain extent is {c['nx']}, {c['ny']}, {c['nz']}") - - # Now gather conserved data - self.cons = np.zeros([c['Ncons'], c['nx'], c['ny'], c['nz']]) - print("Fetching conserved variables...") - with open(self.DatDir + 'Conserved/cons' + self.appendix + '.dat', 'r') as f: - for i, line in enumerate(f): - # Get cons var labels - if i==0: - consLabels = line.split()[2:] - # Get cons var data - else: - temp = line.split() - for k in range(c['Nz']-2*c['Ng']): - self.cons[self._getVarFromLine(i, c['Nx'], c['Ny'], c['Ng'])][self._getXIndexFromLine(i, c['Nx'], c['Ny'], c['Ng'])][self._getYIndexFromLine(i, c['Nx'], c['Ny'], c['Ng'])][k] = float(temp[k]) - - # Clean up labels (remove the commas) - self.cleanConsLabels = [] - for i in range(len(consLabels)-1): - self.cleanConsLabels.append(consLabels[i][:-1]) - self.cleanConsLabels.append(consLabels[-1]) - - - with suppress(FileNotFoundError): - # Now get primitive variables if and store the data in array... - self.prims = np.zeros([c['Nprims'], c['nx'], c['ny'], c['nz']]) - print("Fetching primitive variables...") - with open(self.DatDir + 'Primitive/prims' + self.appendix + '.dat', 'r') as f: - for i, line in enumerate(f): - # Get primitive var labels - if i==0: - primLabels = line.split()[2:] - # Get primitive var data - else: - temp = line.split() - for k in range(c['Nz']-2*c['Ng']): - self.prims[self._getVarFromLine(i, c['Nx'], c['Ny'], c['Ng'])][self._getXIndexFromLine(i, c['Nx'], c['Ny'], c['Ng'])][self._getYIndexFromLine(i, c['Nx'], c['Ny'], c['Ng'])][k] = float(temp[k]) - # Clean up labels (remove the commas) - self.cleanPrimLabels = [] - for i in range(len(primLabels)-1): - self.cleanPrimLabels.append(primLabels[i][:-1]) - self.cleanPrimLabels.append(primLabels[-1]) - - with suppress(FileNotFoundError): - # And finally the aux vars if available - self.aux = np.zeros([c['Naux'], c['nx'], c['ny'], c['nz']]) - print("Fetching auxiliary variables...") - with open(self.DatDir + 'Auxiliary/aux' + self.appendix +'.dat', 'r') as f: + if states: + # Now gather conserved data + self.cons = np.zeros([c['Ncons'], c['nx'], c['ny'], c['nz']]) + print("Fetching conserved variables...") + with open(self.DatDir + 'Conserved/cons' + self.appendix + '.dat', 'r') as f: for i, line in enumerate(f): # Get cons var labels if i==0: - auxLabels = line.split()[2:] - # Get cons var data + consLabels = line.split()[2:] + # Get cons var data else: temp = line.split() - for k in range(c['Nz']-2*c['Ng']): - self.aux[self._getVarFromLine(i, c['Nx'], c['Ny'], c['Ng'])][self._getXIndexFromLine(i, c['Nx'], c['Ny'], c['Ng'])][self._getYIndexFromLine(i, c['Nx'], c['Ny'], c['Ng'])][k] = float(temp[k]) - + for k in range(c['nz']): + self.cons[self._getVarFromLine(i, c['nx'], c['ny'])][self._getXIndexFromLine(i, c['nx'], c['ny'])][self._getYIndexFromLine(i, c['nx'], c['ny'])][k] = float(temp[k]) + + # Clean up labels (remove the commas) - self.cleanAuxLabels = [] - for i in range(len(auxLabels)-1): - self.cleanAuxLabels.append(auxLabels[i][:-1]) - self.cleanAuxLabels.append(auxLabels[-1]) - - with suppress(FileNotFoundError): - # Grab domain data - self.x = np.zeros(c['Nx']) - self.y = np.zeros(c['Ny']) - self.z = np.zeros(c['Nz']) - coords = [self.x, self.y, self.z] - print("Fetching domain coordinates...") - with open(self.DatDir + 'Domain/domain' + self.appendix +'.dat', 'r') as f: - for coord, (i, line) in zip(coords, enumerate(f)): - temp = line.split() - for k, val in enumerate(temp): - coord[k] = float(val) - - # Clean up labels (remove the commas) - self.cleanAuxLabels = [] - for i in range(len(auxLabels)-1): - self.cleanAuxLabels.append(auxLabels[i][:-1]) - self.cleanAuxLabels.append(auxLabels[-1]) - - - def _getVarFromLine(self, line, Nx, Ny, Ng): + self.cleanConsLabels = [] + for i in range(len(consLabels)-1): + self.cleanConsLabels.append(consLabels[i][:-1]) + self.cleanConsLabels.append(consLabels[-1]) + + with suppress(FileNotFoundError): + # Now get primitive variables if and store the data in array... + self.prims = np.zeros([c['Nprims'], c['nx'], c['ny'], c['nz']]) + print("Fetching primitive variables...") + with open(self.DatDir + 'Primitive/prims' + self.appendix + '.dat', 'r') as f: + for i, line in enumerate(f): + # Get primitive var labels + if i==0: + primLabels = line.split()[2:] + # Get primitive var data + else: + temp = line.split() + for k in range(c['nz']): + self.prims[self._getVarFromLine(i, c['nx'], c['ny'])][self._getXIndexFromLine(i, c['nx'], c['ny'])][self._getYIndexFromLine(i, c['nx'], c['ny'])][k] = float(temp[k]) + + # Clean up labels (remove the commas) + self.cleanPrimLabels = [] + for i in range(len(primLabels)-1): + self.cleanPrimLabels.append(primLabels[i][:-1]) + self.cleanPrimLabels.append(primLabels[-1]) + + with suppress(FileNotFoundError): + # And finally the aux vars if available + self.aux = np.zeros([c['Naux'], c['nx'], c['ny'], c['nz']]) + print("Fetching auxiliary variables...") + with open(self.DatDir + 'Auxiliary/aux' + self.appendix +'.dat', 'r') as f: + for i, line in enumerate(f): + # Get cons var labels + if i==0: + auxLabels = line.split()[2:] + # Get cons var data + else: + temp = line.split() + for k in range(c['nz']): + self.aux[self._getVarFromLine(i, c['nx'], c['ny'])][self._getXIndexFromLine(i, c['nx'], c['ny'])][self._getYIndexFromLine(i, c['nx'], c['ny'])][k] = float(temp[k]) + + # Clean up labels (remove the commas) + self.cleanAuxLabels = [] + for i in range(len(auxLabels)-1): + self.cleanAuxLabels.append(auxLabels[i][:-1]) + self.cleanAuxLabels.append(auxLabels[-1]) + +# with suppress(FileNotFoundError): +# # Grab domain data +# self.x = np.zeros(c['nx']) +# self.y = np.zeros(c['ny']) +# self.z = np.zeros(c['nz']) +# coords = [self.x, self.y, self.z] +# print("Fetching domain coordinates...") +# with open(self.DatDir + 'Domain/domain' + self.appendix +'.dat', 'r') as f: +# for coord, (i, line) in zip(coords, enumerate(f)): +# temp = line.split() +# for k, val in enumerate(temp): +# coord[k] = float(val) + + + + def _getVarFromLine(self, line, nx, ny): """ Given the line number that the iterator is on, and the size of the x-domain, returns the index of the primitive variable this data belongs to. @@ -199,9 +204,9 @@ def _getVarFromLine(self, line, Nx, Ny, Ng): line: int The line number the file pointer is pointing to. We want to know which primitive variable this line's data corresponds to. - Nx: int + nx: int The total number (incl ghost cells) of domain cells in the x-direction. - Ny: int + ny: int The total number (incl ghost cells) of domain cells in the y-direction. Returns @@ -217,14 +222,10 @@ def _getVarFromLine(self, line, Nx, Ny, Ng): if line == 0: raise ValueError('Line zero does not contain any data') else: - # Remove ghost cells from count as these are no longer included in output - Nx = Nx - 2*Ng - if Ny > 1: - Ny = Ny - 2*Ng - return ((line-1)//Ny)//Nx + return ((line-1)//ny)//nx - def _getXIndexFromLine(self, line, Nx, Ny, Ng): + def _getXIndexFromLine(self, line, nx, ny): """ Given the line number that the iterator is on, and the size of the x-domain, returns the x-index of this line's data. @@ -234,9 +235,9 @@ def _getXIndexFromLine(self, line, Nx, Ny, Ng): line: int The line number the file pointer is pointing to. We want to know which primitive variable this line's data corresponds to. - Nx: int + nx: int The total number (incl ghost cells) of domain cells in the x-direction. - Ny: int + ny: int The total number (incl ghost cells) of domain cells in the y-direction. Returns @@ -244,14 +245,9 @@ def _getXIndexFromLine(self, line, Nx, Ny, Ng): index: The x-index of the current line's data. """ - # Remove ghost cells from count as these are no longer included in output - Nx = Nx - 2*Ng - if Ny > 1: - Ny = Ny - 2*Ng + return ((line-1)//ny)%nx - return ((line-1)//Ny)%Nx - - def _getYIndexFromLine(self, line, Nx, Ny, Ng): + def _getYIndexFromLine(self, line, nx, ny): """ Given the line number that the iterator is on, and the size of the y-domain, returns the y-index of this line's data. @@ -261,9 +257,9 @@ def _getYIndexFromLine(self, line, Nx, Ny, Ng): line: int The line number the file pointer is pointing to. We want to know which primitive variable this line's data corresponds to. - Nx: int - The total number (incl ghost cells) of domain cells in the x-direction. - Ny: int + nx: int + The total number (incl ghost cells)n of domain cells in the x-direction. + ny: int The total number (incl ghost cells) of domain cells in the y-direction. Returns @@ -271,12 +267,7 @@ def _getYIndexFromLine(self, line, Nx, Ny, Ng): index: The y-index of the current line's data. """ - # Remove ghost cells from count as these are no longer included in output - Nx = Nx - 2*Ng - if Ny > 1: - Ny = Ny - 2*Ng - - return (line-1)%Ny + return (line-1)%ny @@ -315,36 +306,38 @@ def plotHeatMaps(self, data='prims', color=None, axis=2): dataLabels = self.cleanConsLabels elif data=='aux' or data=='auxiliary': data = self.aux - dataLabels = self.cleanAuxLabels + data = self.cleanAuxLabels else: raise ValueError("Variable type not recognised, please try again") c = self.c for i in range(data.shape[0]): - fig = plt.figure() + fig, ax = plt.subplots(1) if (axis == 0): - plotVars = data[i, c['Nx']//2, c['Ng']:-c['Ng'], c['Ng']:-c['Ng']] + plotVars = data[i, c['Nx']//2, :, :] axisLabel1 = r'$y$' axisLabel2 = r'$z$' if (axis == 1): - plotVars = data[i, c['Ng']:-c['Ng'], c['Ny']//2, c['Ng']:-c['Ng']] + plotVars = data[i, :, c['Ny']//2, :] axisLabel1 = r'$x$' axisLabel2 = r'$z$' if (axis == 2): - plotVars = data[i, c['Ng']:-c['Ng'], c['Ng']:-c['Ng'], c['Nz']//2] + plotVars = data[i, :, :, c['Nz']//2] axisLabel1 = r'$x$' axisLabel2 = r'$y$' if color==None: color = cm.afmhot - surf = plt.imshow(plotVars.T, cmap=color, interpolation='bicubic', aspect='auto', - extent=[c['xmin'], c['xmax'], c['ymin'], c['ymax']]) - plt.title(r'Time Evolution for {}: $t = {}$'.format(dataLabels[i], c['t'])) - plt.xlabel(axisLabel2) - plt.ylabel(axisLabel1) + ext = [self.c['xmin'], self.c['xmax'], self.c['ymin'], self.c['ymax']] + surf = ax.imshow(plotVars.T, cmap=color, interpolation='bicubic', aspect='auto', origin='lower', extent=ext) + ax.set_title(r'Time Evolution for {}: $t = {}$'.format(dataLabels[i], c['t'])) + ax.set_xlim([self.c['xmin'], self.c['xmax']]) + ax.set_ylim([self.c['ymin'], self.c['ymax']]) + ax.set_xlabel(axisLabel1) + ax.set_ylabel(axisLabel2) fig.colorbar(surf, shrink=0.5, aspect=5) plt.show() - + return ax def plotSlice(self, data='prims', axis=0): """ @@ -377,24 +370,24 @@ def plotSlice(self, data='prims', axis=0): raise ValueError("Variable type not recognised, please try again") c = self.c - Nx, Ny, Nz, Ng= c['Nx'], c['Ny'], c['Nz'], c['Ng'] + Nx, Ny, Nz = c['Nx'], c['Ny'], c['Nz'] for i in range(len(data)): plt.figure() if (axis == 0): - plotVars = data[i, Ng:-Ng, Ny//2, Nz//2] + plotVars = data[i, :, Ny//2, Nz//2] axisLabel = r'$x$' step = c['dx'] n = c['nx'] left, right = c['xmin'], c['xmax'] if (axis == 1): - plotVars = data[i, Nx//2, Ng:-Ng, Nz//2] + plotVars = data[i, Nx//2, :, Nz//2] axisLabel = r'$y$' step = c['dy'] n = c['ny'] left, right = c['ymin'], c['ymax'] if (axis == 2): - plotVars = data[i, Nx//2, Ny//2, Ng:-Ng] + plotVars = data[i, Nx//2, Ny//2, :] axisLabel = r'$z$' step = c['dz'] n = c['nz'] @@ -411,7 +404,7 @@ def plotSlice(self, data='prims', axis=0): plt.xlabel(axisLabel) plt.ylabel(r'$q_{}(x)$'.format(i+1)) plt.xlim([c['xmin'], c['xmax']]) - plt.ylim((ylower, yupper)) +# plt.ylim((ylower, yupper)) plt.legend(loc='lower center', fontsize=10) plt.show() @@ -423,11 +416,11 @@ def plotTwoFluidSlice(self): """ c = self.c - Ny, Nz, Ng = c['Ny'], c['Nz'], c['Ng'] + Ny, Nz = c['Ny'], c['Nz'] - rho = self.prims[0, Ng:-Ng, Ny//2, Nz//2] + self.prims[5, Ng:-Ng, Ny//2, Nz//2] - p = self.prims[4, Ng:-Ng, Ny//2, Nz//2] + self.prims[9, Ng:-Ng, Ny//2, Nz//2] - var = [rho, *self.aux[31:34, Ng:-Ng, Ny//2, Nz//2], p, *self.prims[10:, Ng:-Ng, Ny//2, Nz//2]] + rho = self.prims[0, :, Ny//2, Nz//2] + self.prims[5, :, Ny//2, Nz//2] + p = self.prims[4, :, Ny//2, Nz//2] + self.prims[9, :, Ny//2, Nz//2] + var = [rho, *self.aux[31:34, :, Ny//2, Nz//2], p, *self.prims[10:, :, Ny//2, Nz//2]] varLab = [r'$\rho$', r'$u_x$', r'$u_y$', r'$u_z$', r'$p$', r'$B_x$', r'$B_y$', r'$B_z$', r'$E_x$', r'$E_y$', r'$E_z$'] xs = np.linspace(c['xmin'] + c['dx']/2, c['xmax'] - c['dx']/2, c['nx']) @@ -459,7 +452,7 @@ def plotTwoFluidCurrentSheetAgainstExact(self): plt.figure() xs = np.linspace(c['xmin'], c['xmax'], c['nx']) exact = np.sign(xs)*erf(0.5 * np.sqrt(c['sigma'] * xs ** 2 / (c['t']+1))) - plt.plot(xs, By[c['Ng']:-c['Ng'], 0, 0], label='Numerical') + plt.plot(xs, By[:, 0, 0], label='Numerical') plt.plot(xs, exact, label='Exact') plt.xlim([c['xmin'], c['xmax']]) plt.ylim([-1.2, 1.2]) @@ -468,7 +461,7 @@ def plotTwoFluidCurrentSheetAgainstExact(self): plt.title(r'Comparison of exact and numerical $B_y$ at $t={:.4f}$'.format(c['t']+1)) plt.legend(loc='upper left') plt.show() - #return np.linalg.norm(exact - By[c['Ng']:-c['Ng'], 0, 0]) + #return np.linalg.norm(exact - By[:, 0, 0]) def plotSingleFluidCurrentSheetAgainstExact(self, direction=0): @@ -483,13 +476,13 @@ def plotSingleFluidCurrentSheetAgainstExact(self, direction=0): nz = self.c['Nz'] // 2 if direction == 0: - B = self.cons[6, c['Ng']:-c['Ng'], ny, nz] + B = self.cons[6, :, ny, nz] x = np.linspace(c['xmin'], c['xmax'], c['nx']) elif direction == 1: - B = self.cons[7, nx, c['Ng']:-c['Ng'], nz] + B = self.cons[7, nx, :, nz] x = np.linspace(c['ymin'], c['ymax'], c['ny']) else: - B = self.cons[5, nx, ny, c['Ng']:-c['Ng']] + B = self.cons[5, nx, ny, :] x = np.linspace(c['zmin'], c['zmax'], c['nz']) exact = np.sign(x)*erf(0.5 * np.sqrt(c['sigma'] * x ** 2 / (c['t']+1))) @@ -516,7 +509,6 @@ def plotTwoFluidCPAlfvenWaveAgainstExact(self): c = self.c xs = np.linspace(c['xmin'], c['xmax'], c['nx']) t = c['t'] - Ng = c['Ng'] h = 1.04 B0 = h @@ -547,28 +539,28 @@ def plotTwoFluidCPAlfvenWaveAgainstExact(self): BxSol = np.zeros_like(BySol) BxSol[:] = B0 plt.figure() - plt.plot(xs, Bx[Ng:-Ng, 0, 0], label='Numerical') + plt.plot(xs, Bx[:, 0, 0], label='Numerical') plt.plot(xs, BxSol, '--', label='Exact') plt.title(r'Exact comparison for $B_x$ at $t={}$'.format(t)) plt.xlim([c['xmin'], c['xmax']]) plt.legend() # By plt.figure() - plt.plot(xs, By[Ng:-Ng, 0, 0], label='Numerical') + plt.plot(xs, By[:, 0, 0], label='Numerical') plt.plot(xs, BySol, '--', label='Exact') plt.title(r'Exact comparison for $B_y$ at $t={}$'.format(t)) plt.xlim([c['xmin'], c['xmax']]) plt.legend() # By plt.figure() - plt.plot(xs, Bz[Ng:-Ng, 0, 0], label='Numerical') + plt.plot(xs, Bz[:, 0, 0], label='Numerical') plt.plot(xs, BzSol, '--', label='Exact') plt.title(r'Exact comparison for $B_z$ at $t={}$'.format(t)) plt.xlim([c['xmin'], c['xmax']]) plt.legend() # Ex plt.figure() - plt.plot(xs, Ex[Ng:-Ng, 0, 0], label='Numerical') + plt.plot(xs, Ex[:, 0, 0], label='Numerical') plt.plot(xs, np.zeros_like(xs), '--', label='Exact') plt.title(r'Exact comparison for $E_x$ at $t={}$'.format(t)) plt.xlim([c['xmin'], c['xmax']]) @@ -579,21 +571,21 @@ def plotTwoFluidCPAlfvenWaveAgainstExact(self): plt.legend() # Ey plt.figure() - plt.plot(xs, Ey[Ng:-Ng, 0, 0], label='Numerical') + plt.plot(xs, Ey[:, 0, 0], label='Numerical') plt.plot(xs, EySol, '--', label='Exact') plt.title(r'Exact comparison for $E_y$ at $t={}$'.format(t)) plt.xlim([c['xmin'], c['xmax']]) plt.legend() # Ez plt.figure() - plt.plot(xs, Ez[Ng:-Ng, 0, 0], label='Numerical') + plt.plot(xs, Ez[:, 0, 0], label='Numerical') plt.plot(xs, EzSol, '--', label='Exact') plt.title(r'Exact comparison for $E_z$ at $t={}$'.format(t)) plt.xlim([c['xmin'], c['xmax']]) plt.legend() # vx1 plt.figure() - plt.plot(xs, vx1[Ng:-Ng, 0, 0], label='Numerical') + plt.plot(xs, vx1[:, 0, 0], label='Numerical') plt.plot(xs, np.zeros_like(xs), '--', label='Exact') plt.title(r'Exact comparison for $v_x1$ at $t={}$'.format(t)) plt.xlim([c['xmin'], c['xmax']]) @@ -604,21 +596,21 @@ def plotTwoFluidCPAlfvenWaveAgainstExact(self): plt.legend() # vy1 plt.figure() - plt.plot(xs, vy1[Ng:-Ng, 0, 0], label='Numerical') + plt.plot(xs, vy1[:, 0, 0], label='Numerical') plt.plot(xs, vy1sol, '--', label='Exact') plt.title(r'Exact comparison for $v_y1$ at $t={}$'.format(t)) plt.xlim([c['xmin'], c['xmax']]) plt.legend() # vz1 plt.figure() - plt.plot(xs, vz1[Ng:-Ng, 0, 0], label='Numerical') + plt.plot(xs, vz1[:, 0, 0], label='Numerical') plt.plot(xs, vz1sol, '--', label='Exact') plt.title(r'Exact comparison for $v_z1$ at $t={}$'.format(t)) plt.xlim([c['xmin'], c['xmax']]) plt.legend() # vx2 plt.figure() - plt.plot(xs, vx2[Ng:-Ng, 0, 0], label='Numerical') + plt.plot(xs, vx2[:, 0, 0], label='Numerical') plt.plot(xs, np.zeros_like(xs), '--', label='Exact') plt.title(r'Exact comparison for $v_x2$ at $t={}$'.format(t)) plt.xlim([c['xmin'], c['xmax']]) @@ -629,23 +621,130 @@ def plotTwoFluidCPAlfvenWaveAgainstExact(self): plt.legend() # vy2 plt.figure() - plt.plot(xs, vy2[Ng:-Ng, 0, 0], label='Numerical') + plt.plot(xs, vy2[:, 0, 0], label='Numerical') plt.plot(xs, vy2sol, '--', label='Exact') plt.title(r'Exact comparison for $v_y2$ at $t={}$'.format(t)) plt.xlim([c['xmin'], c['xmax']]) plt.legend() # vz2 plt.figure() - plt.plot(xs, vz2[Ng:-Ng, 0, 0], label='Numerical') + plt.plot(xs, vz2[:, 0, 0], label='Numerical') plt.plot(xs, vz2sol, '--', label='Exact') plt.title(r'Exact comparison for $v_z2$ at $t={}$'.format(t)) plt.xlim([c['xmin'], c['xmax']]) plt.legend() + + + def plot2DBrioWu(self, diag=0): + """ + Plots the main diagonal of the 2D Brio-Wu problem + + Parameters + ---------- + diag : int + The diagonal to plot the slice + """ + + nx = self.c['nx'] +# Ny = self.c['Ny'] + midZ = self.c['Nz'] // 2 + Ng = self.c['Ng'] + + if diag == 0: + LB = -Ng + RB = Ng + step = -1 + else: + LB = Ng + RB = -Ng + step = 1 + + + dens = self.prims[0, :, LB:RB:step, midZ].diagonal() + vx = self.prims[1, :, LB:RB:step, midZ].diagonal() + vy = self.prims[2, :, LB:RB:step, midZ].diagonal() + + + p = self.prims[4, :, LB:RB:step, midZ].diagonal() + B = self.prims[5, :, LB:RB:step, midZ].diagonal() / np.sqrt(2) + \ + self.prims[6, :, LB:RB:step, midZ].diagonal() / np.sqrt(2) + + # rho + plt.figure() + plt.plot(np.linspace(0, 1, nx), dens) + plt.ylabel(r'$\rho$') + plt.xlim([0, 1]) + plt.show() + # vx + plt.figure() + plt.plot(np.linspace(0, 1, nx), vx) + plt.ylabel(r'$vx$') + plt.xlim([0, 1]) + plt.show() + # vy + plt.figure() + plt.plot(np.linspace(0, 1, nx), vy) + plt.ylabel(r'$vy$') + plt.xlim([0, 1]) + plt.show() + # v rel + plt.figure() + plt.plot(np.linspace(0, 1, nx),(vx-vy)/(1-vx*vy)) + plt.ylabel(r'$v (rel)$') + plt.xlim([0, 1]) + plt.show() + # v non-rel + plt.figure() + plt.plot(np.linspace(0, 1, nx), vx/np.sqrt(2) - vy/np.sqrt(2)) + plt.ylabel(r'$v (non-rel)$') + plt.xlim([0, 1]) + plt.show() + # p + plt.figure() + plt.plot(np.linspace(0, 1, nx), p) + plt.ylabel(r'$p$') + plt.xlim([0, 1]) + plt.show() + # B + plt.figure() + plt.plot(np.linspace(0, 1, nx), B) + plt.ylabel(r'$B$') + plt.xlim([0, 1]) + plt.show() + + return B + + def plotAdvectionAgainstInitial(self): + xs = np.linspace(Plot.c['dx']/2, 1-Plot.c['dx']/2, Plot.c['nx']) + initialRho = np.ones_like(xs)*0.1 + initialRho += 0.4*np.exp(-(10 * (xs - 0.5))**2) + + fig, axs = plt.subplots(2) + fig.set_size_inches(8, 6) + axs[0].plot(xs, initialRho, 'k-', linewidth=5, alpha=0.3, label='initial') + axs[0].plot(xs, Plot.prims[0, :, 0, 0], 'b:', label='rho') + axs[0].set_xlim(xs[0], xs[-1]) + axs[0].set_xlabel(r'$x$') + axs[0].set_ylabel(r'$\rho$') + axs[0].legend() + + error = np.abs(initialRho-Plot.prims[0, :, 0, 0]) + errorNorm = np.sum(error)/len(error) + axs[1].semilogy(xs, error, label=rf'Mean = ${errorNorm:.1e}$') + axs[1].set_xlabel(r"$x$") + axs[1].set_ylabel('Error') + axs[1].set_xlim(xs[0], xs[-1]) + axs[1].legend() + plt.show() + + # Function declarations over, access data and plot! + if __name__ == '__main__': Plot = InteractivePlot() - Plot.plotHeatMaps() +# Plot.plotHeatMaps() + diff --git a/Project/GPU/Src/main.cu b/Project/GPU/Src/main.cu index 10a10340..8d4536aa 100644 --- a/Project/GPU/Src/main.cu +++ b/Project/GPU/Src/main.cu @@ -36,7 +36,7 @@ int main(int argc, char *argv[]) { double ymax(1.0); double zmin(-1.5); double zmax(1.5); - double endTime(0.05); + double endTime(0.0005); double cfl(0.1); double gamma(4.0/3.0); double sigma(0); diff --git a/Project/compare.py b/Project/compare.py index 99fa52ad..47f74f90 100644 --- a/Project/compare.py +++ b/Project/compare.py @@ -1,4 +1,4 @@ -TOL=10e-15 +TOL=1e-15 time_format_folder="Final" vars_folders=["Conserved", "Auxiliary", "Primitive"] @@ -25,10 +25,8 @@ parallel_val = float(parallel_line) line_number = line_number + 1 if (abs(serial_val-parallel_val) > TOL): - print("\n\n!! Error in {} (val={}, line={}), {}, (val={})\n\n".format(serial_filename, serial_val, line_number, parallel_filename, parallel_val)) + print("\tError in {} (val={}, line={}), {}, (val={})\n".format(serial_filename, serial_val, line_number, parallel_filename, parallel_val)) break except IOError: print("Could not read file:", filename) - - From 0568b6fa0ac546809be661932c27a6d832c70312 Mon Sep 17 00:00:00 2001 From: "ania.brown" Date: Wed, 26 Aug 2020 13:21:45 +0100 Subject: [PATCH 13/56] added new API for saving data --- Project/GPU/Include/parallelSaveData.h | 129 ++++ Project/GPU/Include/saveData.h | 28 +- Project/GPU/Include/serialSaveData.h | 82 +++ Project/GPU/Include/simData.h | 1 + Project/GPU/Makefile | 4 +- Project/GPU/Src/main.cu | 4 +- Project/GPU/Src/parallelSaveData.cu | 550 ++++++++++++++++++ .../Src/{saveData.cu => serialSaveData.cu} | 17 +- 8 files changed, 787 insertions(+), 28 deletions(-) create mode 100644 Project/GPU/Include/parallelSaveData.h create mode 100644 Project/GPU/Include/serialSaveData.h create mode 100644 Project/GPU/Src/parallelSaveData.cu rename Project/GPU/Src/{saveData.cu => serialSaveData.cu} (96%) diff --git a/Project/GPU/Include/parallelSaveData.h b/Project/GPU/Include/parallelSaveData.h new file mode 100644 index 00000000..3fb33dfd --- /dev/null +++ b/Project/GPU/Include/parallelSaveData.h @@ -0,0 +1,129 @@ +#ifndef PARALLELSAVEDATA_H +#define PARALLELSAVEDATA_H + +#include +#include +#include +#include +#include +#include "simData.h" +#include "saveData.h" +#include "parallelEnv.h" + +using namespace std; + +//! Class used to save simulation data using multiple processes +/*! + @par + Write outputs through the simple system of collecting all simulation data onto process 0 + and writing out from process 0. This is easy to code but has the downside of limiting + the problem size to one that will fit onto one node. + + Class is initialized with the data that is to be saved. Saves the simulation + data in the Data directory, located within the Project folder. All data is + saved automatically, including all constant data (xmin, ymax, endTime etc) and + and the values of all prims, aux and cons variables. +*/ +class ParallelSaveData : public SaveData +{ + public: + ParallelEnv * env; //!< Pointer to PlatformEnv class containing platform specific info such as MPI details + + private: + + /*! + For each particular state vector (cons, prims, aux) packs a buffer containing all cells in a subdomain + (not including ghost values) to be sent to process 0 + @param[out] *buffer pointer to the buffer to pack + @param[in] *stateVector pointer to cons, prims or aux array + @param[in] nVars number of variables in the cons, prims or aux array + */ + void packStateVectorBuffer(double *buffer, double *stateVector, int nVars); + + /*! + For each subdomain, send a buffer containing the non-ghost cells in that subdomain to a buffer on process 0. + @param[in, out] *buffer pointer to the buffer to send or receive + @param[in] numCellsSent number of cells in the buffer + @param[in] rank global id of the process sending its buffer to process 0 + */ + void sendStateVectorBufferToMaster(double *buffer, int numCellsSent, int rank); + + /*! + For each particular state vector (cons, prims, aux) unpacks a buffer containing all cells + (not including ghost values) received from a particular subdomain into a vector containing + the full simulation domain + @param[in] *buffer pointer to the buffer to unpack + @param[in, out] *stateVector pointer to cons, prims or aux array of size equal to the full simulation domain + @param[in] rank global id of the process that sent its buffer to process 0 + */ + void unpackStateVectorBuffer(double *buffer, double *stateVector, int nVars, int rank); + + /*! + Process 0 already holds the values for its own subdomain, so does not need to send them anywhere. + Instead, it needs to copy its subdomain values (cons, prims, aux) to the vector containing + the full simulation domain + @param[in, out] *fullStateVector pointer to cons, prims or aux array of size equal to the full simulation domain + @param[in] *stateVector pointer to cons, prims or aux array for process 0's subdomain + @param[in] nVars number of variables in the cons, prims or aux array + */ + void copyMasterStateVectorToFullStateVector(double *fullStateVector, double *stateVector, int nVars); + + // TODO -- docstring + void writeStateVectorToFile(FILE *f, double *fullStateVector, int nVars); + + public: + + //! Saves the conserved vector state + void saveCons(); + + //! Saves the primitive vector state + void savePrims(); + + //! Saves the auxiliary vector state + void saveAux(); + + //! Saves the domain coordinates + void saveDomain(); + + //! Saves the constant data + void saveConsts(); + + + //! Constructor + /*! + @par + The constructor take a pointer to the data class which the user wants + to save. All this data is automatically saved in the Data directory, located + in the Project folder. + + @param *data pointer to the Data class + @param test integar flagging if we are in the 'Examples' directory or not, + Only used for running the given examples, can ignore otherwise. + */ + ParallelSaveData(Data * data, ParallelEnv * env, int test=0) : SaveData(data, test), env(env) { } + + virtual ~ParallelSaveData() { } //!< Destructor + + //! Saves all cons, prims, aux and constant data + /*! + @par + This calls the other member functions to save their respective + simulation data. + + @param[in] timeSeries flags whether the saved data is final or transient + */ + void saveAll(bool timeSeries=false); + + //! Saves user specified variable + /*! + @par + Function saves the data for the variable specified by the string `var` + + @param[in] variable Defines the variable the user wants to save. Should match a variable label + @param[in] num number of user-specified variables to save in total (required for consistent numbering of files) + */ + void saveVar(string variable, int num=1); + +}; + +#endif diff --git a/Project/GPU/Include/saveData.h b/Project/GPU/Include/saveData.h index c737e1ab..95734cc2 100644 --- a/Project/GPU/Include/saveData.h +++ b/Project/GPU/Include/saveData.h @@ -7,13 +7,15 @@ #include #include #include "simData.h" +#include "platformEnv.h" using namespace std; //! Class used to save simulation data /*! @par - Class is initialized with the data that is to be saved. Saves the simulation + Abstract base class to allow for different output schemes in a parallel environment. + Class is initialized with the data that is to be saved. Saves the simulation data in the Data directory, located within the Project folder. All data is saved automatically, including all constant data (xmin, ymax, endTime etc) and and the values of all prims, aux and cons variables. @@ -24,34 +26,29 @@ class SaveData public: Data * d; //!< Pointer to Data class containing global simulation data - private: - int Nouts, //!< Number of output files Ncount, //!< Which user defined variable is this? test; //!< Flags if we are running one of the given examples - public: - //! Saves the conserved vector state - void saveCons(); + virtual void saveCons() = 0; //! Saves the primitive vector state - void savePrims(); + virtual void savePrims() = 0; //! Saves the auxiliary vector state - void saveAux(); + virtual void saveAux() = 0; //! Saves the domain coordinates - void saveDomain(); + virtual void saveDomain() = 0; //! Saves the constant data - void saveConsts(); + virtual void saveConsts() = 0; char dir[50], //!< String path to the directory in which to write files - app[10]; //!< String appendix to add to end of file names - + app[50]; //!< String appendix to add to end of file names //! Constructor /*! @@ -73,6 +70,7 @@ class SaveData } } + virtual ~SaveData() { } //!< Destructor //! Saves all cons, prims, aux and constant data /*! @@ -82,17 +80,17 @@ class SaveData @param[in] timeSeries flags whether the saved data is final or transient */ - void saveAll(bool timeSeries=false); + virtual void saveAll(bool timeSeries=false) = 0; //! Saves user specified variable /*! @par Function saves the data for the variable specified by the string `var` - @param[in] var Defines the variable the user wants to save. Should match a variable label + @param[in] variable Defines the variable the user wants to save. Should match a variable label @param[in] num number of user-specified variables to save in total (required for consistent numbering of files) */ - void saveVar(string variable, int num=1); + virtual void saveVar(string variable, int num=1) = 0; }; diff --git a/Project/GPU/Include/serialSaveData.h b/Project/GPU/Include/serialSaveData.h new file mode 100644 index 00000000..7900ccba --- /dev/null +++ b/Project/GPU/Include/serialSaveData.h @@ -0,0 +1,82 @@ +#ifndef SERIALSAVEDATA_H +#define SERIALSAVEDATA_H + +#include +#include +#include +#include +#include +#include "simData.h" +#include "saveData.h" +#include "serialEnv.h" + +using namespace std; + +//! Class used to save simulation data using a single process +/*! + @par + Class is initialized with the data that is to be saved. Saves the simulation + data in the Data directory, located within the Project folder. All data is + saved automatically, including all constant data (xmin, ymax, endTime etc) and + and the values of all prims, aux and cons variables. +*/ +class SerialSaveData : public SaveData +{ + + public: + + SerialEnv * env; //!< Pointer to PlatformEnv class containing platform specific info such as MPI details + + //! Saves the conserved vector state + void saveCons(); + + //! Saves the primitive vector state + void savePrims(); + + //! Saves the auxiliary vector state + void saveAux(); + + //! Saves the domain coordinates + void saveDomain(); + + //! Saves the constant data + void saveConsts(); + + //! Constructor + /*! + @par + The constructor take a pointer to the data class which the user wants + to save. All this data is automatically saved in the Data directory, located + in the Project folder. + + @param *data pointer to the Data class + @param test integar flagging if we are in the 'Examples' directory or not, + Only used for running the given examples, can ignore otherwise. + */ + SerialSaveData(Data * data, SerialEnv * env, int test=0) : SaveData(data, test), env(env) { } + + virtual ~SerialSaveData() { } //!< Destructor + + //! Saves all cons, prims, aux and constant data + /*! + @par + This calls the other member functions to save their respective + simulation data. + + @param[in] timeSeries flags whether the saved data is final or transient + */ + void saveAll(bool timeSeries=false); + + //! Saves user specified variable + /*! + @par + Function saves the data for the variable specified by the string `var` + + @param[in] variable Defines the variable the user wants to save. Should match a variable label + @param[in] num number of user-specified variables to save in total (required for consistent numbering of files) + */ + void saveVar(string variable, int num=1); + +}; + +#endif diff --git a/Project/GPU/Include/simData.h b/Project/GPU/Include/simData.h index 7e0056d2..1111ab3b 100644 --- a/Project/GPU/Include/simData.h +++ b/Project/GPU/Include/simData.h @@ -5,6 +5,7 @@ #include #include "platformEnv.h" + /*! Currently (and possibly permanently) a very hacky way of keeping singleCell cons2prims function general for the benefit of the IMEX integrator. diff --git a/Project/GPU/Makefile b/Project/GPU/Makefile index 28576504..9028e8b3 100644 --- a/Project/GPU/Makefile +++ b/Project/GPU/Makefile @@ -42,7 +42,7 @@ SRCS = main.cu \ SSP3.cu \ IMEX3Args.cu \ boundaryConds.cu \ - saveData.cu \ + serialSaveData.cu \ serialEnv.cu \ fluxVectorSplitting.cu \ srrmhd.cu \ @@ -151,7 +151,7 @@ IMEX3Args.o : $(MODULE_DIR)/IMEX3Args.cu $(INC_DIR)/IMEX3Args.h $(INC_DIR)/IMEX2 boundaryConds.o : $(MODULE_DIR)/boundaryConds.cu $(INC_DIR)/boundaryConds.h $(CC) $< -c $(NVFLAGS) -I$(INC_DIR) -Xcompiler -fopenmp -saveData.o : $(MODULE_DIR)/saveData.cu $(INC_DIR)/saveData.h +serialSaveData.o : $(MODULE_DIR)/serialSaveData.cu $(INC_DIR)/serialSaveData.h $(CC) $< -c $(NVFLAGS) -I$(INC_DIR) serialEnv.o : $(MODULE_DIR)/serialEnv.cu $(INC_DIR)/serialEnv.h diff --git a/Project/GPU/Src/main.cu b/Project/GPU/Src/main.cu index ddf37c4b..4be013f4 100644 --- a/Project/GPU/Src/main.cu +++ b/Project/GPU/Src/main.cu @@ -7,7 +7,7 @@ #include "boundaryConds.h" #include "rkSplit.h" #include "SSP2.h" -#include "saveData.h" +#include "serialSaveData.h" #include "fluxVectorSplitting.h" #include "serialEnv.h" @@ -72,7 +72,7 @@ int main(int argc, char *argv[]) { SSP2 timeInt(&data, &model, &bcs, &fluxMethod); - SaveData save(&data); + SerialSaveData save(&data, &env); // Now objects have been created, set up the simulation sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save); diff --git a/Project/GPU/Src/parallelSaveData.cu b/Project/GPU/Src/parallelSaveData.cu new file mode 100644 index 00000000..0c592d5c --- /dev/null +++ b/Project/GPU/Src/parallelSaveData.cu @@ -0,0 +1,550 @@ +#include "parallelSaveData.h" +#include +#include +#include +#include + +using namespace std; + +// Id in a state vector that does not include ghost cells +// TODO -- Should probably just define a variable on Data that is (Nz-2*Ng or 1 if nz=0) to avoid having a copy for each dimension +#define ID_PHYS_3D(variable, idx, jdx, kdx) ((variable)*(d->Nx-(d->Ng*2))*(d->Ny-(d->Ng*2))*(d->Nz-(d->Ng*2)) + (idx)*(d->Ny-(d->Ng*2))*(d->Nz-(d->Ng*2)) + (jdx)*(d->Nz-(d->Ng*2)) + (kdx)) +#define ID_PHYS_2D(variable, idx, jdx) ((variable)*(d->Nx-(d->Ng*2))*(d->Ny-(d->Ng*2)) + (idx)*(d->Ny-(d->Ng*2)) + (jdx)) +#define ID_PHYS_1D(variable, idx) ((variable)*(d->Nx-(d->Ng*2)) + (idx)) + +#define ID_FULL_3D(variable, idx, jdx, kdx) ((variable)*(d->nx)*(d->ny)*(d->nz) + (idx)*(d->ny)*(d->nz) + (jdx)*(d->nz) + (kdx)) +#define ID_FULL_2D(variable, idx, jdx) ((variable)*(d->nx)*(d->ny) + (idx)*(d->ny) + (jdx)) +#define ID_FULL_1D(variable, idx) ((variable)*(d->nx) + (idx)) +#define ID(variable, idx, jdx, kdx) ((variable)*(d->Nx)*(d->Ny)*(d->Nz) + (idx)*(d->Ny)*(d->Nz) + (jdx)*(d->Nz) + (kdx)) + +void ParallelSaveData::saveAll(bool timeSeries) +{ + // Clean directory variable + dir[0] = '\0'; + // Determine the directory to write files to + if (test) + strcpy(dir, "../../"); + if (!timeSeries && strcmp(dir, "Data/Final")!=0) { + strcat(dir, "Data/Final"); + app[0]=0; + } + else { + strcat(dir, "Data/TimeSeries"); + sprintf(app, "%d", Nouts++); + } + + // Cons + this->saveCons(); + + // Prims + this->savePrims(); + + // Aux + this->saveAux(); + + // TODO -- could gather this to proc0 like for the other state vectors but not sure if it is required + //this->saveDomain(); + + // TODO -- Nx, Ny are per process -- may need to print out a global version as well (nx, ny don't include ghost cells) + this->saveConsts(); + +} + +void ParallelSaveData::packStateVectorBuffer(double *buffer, double *stateVector, int nVars){ + // Prepare send buffer, which doesn't include ghost cells, by copying from local state vectors + if (d->dims==3){ + for (int var(0); var < nVars; var++) { + for (int i(0); i < d->Nx-(d->Ng*2); i++) { + for (int j(0); j < d->Ny-(d->Ng*2); j++) { + for (int k(0); k < d->Nz-(d->Ng*2); k++) { + buffer[ID_PHYS_3D(var, i, j, k)] = stateVector[ID(var, i + d->Ng, j + d->Ng, k + d->Ng)]; + } + } + } + } + } else if (d->dims==2){ + for (int var(0); var < nVars; var++) { + for (int i(0); i < d->Nx-(d->Ng*2); i++) { + for (int j(0); j < d->Ny-(d->Ng*2); j++) { + buffer[ID_PHYS_2D(var, i, j)] = stateVector[ID(var, i + d->Ng, j + d->Ng, 0)]; + } + } + } + } else { + for (int var(0); var < nVars; var++) { + for (int i(0); i < d->Nx-(d->Ng*2); i++) { + buffer[ID_PHYS_1D(var, i)] = stateVector[ID(var, i + d->Ng, 0, 0)]; + } + } + } +} + +void ParallelSaveData::copyMasterStateVectorToFullStateVector(double *fullStateVector, double *stateVector, int nVars){ + // This requires proc0 to have xRankId=yRankId=zRankId=0 + if (d->dims==3){ + for (int var(0); var < nVars; var++) { + for (int i(0); i < d->Nx-(d->Ng*2); i++) { + for (int j(0); j < d->Ny-(d->Ng*2); j++) { + for (int k(0); k < d->Nz-(d->Ng*2); k++) { + fullStateVector[ID_FULL_3D(var, i, j, k)] = stateVector[ID(var, i + d->Ng, j + d->Ng, k + d->Ng)]; + } + } + } + } + } else if (d->dims==2){ + for (int var(0); var < nVars; var++) { + for (int i(0); i < d->Nx-(d->Ng*2); i++) { + for (int j(0); j < d->Ny-(d->Ng*2); j++) { + //printf("nx: %d, ny: %d\n", d->nx, d->ny); + //printf("var: %d i: %d j: %d, id: %d, id_full: %d\n", var, i, j, ID(var, i+d->Ng, j+d->Ng, 0), + //ID_FULL_2D(var, i, j)); + fullStateVector[ID_FULL_2D(var, i, j)] = stateVector[ID(var, i + d->Ng, j + d->Ng, 0)]; + } + } + } + } else { + for (int var(0); var < nVars; var++) { + for (int i(0); i < d->Nx-(d->Ng*2); i++) { + fullStateVector[ID_FULL_1D(var, i)] = stateVector[ID(var, i + d->Ng, 0, 0)]; + } + } + } +} + +void ParallelSaveData::sendStateVectorBufferToMaster(double *buffer, int numCellsSent, int rank){ + // MPI message vars + int tag = 101; + MPI_Status status; + if (env->rank == rank){ + MPI_Send(buffer, numCellsSent, MPI_DOUBLE, 0, tag, env->mpiCartesianComm); + } else if (env->rank == 0){ + MPI_Recv(buffer, numCellsSent, MPI_DOUBLE, rank, tag, env->mpiCartesianComm, &status); + } +} + +void ParallelSaveData::unpackStateVectorBuffer(double *buffer, double *stateVector, int nVars, int rank){ + // Unpack send buffer, which don't include ghost cells, into the global state vector + + // Get (x,y,z) coords of rank that sent data to proc0 + int rankCoords[3]; + int ndims = 3; // rank grid is always 3D + MPI_Cart_coords(env->mpiCartesianComm, rank, ndims, rankCoords); + + int iOffset, jOffset, kOffset; + iOffset = rankCoords[0] * (d->Nx - (d->Ng*2)); + if (d->dims > 1) { + jOffset = rankCoords[1] * (d->Ny - (d->Ng*2)); + } else jOffset = 0; + + if (d->dims > 2) { + kOffset = rankCoords[2] * (d->Nz - (d->Ng*2)); + } else kOffset = 0; + + if (d->dims==3){ + for (int var(0); var < nVars; var++) { + for (int i(0); i < d->Nx-(d->Ng*2); i++) { + for (int j(0); j < d->Ny-(d->Ng*2); j++) { + for (int k(0); k < d->Nz-(d->Ng*2); k++) { + stateVector[ID_FULL_3D(var, i + iOffset, j + jOffset, k + kOffset)] = buffer[ID_PHYS_3D(var, i, j, k)]; + } + } + } + } + } else if (d->dims==2){ + for (int var(0); var < nVars; var++) { + for (int i(0); i < d->Nx-(d->Ng*2); i++) { + for (int j(0); j < d->Ny-(d->Ng*2); j++) { + stateVector[ID_FULL_2D(var, i + iOffset, j + jOffset)] = buffer[ID_PHYS_2D(var, i, j)]; + } + } + } + } else { + for (int var(0); var < nVars; var++) { + for (int i(0); i < d->Nx-(d->Ng*2); i++) { + stateVector[ID_FULL_1D(var, i + iOffset)] = buffer[ID_PHYS_1D(var, i)]; + } + } + } +} + +void ParallelSaveData::writeStateVectorToFile(FILE *f, double *fullStateVector, int nVars){ + if (d->dims==3){ + for (int var(0); var < nVars; var++) { + for (int i(0); i < d->nx; i++) { + for (int j(0); j < d->ny; j++) { + for (int k(0); k < d->nz; k++) { + fprintf(f, "%.16f ", fullStateVector[ID_FULL_3D(var, i, j, k)]); + } + fprintf(f, "\n"); + } + } + } + } else if (d->dims==2){ + for (int var(0); var < nVars; var++) { + for (int i(0); i < d->nx; i++) { + for (int j(0); j < d->ny; j++) { + fprintf(f, "%.16f ", fullStateVector[ID_FULL_2D(var, i, j)]); + fprintf(f, "\n"); + } + } + } + } else { + for (int var(0); var < nVars; var++) { + for (int i(0); i < d->nx; i++) { + fprintf(f, "%.16f ", fullStateVector[ID_FULL_1D(var, i)]); + fprintf(f, "\n"); + } + } + } +} + +void ParallelSaveData::saveCons() +{ + FILE * f; + + char fname[120]; + strcpy(fname, dir); + strcat(fname, "/Conserved/cons"); + strcat(fname, app); + strcat(fname, ".dat\0"); + + // Allocate buffers for gathering distributed state vectors onto master process + // We do this here rather than in saveAll to allow saveCons to be called independently + // We don't want to do this in the ParallelSaveData constructor as we don't want to use up this large + // amount of memory until it's needed. + int numCellsInBuffer = d->Ncons * (d->Nx-(2*d->Ng)); + if (d->dims > 1) numCellsInBuffer *= (d->Ny - (2*d->Ng)); + if (d->dims > 2) numCellsInBuffer *= (d->Nz - (2*d->Ng)); + double *buffer = (double*) malloc(numCellsInBuffer * sizeof(double)); + int numCellsInFullStateVector = numCellsInBuffer * env->nProc; + double *fullStateVector = (double*) malloc(numCellsInFullStateVector * sizeof(double)); + + // For all procs other than proc0, copy local statevector to a buffer that does not include ghost cells + // for sending to proc0. Proc0 can copy directly from its local statevector to the fullstatevector + if (env->rank != 0) packStateVectorBuffer(buffer, d->cons, d->Ncons); + else copyMasterStateVectorToFullStateVector(fullStateVector, d->cons, d->Ncons); + + for (int r(1); r < env->nProc; r++){ + int numCellsSent = d->Ncons * (d->Nx-(2*d->Ng)); + if (d->dims > 1) numCellsSent *= (d->Ny-(2*d->Ng)); + if (d->dims > 2) numCellsSent *= (d->Nz-(2*d->Ng)); + sendStateVectorBufferToMaster(buffer, numCellsSent, r); + if (env->rank == 0) unpackStateVectorBuffer(buffer, fullStateVector, d->Ncons, r); + } + + if (env->rank == 0){ + f = fopen(fname, "w"); + // Ensure file is open + if (f == NULL) { + printf("Error: could not open 'cons.dat' for writing.\n"); + exit(1); + } + + // File is open, write data + fprintf(f, "cons = "); + for (int i(0); i < d->Ncons-1; i++) { + fprintf(f, "%s, ", d->consLabels[i].c_str()); + } + fprintf(f, "%s\n", d->consLabels[d->Ncons-1].c_str()); + + writeStateVectorToFile(f, fullStateVector, d->Ncons); + + fclose(f); + } + + free(buffer); + free(fullStateVector); +} + +void ParallelSaveData::savePrims() +{ + FILE * f; + char fname[120]; + strcpy(fname, dir); + strcat(fname, "/Primitive/prims"); + strcat(fname, app); + strcat(fname, ".dat\0"); f = fopen(fname, "w"); + + // Allocate buffers for gathering distributed state vectors onto master process + // We do this here rather than in saveAll to allow savePrims to be called independently + // We don't want to do this in the ParallelSaveData constructor as we don't want to use up this large + // amount of memory until it's needed. + int numCellsInBuffer = d->Nprims * (d->Nx-(2*d->Ng)); + if (d->dims > 1) numCellsInBuffer *= (d->Ny - (2*d->Ng)); + if (d->dims > 2) numCellsInBuffer *= (d->Nz - (2*d->Ng)); + double *buffer = (double*) malloc(numCellsInBuffer * sizeof(double)); + int numCellsInFullStateVector = numCellsInBuffer * env->nProc; + double *fullStateVector = (double*) malloc(numCellsInFullStateVector * sizeof(double)); + + if (env->rank != 0) packStateVectorBuffer(buffer, d->prims, d->Nprims); + else copyMasterStateVectorToFullStateVector(fullStateVector, d->prims, d->Nprims); + for (int r(1); r < env->nProc; r++){ + int numCellsSent = d->Nprims * (d->Nx-(2*d->Ng)); + if (d->dims > 1) numCellsSent *= (d->Ny-(2*d->Ng)); + if (d->dims > 2) numCellsSent *= (d->Nz-(2*d->Ng)); + sendStateVectorBufferToMaster(buffer, numCellsSent, r); + if (env->rank == 0) unpackStateVectorBuffer(buffer, fullStateVector, d->Nprims, r); + } + + if (env->rank == 0){ + // Ensure file is open + if (f == NULL) { + printf("Error: could not open 'prims.dat' for writing.\n"); + exit(1); + } + + // File is open, write data + fprintf(f, "prims = "); + for (int i(0); i < d->Nprims-1; i++) fprintf(f, "%s, ", d->primsLabels[i].c_str()); + fprintf(f, "%s\n", d->primsLabels[d->Nprims-1].c_str()); + + writeStateVectorToFile(f, fullStateVector, d->Nprims); + fclose(f); + } + + free(buffer); + free(fullStateVector); +} + +void ParallelSaveData::saveAux() +{ + FILE * f; + char fname[120]; + strcpy(fname, dir); + strcat(fname, "/Auxiliary/aux"); + strcat(fname, app); + strcat(fname, ".dat\0"); f = fopen(fname, "w"); + + // Allocate buffers for gathering distributed state vectors onto master process + // We do this here rather than in saveAll to allow saveAux to be called independently + // We don't want to do this in the ParallelSaveData constructor as we don't want to use up this large + // amount of memory until it's needed. + int numCellsInBuffer = d->Naux * (d->Nx-(2*d->Ng)); + if (d->dims > 1) numCellsInBuffer *= (d->Ny - (2*d->Ng)); + if (d->dims > 2) numCellsInBuffer *= (d->Nz - (2*d->Ng)); + double *buffer = (double*) malloc(numCellsInBuffer * sizeof(double)); + int numCellsInFullStateVector = numCellsInBuffer * env->nProc; + double *fullStateVector = (double*) malloc(numCellsInFullStateVector * sizeof(double)); + + if (env->rank != 0) packStateVectorBuffer(buffer, d->aux, d->Naux); + else copyMasterStateVectorToFullStateVector(fullStateVector, d->aux, d->Naux); + for (int r(1); r < env->nProc; r++){ + int numCellsSent = d->Naux * (d->Nx-(2*d->Ng)); + if (d->dims > 1) numCellsSent *= (d->Ny-(2*d->Ng)); + if (d->dims > 2) numCellsSent *= (d->Nz-(2*d->Ng)); + sendStateVectorBufferToMaster(buffer, numCellsSent, r); + if (env->rank == 0) unpackStateVectorBuffer(buffer, fullStateVector, d->Naux, r); + } + + if (env->rank == 0){ + // Ensure file is open + if (f == NULL) { + printf("Error: could not open 'aux.dat' for writing.\n"); + exit(1); + } + + // File is open, write data + fprintf(f, "aux = "); + for (int i(0); i < d->Naux-1; i++) fprintf(f, "%s, ", d->auxLabels[i].c_str()); + fprintf(f, "%s\n", d->auxLabels[d->Naux-1].c_str()); + + writeStateVectorToFile(f, fullStateVector, d->Naux); + fclose(f); + } + + free(buffer); + free(fullStateVector); + +} + + +void ParallelSaveData::saveDomain() +{ + FILE * f; + char fname[120]; + strcpy(fname, dir); + strcat(fname, "/Domain/domain"); + strcat(fname, app); + strcat(fname, ".dat\0"); f = fopen(fname, "w"); + + // Ensure file is open + if (f == NULL) { + printf("Error: could not open 'domain.dat' for writing.\n"); + exit(1); + } + + // File is open, write data + for (int i(0); i < d->Nx; i++) + fprintf(f, "%.16f ", d->x[i]); + fprintf(f, "\n"); + for (int j(0); j < d->Ny; j++) + fprintf(f, "%.16f ", d->y[j]); + fprintf(f, "\n"); + for (int k(0); k < d->Nz; k++) + fprintf(f, "%.16f ", d->z[k]); + fprintf(f, "\n"); + + + fclose(f); + +} + + +void ParallelSaveData::saveConsts() +{ + FILE * f; + char fname[120]; + strcpy(fname, dir); + strcat(fname, "/Constants/constants"); + strcat(fname, app); + strcat(fname, ".dat\0"); f = fopen(fname, "w"); + + if (env->rank == 0){ + // Ensure file is open + if (f == NULL) { + printf("Error: could not open 'constants.dat' for writing.\n"); + exit(1); + } + + fprintf(f, "constants = nx, ny, nz, Nx, Ny, Nz, xmin, xmax, ymin, ymax, zmin, zmax, endTime, cfl, Ng, gamma, sigma, "); + fprintf(f, "Ncons, Nprims, Naux, cp, dt, t, dx, dy, dz\n"); + fprintf(f, "%d %d %d %d %d %d %.16f %.16f %.16f %.16f %.16f %.16f %.16f %.16f %d %.16f %.16f %d %d %d %.16f %.16f %.16f %.16f %.16f %.16f\n", + d->nx, d->ny, d->nz, d->Nx, d->Ny, d->Nz, d->xmin, d->xmax, d->ymin, d->ymax, d->zmin, d->zmax, d->endTime, d->cfl, d->Ng, + d->gamma, d->sigma, d->Ncons, d->Nprims, d->Naux, d->cp, d->dt, d->t, d->dx, d->dy, d->dz); + + fclose(f); + } +} + + +void ParallelSaveData::saveVar(string variable, int num) +{ + int cpa(0); // cons=1,prims=2,aux=3 + int Nvar(0); // Variable number + FILE * f; + char fname[120]; + double * sendVec; // Pointer to the array to send to master and save + + // Determine which variable the user wants saved + for (int var(0); var < d->Ncons; var++) { + if (strcmp(d->consLabels[var].c_str(), variable.c_str()) == 0) { + cpa=1; Nvar=var; + break; + } + } + + if (!cpa) { + for (int var(0); var < d->Nprims; var++) { + if (strcmp(d->primsLabels[var].c_str(), variable.c_str()) == 0) { + cpa=2; Nvar=var; + break; + } + } + } + + if (!cpa) { + for (int var(0); var < d->Naux; var++) { + if (strcmp(d->auxLabels[var].c_str(), variable.c_str()) == 0) { + cpa=3; Nvar=var; + break; + } + } + } + + if (!cpa) { + printf("Error: Could not find user specified variable '%s'\n", variable.c_str()); + exit(1); + } + + if (cpa==1) sendVec = &d->cons[ID(Nvar, 0, 0, 0)]; + else if (cpa==2) sendVec = &d->prims[ID(Nvar, 0, 0, 0)]; + else sendVec = &d->aux[ID(Nvar, 0, 0, 0)]; + + // Allocate buffers for gathering distributed state vectors onto master process + // We do this here rather than in saveAll to allow savePrims to be called independently + // We don't want to do this in the ParallelSaveData constructor as we don't want to use up this large + // amount of memory until it's needed. + int numCellsInBuffer = (d->Nx-(2*d->Ng)); + if (d->dims > 1) numCellsInBuffer *= (d->Ny - (2*d->Ng)); + if (d->dims > 2) numCellsInBuffer *= (d->Nz - (2*d->Ng)); + double *buffer = (double*) malloc(numCellsInBuffer * sizeof(double)); + int numCellsInFullStateVector = numCellsInBuffer * env->nProc; + double *fullStateVector = (double*) malloc(numCellsInFullStateVector * sizeof(double)); + + if (env->rank != 0) packStateVectorBuffer(buffer, sendVec, 1); + else copyMasterStateVectorToFullStateVector(fullStateVector, sendVec, 1); + for (int r(1); r < env->nProc; r++){ + int numCellsSent = 1 * (d->Nx-(2*d->Ng)); + if (d->dims > 1) numCellsSent *= (d->Ny-(2*d->Ng)); + if (d->dims > 2) numCellsSent *= (d->Nz-(2*d->Ng)); + sendStateVectorBufferToMaster(buffer, numCellsSent, r); + if (env->rank == 0) unpackStateVectorBuffer(buffer, fullStateVector, 1, r); + } + + + + + + + if (env->rank == 0){ + + // Directory + if (this->test) + strcpy(fname, "../../Data/TimeSeries/UserDef/"); + else + strcpy(fname, "Data/TimeSeries/UserDef/"); + sprintf(app, "%d", Nouts); + + // Location of output file + strcat(fname, variable.c_str()); + strcat(fname, app); + strcat(fname, ".dat\0"); + f = fopen(fname, "w"); + + // Ensure file is open + if (f == NULL) { + printf("Error: could not open user-defined file for writing.\n"); + exit(1); + } + + // File is open, write data + fprintf(f, "var = %s, t = %18.16f\n", variable.c_str(), d->t); + + writeStateVectorToFile(f, fullStateVector, 1); + + + fclose(f); + + + // For first output add the variables we are saving + if (Nouts==0) { + if (Ncount==0) { + ofstream info; + if (this->test) + strcpy(fname, "../../Data/TimeSeries/UserDef/"); + else + strcpy(fname, "Data/TimeSeries/UserDef/"); + strcat(fname, "info"); + info.open(fname); + info << variable << endl; + info.close(); + } + else { + ofstream info; + info.open("Data/TimeSeries/UserDef/info", ios::app); + info << variable << endl; + info.close(); + } + } + Ncount++; + // Increment if this is the last variable to save in this timestep + if (Ncount == num) { + Ncount = 0; + Nouts++; + } + } + + free(buffer); + free(fullStateVector); + +} diff --git a/Project/GPU/Src/saveData.cu b/Project/GPU/Src/serialSaveData.cu similarity index 96% rename from Project/GPU/Src/saveData.cu rename to Project/GPU/Src/serialSaveData.cu index c87bbccb..a0115466 100644 --- a/Project/GPU/Src/saveData.cu +++ b/Project/GPU/Src/serialSaveData.cu @@ -1,14 +1,13 @@ -#include "saveData.h" +#include "serialSaveData.h" #include #include #include using namespace std; -// Macro for getting array index #define ID(variable, idx, jdx, kdx) ((variable)*(d->Nx)*(d->Ny)*(d->Nz) + (idx)*(d->Ny)*(d->Nz) + (jdx)*(d->Nz) + (kdx)) -void SaveData::saveAll(bool timeSeries) +void SerialSaveData::saveAll(bool timeSeries) { // Clean directory variable dir[0] = '\0'; @@ -31,7 +30,7 @@ void SaveData::saveAll(bool timeSeries) this->saveConsts(); } -void SaveData::saveCons() +void SerialSaveData::saveCons() { FILE * f; @@ -89,7 +88,7 @@ void SaveData::saveCons() } -void SaveData::savePrims() +void SerialSaveData::savePrims() { FILE * f; char fname[120]; @@ -142,7 +141,7 @@ void SaveData::savePrims() } -void SaveData::saveAux() +void SerialSaveData::saveAux() { FILE * f; char fname[120]; @@ -196,7 +195,7 @@ void SaveData::saveAux() } -void SaveData::saveDomain() +void SerialSaveData::saveDomain() { FILE * f; char fname[120]; @@ -228,7 +227,7 @@ void SaveData::saveDomain() } -void SaveData::saveConsts() +void SerialSaveData::saveConsts() { FILE * f; char fname[120]; @@ -254,7 +253,7 @@ void SaveData::saveConsts() } -void SaveData::saveVar(string variable, int num) +void SerialSaveData::saveVar(string variable, int num) { int cpa(0); // cons=1,prims=2,aux=3 int Nvar(0); // Variable number From e879fb8896edd8400ab25f3916b22eab39338025 Mon Sep 17 00:00:00 2001 From: "ania.brown" Date: Tue, 1 Sep 2020 09:40:26 +0100 Subject: [PATCH 14/56] split out compiling and final linking into nvcc/g++, to prepare for adding MPI to build system --- Project/GPU/Makefile | 75 +++++++++++++++++++++++++++----------------- 1 file changed, 47 insertions(+), 28 deletions(-) diff --git a/Project/GPU/Makefile b/Project/GPU/Makefile index 9028e8b3..abde3e2c 100644 --- a/Project/GPU/Makefile +++ b/Project/GPU/Makefile @@ -2,7 +2,12 @@ # and links for main.cc # Compiler -CC = nvcc + +# We will compile most object files with (NVCC), other than those object files that use MPI. If using MPI, set CC_CPU=mpicc. In that case, the object files which use MPI will be compiled with mpicc. For linking, CC_CPU will be used (this should be set to mpicc if using MPI) +CC_CPU = g++ +CC_GPU = nvcc + +USE_MPI=0 # Module directory MODULE_DIR = ./Src @@ -20,10 +25,10 @@ RTFIND_INC_DIR = ./CminpackLibrary/Include RTFIND_SRC_DIR = ./CminpackLibrary/Src # C++ compiler flags -CXXFLAGS = -Xcompiler -fopenmp -Xcompiler -Wall +CXXFLAGS = -fopenmp -Wall # NVIDIA compiler flags -NVFLAGS = -std=c++11 -Wno-deprecated-gpu-targets -rdc=true -fmad=false -O3 -arch=sm_52 +NVFLAGS = -std=c++11 -Wno-deprecated-gpu-targets -rdc=true -fmad=false -O3 -arch=sm_61 -Xcompiler -fopenmp -Xcompiler -Wall # Sources SRCS = main.cu \ @@ -42,16 +47,27 @@ SRCS = main.cu \ SSP3.cu \ IMEX3Args.cu \ boundaryConds.cu \ - serialSaveData.cu \ - serialEnv.cu \ fluxVectorSplitting.cu \ srrmhd.cu \ C2PArgs.cu +PARALLEL_SRCS = parallelSaveData.cu \ + parallelEnv.cu \ + parallelBoundaryConds.cu + +SERIAL_SRCS = serialSaveData.cu \ + serialEnv.cu # Headers HDRS = ${SRCS:.cu=.h} cudaErrorCheck.h + +ifeq ($(USE_MPI), 1) + SRCS += ${PARALLEL_SRCS} +else + SRCS += ${SERIAL_SRCS} +endif + # Objects OBJS = ${SRCS:.cu=.o} @@ -98,72 +114,75 @@ clean : ################# simData.o : $(MODULE_DIR)/simData.cu $(INC_DIR)/simData.h - $(CC) $< -c $(NVFLAGS) -I$(INC_DIR) + $(CC_GPU) $< -c $(NVFLAGS) -I$(INC_DIR) initFunc.o : $(MODULE_DIR)/initFunc.cu $(INC_DIR)/initFunc.h $(INC_DIR)/simData.h - $(CC) $< -c $(NVFLAGS) -I$(INC_DIR) + $(CC_GPU) $< -c $(NVFLAGS) -I$(INC_DIR) C2PArgs.o : $(MODULE_DIR)/C2PArgs.cu $(INC_DIR)/C2PArgs.h $(INC_DIR)/simData.h - $(CC) $< -c $(NVFLAGS) -I$(INC_DIR) + $(CC_GPU) $< -c $(NVFLAGS) -I$(INC_DIR) twoFluidEMHD.o : $(MODULE_DIR)/twoFluidEMHD.cu $(INC_DIR)/twoFluidEMHD.h - $(CC) $< -c $(NVFLAGS) -I$(INC_DIR) + $(CC_GPU) $< -c $(NVFLAGS) -I$(INC_DIR) srmhd.o : $(MODULE_DIR)/srmhd.cu $(INC_DIR)/srmhd.h - $(CC) $< -c $(NVFLAGS) -I$(INC_DIR) -I$(RTFIND_INC_DIR) -Xcompiler -fopenmp + $(CC_GPU) $< -c $(NVFLAGS) -I$(INC_DIR) -I$(RTFIND_INC_DIR) -Xcompiler -fopenmp srrmhd.o : $(MODULE_DIR)/srrmhd.cu $(INC_DIR)/srrmhd.h $(INC_DIR)/C2PArgs.h - $(CC) $< -c $(NVFLAGS) -I$(INC_DIR) -I$(RTFIND_INC_DIR) + $(CC_GPU) $< -c $(NVFLAGS) -I$(INC_DIR) -I$(RTFIND_INC_DIR) simulation.o : $(MODULE_DIR)/simulation.cu $(INC_DIR)/simulation.h $(INC_DIR)/model.h $(INC_DIR)/simData.h $(INC_DIR)/saveData.h - $(CC) $< -c $(NVFLAGS) -I$(INC_DIR) + $(CC_GPU) $< -c $(NVFLAGS) -I$(INC_DIR) main.o : $(MODULE_DIR)/main.cu $(INC_DIR)/simulation.h $(INC_DIR)/model.h $(INC_DIR)/simData.h $(INC_DIR)/initFunc.h - $(CC) $< -c $(NVFLAGS) -I$(INC_DIR) -I$(RTFIND_INC_DIR) $(CXXFLAGS) + $(CC_GPU) $< -c $(NVFLAGS) -I$(INC_DIR) -I$(RTFIND_INC_DIR) weno.o : $(MODULE_DIR)/weno.cu $(INC_DIR)/weno.h - $(CC) $< -c $(NVFLAGS) -I$(INC_DIR) + $(CC_GPU) $< -c $(NVFLAGS) -I$(INC_DIR) RK2.o : $(MODULE_DIR)/RK2.cu $(INC_DIR)/RK2.h - $(CC) $< -c $(NVFLAGS) -I$(INC_DIR) -Xcompiler -fopenmp + $(CC_GPU) $< -c $(NVFLAGS) -I$(INC_DIR) -Xcompiler -fopenmp rkSplit.o : $(MODULE_DIR)/rkSplit.cu $(INC_DIR)/rkSplit.h $(INC_DIR)/RK2.h - $(CC) $< -c $(NVFLAGS) -I$(INC_DIR) -Xcompiler -fopenmp + $(CC_GPU) $< -c $(NVFLAGS) -I$(INC_DIR) -Xcompiler -fopenmp backwardsRK.o : $(MODULE_DIR)/backwardsRK.cu $(INC_DIR)/backwardsRK.h $(INC_DIR)/backRKArgs.h $(INC_DIR)/rkSplit.h - $(CC) $< -c $(NVFLAGS) -I$(INC_DIR) -I$(RTFIND_INC_DIR) + $(CC_GPU) $< -c $(NVFLAGS) -I$(INC_DIR) -I$(RTFIND_INC_DIR) backRKArgs.o : $(MODULE_DIR)/backRKArgs.cu $(INC_DIR)/backRKArgs.h - $(CC) $< -c $(NVFLAGS) -I$(INC_DIR) -I$(RTFIND_INC_DIR) + $(CC_GPU) $< -c $(NVFLAGS) -I$(INC_DIR) -I$(RTFIND_INC_DIR) SSP2.o : $(MODULE_DIR)/SSP2.cu $(INC_DIR)/SSP2.h $(INC_DIR)/IMEX2Args.h $(INC_DIR)/timeInt.h - $(CC) $< -c $(NVFLAGS) -I$(INC_DIR) -I$(RTFIND_INC_DIR) -Xcompiler -fopenmp + $(CC_GPU) $< -c $(NVFLAGS) -I$(INC_DIR) -I$(RTFIND_INC_DIR) -Xcompiler -fopenmp IMEX2Args.o : $(MODULE_DIR)/IMEX2Args.cu $(INC_DIR)/IMEX2Args.h - $(CC) $< -c $(NVFLAGS) -I$(INC_DIR) -I$(RTFIND_INC_DIR) + $(CC_GPU) $< -c $(NVFLAGS) -I$(INC_DIR) -I$(RTFIND_INC_DIR) SSP3.o : $(MODULE_DIR)/SSP3.cu $(INC_DIR)/SSP3.h $(INC_DIR)/IMEX3Args.h $(INC_DIR)/timeInt.h - $(CC) $< -c $(NVFLAGS) -I$(INC_DIR) -I$(RTFIND_INC_DIR) + $(CC_GPU) $< -c $(NVFLAGS) -I$(INC_DIR) -I$(RTFIND_INC_DIR) IMEX3Args.o : $(MODULE_DIR)/IMEX3Args.cu $(INC_DIR)/IMEX3Args.h $(INC_DIR)/IMEX2Args.h - $(CC) $< -c $(NVFLAGS) -I$(INC_DIR) -I$(RTFIND_INC_DIR) + $(CC_GPU) $< -c $(NVFLAGS) -I$(INC_DIR) -I$(RTFIND_INC_DIR) boundaryConds.o : $(MODULE_DIR)/boundaryConds.cu $(INC_DIR)/boundaryConds.h - $(CC) $< -c $(NVFLAGS) -I$(INC_DIR) -Xcompiler -fopenmp + $(CC_GPU) $< -c $(NVFLAGS) -I$(INC_DIR) -Xcompiler -fopenmp serialSaveData.o : $(MODULE_DIR)/serialSaveData.cu $(INC_DIR)/serialSaveData.h - $(CC) $< -c $(NVFLAGS) -I$(INC_DIR) + $(CC_GPU) $< -c $(NVFLAGS) -I$(INC_DIR) serialEnv.o : $(MODULE_DIR)/serialEnv.cu $(INC_DIR)/serialEnv.h - $(CC) $< -c $(NVFLAGS) -I$(INC_DIR) + $(CC_GPU) $< -c $(NVFLAGS) -I$(INC_DIR) fluxVectorSplitting.o : $(MODULE_DIR)/fluxVectorSplitting.cu $(INC_DIR)/fluxVectorSplitting.h $(INC_DIR)/weno.h - $(CC) $< -c $(NVFLAGS) -I$(INC_DIR) -Xcompiler -fopenmp + $(CC_GPU) $< -c $(NVFLAGS) -I$(INC_DIR) -Xcompiler -fopenmp +# link device code using CC_GPU (g++/mpi++ can't do this) +gpu_link.o : $(RTFIND_OBJS) $(OBJS) + $(CC_GPU) $^ -dlink -o $@ $(NVFLAGS) -lcudadevrt # Executable -main : $(RTFIND_OBJS) $(OBJS) - @$(CC) $^ -o $@ $(CXXFLAGS) $(NVFLAGS) +main : gpu_link.o $(RTFIND_OBJS) $(OBJS) + $(CC_CPU) $^ -o $@ $(CXXFLAGS) -lcudart -lcudadevrt -L/local/software/cuda/8.0/lib64 buildRootfinder: @cd $(RTFIND_DIR) && $(MAKE) objects From 30ff5774f3ab42adcbb7901c52ba43951589e34c Mon Sep 17 00:00:00 2001 From: "ania.brown" Date: Tue, 1 Sep 2020 11:01:31 +0100 Subject: [PATCH 15/56] adding parallel BCS files --- Project/GPU/Include/parallelBoundaryConds.h | 314 +++++++++ Project/GPU/Src/parallelBoundaryConds.cu | 725 ++++++++++++++++++++ 2 files changed, 1039 insertions(+) create mode 100644 Project/GPU/Include/parallelBoundaryConds.h create mode 100644 Project/GPU/Src/parallelBoundaryConds.cu diff --git a/Project/GPU/Include/parallelBoundaryConds.h b/Project/GPU/Include/parallelBoundaryConds.h new file mode 100644 index 00000000..85978654 --- /dev/null +++ b/Project/GPU/Include/parallelBoundaryConds.h @@ -0,0 +1,314 @@ +#ifndef PARALLEL_BOUNDARYCONDS_H +#define PARALLEL_BOUNDARYCONDS_H + +#include "simData.h" +#include "boundaryConds.h" +#include "parallelEnv.h" + +//! Boundary Conditions for a data structure that has been distributed across ranks +/*! + @par + Base class for implementations of different boundary conditions across a distributed data structure. Contains common functions + used by more than one Boundary Condition type. + The fields to which the boundary conditions are applied are those passed into + the function apply, not those in the SimData class. +*/ +class ParallelBcs : public Bcs +{ + + public: + + ParallelEnv * env; //!< Pointer to ParallelEnv class containing platform specific info such as MPI details + + int xPeriodic, yPeriodic, zPeriodic; + + //! Constructor + /*! + Calls constructor of base class to store the pointer to the Data class and ParallelEnv class. + + @param[in] *data pointer to Data class + @param[in] *env pointer to ParallelEnv class + @sa Bcs::Bcs + */ + ParallelBcs(Data *data, ParallelEnv *env, int xPeriodic=1, int yPeriodic=1, int zPeriodic=1) : Bcs(data), env(env) + { + env->setParallelDecomposition(xPeriodic, yPeriodic, zPeriodic); + } + + virtual ~ParallelBcs() { } //!< Destructor + + /*! + Exchanges buffers packed with ghost cells with neighbouring subdomains using MPI. + + @param[in] *sendToLeftBuf pointer to the buffer contaning ghost cells at the left (front, bottom) face, + to be sent to the left (front, bottom) neighbour process + @param[in] *sendToRightBuf pointer to the buffer contaning ghost cells at the right (back, top) face, + to be sent to the right (back, top) neighbour process + @param[out] *recvFromLeftBuf buffer for receiving ghost cells from the left (front, bottom) process + @param[out] *recvFromRightBuf buffer for receiving ghost cells from the right (back, top) process + @param[in] leftNeighbour id of the left (front, bottom) process in the global MPI communicator + @param[in] rightNeighbour id of the right (back, top) process in the global MPI communicator + @param[in] numCellsSent number of cells in the ghost region + */ + void swapGhostBuffers(double *sendToLeftBuf, double *sendToRightBuf, double *recvFromLeftBuf, + double *recvFromRightBuf, int leftNeighbour, int rightNeighbour, int numCellsSent); + + /*! + For a particular state vector (cons, prims, aux) copies cells along the left and right faces + of the physical (non-ghost) cells in a subdomain and packs them into buffers for MPI communication to + another process. + + @param[out] *sendToLeftBuf pointer to the buffer to pack with cells at the left face, + to be sent to the left neighbour process + @param[out] *sendToRightBuf pointer to the buffer to pack with cells at the right face, + to be sent to the right neighbour process + @param[in] *stateVector pointer to cons, prims or aux array + @param[in] nVars number of variables in the cons, prims or aux array + */ + void packXBuffer(double *sendToLeftBuf, double *sendToRightBuf, double *stateVector, int nVars); + + /*! + For a particular state vector (cons, prims, aux) copies cells received from a neighbour process into the ghost + cell region at the left and right faces of a subdomain. + + @param[out] *sendToLeftBuf pointer to the buffer to pack with cells at the left face, + to be sent to the left neighbour process + @param[out] *sendToRightBuf pointer to the buffer to pack with cells at the right face, + to be sent to the right neighbour process + @param[in] *stateVector pointer to cons, prims or aux array + @param[in] nVars number of variables in the cons, prims or aux array + */ + void unpackXBuffer(double *recvFromLeftBuf, double *recfFromRightBuf, double *stateVector, int nVars); + + /*! + For a particular state vector (cons, prims, aux) copies cells along the front and back faces + of the physical (non-ghost) cells in a subdomain and packs them into buffers for MPI communication to + another process. + + @param[out] *sendToLeftBuf pointer to the buffer to pack with cells at the front face, + to be sent to the front neighbour process + @param[out] *sendToRightBuf pointer to the buffer to pack with cells at the back face, + to be sent to the back neighbour process + @param[in] *stateVector pointer to cons, prims or aux array + @param[in] nVars number of variables in the cons, prims or aux array + */ + void packYBuffer(double *sendToLeftBuf, double *sendToRightBuf, double *stateVector, int nVars); + + /*! + For a particular state vector (cons, prims, aux) copies cells received from a neighbour process into the ghost + cell region at the front and back faces of a subdomain. + + @param[out] *sendToLeftBuf pointer to the buffer to pack with cells at the front face, + to be sent to the front neighbour process + @param[out] *sendToRightBuf pointer to the buffer to pack with cells at the back face, + to be sent to the back neighbour process + @param[in] *stateVector pointer to cons, prims or aux array + @param[in] nVars number of variables in the cons, prims or aux array + */ + void unpackYBuffer(double *recvFromLeftBuf, double *recfFromRightBuf, double *stateVector, int nVars); + + /*! + For a particular state vector (cons, prims, aux) copies cells received from a neighbour process into the ghost + cell region at the bottom and top faces of a subdomain. + + @param[out] *sendToLeftBuf pointer to the buffer to pack with cells at the bottom face, + to be sent to the bottom neighbour process + @param[out] *sendToRightBuf pointer to the buffer to pack with cells at the top face, + to be sent to the top neighbour process + @param[in] *stateVector pointer to cons, prims or aux array + @param[in] nVars number of variables in the cons, prims or aux array + */ + void packZBuffer(double *sendToLeftBuf, double *sendToRightBuf, double *stateVector, int nVars); + +/*! + For a particular state vector (cons, prims, aux) copies cells received from a neighbour process into the ghost + cell region at the bottom and top faces of a subdomain. + + @param[out] *sendToLeftBuf pointer to the buffer to pack with cells at the bottom face, + to be sent to the bottom neighbour process + @param[out] *sendToRightBuf pointer to the buffer to pack with cells at the top face, + to be sent to the top neighbour process + @param[in] *stateVector pointer to cons, prims or aux array + @param[in] nVars number of variables in the cons, prims or aux array + */ + void unpackZBuffer(double *recvFromLeftBuf, double *recfFromRightBuf, double *stateVector, int nVars); + +}; + +//! Outflow boundary conditions for a data structure that has been distributed across ranks +/*! + Imposes flows that exit the domain freely at all boundaries, analogous to a + domain that extends to infinity in each direction. + All ghost cells are identical to their nearest physical cell.
+ For left-right reconstruction:
+ Before...
+ ______________________________
+ |0|1|2|3|4||5|6|..... |12||13||14|15|16|17|
+ |0|1|2|3|4||5|6|..... |12||13||14|15|16|17|
+
+
+ After....
+ ______________________________
+ |4|4|4|4||4|5|6|..... |12||13||13|13|13|13|
+ |4|4|4|4||4|5|6|..... |12||13||13|13|13|13|
+
+
+ ..and similar in other directions. +*/ +class ParallelOutflow : public ParallelBcs +{ + public: + //! Constructor + /*! + Calls constructor of base class to store the pointer to the Data class. + + @param[in] *data pointer to Data class + @sa ParallelBcs::ParallelBcs + */ + ParallelOutflow(Data * data, ParallelEnv *env) : ParallelBcs(data, env, xPeriodic=0, yPeriodic=0, zPeriodic=0) { } + + virtual ~ParallelOutflow() { } //!< Destructor + + //! Application function + /*! + Applies the Outflow boundary conditions to the ghost cells. + + @param[in, out] *cons pointer to the conservative (sized) vector + @param[in, out] *prims optional pointer to the primitive vector + @param[in, out] *aux optional pointer to the primitive vector + @sa Bcs::apply + */ + void apply(double * cons, double * prims = NULL, double * aux = NULL); + + /*! + Applies the Outflow boundary conditions to the ghost cells of subdomains that have an external face along + the x dimension. + + @param[in, out] *stateVector pointer to one of cons, prims, aux + @param[in] nVars number of variables in the cons, prims or aux array + */ + void setXBoundary(double *stateVector, int nVars); + + /*! + Applies the Outflow boundary conditions to the ghost cells of subdomains that have an external face along + the y dimension. + + @param[in, out] *stateVector pointer to one of cons, prims, aux + @param[in] nVars number of variables in the cons, prims or aux array + */ + void setYBoundary(double *stateVector, int nVars); + + /*! + Applies the Outflow boundary conditions to the ghost cells of subdomains that have an external face along + the z dimension. + + @param[in, out] *stateVector pointer to one of cons, prims, aux + @param[in] nVars number of variables in the cons, prims or aux array + */ + void setZBoundary(double *stateVector, int nVars); +}; + + +//! Periodic boundary conditions for a data structure that has been distributed across ranks +/*! + Flows that exit across one domain boundary re-enter at the opposing + end. I.e. the N ghost cells at one edge of the domain are set to the values + of the N physical cells before the ghost cells at the opposing edge. + + For left-right reconstruction:
+ (Note that the lower and upper halves of each row will lie on different ranks)
+ Before...
+ ____________________________
+ |0|1|2|3||4|5|6|..... |13||14|15|16|17|
+ |0|1|2|3||4|5|6|..... |13||14|15|16|17|
+
+ After....
+ ____________________________
+ |10|11|12|13||4|5|6|..... |13||4|5|6|7|
+ |10|11|12|13||4|5|6|..... |13||4|5|6|7|
+
+ ..and similar in other directions. + +*/ +class ParallelPeriodic : public ParallelBcs +{ + + public: + + //! Constructor + /*! + Calls constructor of base class to store the pointer to the Data class and ParallelEnv class. + + @param[in] *data pointer to Data class + @param[in] *env pointer to ParallelEnv class + @sa ParallelBcs::ParallelBcs + */ + ParallelPeriodic(Data * data, ParallelEnv * env) : ParallelBcs(data, env, xPeriodic=1, yPeriodic=1, zPeriodic=1) { } + + virtual ~ParallelPeriodic() { } //!< Destructor + + //! Application function + /*! + Applies the Periodic boundary conditions to the ghost cells. + + @param[in, out] *cons pointer to the conservative (sized) vector + @param[in, out] *prims optional pointer to the primitive vector + @param[in, out] *aux optional pointer to the primitive vector + @sa Bcs::apply + */ + void apply(double * cons, double * prims = NULL, double * aux = NULL); + +}; + +//! Flow boundary conditions +/*! + Boundary conditions used for the Kelvin Helmholtz instability. The + x-direction is periodic and y- and z-directions are outflow. +*/ + +class ParallelFlow : public ParallelBcs +{ + public: + //! Constructor + /*! + Calls constructor of base class to store the pointer to the Data class. + + @param[in] *data pointer to Data class + @sa ParallelBcs::ParallelBcs + */ + ParallelFlow(Data * data, ParallelEnv *env) : ParallelBcs(data, env, xPeriodic=1, yPeriodic=0, zPeriodic=0) { } + + virtual ~ParallelFlow() { } //!< Destructor + + //! Application function + /*! + Applies the Outflow boundary conditions to the ghost cells. + + @param[in, out] *cons pointer to the conservative (sized) vector + @param[in, out] *prims optional pointer to the primitive vector + @param[in, out] *aux optional pointer to the primitive vector + @sa Bcs::apply + */ + void apply(double * cons, double * prims = NULL, double * aux = NULL); + + /*! + Applies the Outflow boundary conditions to the ghost cells of subdomains that have an external face along + the y dimension. + + @param[in, out] *stateVector pointer to one of cons, prims, aux + @param[in] nVars number of variables in the cons, prims or aux array + */ + void setYBoundary(double *stateVector, int nVars); + + /*! + Applies the Outflow boundary conditions to the ghost cells of subdomains that have an external face along + the z dimension. + + @param[in, out] *stateVector pointer to one of cons, prims, aux + @param[in] nVars number of variables in the cons, prims or aux array + */ + void setZBoundary(double *stateVector, int nVars); +}; + + +#endif diff --git a/Project/GPU/Src/parallelBoundaryConds.cu b/Project/GPU/Src/parallelBoundaryConds.cu new file mode 100644 index 00000000..219d589b --- /dev/null +++ b/Project/GPU/Src/parallelBoundaryConds.cu @@ -0,0 +1,725 @@ +#include "parallelBoundaryConds.h" +#include "mpi.h" +#include "platformEnv.h" +#include + +// TODO -- Using three arrays here means we can keep the same (i,j,k) order for each neighbour direction. Decide if this is worth it. +#define ID_XBUFF(variable, gdx, jdx, kdx) ((variable)*(d->Ng)*(d->Ny)*(d->Nz) + (gdx)*(d->Ny)*(d->Nz) + (jdx)*(d->Nz) + (kdx)) +#define ID_YBUFF(variable, idx, gdx, kdx) ((variable)*(d->Nx)*(d->Ng)*(d->Nz) + (idx)*(d->Ng)*(d->Nz) + (gdx)*(d->Nz) + (kdx)) +#define ID_ZBUFF(variable, idx, jdx, gdx) ((variable)*(d->Nx)*(d->Ny)*(d->Ng) + (idx)*(d->Ny)*(d->Ng) + (jdx)*(d->Ng) + (gdx)) + +void ParallelBcs::swapGhostBuffers(double *sendToLeftBuf, double *sendToRightBuf, double *recvFromLeftBuf, + double *recvFromRightBuf, int leftNeighbour, int rightNeighbour, int numCellsSent){ + + // MPI message vars + int tag = 100; + MPI_Status status; + + // Send to left and receive from right neighbour process + MPI_Sendrecv(sendToLeftBuf, numCellsSent, MPI_DOUBLE, + leftNeighbour, tag, + recvFromRightBuf, numCellsSent, MPI_DOUBLE, + rightNeighbour, tag, + env->mpiCartesianComm, &status); + // Send to right and receive from left neighbour process + MPI_Sendrecv(sendToRightBuf, numCellsSent, MPI_DOUBLE, + rightNeighbour, tag, + recvFromLeftBuf, numCellsSent, MPI_DOUBLE, + leftNeighbour, tag, + env->mpiCartesianComm, &status); +} + +void ParallelBcs::packXBuffer(double *sendToLeftBuf, double *sendToRightBuf, double *stateVector, int nVars){ + Data * d(this->data); + for (int var(0); var < nVars; var++) { + for (int i(0); i < d->Ng; i++) { + for (int j(0); j < d->Ny; j++) { + for (int k(0); k < d->Nz; k++) { + // Prepare buffer to send left + sendToLeftBuf[ID_XBUFF(var, i, j, k)] = stateVector[ID(var, d->Ng + i, j, k)]; + // Prepare buffer to send right + sendToRightBuf[ID_XBUFF(var, i, j, k)] = stateVector[ID(var, d->Nx-(2*d->Ng) + i, j, k)]; + } + } + } + } +} + +void ParallelBcs::unpackXBuffer(double *recvFromLeftBuf, double *recvFromRightBuf, double *stateVector, int nVars){ + Data * d(this->data); + for (int var(0); var < nVars; var++) { + for (int i(0); i < d->Ng; i++) { + for (int j(0); j < d->Ny; j++) { + for (int k(0); k < d->Nz; k++) { + // Unpack buffer from right neighbour + stateVector[ID(var, d->Nx - d->Ng + i, j, k)] = recvFromRightBuf[ID_XBUFF(var, i, j, k)]; + // Unpack buffer from left neighbour + stateVector[ID(var, i, j, k)] = recvFromLeftBuf[ID_XBUFF(var, i, j, k)]; + } + } + } + } +} + +void ParallelBcs::packYBuffer(double *sendToLeftBuf, double *sendToRightBuf, double *stateVector, int nVars){ + Data * d(this->data); + for (int var(0); var < nVars; var++) { + for (int i(0); i < d->Nx; i++) { + for (int j(0); j < d->Ng; j++) { + for (int k(0); k < d->Nz; k++) { + // Prepare buffer to send left + sendToLeftBuf[ID_YBUFF(var, i, j, k)] = stateVector[ID(var, i, d->Ng + j, k)]; + // Prepare buffer to send right + sendToRightBuf[ID_YBUFF(var, i, j, k)] = stateVector[ID(var, i, d->Ny-(2*d->Ng) + j, k)]; + } + } + } + } +} + +void ParallelBcs::unpackYBuffer(double *recvFromLeftBuf, double *recvFromRightBuf, double *stateVector, int nVars){ + Data * d(this->data); + for (int var(0); var < nVars; var++) { + for (int i(0); i < d->Nx; i++) { + for (int j(0); j < d->Ng; j++) { + for (int k(0); k < d->Nz; k++) { + // Unpack buffer from right neighbour + stateVector[ID(var, i, d->Ny - d->Ng + j, k)] = recvFromRightBuf[ID_YBUFF(var, i, j, k)]; + // Unpack buffer from left neighbour + stateVector[ID(var, i, j, k)] = recvFromLeftBuf[ID_YBUFF(var, i, j, k)]; + } + } + } + } +} + +void ParallelBcs::packZBuffer(double *sendToLeftBuf, double *sendToRightBuf, double *stateVector, int nVars){ + Data * d(this->data); + for (int var(0); var < nVars; var++) { + for (int i(0); i < d->Nx; i++) { + for (int j(0); j < d->Ny; j++) { + for (int k(0); k < d->Ng; k++) { + // Prepare buffer to send left + sendToLeftBuf[ID_ZBUFF(var, i, j, k)] = stateVector[ID(var, i, j, d->Ng + k)]; + // Prepare buffer to send right + sendToRightBuf[ID_ZBUFF(var, i, j, k)] = stateVector[ID(var, i, j, d->Nz-(2*d->Ng) + k)]; + } + } + } + } +} + +void ParallelBcs::unpackZBuffer(double *recvFromLeftBuf, double *recvFromRightBuf, double *stateVector, int nVars){ + Data * d(this->data); + for (int var(0); var < nVars; var++) { + for (int i(0); i < d->Nx; i++) { + for (int j(0); j < d->Ng; j++) { + for (int k(0); k < d->Nz; k++) { + // Unpack buffer from right neighbour + stateVector[ID(var, i, j, d->Nz - d->Ng + k)] = recvFromRightBuf[ID_ZBUFF(var, i, j, k)]; + // Unpack buffer from left neighbour + stateVector[ID(var, i, j, k)] = recvFromLeftBuf[ID_ZBUFF(var, i, j, k)]; + } + } + } + } +} + +void ParallelPeriodic::apply(double * cons, double * prims, double * aux) +{ + // Syntax + Data * d(this->data); + + // Allocate one ghost region buffer array the size of the largest ghost region + int maxSendBufSize = std::max(std::max(d->Ncons, d->Nprims), d->Naux) * d->Ng; + if (d->Ny > 1) { + maxSendBufSize *= std::max(d->Nx, d->Ny); + } + if (d->Nz > 1) { + maxSendBufSize *= std::max(std::min(d->Nx, d->Ny), (d->Nz)); + } + + // TODO -- Could do left and right halo exchange separately and allocate half as many buffers but this would + // add twice as many loops + + // Allocate temporary buffers for ghost region exchange + // TODO -- should allocate this once at beginning of run + double *sendToLeftBuf = (double *) malloc(maxSendBufSize*sizeof(double)); + double *sendToRightBuf = (double *) malloc(maxSendBufSize*sizeof(double)); + double *recvFromRightBuf = (double *) malloc(maxSendBufSize*sizeof(double)); + double *recvFromLeftBuf = (double *) malloc(maxSendBufSize*sizeof(double)); + + int numCellsSent; + + // x dimension + + // Cons + numCellsSent = d->Ncons * d->Ng * d->Ny * d->Nz; + packXBuffer(sendToLeftBuf, sendToRightBuf, cons, d->Ncons); + + swapGhostBuffers(sendToLeftBuf, sendToRightBuf, recvFromLeftBuf, recvFromRightBuf, env->leftXNeighbourRank, + env->rightXNeighbourRank, numCellsSent); + + unpackXBuffer(recvFromLeftBuf, recvFromRightBuf, cons, d->Ncons); + + // Prims + if (prims) { + numCellsSent = d->Nprims * d->Ng * d->Ny * d->Nz; + packXBuffer(sendToLeftBuf, sendToRightBuf, prims, d->Nprims); + + swapGhostBuffers(sendToLeftBuf, sendToRightBuf, recvFromLeftBuf, recvFromRightBuf, env->leftXNeighbourRank, + env->rightXNeighbourRank, numCellsSent); + + unpackXBuffer(recvFromLeftBuf, recvFromRightBuf, prims, d->Nprims); + } + + // Aux + if (aux) { + numCellsSent = d->Naux * d->Ng * d->Ny * d->Nz; + packXBuffer(sendToLeftBuf, sendToRightBuf, aux, d->Naux); + + swapGhostBuffers(sendToLeftBuf, sendToRightBuf, recvFromLeftBuf, recvFromRightBuf, env->leftXNeighbourRank, + env->rightXNeighbourRank, numCellsSent); + + unpackXBuffer(recvFromLeftBuf, recvFromRightBuf, aux, d->Naux); + } + + if (d->Ny > 1) { + // y dimension + + // Cons + numCellsSent = d->Ncons * d->Nx * d->Ng * d->Nz; + packYBuffer(sendToLeftBuf, sendToRightBuf, cons, d->Ncons); + + swapGhostBuffers(sendToLeftBuf, sendToRightBuf, recvFromLeftBuf, recvFromRightBuf, env->leftYNeighbourRank, + env->rightYNeighbourRank, numCellsSent); + + unpackYBuffer(recvFromLeftBuf, recvFromRightBuf, cons, d->Ncons); + + // Prims + if (prims) { + numCellsSent = d->Nprims * d->Nx * d->Ng * d->Nz; + packYBuffer(sendToLeftBuf, sendToRightBuf, prims, d->Nprims); + + swapGhostBuffers(sendToLeftBuf, sendToRightBuf, recvFromLeftBuf, recvFromRightBuf, env->leftYNeighbourRank, + env->rightYNeighbourRank, numCellsSent); + + unpackYBuffer(recvFromLeftBuf, recvFromRightBuf, prims, d->Nprims); + } + + // Aux + if (aux) { + numCellsSent = d->Naux * d->Nx * d->Ng * d->Nz; + packYBuffer(sendToLeftBuf, sendToRightBuf, aux, d->Naux); + + swapGhostBuffers(sendToLeftBuf, sendToRightBuf, recvFromLeftBuf, recvFromRightBuf, env->leftYNeighbourRank, + env->rightYNeighbourRank, numCellsSent); + + unpackYBuffer(recvFromLeftBuf, recvFromRightBuf, aux, d->Naux); + } + } + + + if (d->Nz > 1) { + // y dimension + + // Cons + numCellsSent = d->Ncons * d->Nx * d->Ny * d->Ng; + packZBuffer(sendToLeftBuf, sendToRightBuf, cons, d->Ncons); + + swapGhostBuffers(sendToLeftBuf, sendToRightBuf, recvFromLeftBuf, recvFromRightBuf, env->leftZNeighbourRank, + env->rightZNeighbourRank, numCellsSent); + + unpackZBuffer(recvFromLeftBuf, recvFromRightBuf, cons, d->Ncons); + + // Prims + if (prims) { + numCellsSent = d->Nprims * d->Nx * d->Ny * d->Ng; + packZBuffer(sendToLeftBuf, sendToRightBuf, prims, d->Nprims); + + swapGhostBuffers(sendToLeftBuf, sendToRightBuf, recvFromLeftBuf, recvFromRightBuf, env->leftZNeighbourRank, + env->rightZNeighbourRank, numCellsSent); + + unpackZBuffer(recvFromLeftBuf, recvFromRightBuf, prims, d->Nprims); + } + + // Aux + if (aux) { + numCellsSent = d->Naux * d->Nx * d->Ny * d->Ng; + packZBuffer(sendToLeftBuf, sendToRightBuf, aux, d->Naux); + + swapGhostBuffers(sendToLeftBuf, sendToRightBuf, recvFromLeftBuf, recvFromRightBuf, env->leftZNeighbourRank, + env->rightZNeighbourRank, numCellsSent); + + unpackZBuffer(recvFromLeftBuf, recvFromRightBuf, aux, d->Naux); + } + } + + // Todo -- allocate and free in constructor/destructor + free(sendToLeftBuf); + free(sendToRightBuf); + free(recvFromRightBuf); + free(recvFromLeftBuf); + +} + +void ParallelOutflow::setXBoundary(double *stateVector, int nVars){ + // Syntax + Data * d(this->data); + + // Left boundary + // TODO -- could technically only check this once per [cons, aux, prims] but time to check should be negligible + if (env->isNeighbourExternal(0, 0)){ + for (int var(0); var < nVars; var++) { + for (int i(0); i < d->Ng; i++) { + for (int j(0); j < d->Ny; j++) { + for (int k(0); k < d->Nz; k++) { + stateVector[ID(var, i, j, k)] = stateVector[ID(var, d->Ng, j, k)]; + } + } + } + } + } + + // Right boundary + if (env->isNeighbourExternal(0, 1)){ + for (int var(0); var < nVars; var++) { + for (int i(0); i < d->Ng; i++) { + for (int j(0); j < d->Ny; j++) { + for (int k(0); k < d->Nz; k++) { + stateVector[ID(var, d->Nx - d->Ng + i, j, k)] = stateVector[ID(var, d->Nx - d->Ng - 1, j, k)]; + } + } + } + } + } +} + + +void ParallelOutflow::setYBoundary(double *stateVector, int nVars){ + // Syntax + Data * d(this->data); + + // Front boundary + if (env->isNeighbourExternal(1, 0)){ + for (int var(0); var < nVars; var++) { + for (int i(0); i < d->Nx; i++) { + for (int j(0); j < d->Ng; j++) { + for (int k(0); k < d->Nz; k++) { + // Front + stateVector[ID(var, i, j, k)] = stateVector[ID(var, i, d->Ng, k)]; + } + } + } + } + } + + // Back boundary + if (env->isNeighbourExternal(1, 1)){ + for (int var(0); var < nVars; var++) { + for (int i(0); i < d->Nx; i++) { + for (int j(0); j < d->Ng; j++) { + for (int k(0); k < d->Nz; k++) { + // Back + stateVector[ID(var, i, d->Ny - d->Ng + j, k)] = stateVector[ID(var, i, d->Ny - d->Ng - 1, k)]; + } + } + } + } + } +} + +void ParallelOutflow::setZBoundary(double *stateVector, int nVars){ + // Syntax + Data * d(this->data); + + // Bottom boundary + if (env->isNeighbourExternal(2, 0)){ + for (int var(0); var < nVars; var++) { + for (int i(0); i < d->Nx; i++) { + for (int j(0); j < d->Ny; j++) { + for (int k(0); k < d->Ng; k++) { + // Bottom + stateVector[ID(var, i, j, k)] = stateVector[ID(var, i, j, d->Ng)]; + } + } + } + } + } + + // Top boundary + if (env->isNeighbourExternal(2, 1)){ + for (int var(0); var < nVars; var++) { + for (int i(0); i < d->Nx; i++) { + for (int j(0); j < d->Ny; j++) { + for (int k(0); k < d->Ng; k++) { + // Top + stateVector[ID(var, i, j, d->Nz - d->Ng + k)] = stateVector[ID(var, i, j, d->Nz - d->Ng - 1)]; + } + } + } + } + } +} + + + +void ParallelOutflow::apply(double * cons, double * prims, double * aux) +{ + // Syntax + Data * d(this->data); + + // Allocate one ghost region buffer array the size of the largest ghost region + int maxSendBufSize = std::max(std::max(d->Ncons, d->Nprims), d->Naux) * d->Ng; + if (d->Ny > 1) { + maxSendBufSize *= std::max(d->Nx, d->Ny); + } + if (d->Nz > 1) { + maxSendBufSize *= std::max(std::min(d->Nx, d->Ny), (d->Nz)); + } + + // TODO -- Could do left and right halo exchange separately and allocate half as many buffers but this would + // add twice as many loops + + // Allocate temporary buffers for ghost region exchange + // TODO -- should allocate this once at beginning of run + double *sendToLeftBuf = (double *) malloc(maxSendBufSize*sizeof(double)); + double *sendToRightBuf = (double *) malloc(maxSendBufSize*sizeof(double)); + double *recvFromRightBuf = (double *) malloc(maxSendBufSize*sizeof(double)); + double *recvFromLeftBuf = (double *) malloc(maxSendBufSize*sizeof(double)); + + int numCellsSent; + + // x dimension + + // Cons + numCellsSent = d->Ncons * d->Ng * d->Ny * d->Nz; + packXBuffer(sendToLeftBuf, sendToRightBuf, cons, d->Ncons); + + swapGhostBuffers(sendToLeftBuf, sendToRightBuf, recvFromLeftBuf, recvFromRightBuf, env->leftXNeighbourRank, + env->rightXNeighbourRank, numCellsSent); + + unpackXBuffer(recvFromLeftBuf, recvFromRightBuf, cons, d->Ncons); + setXBoundary(cons, d->Ncons); + + // Prims + if (prims) { + numCellsSent = d->Nprims * d->Ng * d->Ny * d->Nz; + packXBuffer(sendToLeftBuf, sendToRightBuf, prims, d->Nprims); + + swapGhostBuffers(sendToLeftBuf, sendToRightBuf, recvFromLeftBuf, recvFromRightBuf, env->leftXNeighbourRank, + env->rightXNeighbourRank, numCellsSent); + + unpackXBuffer(recvFromLeftBuf, recvFromRightBuf, prims, d->Nprims); + setXBoundary(prims, d->Nprims); + } + + // Aux + if (aux) { + numCellsSent = d->Naux * d->Ng * d->Ny * d->Nz; + packXBuffer(sendToLeftBuf, sendToRightBuf, aux, d->Naux); + + swapGhostBuffers(sendToLeftBuf, sendToRightBuf, recvFromLeftBuf, recvFromRightBuf, env->leftXNeighbourRank, + env->rightXNeighbourRank, numCellsSent); + + unpackXBuffer(recvFromLeftBuf, recvFromRightBuf, aux, d->Naux); + setXBoundary(aux, d->Naux); + } + + if (d->Ny > 1) { + // y dimension + + // Cons + numCellsSent = d->Ncons * d->Nx * d->Ng * d->Nz; + packYBuffer(sendToLeftBuf, sendToRightBuf, cons, d->Ncons); + + swapGhostBuffers(sendToLeftBuf, sendToRightBuf, recvFromLeftBuf, recvFromRightBuf, env->leftYNeighbourRank, + env->rightYNeighbourRank, numCellsSent); + + unpackYBuffer(recvFromLeftBuf, recvFromRightBuf, cons, d->Ncons); + setYBoundary(cons, d->Ncons); + + // Prims + if (prims) { + numCellsSent = d->Nprims * d->Nx * d->Ng * d->Nz; + packYBuffer(sendToLeftBuf, sendToRightBuf, prims, d->Nprims); + + swapGhostBuffers(sendToLeftBuf, sendToRightBuf, recvFromLeftBuf, recvFromRightBuf, env->leftYNeighbourRank, + env->rightYNeighbourRank, numCellsSent); + + unpackYBuffer(recvFromLeftBuf, recvFromRightBuf, prims, d->Nprims); + setYBoundary(prims, d->Nprims); + } + + // Aux + if (aux) { + numCellsSent = d->Naux * d->Nx * d->Ng * d->Nz; + packYBuffer(sendToLeftBuf, sendToRightBuf, aux, d->Naux); + + swapGhostBuffers(sendToLeftBuf, sendToRightBuf, recvFromLeftBuf, recvFromRightBuf, env->leftYNeighbourRank, + env->rightYNeighbourRank, numCellsSent); + + unpackYBuffer(recvFromLeftBuf, recvFromRightBuf, aux, d->Naux); + setYBoundary(aux, d->Naux); + } + } + + + if (d->Nz > 1) { + // y dimension + + // Cons + numCellsSent = d->Ncons * d->Nx * d->Ny * d->Ng; + packZBuffer(sendToLeftBuf, sendToRightBuf, cons, d->Ncons); + + swapGhostBuffers(sendToLeftBuf, sendToRightBuf, recvFromLeftBuf, recvFromRightBuf, env->leftZNeighbourRank, + env->rightZNeighbourRank, numCellsSent); + + unpackZBuffer(recvFromLeftBuf, recvFromRightBuf, cons, d->Ncons); + setZBoundary(cons, d->Ncons); + + // Prims + if (prims) { + numCellsSent = d->Nprims * d->Nx * d->Ny * d->Ng; + packZBuffer(sendToLeftBuf, sendToRightBuf, prims, d->Nprims); + + swapGhostBuffers(sendToLeftBuf, sendToRightBuf, recvFromLeftBuf, recvFromRightBuf, env->leftZNeighbourRank, + env->rightZNeighbourRank, numCellsSent); + + unpackZBuffer(recvFromLeftBuf, recvFromRightBuf, prims, d->Nprims); + setZBoundary(prims, d->Nprims); + } + + // Aux + if (aux) { + numCellsSent = d->Naux * d->Nx * d->Ny * d->Ng; + packZBuffer(sendToLeftBuf, sendToRightBuf, aux, d->Naux); + + swapGhostBuffers(sendToLeftBuf, sendToRightBuf, recvFromLeftBuf, recvFromRightBuf, env->leftZNeighbourRank, + env->rightZNeighbourRank, numCellsSent); + + unpackZBuffer(recvFromLeftBuf, recvFromRightBuf, aux, d->Naux); + setZBoundary(aux, d->Naux); + } + } + + free(sendToLeftBuf); + free(sendToRightBuf); + free(recvFromRightBuf); + free(recvFromLeftBuf); + +} + + +// TODO -- these are shared by ParallelOutflow, so could be added to the ParallelBcs base class. +void ParallelFlow::setYBoundary(double *stateVector, int nVars){ + // Syntax + Data * d(this->data); + + // Front boundary + if (env->isNeighbourExternal(1, 0)){ + for (int var(0); var < nVars; var++) { + for (int i(0); i < d->Nx; i++) { + for (int j(0); j < d->Ng; j++) { + for (int k(0); k < d->Nz; k++) { + // Front + stateVector[ID(var, i, j, k)] = stateVector[ID(var, i, d->Ng, k)]; + } + } + } + } + } + + // Back boundary + if (env->isNeighbourExternal(1, 1)){ + for (int var(0); var < nVars; var++) { + for (int i(0); i < d->Nx; i++) { + for (int j(0); j < d->Ng; j++) { + for (int k(0); k < d->Nz; k++) { + // Back + stateVector[ID(var, i, d->Ny - d->Ng + j, k)] = stateVector[ID(var, i, d->Ny - d->Ng - 1, k)]; + } + } + } + } + } +} + +void ParallelFlow::setZBoundary(double *stateVector, int nVars){ + // Syntax + Data * d(this->data); + + // Bottom boundary + if (env->isNeighbourExternal(2, 0)){ + for (int var(0); var < nVars; var++) { + for (int i(0); i < d->Nx; i++) { + for (int j(0); j < d->Ny; j++) { + for (int k(0); k < d->Ng; k++) { + // Bottom + stateVector[ID(var, i, j, k)] = stateVector[ID(var, i, j, d->Ng)]; + } + } + } + } + } + + // Top boundary + if (env->isNeighbourExternal(2, 1)){ + for (int var(0); var < nVars; var++) { + for (int i(0); i < d->Nx; i++) { + for (int j(0); j < d->Ny; j++) { + for (int k(0); k < d->Ng; k++) { + // Top + stateVector[ID(var, i, j, d->Nz - d->Ng + k)] = stateVector[ID(var, i, j, d->Nz - d->Ng - 1)]; + } + } + } + } + } +} + + + +void ParallelFlow::apply(double * cons, double * prims, double * aux) +{ + // Syntax + Data * d(this->data); + + // Allocate one ghost region buffer array the size of the largest ghost region + int maxSendBufSize = std::max(std::max(d->Ncons, d->Nprims), d->Naux) * d->Ng; + if (d->Ny > 1) { + maxSendBufSize *= std::max(d->Nx, d->Ny); + } + if (d->Nz > 1) { + maxSendBufSize *= std::max(std::min(d->Nx, d->Ny), (d->Nz)); + } + + // TODO -- Could do left and right halo exchange separately and allocate half as many buffers but this would + // add twice as many loops + + // Allocate temporary buffers for ghost region exchange + // TODO -- should allocate this once at beginning of run + double *sendToLeftBuf = (double *) malloc(maxSendBufSize*sizeof(double)); + double *sendToRightBuf = (double *) malloc(maxSendBufSize*sizeof(double)); + double *recvFromRightBuf = (double *) malloc(maxSendBufSize*sizeof(double)); + double *recvFromLeftBuf = (double *) malloc(maxSendBufSize*sizeof(double)); + + int numCellsSent; + + // x dimension + + // Cons + numCellsSent = d->Ncons * d->Ng * d->Ny * d->Nz; + packXBuffer(sendToLeftBuf, sendToRightBuf, cons, d->Ncons); + + swapGhostBuffers(sendToLeftBuf, sendToRightBuf, recvFromLeftBuf, recvFromRightBuf, env->leftXNeighbourRank, + env->rightXNeighbourRank, numCellsSent); + + unpackXBuffer(recvFromLeftBuf, recvFromRightBuf, cons, d->Ncons); + + // Prims + if (prims) { + numCellsSent = d->Nprims * d->Ng * d->Ny * d->Nz; + packXBuffer(sendToLeftBuf, sendToRightBuf, prims, d->Nprims); + + swapGhostBuffers(sendToLeftBuf, sendToRightBuf, recvFromLeftBuf, recvFromRightBuf, env->leftXNeighbourRank, + env->rightXNeighbourRank, numCellsSent); + + unpackXBuffer(recvFromLeftBuf, recvFromRightBuf, prims, d->Nprims); + } + + // Aux + if (aux) { + numCellsSent = d->Naux * d->Ng * d->Ny * d->Nz; + packXBuffer(sendToLeftBuf, sendToRightBuf, aux, d->Naux); + + swapGhostBuffers(sendToLeftBuf, sendToRightBuf, recvFromLeftBuf, recvFromRightBuf, env->leftXNeighbourRank, + env->rightXNeighbourRank, numCellsSent); + + unpackXBuffer(recvFromLeftBuf, recvFromRightBuf, aux, d->Naux); + } + + if (d->Ny > 1) { + // y dimension + + // Cons + numCellsSent = d->Ncons * d->Nx * d->Ng * d->Nz; + packYBuffer(sendToLeftBuf, sendToRightBuf, cons, d->Ncons); + + swapGhostBuffers(sendToLeftBuf, sendToRightBuf, recvFromLeftBuf, recvFromRightBuf, env->leftYNeighbourRank, + env->rightYNeighbourRank, numCellsSent); + + unpackYBuffer(recvFromLeftBuf, recvFromRightBuf, cons, d->Ncons); + setYBoundary(cons, d->Ncons); + + // Prims + if (prims) { + numCellsSent = d->Nprims * d->Nx * d->Ng * d->Nz; + packYBuffer(sendToLeftBuf, sendToRightBuf, prims, d->Nprims); + + swapGhostBuffers(sendToLeftBuf, sendToRightBuf, recvFromLeftBuf, recvFromRightBuf, env->leftYNeighbourRank, + env->rightYNeighbourRank, numCellsSent); + + unpackYBuffer(recvFromLeftBuf, recvFromRightBuf, prims, d->Nprims); + setYBoundary(prims, d->Nprims); + } + + // Aux + if (aux) { + numCellsSent = d->Naux * d->Nx * d->Ng * d->Nz; + packYBuffer(sendToLeftBuf, sendToRightBuf, aux, d->Naux); + + swapGhostBuffers(sendToLeftBuf, sendToRightBuf, recvFromLeftBuf, recvFromRightBuf, env->leftYNeighbourRank, + env->rightYNeighbourRank, numCellsSent); + + unpackYBuffer(recvFromLeftBuf, recvFromRightBuf, aux, d->Naux); + setYBoundary(aux, d->Naux); + } + } + + + if (d->Nz > 1) { + // y dimension + + // Cons + numCellsSent = d->Ncons * d->Nx * d->Ny * d->Ng; + packZBuffer(sendToLeftBuf, sendToRightBuf, cons, d->Ncons); + + swapGhostBuffers(sendToLeftBuf, sendToRightBuf, recvFromLeftBuf, recvFromRightBuf, env->leftZNeighbourRank, + env->rightZNeighbourRank, numCellsSent); + + unpackZBuffer(recvFromLeftBuf, recvFromRightBuf, cons, d->Ncons); + setZBoundary(cons, d->Ncons); + + // Prims + if (prims) { + numCellsSent = d->Nprims * d->Nx * d->Ny * d->Ng; + packZBuffer(sendToLeftBuf, sendToRightBuf, prims, d->Nprims); + + swapGhostBuffers(sendToLeftBuf, sendToRightBuf, recvFromLeftBuf, recvFromRightBuf, env->leftZNeighbourRank, + env->rightZNeighbourRank, numCellsSent); + + unpackZBuffer(recvFromLeftBuf, recvFromRightBuf, prims, d->Nprims); + setZBoundary(prims, d->Nprims); + } + + // Aux + if (aux) { + numCellsSent = d->Naux * d->Nx * d->Ny * d->Ng; + packZBuffer(sendToLeftBuf, sendToRightBuf, aux, d->Naux); + + swapGhostBuffers(sendToLeftBuf, sendToRightBuf, recvFromLeftBuf, recvFromRightBuf, env->leftZNeighbourRank, + env->rightZNeighbourRank, numCellsSent); + + unpackZBuffer(recvFromLeftBuf, recvFromRightBuf, aux, d->Naux); + setZBoundary(aux, d->Naux); + } + } + + free(sendToLeftBuf); + free(sendToRightBuf); + free(recvFromRightBuf); + free(recvFromLeftBuf); + +} + From de65862a9e8ed8a88100fbae47c57785053096f5 Mon Sep 17 00:00:00 2001 From: "ania.brown" Date: Tue, 1 Sep 2020 13:05:57 +0100 Subject: [PATCH 16/56] attempt at building with mpicc, not yet working --- Project/GPU/Makefile | 36 ++++++++++++++----- Project/GPU/Src/main.cu | 13 +++---- ...ndaryConds.cu => parallelBoundaryConds.cc} | 0 .../Src/{parallelEnv.cu => parallelEnv.cc} | 0 ...arallelSaveData.cu => parallelSaveData.cc} | 0 5 files changed, 35 insertions(+), 14 deletions(-) rename Project/GPU/Src/{parallelBoundaryConds.cu => parallelBoundaryConds.cc} (100%) rename Project/GPU/Src/{parallelEnv.cu => parallelEnv.cc} (100%) rename Project/GPU/Src/{parallelSaveData.cu => parallelSaveData.cc} (100%) diff --git a/Project/GPU/Makefile b/Project/GPU/Makefile index abde3e2c..d714390b 100644 --- a/Project/GPU/Makefile +++ b/Project/GPU/Makefile @@ -4,10 +4,10 @@ # Compiler # We will compile most object files with (NVCC), other than those object files that use MPI. If using MPI, set CC_CPU=mpicc. In that case, the object files which use MPI will be compiled with mpicc. For linking, CC_CPU will be used (this should be set to mpicc if using MPI) -CC_CPU = g++ +CC_CPU = mpic++ CC_GPU = nvcc -USE_MPI=0 +USE_MPI=1 # Module directory MODULE_DIR = ./Src @@ -25,7 +25,7 @@ RTFIND_INC_DIR = ./CminpackLibrary/Include RTFIND_SRC_DIR = ./CminpackLibrary/Src # C++ compiler flags -CXXFLAGS = -fopenmp -Wall +CXXFLAGS = -fopenmp -Wall -std=c++11 -O3 # NVIDIA compiler flags NVFLAGS = -std=c++11 -Wno-deprecated-gpu-targets -rdc=true -fmad=false -O3 -arch=sm_61 -Xcompiler -fopenmp -Xcompiler -Wall @@ -61,16 +61,18 @@ SERIAL_SRCS = serialSaveData.cu \ # Headers HDRS = ${SRCS:.cu=.h} cudaErrorCheck.h - ifeq ($(USE_MPI), 1) - SRCS += ${PARALLEL_SRCS} + ENV_SRCS = ${PARALLEL_SRCS} else - SRCS += ${SERIAL_SRCS} + ENV_SRCS = ${SERIAL_SRCS} endif # Objects OBJS = ${SRCS:.cu=.o} +# Serial or Parallel CPU files. These cannot contain device code +ENV_OBJS = ${ENV_SRCS:.cu=.o} + # Rootfinder objects RTFIND_OBJS = $(RTFIND_SRC_DIR)/dogleg.o \ $(RTFIND_SRC_DIR)/dpmpar.o \ @@ -106,7 +108,7 @@ run : $(RTFIND) $(EXEC) @./$(EXEC) clean : - rm -f $(EXEC) $(OBJS) *.gch + rm -f $(EXEC) $(OBJS) $(ENV_OBJS) *.gch ################# @@ -170,18 +172,36 @@ boundaryConds.o : $(MODULE_DIR)/boundaryConds.cu $(INC_DIR)/boundaryConds.h serialSaveData.o : $(MODULE_DIR)/serialSaveData.cu $(INC_DIR)/serialSaveData.h $(CC_GPU) $< -c $(NVFLAGS) -I$(INC_DIR) + serialEnv.o : $(MODULE_DIR)/serialEnv.cu $(INC_DIR)/serialEnv.h $(CC_GPU) $< -c $(NVFLAGS) -I$(INC_DIR) fluxVectorSplitting.o : $(MODULE_DIR)/fluxVectorSplitting.cu $(INC_DIR)/fluxVectorSplitting.h $(INC_DIR)/weno.h $(CC_GPU) $< -c $(NVFLAGS) -I$(INC_DIR) -Xcompiler -fopenmp +#parallel sources -- these need to be compiled with the MPI library linked, which can be accomplished by compiling with mpic++ + +parallelSaveData.o : $(MODULE_DIR)/parallelSaveData.cc $(INC_DIR)/parallelSaveData.h + $(CC_CPU) $< $(CXXFLAGS) -I$(INC_DIR) -lcudart -lcudadevrt -L/local/software/cuda/8.0/lib64 + +parallelBoundaryConds.o : $(MODULE_DIR)/parallelBoundaryConds.cc $(INC_DIR)/parallelBoundaryConds.h + $(CC_CPU) $< $(CXXFLAGS) -I$(INC_DIR) -lcudart -lcudadevrt -L/local/software/cuda/8.0/lib64 + +parallelEnv.o : $(MODULE_DIR)/parallelEnv.cc $(INC_DIR)/parallelEnv.h + $(CC_CPU) $< $(CXXFLAGS) -I$(INC_DIR) -lcudart -lcudadevrt -L/local/software/cuda/8.0/lib64 + +#end parallel sources + + +cpu_link.o : $(ENV_OBJS) + $(CC_CPU) $^ -o $@ $(CXXFLAGS) -lcudart -lcudadevrt -L/local/software/cuda/8.0/lib64 + # link device code using CC_GPU (g++/mpi++ can't do this) gpu_link.o : $(RTFIND_OBJS) $(OBJS) $(CC_GPU) $^ -dlink -o $@ $(NVFLAGS) -lcudadevrt # Executable -main : gpu_link.o $(RTFIND_OBJS) $(OBJS) +main : gpu_link.o $(RTFIND_OBJS) $(OBJS) cpu_link.o $(CC_CPU) $^ -o $@ $(CXXFLAGS) -lcudart -lcudadevrt -L/local/software/cuda/8.0/lib64 buildRootfinder: diff --git a/Project/GPU/Src/main.cu b/Project/GPU/Src/main.cu index 4be013f4..1898ecf4 100644 --- a/Project/GPU/Src/main.cu +++ b/Project/GPU/Src/main.cu @@ -5,9 +5,10 @@ #include "srmhd.h" #include "srrmhd.h" #include "boundaryConds.h" +#include "parallelBoundaryConds.h" #include "rkSplit.h" #include "SSP2.h" -#include "serialSaveData.h" +#include "parallelSaveData.h" #include "fluxVectorSplitting.h" #include "serialEnv.h" @@ -42,7 +43,7 @@ int main(int argc, char *argv[]) { double sigma(0); bool output(true); int safety(180); - int nxRanks(4); + int nxRanks(1); int nyRanks(1); int nzRanks(1); @@ -54,7 +55,7 @@ int main(int argc, char *argv[]) { } } - SerialEnv env(&argc, &argv, nxRanks, nyRanks, nzRanks); + ParallelEnv env(&argc, &argv, nxRanks, nyRanks, nzRanks); Data data(nx, ny, nz, xmin, xmax, ymin, ymax, zmin, zmax, endTime, &env, cfl, Ng, gamma, sigma); @@ -64,15 +65,15 @@ int main(int argc, char *argv[]) { FVS fluxMethod(&data, &model); + ParallelFlow bcs(&data, &env); + Simulation sim(&data, &env); KHInstabilitySingleFluid init(&data, 1); - Flow bcs(&data); - SSP2 timeInt(&data, &model, &bcs, &fluxMethod); - SerialSaveData save(&data, &env); + ParallelSaveData save(&data, &env); // Now objects have been created, set up the simulation sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save); diff --git a/Project/GPU/Src/parallelBoundaryConds.cu b/Project/GPU/Src/parallelBoundaryConds.cc similarity index 100% rename from Project/GPU/Src/parallelBoundaryConds.cu rename to Project/GPU/Src/parallelBoundaryConds.cc diff --git a/Project/GPU/Src/parallelEnv.cu b/Project/GPU/Src/parallelEnv.cc similarity index 100% rename from Project/GPU/Src/parallelEnv.cu rename to Project/GPU/Src/parallelEnv.cc diff --git a/Project/GPU/Src/parallelSaveData.cu b/Project/GPU/Src/parallelSaveData.cc similarity index 100% rename from Project/GPU/Src/parallelSaveData.cu rename to Project/GPU/Src/parallelSaveData.cc From a62c51ea73a6d3efd0ddf9233a18ffaec483f76f Mon Sep 17 00:00:00 2001 From: "ania.brown" Date: Tue, 1 Sep 2020 13:29:02 +0100 Subject: [PATCH 17/56] changed build strategy -- build everything with nvcc and link MPI libraries. This is simpler than doing some compilation with mpicc and some with nvcc but does require the user knowing the location of their MPI build. --- Project/GPU/Makefile | 29 ++++++++----------- ...ndaryConds.cc => parallelBoundaryConds.cu} | 1 + .../Src/{parallelEnv.cc => parallelEnv.cu} | 0 ...arallelSaveData.cc => parallelSaveData.cu} | 0 4 files changed, 13 insertions(+), 17 deletions(-) rename Project/GPU/Src/{parallelBoundaryConds.cc => parallelBoundaryConds.cu} (99%) rename Project/GPU/Src/{parallelEnv.cc => parallelEnv.cu} (100%) rename Project/GPU/Src/{parallelSaveData.cc => parallelSaveData.cu} (100%) diff --git a/Project/GPU/Makefile b/Project/GPU/Makefile index d714390b..6cfdfbbb 100644 --- a/Project/GPU/Makefile +++ b/Project/GPU/Makefile @@ -3,12 +3,12 @@ # Compiler -# We will compile most object files with (NVCC), other than those object files that use MPI. If using MPI, set CC_CPU=mpicc. In that case, the object files which use MPI will be compiled with mpicc. For linking, CC_CPU will be used (this should be set to mpicc if using MPI) -CC_CPU = mpic++ CC_GPU = nvcc USE_MPI=1 +MPI_LIBRARY = /local/software/openmpi/3.0.0/gcc-cuda8.0 + # Module directory MODULE_DIR = ./Src @@ -24,6 +24,8 @@ RTFIND_INC_DIR = ./CminpackLibrary/Include # Cminpack source directory RTFIND_SRC_DIR = ./CminpackLibrary/Src +MPI_FLAGS = -lmpi -L${MPI_LIBRARY_PATH}/lib -I${MPI_LIBRARY_PATH}/include + # C++ compiler flags CXXFLAGS = -fopenmp -Wall -std=c++11 -O3 @@ -181,28 +183,21 @@ fluxVectorSplitting.o : $(MODULE_DIR)/fluxVectorSplitting.cu $(INC_DIR)/fluxVect #parallel sources -- these need to be compiled with the MPI library linked, which can be accomplished by compiling with mpic++ -parallelSaveData.o : $(MODULE_DIR)/parallelSaveData.cc $(INC_DIR)/parallelSaveData.h - $(CC_CPU) $< $(CXXFLAGS) -I$(INC_DIR) -lcudart -lcudadevrt -L/local/software/cuda/8.0/lib64 +parallelSaveData.o : $(MODULE_DIR)/parallelSaveData.cu $(INC_DIR)/parallelSaveData.h + $(CC_GPU) $< -c $(NVFLAGS) -I$(INC_DIR) $(MPI_FLAGS) -parallelBoundaryConds.o : $(MODULE_DIR)/parallelBoundaryConds.cc $(INC_DIR)/parallelBoundaryConds.h - $(CC_CPU) $< $(CXXFLAGS) -I$(INC_DIR) -lcudart -lcudadevrt -L/local/software/cuda/8.0/lib64 +parallelBoundaryConds.o : $(MODULE_DIR)/parallelBoundaryConds.cu $(INC_DIR)/parallelBoundaryConds.h + $(CC_GPU) $< -c $(NVFLAGS) -I$(INC_DIR) $(MPI_FLAGS) -parallelEnv.o : $(MODULE_DIR)/parallelEnv.cc $(INC_DIR)/parallelEnv.h - $(CC_CPU) $< $(CXXFLAGS) -I$(INC_DIR) -lcudart -lcudadevrt -L/local/software/cuda/8.0/lib64 +parallelEnv.o : $(MODULE_DIR)/parallelEnv.cu $(INC_DIR)/parallelEnv.h + $(CC_GPU) $< -c $(NVFLAGS) -I$(INC_DIR) $(MPI_FLAGS) #end parallel sources -cpu_link.o : $(ENV_OBJS) - $(CC_CPU) $^ -o $@ $(CXXFLAGS) -lcudart -lcudadevrt -L/local/software/cuda/8.0/lib64 - -# link device code using CC_GPU (g++/mpi++ can't do this) -gpu_link.o : $(RTFIND_OBJS) $(OBJS) - $(CC_GPU) $^ -dlink -o $@ $(NVFLAGS) -lcudadevrt - # Executable -main : gpu_link.o $(RTFIND_OBJS) $(OBJS) cpu_link.o - $(CC_CPU) $^ -o $@ $(CXXFLAGS) -lcudart -lcudadevrt -L/local/software/cuda/8.0/lib64 +main : $(RTFIND_OBJS) $(OBJS) $(ENV_OBJS) + $(CC_GPU) $^ -o $@ $(NVFLAGS) $(MPI_FLAGS) buildRootfinder: @cd $(RTFIND_DIR) && $(MAKE) objects diff --git a/Project/GPU/Src/parallelBoundaryConds.cc b/Project/GPU/Src/parallelBoundaryConds.cu similarity index 99% rename from Project/GPU/Src/parallelBoundaryConds.cc rename to Project/GPU/Src/parallelBoundaryConds.cu index 219d589b..45908767 100644 --- a/Project/GPU/Src/parallelBoundaryConds.cc +++ b/Project/GPU/Src/parallelBoundaryConds.cu @@ -7,6 +7,7 @@ #define ID_XBUFF(variable, gdx, jdx, kdx) ((variable)*(d->Ng)*(d->Ny)*(d->Nz) + (gdx)*(d->Ny)*(d->Nz) + (jdx)*(d->Nz) + (kdx)) #define ID_YBUFF(variable, idx, gdx, kdx) ((variable)*(d->Nx)*(d->Ng)*(d->Nz) + (idx)*(d->Ng)*(d->Nz) + (gdx)*(d->Nz) + (kdx)) #define ID_ZBUFF(variable, idx, jdx, gdx) ((variable)*(d->Nx)*(d->Ny)*(d->Ng) + (idx)*(d->Ny)*(d->Ng) + (jdx)*(d->Ng) + (gdx)) +#define ID(variable, idx, jdx, kdx) ((variable)*(d->Nx)*(d->Ny)*(d->Nz) + (idx)*(d->Ny)*(d->Nz) + (jdx)*(d->Nz) + (kdx)) void ParallelBcs::swapGhostBuffers(double *sendToLeftBuf, double *sendToRightBuf, double *recvFromLeftBuf, double *recvFromRightBuf, int leftNeighbour, int rightNeighbour, int numCellsSent){ diff --git a/Project/GPU/Src/parallelEnv.cc b/Project/GPU/Src/parallelEnv.cu similarity index 100% rename from Project/GPU/Src/parallelEnv.cc rename to Project/GPU/Src/parallelEnv.cu diff --git a/Project/GPU/Src/parallelSaveData.cc b/Project/GPU/Src/parallelSaveData.cu similarity index 100% rename from Project/GPU/Src/parallelSaveData.cc rename to Project/GPU/Src/parallelSaveData.cu From e9c5b2455c79bc493ddacb49d192688d05426ec8 Mon Sep 17 00:00:00 2001 From: aniabrown Date: Tue, 1 Sep 2020 16:48:15 +0100 Subject: [PATCH 18/56] fix to bcs for parallel version --- Project/GPU/Include/boundaryConds.h | 82 +++++++++++++++++++++++++++-- Project/GPU/Makefile | 9 ++-- 2 files changed, 84 insertions(+), 7 deletions(-) diff --git a/Project/GPU/Include/boundaryConds.h b/Project/GPU/Include/boundaryConds.h index 8607d072..6db94089 100644 --- a/Project/GPU/Include/boundaryConds.h +++ b/Project/GPU/Include/boundaryConds.h @@ -21,8 +21,27 @@ class Bcs Constructor simply stores the pointer to the Data class. @param[in] *data pointer to the Data class + @param[in] *env pointer to the PlatformEnv class */ - Bcs(Data * data) : data(data) { } + Bcs(Data * data, PlatformEnv * env) : data(data) + { + data->bcsSet = 1; + } + + //TODO -- We may not want to allow creation of Bcs object without env in future + //! Constructor store data about simulation (needed for domain) + /*! + Constructor simply stores the pointer to the Data class. + + @param[in] *data pointer to the Data class + */ + + Bcs(Data * data) : data(data) + { + data->bcsSet = 1; + } + + virtual ~Bcs() { } //!< Destructor public: @@ -75,6 +94,8 @@ class Outflow : public Bcs */ Outflow(Data * data) : Bcs(data) { } + virtual ~Outflow() { } //!< Destructor + //! Application function /*! Applies the Outflow boundary conditions to the ghost cells. @@ -88,6 +109,40 @@ class Outflow : public Bcs }; +//! Out flow boundary conditions for the rotated 2D Brio-Wu +/*! + Using the conventional outflow BCs for the diagonal BW problem results in + shocks entering from along the main diagonal. This class deals with these + shocks. + Using this.apply behaves as if the BW problem has been rotated, as required. +*/ +class OutflowRotatedBW : public Bcs +{ +public: + //! Constructor + /*! + Calls constructor of base class to store the pointer to the Data class. + + @param[in] *data pointer to Data class + @sa Bcs::Bcs + */ + OutflowRotatedBW(Data * data) : Bcs(data) { } + + virtual ~OutflowRotatedBW() { } //!< Destructor + + //! Application function + /*! + Applies the Outflow boundary conditions to the ghost cells. + + @param[in, out] *cons pointer to the conservative (sized) vector + @param[in, out] *prims optional pointer to the primitive vector + @param[in, out] *aux optional pointer to the primitive vector + @sa Bcs::apply + */ + void apply(double * cons, double * prims = NULL, double * aux = NULL); +}; + + //! Periodic boundary conditions /*! Flows that exit across one domain boundary re-enter at the opposing @@ -122,6 +177,8 @@ class Periodic : public Bcs */ Periodic(Data * data) : Bcs(data) { } + virtual ~Periodic() { } //!< Destructor + //! Application function /*! Applies the Periodic boundary conditions to the ghost cells. @@ -135,16 +192,35 @@ class Periodic : public Bcs }; +//! Flow boundary conditions /*! - Boundary conditions for the Kelvin Helmholtz instability - x-direction is periodic and others are outflow + Boundary conditions used for the Kelvin Helmholtz instability. The + x-direction is periodic and y- and z-directions are outflow. */ class Flow : public Bcs { public: + //! Constructor + /*! + Calls constructor of base class to store the pointer to the Data class. + + @param[in] *data pointer to Data class + @sa Bcs::Bcs + */ Flow(Data * data) : Bcs(data) { } + virtual ~Flow() { } //!< Destructor + + //! Application function + /*! + Applies the Flow boundary conditions to the ghost cells. + + @param[in, out] *cons pointer to the conservative (sized) vector + @param[in, out] *prims optional pointer to the primitive vector + @param[in, out] *aux optional pointer to the primitive vector + @sa Bcs::apply + */ void apply(double * cons, double * prims = NULL, double * aux = NULL); }; diff --git a/Project/GPU/Makefile b/Project/GPU/Makefile index 6cfdfbbb..a88806e2 100644 --- a/Project/GPU/Makefile +++ b/Project/GPU/Makefile @@ -7,7 +7,7 @@ CC_GPU = nvcc USE_MPI=1 -MPI_LIBRARY = /local/software/openmpi/3.0.0/gcc-cuda8.0 +MPI_LIBRARY_PATH = /local/software/openmpi/3.0.0/gcc-cuda8.0 # Module directory MODULE_DIR = ./Src @@ -24,13 +24,14 @@ RTFIND_INC_DIR = ./CminpackLibrary/Include # Cminpack source directory RTFIND_SRC_DIR = ./CminpackLibrary/Src -MPI_FLAGS = -lmpi -L${MPI_LIBRARY_PATH}/lib -I${MPI_LIBRARY_PATH}/include +#MPI_FLAGS = -lmpi -L${MPI_LIBRARY_PATH}/lib -I${MPI_LIBRARY_PATH}/include +MPI_FLAGS = -I/usr/include/mpich -L/usr/lib/x86_64-linux-gnu -lmpichcxx -lmpich # C++ compiler flags CXXFLAGS = -fopenmp -Wall -std=c++11 -O3 # NVIDIA compiler flags -NVFLAGS = -std=c++11 -Wno-deprecated-gpu-targets -rdc=true -fmad=false -O3 -arch=sm_61 -Xcompiler -fopenmp -Xcompiler -Wall +NVFLAGS = -std=c++11 -Wno-deprecated-gpu-targets -rdc=true -fmad=false -O3 -arch=sm_52 -Xcompiler -fopenmp -Xcompiler -Wall # Sources SRCS = main.cu \ @@ -139,7 +140,7 @@ simulation.o : $(MODULE_DIR)/simulation.cu $(INC_DIR)/simulation.h $(INC_DIR)/mo $(CC_GPU) $< -c $(NVFLAGS) -I$(INC_DIR) main.o : $(MODULE_DIR)/main.cu $(INC_DIR)/simulation.h $(INC_DIR)/model.h $(INC_DIR)/simData.h $(INC_DIR)/initFunc.h - $(CC_GPU) $< -c $(NVFLAGS) -I$(INC_DIR) -I$(RTFIND_INC_DIR) + $(CC_GPU) $< -c $(NVFLAGS) -I$(INC_DIR) -I$(RTFIND_INC_DIR) $(MPI_FLAGS) weno.o : $(MODULE_DIR)/weno.cu $(INC_DIR)/weno.h $(CC_GPU) $< -c $(NVFLAGS) -I$(INC_DIR) From ee4992a110a6d4141a8e861895e2d00884a364dc Mon Sep 17 00:00:00 2001 From: aniabrown Date: Tue, 1 Sep 2020 16:59:44 +0100 Subject: [PATCH 19/56] shortening time. Comparison to CPU version passes at this reduced end time --- Project/CPU/Src/main.cc | 2 +- Project/GPU/Src/main.cu | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Project/CPU/Src/main.cc b/Project/CPU/Src/main.cc index 3a76a067..8f76b365 100644 --- a/Project/CPU/Src/main.cc +++ b/Project/CPU/Src/main.cc @@ -27,7 +27,7 @@ int main(int argc, char *argv[]) { double ymax(1.0); double zmin(-1.5); double zmax(1.5); - double endTime(0.05); + double endTime(0.0005); double cfl(0.1); double gamma(4.0/3.0); double sigma(0); diff --git a/Project/GPU/Src/main.cu b/Project/GPU/Src/main.cu index 1898ecf4..acf10c94 100644 --- a/Project/GPU/Src/main.cu +++ b/Project/GPU/Src/main.cu @@ -37,14 +37,14 @@ int main(int argc, char *argv[]) { double ymax(1.0); double zmin(-1.5); double zmax(1.5); - double endTime(0.05); + double endTime(0.0005); double cfl(0.1); double gamma(4.0/3.0); double sigma(0); bool output(true); int safety(180); - int nxRanks(1); - int nyRanks(1); + int nxRanks(2); + int nyRanks(2); int nzRanks(1); char * ptr(0); From 4671878d100120d3d5a9c524ab9b08395caa4248 Mon Sep 17 00:00:00 2001 From: aniabrown Date: Mon, 21 Sep 2020 11:01:22 +0100 Subject: [PATCH 20/56] tweaking srmhd.cu so that it matches the CPU version (both versions run on CPU currently) --- Project/CPU/Src/main.cc | 7 ++++--- Project/GPU/Src/main.cu | 10 +++++----- Project/GPU/Src/simulation.cu | 13 ++++++++----- Project/GPU/Src/srmhd.cu | 30 ++++++------------------------ 4 files changed, 23 insertions(+), 37 deletions(-) diff --git a/Project/CPU/Src/main.cc b/Project/CPU/Src/main.cc index 8f76b365..ea21d219 100644 --- a/Project/CPU/Src/main.cc +++ b/Project/CPU/Src/main.cc @@ -6,6 +6,7 @@ #include "initFunc.h" #include "simData.h" #include "SSP2.h" +#include "RK2.h" #include "Euler.h" #include "weno.h" @@ -19,7 +20,7 @@ int main(int argc, char *argv[]) { // Set up domain int Ng(4); int nx(64); - int ny(16); + int ny(8); int nz(0); double xmin(-0.5); double xmax(0.5); @@ -44,7 +45,7 @@ int main(int argc, char *argv[]) { cfl, Ng, gamma, sigma); // Choose particulars of simulation - SRRMHD model(&data); + SRMHD model(&data); Weno3 weno(&data); @@ -56,7 +57,7 @@ int main(int argc, char *argv[]) { KHInstabilitySingleFluid init(&data, 1); - SSP2 timeInt(&data, &model, &bcs, &fluxMethod); + RK2 timeInt(&data, &model, &bcs, &fluxMethod); SerialSaveData save(&data, &env, 0); diff --git a/Project/GPU/Src/main.cu b/Project/GPU/Src/main.cu index acf10c94..6ef7cd8b 100644 --- a/Project/GPU/Src/main.cu +++ b/Project/GPU/Src/main.cu @@ -29,7 +29,7 @@ int main(int argc, char *argv[]) { // Set up domain int Ng(4); int nx(64); - int ny(16); + int ny(8); int nz(0); double xmin(-0.5); double xmax(0.5); @@ -43,8 +43,8 @@ int main(int argc, char *argv[]) { double sigma(0); bool output(true); int safety(180); - int nxRanks(2); - int nyRanks(2); + int nxRanks(1); + int nyRanks(1); int nzRanks(1); char * ptr(0); @@ -61,7 +61,7 @@ int main(int argc, char *argv[]) { cfl, Ng, gamma, sigma); // Choose particulars of simulation - SRRMHD model(&data); + SRMHD model(&data); FVS fluxMethod(&data, &model); @@ -71,7 +71,7 @@ int main(int argc, char *argv[]) { KHInstabilitySingleFluid init(&data, 1); - SSP2 timeInt(&data, &model, &bcs, &fluxMethod); + RK2 timeInt(&data, &model, &bcs, &fluxMethod); ParallelSaveData save(&data, &env); diff --git a/Project/GPU/Src/simulation.cu b/Project/GPU/Src/simulation.cu index d692a0a0..46d36208 100644 --- a/Project/GPU/Src/simulation.cu +++ b/Project/GPU/Src/simulation.cu @@ -156,7 +156,7 @@ void Simulation::evolve(bool output, int safety) // Save initial data if (output && save) { - +/* this->save->saveVar("rho", 11); this->save->saveVar("vx", 11); this->save->saveVar("vy", 11); @@ -167,8 +167,9 @@ void Simulation::evolve(bool output, int safety) this->save->saveVar("Bz", 11); this->save->saveVar("Ex", 11); this->save->saveVar("Ey", 11); - this->save->saveVar("Ez", 11); } - + this->save->saveVar("Ez", 11); +*/ + } while (d->t < d->endTime) { this->updateTime(); @@ -176,7 +177,7 @@ void Simulation::evolve(bool output, int safety) // Save data for animation if (output && save && d->iters%d->frameSkip==0) { // Save initial data - +/* this->save->saveVar("rho", 11); this->save->saveVar("vx", 11); this->save->saveVar("vy", 11); @@ -188,6 +189,7 @@ void Simulation::evolve(bool output, int safety) this->save->saveVar("Ex", 11); this->save->saveVar("Ey", 11); this->save->saveVar("Ez", 11); + */ } if (safety>0 && d->iters%safety==0) { @@ -200,7 +202,7 @@ void Simulation::evolve(bool output, int safety) // Save final state if (output && save) { // Save initial data - +/* this->save->saveVar("rho", 11); this->save->saveVar("vx", 11); this->save->saveVar("vy", 11); @@ -212,6 +214,7 @@ void Simulation::evolve(bool output, int safety) this->save->saveVar("Ex", 11); this->save->saveVar("Ey", 11); this->save->saveVar("Ez", 11); + */ } if (env->rank == 0){ diff --git a/Project/GPU/Src/srmhd.cu b/Project/GPU/Src/srmhd.cu index a37e41e6..c0b39205 100644 --- a/Project/GPU/Src/srmhd.cu +++ b/Project/GPU/Src/srmhd.cu @@ -14,7 +14,6 @@ #include #include #include -#include // Macro for getting array index #define ID(variable, idx, jdx, kdx) ((variable)*(d->Nx)*(d->Ny)*(d->Nz) + (idx)*(d->Ny)*(d->Nz) + (jdx)*(d->Nz) + (kdx)) @@ -44,6 +43,7 @@ SRMHD::SRMHD(Data * data) : Model(data) // Solutions for C2P all cells cudaHostAlloc((void **)&solution, sizeof(double)*2*data->Nx*data->Ny*data->Nz, cudaHostAllocPortable); + //solution = (double *) malloc(sizeof(double)*2*data->Nx*data->Ny*data->Nz); smartGuesses = 0; @@ -91,11 +91,8 @@ void SRMHD::fluxVector(double *cons, double *prims, double *aux, double *f, cons // Generate flux vector // Fx: flux in x-direction if (dir == 0) { - #pragma omp parallel for for (int i=0; i < d->Nx; i++) { - #pragma omp parallel for for (int j=0; j < d->Ny; j++) { - #pragma omp parallel for for (int k=0; k < d->Nz; k++) { // D f[ID(0, i, j, k)] = cons[ID(0, i, j, k)] * prims[ID(1, i, j, k)]; @@ -137,11 +134,8 @@ void SRMHD::fluxVector(double *cons, double *prims, double *aux, double *f, cons // Fy: flux in y-direction else if (dir==1) { - #pragma omp parallel for for (int i=0; i < d->Nx; i++) { - #pragma omp parallel for for (int j=0; j < d->Ny; j++) { - #pragma omp parallel for for (int k=0; k < d->Nz; k++) { // D f[ID(0, i, j, k)] = cons[ID(0, i, j, k)] * prims[ID(2, i, j, k)]; @@ -183,11 +177,8 @@ void SRMHD::fluxVector(double *cons, double *prims, double *aux, double *f, cons // Fz: flux in z-direction else { - #pragma omp parallel for for (int i=0; i < d->Nx; i++) { - #pragma omp parallel for for (int j=0; j < d->Ny; j++) { - #pragma omp parallel for for (int k=0; k < d->Nz; k++) { // D f[ID(0, i, j, k)] = cons[ID(0, i, j, k)] * prims[ID(3, i, j, k)]; @@ -238,7 +229,6 @@ void SRMHD::fluxVector(double *cons, double *prims, double *aux, double *f, cons void SRMHD::sourceTermSingleCell(double *cons, double *prims, double *aux, double *source, int i, int j, int k) { - #pragma omp parallel for for (int var=0; var < this->data->Ncons; var++) { if (var == 8) { // phi @@ -258,13 +248,9 @@ void SRMHD::sourceTermSingleCell(double *cons, double *prims, double *aux, doubl void SRMHD::sourceTerm(double *cons, double *prims, double *aux, double *source) { - #pragma omp parallel for for (int i=0; i < this->data->Nx; i++) { - #pragma omp parallel for for (int j=0; j < this->data->Ny; j++) { - #pragma omp parallel for for (int k=0; k < this->data->Nz; k++) { - #pragma omp parallel for for (int var=0; var < this->data->Ncons; var++) { if (var == 8) { // phi @@ -335,7 +321,7 @@ void SRMHD::getPrimitiveVarsSingleCell(double *cons, double *prims, double *aux, double sol[2]; // Guess and solution vector double res[2]; // Residual/fvec vector int info; // Rootfinder flag - const double tol = 1.49011612e-8; // Tolerance of rootfinder + const double tol = 1.4e-8; // Tolerance of rootfinder const int lwa = 19; // Length of work array = n * (3*n + 13) / 2 double wa[lwa]; // Work array @@ -422,9 +408,9 @@ void SRMHD::getPrimitiveVars(double *cons, double *prims, double *aux) // Syntax Data * d(this->data); // Solutions - double * solution; - cudaHostAlloc((void **)&solution, sizeof(double)*2*d->Nx*d->Ny*d->Nz, - cudaHostAllocPortable); + //double * solution; + //cudaHostAlloc((void **)&solution, sizeof(double)*2*d->Nx*d->Ny*d->Nz, + //cudaHostAllocPortable); // Hybrd1 set-up Args args; // Additional arguments structure @@ -432,7 +418,7 @@ void SRMHD::getPrimitiveVars(double *cons, double *prims, double *aux) double sol[2]; // Guess and solution vector double res[2]; // Residual/fvec vector int info; // Rootfinder flag - const double tol = 1.49011612e-8; // Tolerance of rootfinder + const double tol = 1.49011612e-7; // Tolerance of rootfinder const int lwa = 19; // Length of work array = n * (3*n + 13) / 2 double wa[lwa]; // Work array std::vector fails; // Vector of failed structs. Stores location of failed cons2prims cells. @@ -542,11 +528,8 @@ void SRMHD::getPrimitiveVars(double *cons, double *prims, double *aux) } - #pragma omp parallel for for (int i=0; i < d->Nx; i++) { - #pragma omp parallel for for (int j=0; j < d->Ny; j++) { - #pragma omp parallel for for (int k=0; k < d->Nz; k++) { // W aux[ID(1, i, j, k)] = 1 / sqrt(1 - solution[ID(0, i, j, k)]); @@ -598,7 +581,6 @@ void SRMHD::getPrimitiveVars(double *cons, double *prims, double *aux) } // End j-loop } // End i-loop - cudaFreeHost(solution); } From 849d28569904e5ff862fc9ab8908530dc5a78f3d Mon Sep 17 00:00:00 2001 From: aniabrown Date: Tue, 22 Sep 2020 10:54:33 +0100 Subject: [PATCH 21/56] debugging tweaks. Reverse later --- Project/CPU/Makefile | 2 +- Project/CPU/Src/RK2.cc | 27 ++++++++++++++++++--------- Project/CPU/Src/srmhd.cc | 24 ++++++++++++++++-------- Project/GPU/Makefile | 4 ++-- Project/GPU/Src/main.cu | 2 +- 5 files changed, 38 insertions(+), 21 deletions(-) diff --git a/Project/CPU/Makefile b/Project/CPU/Makefile index 84d91915..e1df13f5 100644 --- a/Project/CPU/Makefile +++ b/Project/CPU/Makefile @@ -25,7 +25,7 @@ RTFIND_INC_DIR = ./CminpackLibrary/Include RTFIND_SRC_DIR = ./CminpackLibrary/Src # C++ compiler flags -CXXFLAGS = -Wall -std=c++11 -g -DUSE_MPI=$(USE_MPI) -O3 $(OMP_FLAGS) -Wno-unknown-pragmas +CXXFLAGS = -Wall -std=c++11 -g -DUSE_MPI=$(USE_MPI) -O0 $(OMP_FLAGS) -Wno-unknown-pragmas # Sources SRCS = main.cc \ diff --git a/Project/CPU/Src/RK2.cc b/Project/CPU/Src/RK2.cc index 88633e98..4dd16a99 100644 --- a/Project/CPU/Src/RK2.cc +++ b/Project/CPU/Src/RK2.cc @@ -38,9 +38,12 @@ void RK2::predictorStep(double * cons, double * prims, double * aux, double dt) // Cons2prims conversion for p1 estimate stage requires old values to start // the rootfind - for (int i(d->is); i < d->ie; i++) { - for (int j(d->js); j < d->je; j++) { - for (int k(d->ks); k < d->ke; k++) { + for (int i(0); i < d->Nx; i++) { + for (int j(0); j < d->Ny; j++) { + for (int k(0); k < d->Nz; k++) { + //for (int i(d->is); i < d->ie; i++) { + //for (int j(d->js); j < d->je; j++) { + //for (int k(d->ks); k < d->ke; k++) { for (int var(0); var < d->Naux; var++) { p1aux[ID(var, i, j, k)] = aux[ID(var, i, j, k)]; } @@ -56,9 +59,12 @@ void RK2::predictorStep(double * cons, double * prims, double * aux, double dt) // First stage approximation for (int var(0); var < d->Ncons; var++) { - for (int i(d->is); i < d->ie; i++) { - for (int j(d->js); j < d->je; j++) { - for (int k(d->ks); k < d->ke; k++) { + for (int i(0); i < d->Nx; i++) { + for (int j(0); j < d->Ny; j++) { + for (int k(0); k < d->Nz; k++) { + //for (int i(d->is); i < d->ie; i++) { + //for (int j(d->js); j < d->je; j++) { + //for (int k(d->ks); k < d->ke; k++) { p1cons[ID(var, i, j, k)] = cons[ID(var, i, j, k)] - dt * args1[ID(var, i, j, k)]; } } @@ -79,9 +85,12 @@ void RK2::correctorStep(double * cons, double * prims, double * aux, double dt) // Construct solution for (int var(0); var < d->Ncons; var++) { - for (int i(d->is); i < d->ie; i++) { - for (int j(d->js); j < d->je; j++) { - for (int k(d->ks); k < d->ke; k++) { + for (int i(0); i < d->Nx; i++) { + for (int j(0); j < d->Ny; j++) { + for (int k(0); k < d->Nz; k++) { + //for (int i(d->is); i < d->ie; i++) { + //for (int j(d->js); j < d->je; j++) { + //for (int k(d->ks); k < d->ke; k++) { cons[ID(var, i, j, k)] = 0.5 * (cons[ID(var, i, j, k)] + p1cons[ID(var, i, j, k)] - dt * args2[ID(var, i, j, k)]); } diff --git a/Project/CPU/Src/srmhd.cc b/Project/CPU/Src/srmhd.cc index b169da4b..ba2bfea4 100644 --- a/Project/CPU/Src/srmhd.cc +++ b/Project/CPU/Src/srmhd.cc @@ -404,9 +404,14 @@ void SRMHD::getPrimitiveVars(double *cons, double *prims, double *aux) std::vector fails; // Vector of failed structs. Stores location of failed cons2prims cells. // Loop through domain solving and setting the prim and aux vars - for (int i(d->is); i < d->ie; i++) { - for (int j(d->js); j < d->je; j++) { - for (int k(d->ks); k < d->ke; k++) { + //for (int i(d->is); i < d->ie; i++) { + //for (int j(d->js); j < d->je; j++) { + //for (int k(d->ks); k < d->ke; k++) { + + for (int i(0); i < d->Nx; i++) { + for (int j(0); j < d->Ny; j++) { + for (int k(0); k < d->Nz; k++) { + // Update possible values // Bx, By, Bz prims[ID(5, i, j, k)] = cons[ID(5, i, j, k)]; @@ -461,7 +466,7 @@ void SRMHD::getPrimitiveVars(double *cons, double *prims, double *aux) - +/* // ################################## Smart guessing ########################### // // Are there any failures? @@ -506,11 +511,14 @@ void SRMHD::getPrimitiveVars(double *cons, double *prims, double *aux) // } } } +*/ - - for (int i(d->is); i < d->ie; i++) { - for (int j(d->js); j < d->je; j++) { - for (int k(d->ks); k < d->ke; k++) { + for (int i(0); i < d->Nx; i++) { + for (int j(0); j < d->Ny; j++) { + for (int k(0); k < d->Nz; k++) { + //for (int i(d->is); i < d->ie; i++) { + //for (int j(d->js); j < d->je; j++) { + //for (int k(d->ks); k < d->ke; k++) { // W aux[ID(1, i, j, k)] = 1 / sqrt(1 - solution[ID(0, i, j, k)]); // rho diff --git a/Project/GPU/Makefile b/Project/GPU/Makefile index a88806e2..43f055c9 100644 --- a/Project/GPU/Makefile +++ b/Project/GPU/Makefile @@ -28,10 +28,10 @@ RTFIND_SRC_DIR = ./CminpackLibrary/Src MPI_FLAGS = -I/usr/include/mpich -L/usr/lib/x86_64-linux-gnu -lmpichcxx -lmpich # C++ compiler flags -CXXFLAGS = -fopenmp -Wall -std=c++11 -O3 +CXXFLAGS = -fopenmp -Wall -std=c++11 -O0 -lineinfo -g # NVIDIA compiler flags -NVFLAGS = -std=c++11 -Wno-deprecated-gpu-targets -rdc=true -fmad=false -O3 -arch=sm_52 -Xcompiler -fopenmp -Xcompiler -Wall +NVFLAGS = -std=c++11 -Wno-deprecated-gpu-targets -rdc=true -fmad=false -O0 -arch=sm_52 -Xcompiler -fopenmp -Xcompiler -Wall -lineinfo -g -G # Sources SRCS = main.cu \ diff --git a/Project/GPU/Src/main.cu b/Project/GPU/Src/main.cu index 6ef7cd8b..103875b7 100644 --- a/Project/GPU/Src/main.cu +++ b/Project/GPU/Src/main.cu @@ -28,7 +28,7 @@ int main(int argc, char *argv[]) { // Set up domain int Ng(4); - int nx(64); + int nx(16); int ny(8); int nz(0); double xmin(-0.5); From e247098415f89dc97afeb96593aa1cf7d1ef65fa Mon Sep 17 00:00:00 2001 From: aniabrown Date: Tue, 22 Sep 2020 10:54:52 +0100 Subject: [PATCH 22/56] debugging tweaks. Reverse later --- Project/CPU/Src/main.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project/CPU/Src/main.cc b/Project/CPU/Src/main.cc index ea21d219..dbf80ab2 100644 --- a/Project/CPU/Src/main.cc +++ b/Project/CPU/Src/main.cc @@ -19,7 +19,7 @@ int main(int argc, char *argv[]) { const double MU(1000); // Set up domain int Ng(4); - int nx(64); + int nx(16); int ny(8); int nz(0); double xmin(-0.5); From 68128450090a462025a40f3ad92ef9f6e67b5644 Mon Sep 17 00:00:00 2001 From: aniabrown Date: Tue, 22 Sep 2020 10:55:44 +0100 Subject: [PATCH 23/56] moving SRMHD::getPrimitiveVars to GPU. In progress, needs debugging --- Project/GPU/Include/srmhd.h | 4 + Project/GPU/Src/simData.cu | 5 +- Project/GPU/Src/srmhd.cu | 189 +++++++++++++++++++++++++++++++++++- 3 files changed, 193 insertions(+), 5 deletions(-) diff --git a/Project/GPU/Include/srmhd.h b/Project/GPU/Include/srmhd.h index 4bf1fbb6..48f0ac7a 100644 --- a/Project/GPU/Include/srmhd.h +++ b/Project/GPU/Include/srmhd.h @@ -3,6 +3,7 @@ #include "model.h" #include "deviceArguments.h" +#include "C2PArgs.h" /* @@ -100,6 +101,9 @@ class SRMHD : public Model double * solution; //!< Pointer to array to hold solution of C2P for every cell. Size is 2*Nx*Ny*Nz + // Work array + C2PArgs * c2pArgs; + SRMHD(); //!< Default constructor diff --git a/Project/GPU/Src/simData.cu b/Project/GPU/Src/simData.cu index 84896524..0e829e03 100644 --- a/Project/GPU/Src/simData.cu +++ b/Project/GPU/Src/simData.cu @@ -73,7 +73,10 @@ Data::Data(int nx, int ny, int nz, cudaGetDeviceProperties(&prop, 0); cudaDeviceSetLimit(cudaLimitStackSize, 2048); // Needed for SRMHS and SSP2, hybrd called recursively meaning nvcc does not know the stack size at compile time. Manually set. // Determine the number of GPU streams - Nstreams = Ncells / (tpb * bpg) + 1; + + //Nstreams = Ncells / (tpb * bpg) + 1; + //! TODO -- for debugging. Remove + Nstreams = 1; if (false) { diff --git a/Project/GPU/Src/srmhd.cu b/Project/GPU/Src/srmhd.cu index c0b39205..57e5dc00 100644 --- a/Project/GPU/Src/srmhd.cu +++ b/Project/GPU/Src/srmhd.cu @@ -14,16 +14,21 @@ #include #include #include +#include "cudaErrorCheck.h" // Macro for getting array index #define ID(variable, idx, jdx, kdx) ((variable)*(d->Nx)*(d->Ny)*(d->Nz) + (idx)*(d->Ny)*(d->Nz) + (jdx)*(d->Nz) + (kdx)) +#define IDCons(var, idx, jdx, kdx) ( (var) + (idx)*(d->Ncons)*(d->Nz)*(d->Ny) + (jdx)*(d->Ncons)*(d->Nz) + (kdx)*(d->Ncons) ) +#define IDPrims(var, idx, jdx, kdx) ( (var) + (idx)*(d->Nprims)*(d->Nz)*(d->Ny) + (jdx)*(d->Nprims)*(d->Nz) + (kdx)*(d->Nprims) ) +#define IDAux(var, idx, jdx, kdx) ( (var) + (idx)*(d->Naux)*(d->Nz)*(d->Ny) + (jdx)*(d->Naux)*(d->Nz) + (kdx)*(d->Naux) ) __device__ int SRMHDresidualParallel(void *p, int n, const double *x, double *fvec, int iflag); int SRMHDresidual(void *p, int n, const double *x, double *fvec, int iflag); - +__global__ +static void getPrimitiveVarsParallel(double *cons, double *prims, double *aux, double *guess, int stream, double gamma, double sigma, int Ncons, int Nprims, int Naux, int origWidth, int streamWidth); SRMHD::SRMHD() : Model() { @@ -65,11 +70,14 @@ SRMHD::SRMHD(Data * data) : Model(data) this->data->auxLabels.push_back("bsq"); this->data->auxLabels.push_back("vsq"); this->data->auxLabels.push_back("BS"); this->data->auxLabels.push_back("Bsq"); this->data->auxLabels.push_back("Ssq"); + + c2pArgs = new C2PArgs(this->data); } SRMHD::~SRMHD() { cudaFreeHost(solution); + delete c2pArgs; } @@ -403,7 +411,8 @@ void SRMHD::getPrimitiveVarsSingleCell(double *cons, double *prims, double *aux, old values for the prims and aux vectors. Output is the current values of cons, prims and aux. */ -void SRMHD::getPrimitiveVars(double *cons, double *prims, double *aux) +/* +void SRMHD::getPrimitiveVarsCPU(double *cons, double *prims, double *aux) { // Syntax Data * d(this->data); @@ -583,7 +592,7 @@ void SRMHD::getPrimitiveVars(double *cons, double *prims, double *aux) } - +*/ @@ -700,7 +709,7 @@ void SRMHD::primsToAll(double *cons, double *prims, double *aux) //! Need a structure to pass to C2P hybrd rootfind to hold the current cons values typedef struct { - double guess[9]; + double guess[8]; double gamma; } getPrimVarsArgs; @@ -731,6 +740,178 @@ int SRMHDresidualParallel(void *p, int n, const double *x, double *fvec, int ifl return 0; } + +void SRMHD::getPrimitiveVars(double *cons, double *prims, double *aux) +{ + // Syntax + Data * d(this->data); + + // First need to copy data to the device + // A single cell requires all cons variables and aux10 to start the guessing + // Rearrange data into host arrays ready for copying + for (int i(0); i < d->Nx; i++) { + for (int j(0); j < d->Ny; j++) { + for (int k(0); k < d->Nz; k++) { + for (int var(0); var < d->Ncons; var++) { + c2pArgs->cons_h[IDCons(var, i, j, k)] = cons[ID(var, i, j, k)]; + } + c2pArgs->guess_h[ID(0, i, j, k)] = aux[ID(10, i, j, k)]; + } + } + } + + // Data is in correct order, now stream data to the device + for (int i(0); i < c2pArgs->Nstreams; i++) { + // Which cell is at the left bound? + int lcell(i * c2pArgs->streamWidth); + // Which cell is at the right bound? + int rcell(lcell + c2pArgs->streamWidth); + if (rcell > d->Ncells) rcell = d->Ncells; + // Memory size to copy in + int width(rcell - lcell); + int inMemsize(width * sizeof(double)); + + // Send stream's data + gpuErrchk( cudaMemcpyAsync(c2pArgs->cons_d[i], c2pArgs->cons_h + lcell*d->Ncons, inMemsize*d->Ncons, cudaMemcpyHostToDevice, c2pArgs->stream[i]) ); + gpuErrchk( cudaMemcpyAsync(c2pArgs->guess_d[i], c2pArgs->guess_h + lcell, inMemsize, cudaMemcpyHostToDevice, c2pArgs->stream[i]) ); + + + // Call kernel and operate on data + getPrimitiveVarsParallel <<< c2pArgs->bpg, c2pArgs->tpb, + c2pArgs->tpb * c2pArgs->cellMem, c2pArgs->stream[i] >>> (c2pArgs->cons_d[i], + c2pArgs->prims_d[i], c2pArgs->aux_d[i], c2pArgs->guess_d[i], i, d->gamma, d->sigma, d->Ncons, + d->Nprims, d->Naux, c2pArgs->streamWidth, width); + + + // Copy all data back + gpuErrchk( cudaMemcpyAsync(c2pArgs->prims_h + lcell*d->Nprims, c2pArgs->prims_d[i], inMemsize*d->Nprims, cudaMemcpyDeviceToHost, c2pArgs->stream[i]) ); + gpuErrchk( cudaMemcpyAsync(c2pArgs->aux_h + lcell*d->Naux, c2pArgs->aux_d[i], inMemsize*d->Naux, cudaMemcpyDeviceToHost, c2pArgs->stream[i]) ); + } + gpuErrchk( cudaDeviceSynchronize() ); + + // Rearrange data back into arrays + for (int i(0); i < d->Nx; i++) { + for (int j(0); j < d->Ny; j++) { + for (int k(0); k < d->Nz; k++) { + + for (int var(0); var < d->Nprims; var++) { + prims[ID(var, i, j, k)] = c2pArgs->prims_h[IDPrims(var, i, j, k)]; + } + for (int var(0); var < d->Naux; var++) { + aux[ID(var, i, j, k)] = c2pArgs->aux_h[IDAux(var, i, j, k)]; + } + } + } + } +} + +// /*! +// This is the device version of the getPrimitiveVars that takes a streams data +// and computes the rest of the prims and aux vars. This is called when +// SRRMHD::getPrimitiveVars is required, i.e. all cells need to be found. +// */ +__global__ +static void getPrimitiveVarsParallel(double *streamCons, double *streamPrims, double *streamAux, double *guess, int stream, double gamma, double sigma, int Ncons, int Nprims, int Naux, int origWidth, int streamWidth) +{ + // First need thread indicies + const int tID(threadIdx.x); //!< thread index (in block) + const int lID(tID + blockIdx.x * blockDim.x); //!< local index (in stream) + // const int gID(lID + stream * origWidth); //!< global index (in domain) + // Allocate shared memory + extern __shared__ double sharedArray []; + double * cons = &sharedArray[tID * (Ncons + Nprims + Naux)]; + double * prims = &cons[Ncons]; + double * aux = &prims[Nprims]; + + // Hybrd1 set-up + double sol[2]; // Guess and solution vector + double res[2]; // Residual/fvec vector + int info; // Rootfinder flag + double wa[19]; // Work array + + if (lID < streamWidth) { + + + // Load conserved vector into shared memory, and the initial guess + for (int i(0); i < Ncons; i++) cons[i] = streamCons[lID * Ncons + i]; + + + + + // Update known values + // Bx, By, Bz + prims[5] = cons[5]; + prims[6] = cons[6]; + prims[7] = cons[8]; + + // BS + aux[10] = cons[5] * cons[1] + cons[6] * cons[2] + cons[7] * cons[3]; + // Bsq + aux[11] = cons[5] * cons[5] + cons[6] * cons[6] + cons[7] * cons[7]; + // Ssq + aux[12] = cons[1] * cons[1] + cons[2] * cons[2] + cons[3] * cons[3]; + + + + // Set args for rootfind + getPrimVarsArgs GPVAArgs = {cons[0], cons[1], cons[2], cons[3], cons[4], cons[6], cons[7], cons[8], gamma}; + + // Guesses of solution + sol[0] = prims[1] * prims[1] + prims[2] * prims[2] + prims[3] * prims[3]; + sol[1] = prims[0] * aux[0] / (1 - sol[0]); + + + // Solve residual = 0 + if ((info = __cminpack_func__(hybrd1) (SRMHDresidualParallel, &GPVAArgs, 2, sol, res, 1.49011612e-7, wa, 19))!=1) + { + printf("C2P single cell failed at lID %d, hybrd returns info=%d\n", lID, info); + } + if (lID == 0){ + printf("IN LANE %f\n", prims[5]); + printf("GPU GAMMA %f\n", gamma); + printf("sol %f %f res %f %f\n", sol[0], sol[1], res[0], res[1]); + } + // W + aux[1] = 1 / sqrt(1 - sol[0]); + // rho + prims[0] = cons[0] / aux[1]; + // h + aux[0] = sol[1] / (prims[0] * aux[1] * aux[1]); + // p + prims[4] = (aux[0] - 1) * prims[0] * + (gamma - 1) / gamma; + // e + aux[2] = prims[4] / (prims[0] * (gamma - 1)); + // vx, vy, vz + prims[1] = (cons[5] * aux[10] + cons[1] * sol[1]) / (sol[1] * (aux[11] + sol[1])); + prims[2] = (cons[6] * aux[10] + cons[2] * sol[1]) / (sol[1] * (aux[11] + sol[1])); + prims[3] = (cons[7] * aux[10] + cons[3] * sol[1]) / (sol[1] * (aux[11] + sol[1])); + // vsq + aux[9] = prims[1] * prims[1] + prims[2] * prims[2] + prims[3] * prims[3]; + // c + aux[3] = sqrt(aux[2] * gamma * (gamma - 1) / aux[0]); + // b0 + aux[4] = aux[1] * (cons[5] * prims[1] + cons[6] * prims[2] + cons[7] * prims[3]); + // bx, by, bz + aux[5] = cons[5] / aux[1] + aux[4] * prims[1]; + aux[6] = cons[6] / aux[1] + aux[4] * prims[2]; + aux[7] = cons[7] / aux[1] + aux[4] * prims[3]; + // bsq + aux[8] = (prims[5] * prims[5] + prims[6] * prims[6] + prims[7] * prims[7] + + aux[4] * aux[4]) / (aux[1] * aux[1]); + + + + } + + // Copy data back from shared memory into device arrays + for (int i(0); i < Nprims; i++) streamPrims[lID * Nprims + i] = prims[i]; + for (int i(0); i < Naux; i++) streamAux[lID * Naux + i] = aux[i]; + +} + + + __device__ void SRMHD_D::getPrimitiveVarsSingleCell(double *cons, double *prims, double *aux) { From dbdaf552639f9706a409fcc947bf27ceebc8af70 Mon Sep 17 00:00:00 2001 From: aniabrown Date: Tue, 22 Sep 2020 14:15:32 +0100 Subject: [PATCH 24/56] gpu version no longer failing to converge in getPrims but still gives wrong answer. Currently copying all prims and aux to gpu -- need to copy only those values that are required for the guess in future --- Project/CPU/Src/main.cc | 2 +- Project/CPU/Src/srmhd.cc | 8 ++++ Project/GPU/Makefile | 4 +- Project/GPU/Src/main.cu | 2 +- Project/GPU/Src/srmhd.cu | 98 ++++++++++++++++++++++++++++++---------- 5 files changed, 85 insertions(+), 29 deletions(-) diff --git a/Project/CPU/Src/main.cc b/Project/CPU/Src/main.cc index dbf80ab2..c412eaa2 100644 --- a/Project/CPU/Src/main.cc +++ b/Project/CPU/Src/main.cc @@ -28,7 +28,7 @@ int main(int argc, char *argv[]) { double ymax(1.0); double zmin(-1.5); double zmax(1.5); - double endTime(0.0005); + double endTime(0.00005); double cfl(0.1); double gamma(4.0/3.0); double sigma(0); diff --git a/Project/CPU/Src/srmhd.cc b/Project/CPU/Src/srmhd.cc index ba2bfea4..48e1060a 100644 --- a/Project/CPU/Src/srmhd.cc +++ b/Project/CPU/Src/srmhd.cc @@ -446,9 +446,17 @@ void SRMHD::getPrimitiveVars(double *cons, double *prims, double *aux) sol[1] = prims[ID(0, i, j, k)] * aux[ID(0, i, j, k)] / (1 - sol[0]); + // Solve residual = 0 info = __cminpack_func__(hybrd1) (&SRMHDresidual, &args, n, sol, res, tol, wa, lwa); + if (i==4 && j==4 && k==0){ + printf("CPU, IN LANE (%d,%d,%d)\n", i, j, k); + printf("prims: %f %f %f\n", prims[ID(3, i, j, k)], prims[ID(4, i, j, k)], prims[ID(5, i, j, k)]); + printf("cons: %f %f %f\n", cons[ID(3, i, j, k)], cons[ID(4, i, j, k)], cons[ID(5, i, j, k)]); + printf("args: %f %f %f\n", aux[ID(10, i, j, k)], aux[ID(11, i, j, k)], aux[ID(12, i, j, k)]); + printf("sol %f %f res %f %f\n", sol[0], sol[1], res[0], res[1]); + } // If root find fails, add failed cell to the list if (info!=1) { Failed fail = {i, j, k}; diff --git a/Project/GPU/Makefile b/Project/GPU/Makefile index 43f055c9..1801d2d2 100644 --- a/Project/GPU/Makefile +++ b/Project/GPU/Makefile @@ -28,10 +28,10 @@ RTFIND_SRC_DIR = ./CminpackLibrary/Src MPI_FLAGS = -I/usr/include/mpich -L/usr/lib/x86_64-linux-gnu -lmpichcxx -lmpich # C++ compiler flags -CXXFLAGS = -fopenmp -Wall -std=c++11 -O0 -lineinfo -g +CXXFLAGS = -fopenmp -Wall -std=c++11 -O3 -lineinfo # NVIDIA compiler flags -NVFLAGS = -std=c++11 -Wno-deprecated-gpu-targets -rdc=true -fmad=false -O0 -arch=sm_52 -Xcompiler -fopenmp -Xcompiler -Wall -lineinfo -g -G +NVFLAGS = -std=c++11 -Wno-deprecated-gpu-targets -rdc=true -fmad=false -O3 -arch=sm_52 -Xcompiler -fopenmp -Xcompiler -Wall -lineinfo # Sources SRCS = main.cu \ diff --git a/Project/GPU/Src/main.cu b/Project/GPU/Src/main.cu index 103875b7..1c3e72ca 100644 --- a/Project/GPU/Src/main.cu +++ b/Project/GPU/Src/main.cu @@ -37,7 +37,7 @@ int main(int argc, char *argv[]) { double ymax(1.0); double zmin(-1.5); double zmax(1.5); - double endTime(0.0005); + double endTime(0.00005); double cfl(0.1); double gamma(4.0/3.0); double sigma(0); diff --git a/Project/GPU/Src/srmhd.cu b/Project/GPU/Src/srmhd.cu index 57e5dc00..3c8f0446 100644 --- a/Project/GPU/Src/srmhd.cu +++ b/Project/GPU/Src/srmhd.cu @@ -400,7 +400,7 @@ void SRMHD::getPrimitiveVarsSingleCell(double *cons, double *prims, double *aux, } - +#if 0 //! Solve for the primitive and auxiliary variables /*! Method outlined in Anton 2010, `Relativistic Magnetohydrodynamcis: @@ -411,15 +411,14 @@ void SRMHD::getPrimitiveVarsSingleCell(double *cons, double *prims, double *aux, old values for the prims and aux vectors. Output is the current values of cons, prims and aux. */ -/* -void SRMHD::getPrimitiveVarsCPU(double *cons, double *prims, double *aux) +void SRMHD::getPrimitiveVars(double *cons, double *prims, double *aux) { // Syntax Data * d(this->data); // Solutions - //double * solution; - //cudaHostAlloc((void **)&solution, sizeof(double)*2*d->Nx*d->Ny*d->Nz, - //cudaHostAllocPortable); + double * solution; + cudaHostAlloc((void **)&solution, sizeof(double)*2*d->Nx*d->Ny*d->Nz, + cudaHostAllocPortable); // Hybrd1 set-up Args args; // Additional arguments structure @@ -590,10 +589,11 @@ void SRMHD::getPrimitiveVarsCPU(double *cons, double *prims, double *aux) } // End j-loop } // End i-loop + cudaFreeHost(solution); -} -*/ +} +#endif @@ -702,16 +702,25 @@ void SRMHD::primsToAll(double *cons, double *prims, double *aux) } // End i-loop } -#define Bsq (args->guess[5] * args->guess[5] + args->guess[6] * args->guess[6] + args->guess[7] + args->guess[7]) -#define Ssq (args->guess[1] * args->guess[1] + args->guess[2] * args->guess[2] + args->guess[3] + args->guess[3]) -#define BS (args->guess[5] * args->guess[1] + args->guess[6] * args->guess[2] + args->guess[7] + args->guess[3]) - //! Need a structure to pass to C2P hybrd rootfind to hold the current cons values +/* typedef struct { double guess[8]; double gamma; } getPrimVarsArgs; +*/ + +typedef struct +{ + double + D, //!< Relativistic energy for a single cell + g, //!< Adiabatic index, gamma + Bsq, //!< Squared magnitude of magnetic field for a single cell + Ssq, //!< Square magnitude of momentum for a single cell + BS, //!< Scalar product of magnetic field and momentum vector for a single cell + tau; //!< Kinetic energy for a single cell +} getPrimVarsArgs; __device__ int SRMHDresidualParallel(void *p, int n, const double *x, double *fvec, int iflag) @@ -724,23 +733,25 @@ int SRMHDresidualParallel(void *p, int n, const double *x, double *fvec, int ifl fvec[0] = fvec[1] = 1e6; return 0; } - - + double Bsq(args->Bsq); + double Ssq(args->Ssq); + double BS(args->BS); double W(1 / sqrt(1 - x[0])); - double rho(args->guess[0] / W); + double rho(args->D / W); double h(x[1] / (rho * W * W)); - double pr((h - 1) * rho * (args->gamma - 1) / args->gamma); + double pr((h - 1) * rho * (args->g - 1) / args->g); if (pr < 0 || rho < 0 || h < 0 || W < 1) { fvec[0] = fvec[1] = 1e6; return 0; } // Values should be OK fvec[0] = (x[1] + Bsq) * (x[1] + Bsq) * x[0] - (2 * x[1] + Bsq) * BS * BS / (x[1] * x[1]) - Ssq; - fvec[1] = x[1] + Bsq - pr - Bsq / (2 * W * W) - BS * BS / (2 * x[1] * x[1]) - args->guess[0] - args->guess[4]; + fvec[1] = x[1] + Bsq - pr - Bsq / (2 * W * W) - BS * BS / (2 * x[1] * x[1]) - args->D - args->tau; return 0; } +#if 1 void SRMHD::getPrimitiveVars(double *cons, double *prims, double *aux) { // Syntax @@ -755,7 +766,26 @@ void SRMHD::getPrimitiveVars(double *cons, double *prims, double *aux) for (int var(0); var < d->Ncons; var++) { c2pArgs->cons_h[IDCons(var, i, j, k)] = cons[ID(var, i, j, k)]; } - c2pArgs->guess_h[ID(0, i, j, k)] = aux[ID(10, i, j, k)]; + } + } + } + + for (int i(0); i < d->Nx; i++) { + for (int j(0); j < d->Ny; j++) { + for (int k(0); k < d->Nz; k++) { + for (int var(0); var < d->Nprims; var++) { + c2pArgs->prims_h[IDPrims(var, i, j, k)] = prims[ID(var, i, j, k)]; + } + } + } + } + + for (int i(0); i < d->Nx; i++) { + for (int j(0); j < d->Ny; j++) { + for (int k(0); k < d->Nz; k++) { + for (int var(0); var < d->Naux; var++) { + c2pArgs->aux_h[IDAux(var, i, j, k)] = aux[ID(var, i, j, k)]; + } } } } @@ -773,6 +803,8 @@ void SRMHD::getPrimitiveVars(double *cons, double *prims, double *aux) // Send stream's data gpuErrchk( cudaMemcpyAsync(c2pArgs->cons_d[i], c2pArgs->cons_h + lcell*d->Ncons, inMemsize*d->Ncons, cudaMemcpyHostToDevice, c2pArgs->stream[i]) ); + gpuErrchk( cudaMemcpyAsync(c2pArgs->prims_d[i], c2pArgs->prims_h + lcell*d->Nprims, inMemsize*d->Nprims, cudaMemcpyHostToDevice, c2pArgs->stream[i]) ); + gpuErrchk( cudaMemcpyAsync(c2pArgs->aux_d[i], c2pArgs->aux_h + lcell*d->Naux, inMemsize*d->Naux, cudaMemcpyHostToDevice, c2pArgs->stream[i]) ); gpuErrchk( cudaMemcpyAsync(c2pArgs->guess_d[i], c2pArgs->guess_h + lcell, inMemsize, cudaMemcpyHostToDevice, c2pArgs->stream[i]) ); @@ -804,6 +836,7 @@ void SRMHD::getPrimitiveVars(double *cons, double *prims, double *aux) } } } +#endif // /*! // This is the device version of the getPrimitiveVars that takes a streams data @@ -834,6 +867,8 @@ static void getPrimitiveVarsParallel(double *streamCons, double *streamPrims, do // Load conserved vector into shared memory, and the initial guess for (int i(0); i < Ncons; i++) cons[i] = streamCons[lID * Ncons + i]; + for (int i(0); i < Nprims; i++) prims[i] = streamPrims[lID * Nprims + i]; + for (int i(0); i < Naux; i++) aux[i] = streamAux[lID * Naux + i]; @@ -851,10 +886,14 @@ static void getPrimitiveVarsParallel(double *streamCons, double *streamPrims, do // Ssq aux[12] = cons[1] * cons[1] + cons[2] * cons[2] + cons[3] * cons[3]; - - // Set args for rootfind - getPrimVarsArgs GPVAArgs = {cons[0], cons[1], cons[2], cons[3], cons[4], cons[6], cons[7], cons[8], gamma}; + getPrimVarsArgs GPVAArgs; + GPVAArgs.D = cons[0]; + GPVAArgs.g = gamma; + GPVAArgs.BS = aux[10]; + GPVAArgs.Bsq = aux[11]; + GPVAArgs.Ssq = aux[12]; + GPVAArgs.tau = cons[4]; // Guesses of solution sol[0] = prims[1] * prims[1] + prims[2] * prims[2] + prims[3] * prims[3]; @@ -862,12 +901,15 @@ static void getPrimitiveVarsParallel(double *streamCons, double *streamPrims, do // Solve residual = 0 - if ((info = __cminpack_func__(hybrd1) (SRMHDresidualParallel, &GPVAArgs, 2, sol, res, 1.49011612e-7, wa, 19))!=1) + if ((info = __cminpack_func__(hybrd1) (SRMHDresidualParallel, &GPVAArgs, 2, sol, res, 1.49011612e-7, wa, 19))!=1 && lID==68) { printf("C2P single cell failed at lID %d, hybrd returns info=%d\n", lID, info); } - if (lID == 0){ - printf("IN LANE %f\n", prims[5]); + if (lID == 68){ + printf("IN LANE %d\n", lID); + printf("prims: %f %f %f\n", prims[3], prims[4], prims[5]); + printf("cons: %f %f %f\n", cons[3], cons[4], cons[5]); + printf("args: %f %f %f\n", aux[10], aux[11], aux[12]); printf("GPU GAMMA %f\n", gamma); printf("sol %f %f res %f %f\n", sol[0], sol[1], res[0], res[1]); } @@ -937,7 +979,13 @@ void SRMHD_D::getPrimitiveVarsSingleCell(double *cons, double *prims, double *au // Set args for rootfind - getPrimVarsArgs GPVAArgs = {cons[0], cons[1], cons[2], cons[3], cons[4], cons[6], cons[7], cons[8], args->gamma}; + getPrimVarsArgs GPVAArgs; + GPVAArgs.D = cons[0]; + GPVAArgs.g = args->gamma; + GPVAArgs.BS = aux[10]; + GPVAArgs.Bsq = aux[11]; + GPVAArgs.Ssq = aux[12]; + GPVAArgs.tau = cons[4]; // Guesses of solution sol[0] = prims[1] * prims[1] + prims[2] * prims[2] + prims[3] * prims[3]; From ac37273a97a21e68b78db33ed155706d1293a245 Mon Sep 17 00:00:00 2001 From: aniabrown Date: Wed, 23 Sep 2020 17:02:11 +0100 Subject: [PATCH 25/56] reversing changes made for debugging --- Project/CPU/Src/RK2.cc | 27 +++++++++------------------ Project/CPU/Src/main.cc | 6 +++--- Project/CPU/Src/srmhd.cc | 30 ++++++------------------------ Project/GPU/Src/main.cu | 6 +++--- Project/GPU/Src/srmhd.cu | 9 +-------- 5 files changed, 22 insertions(+), 56 deletions(-) diff --git a/Project/CPU/Src/RK2.cc b/Project/CPU/Src/RK2.cc index 4dd16a99..88633e98 100644 --- a/Project/CPU/Src/RK2.cc +++ b/Project/CPU/Src/RK2.cc @@ -38,12 +38,9 @@ void RK2::predictorStep(double * cons, double * prims, double * aux, double dt) // Cons2prims conversion for p1 estimate stage requires old values to start // the rootfind - for (int i(0); i < d->Nx; i++) { - for (int j(0); j < d->Ny; j++) { - for (int k(0); k < d->Nz; k++) { - //for (int i(d->is); i < d->ie; i++) { - //for (int j(d->js); j < d->je; j++) { - //for (int k(d->ks); k < d->ke; k++) { + for (int i(d->is); i < d->ie; i++) { + for (int j(d->js); j < d->je; j++) { + for (int k(d->ks); k < d->ke; k++) { for (int var(0); var < d->Naux; var++) { p1aux[ID(var, i, j, k)] = aux[ID(var, i, j, k)]; } @@ -59,12 +56,9 @@ void RK2::predictorStep(double * cons, double * prims, double * aux, double dt) // First stage approximation for (int var(0); var < d->Ncons; var++) { - for (int i(0); i < d->Nx; i++) { - for (int j(0); j < d->Ny; j++) { - for (int k(0); k < d->Nz; k++) { - //for (int i(d->is); i < d->ie; i++) { - //for (int j(d->js); j < d->je; j++) { - //for (int k(d->ks); k < d->ke; k++) { + for (int i(d->is); i < d->ie; i++) { + for (int j(d->js); j < d->je; j++) { + for (int k(d->ks); k < d->ke; k++) { p1cons[ID(var, i, j, k)] = cons[ID(var, i, j, k)] - dt * args1[ID(var, i, j, k)]; } } @@ -85,12 +79,9 @@ void RK2::correctorStep(double * cons, double * prims, double * aux, double dt) // Construct solution for (int var(0); var < d->Ncons; var++) { - for (int i(0); i < d->Nx; i++) { - for (int j(0); j < d->Ny; j++) { - for (int k(0); k < d->Nz; k++) { - //for (int i(d->is); i < d->ie; i++) { - //for (int j(d->js); j < d->je; j++) { - //for (int k(d->ks); k < d->ke; k++) { + for (int i(d->is); i < d->ie; i++) { + for (int j(d->js); j < d->je; j++) { + for (int k(d->ks); k < d->ke; k++) { cons[ID(var, i, j, k)] = 0.5 * (cons[ID(var, i, j, k)] + p1cons[ID(var, i, j, k)] - dt * args2[ID(var, i, j, k)]); } diff --git a/Project/CPU/Src/main.cc b/Project/CPU/Src/main.cc index c412eaa2..73c07e32 100644 --- a/Project/CPU/Src/main.cc +++ b/Project/CPU/Src/main.cc @@ -19,8 +19,8 @@ int main(int argc, char *argv[]) { const double MU(1000); // Set up domain int Ng(4); - int nx(16); - int ny(8); + int nx(64); + int ny(16); int nz(0); double xmin(-0.5); double xmax(0.5); @@ -28,7 +28,7 @@ int main(int argc, char *argv[]) { double ymax(1.0); double zmin(-1.5); double zmax(1.5); - double endTime(0.00005); + double endTime(0.0005); double cfl(0.1); double gamma(4.0/3.0); double sigma(0); diff --git a/Project/CPU/Src/srmhd.cc b/Project/CPU/Src/srmhd.cc index 48e1060a..53f9f1aa 100644 --- a/Project/CPU/Src/srmhd.cc +++ b/Project/CPU/Src/srmhd.cc @@ -404,14 +404,9 @@ void SRMHD::getPrimitiveVars(double *cons, double *prims, double *aux) std::vector fails; // Vector of failed structs. Stores location of failed cons2prims cells. // Loop through domain solving and setting the prim and aux vars - //for (int i(d->is); i < d->ie; i++) { - //for (int j(d->js); j < d->je; j++) { - //for (int k(d->ks); k < d->ke; k++) { - - for (int i(0); i < d->Nx; i++) { - for (int j(0); j < d->Ny; j++) { - for (int k(0); k < d->Nz; k++) { - + for (int i(d->is); i < d->ie; i++) { + for (int j(d->js); j < d->je; j++) { + for (int k(d->ks); k < d->ke; k++) { // Update possible values // Bx, By, Bz prims[ID(5, i, j, k)] = cons[ID(5, i, j, k)]; @@ -450,13 +445,6 @@ void SRMHD::getPrimitiveVars(double *cons, double *prims, double *aux) // Solve residual = 0 info = __cminpack_func__(hybrd1) (&SRMHDresidual, &args, n, sol, res, tol, wa, lwa); - if (i==4 && j==4 && k==0){ - printf("CPU, IN LANE (%d,%d,%d)\n", i, j, k); - printf("prims: %f %f %f\n", prims[ID(3, i, j, k)], prims[ID(4, i, j, k)], prims[ID(5, i, j, k)]); - printf("cons: %f %f %f\n", cons[ID(3, i, j, k)], cons[ID(4, i, j, k)], cons[ID(5, i, j, k)]); - printf("args: %f %f %f\n", aux[ID(10, i, j, k)], aux[ID(11, i, j, k)], aux[ID(12, i, j, k)]); - printf("sol %f %f res %f %f\n", sol[0], sol[1], res[0], res[1]); - } // If root find fails, add failed cell to the list if (info!=1) { Failed fail = {i, j, k}; @@ -474,8 +462,6 @@ void SRMHD::getPrimitiveVars(double *cons, double *prims, double *aux) -/* - // ################################## Smart guessing ########################### // // Are there any failures? if (fails.size() > 0) { @@ -519,14 +505,10 @@ void SRMHD::getPrimitiveVars(double *cons, double *prims, double *aux) // } } } -*/ - for (int i(0); i < d->Nx; i++) { - for (int j(0); j < d->Ny; j++) { - for (int k(0); k < d->Nz; k++) { - //for (int i(d->is); i < d->ie; i++) { - //for (int j(d->js); j < d->je; j++) { - //for (int k(d->ks); k < d->ke; k++) { + for (int i(d->is); i < d->ie; i++) { + for (int j(d->js); j < d->je; j++) { + for (int k(d->ks); k < d->ke; k++) { // W aux[ID(1, i, j, k)] = 1 / sqrt(1 - solution[ID(0, i, j, k)]); // rho diff --git a/Project/GPU/Src/main.cu b/Project/GPU/Src/main.cu index 1c3e72ca..e41e1f84 100644 --- a/Project/GPU/Src/main.cu +++ b/Project/GPU/Src/main.cu @@ -28,8 +28,8 @@ int main(int argc, char *argv[]) { // Set up domain int Ng(4); - int nx(16); - int ny(8); + int nx(64); + int ny(16); int nz(0); double xmin(-0.5); double xmax(0.5); @@ -37,7 +37,7 @@ int main(int argc, char *argv[]) { double ymax(1.0); double zmin(-1.5); double zmax(1.5); - double endTime(0.00005); + double endTime(0.0005); double cfl(0.1); double gamma(4.0/3.0); double sigma(0); diff --git a/Project/GPU/Src/srmhd.cu b/Project/GPU/Src/srmhd.cu index 3c8f0446..17cd2683 100644 --- a/Project/GPU/Src/srmhd.cu +++ b/Project/GPU/Src/srmhd.cu @@ -401,6 +401,7 @@ void SRMHD::getPrimitiveVarsSingleCell(double *cons, double *prims, double *aux, } #if 0 +// CPU VERSION //! Solve for the primitive and auxiliary variables /*! Method outlined in Anton 2010, `Relativistic Magnetohydrodynamcis: @@ -905,14 +906,6 @@ static void getPrimitiveVarsParallel(double *streamCons, double *streamPrims, do { printf("C2P single cell failed at lID %d, hybrd returns info=%d\n", lID, info); } - if (lID == 68){ - printf("IN LANE %d\n", lID); - printf("prims: %f %f %f\n", prims[3], prims[4], prims[5]); - printf("cons: %f %f %f\n", cons[3], cons[4], cons[5]); - printf("args: %f %f %f\n", aux[10], aux[11], aux[12]); - printf("GPU GAMMA %f\n", gamma); - printf("sol %f %f res %f %f\n", sol[0], sol[1], res[0], res[1]); - } // W aux[1] = 1 / sqrt(1 - sol[0]); // rho From 3ee82f2a642c6889caa364d840e98ea58fad9dac Mon Sep 17 00:00:00 2001 From: aniabrown Date: Wed, 23 Sep 2020 17:03:21 +0100 Subject: [PATCH 26/56] fixing typo that was causing bug in getPrimitiveVars --- Project/GPU/Src/main.cu | 4 ++-- Project/GPU/Src/srmhd.cu | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Project/GPU/Src/main.cu b/Project/GPU/Src/main.cu index e41e1f84..bb109cd3 100644 --- a/Project/GPU/Src/main.cu +++ b/Project/GPU/Src/main.cu @@ -43,8 +43,8 @@ int main(int argc, char *argv[]) { double sigma(0); bool output(true); int safety(180); - int nxRanks(1); - int nyRanks(1); + int nxRanks(2); + int nyRanks(2); int nzRanks(1); char * ptr(0); diff --git a/Project/GPU/Src/srmhd.cu b/Project/GPU/Src/srmhd.cu index 17cd2683..97c15d36 100644 --- a/Project/GPU/Src/srmhd.cu +++ b/Project/GPU/Src/srmhd.cu @@ -878,7 +878,7 @@ static void getPrimitiveVarsParallel(double *streamCons, double *streamPrims, do // Bx, By, Bz prims[5] = cons[5]; prims[6] = cons[6]; - prims[7] = cons[8]; + prims[7] = cons[7]; // BS aux[10] = cons[5] * cons[1] + cons[6] * cons[2] + cons[7] * cons[3]; From adf419bf1c9fdd3595758daed8bbc7229bd6823e Mon Sep 17 00:00:00 2001 From: aniabrown Date: Mon, 28 Sep 2020 09:51:26 +0100 Subject: [PATCH 27/56] cleaning up makefile --- Project/GPU/Makefile | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/Project/GPU/Makefile b/Project/GPU/Makefile index 1801d2d2..a5100e7b 100644 --- a/Project/GPU/Makefile +++ b/Project/GPU/Makefile @@ -1,13 +1,18 @@ # Make file for the main function. Builds all modules # and links for main.cc -# Compiler - -CC_GPU = nvcc +# -------------- PARAMETERS FOR USERS TO EDIT -------------------- +# if USE_MPI=1, need to use parallel versions of objects, such as ParallelEnv, ParallelSaveData etc USE_MPI=1 -MPI_LIBRARY_PATH = /local/software/openmpi/3.0.0/gcc-cuda8.0 +# find location of MPI libraries to link on your local system using 'mpicc -show' +MPI_FLAGS = -I/usr/include/mpich -L/usr/lib/x86_64-linux-gnu -lmpichcxx -lmpich + +# -------------- END PARAMETERS FOR USERS TO EDIT -------------------- + +# Compiler +CC_GPU = nvcc # Module directory MODULE_DIR = ./Src @@ -25,7 +30,6 @@ RTFIND_INC_DIR = ./CminpackLibrary/Include RTFIND_SRC_DIR = ./CminpackLibrary/Src #MPI_FLAGS = -lmpi -L${MPI_LIBRARY_PATH}/lib -I${MPI_LIBRARY_PATH}/include -MPI_FLAGS = -I/usr/include/mpich -L/usr/lib/x86_64-linux-gnu -lmpichcxx -lmpich # C++ compiler flags CXXFLAGS = -fopenmp -Wall -std=c++11 -O3 -lineinfo From 192749eac158d934ffccc0a1a597442c0b06dbb0 Mon Sep 17 00:00:00 2001 From: aniabrown Date: Wed, 30 Sep 2020 12:01:30 +0100 Subject: [PATCH 28/56] updated getPrimitiveVars on SRMHD to only send prims and aux required for initial guess to GPU --- Project/GPU/Include/C2PArgs.h | 3 ++- Project/GPU/Src/C2PArgs.cu | 9 ++++++-- Project/GPU/Src/srmhd.cu | 41 +++++++++++++++-------------------- 3 files changed, 27 insertions(+), 26 deletions(-) diff --git a/Project/GPU/Include/C2PArgs.h b/Project/GPU/Include/C2PArgs.h index 90bd6989..0a6c2aa9 100644 --- a/Project/GPU/Include/C2PArgs.h +++ b/Project/GPU/Include/C2PArgs.h @@ -21,7 +21,8 @@ class C2PArgs bpg, //!< Blocks per grid cellMem, //!< Memory required for one cell Nstreams, //!< Number of CUDA streams - streamWidth; //!< Number of cells in each stream + streamWidth, //!< Number of cells in each stream + nGuessSRMHD; //!< Number of elements required for the initial guess per cell for the SRMHD model double //@{ ** cons_d, diff --git a/Project/GPU/Src/C2PArgs.cu b/Project/GPU/Src/C2PArgs.cu index 8c4e0aed..9af701ea 100644 --- a/Project/GPU/Src/C2PArgs.cu +++ b/Project/GPU/Src/C2PArgs.cu @@ -11,6 +11,11 @@ C2PArgs::C2PArgs(Data * data) : data(data) // Determine the memory required for one cell cellMem = (d->Ncons + d->Nprims + d->Naux) * sizeof(double); + // Number of values sent to getPrimitiveValues for initial guess. We allocate enough room for SRMHD, which + // requires more values than SRRMHD. + //! TODO -- create separate object for SRRMHD, which only allocates the one value needed per cell + nGuessSRMHD = 5; + tpb = d->tpb; bpg = d->bpg; streamWidth = tpb * bpg; @@ -26,7 +31,7 @@ C2PArgs::C2PArgs(Data * data) : data(data) gpuErrchk( cudaHostAlloc((void **)&cons_h, d->Ncons * d->Ncells * sizeof(double), cudaHostAllocPortable) ); gpuErrchk( cudaHostAlloc((void **)&prims_h, d->Nprims * d->Ncells * sizeof(double), cudaHostAllocPortable) ); gpuErrchk( cudaHostAlloc((void **)&aux_h, d->Naux * d->Ncells * sizeof(double), cudaHostAllocPortable) ); - gpuErrchk( cudaHostAlloc((void **)&guess_h, d->Ncells * sizeof(double), cudaHostAllocPortable) ); + gpuErrchk( cudaHostAlloc((void **)&guess_h, d->Ncells * nGuessSRMHD * sizeof(double), cudaHostAllocPortable) ); @@ -34,7 +39,7 @@ C2PArgs::C2PArgs(Data * data) : data(data) gpuErrchk( cudaMalloc((void **)&cons_d[i], d->Ncons * streamWidth * sizeof(double)) ); gpuErrchk( cudaMalloc((void **)&prims_d[i], d->Nprims * streamWidth * sizeof(double)) ); gpuErrchk( cudaMalloc((void **)&aux_d[i], d->Naux * streamWidth * sizeof(double)) ); - gpuErrchk( cudaMalloc((void **)&guess_d[i], streamWidth * sizeof(double)) ); + gpuErrchk( cudaMalloc((void **)&guess_d[i], nGuessSRMHD * streamWidth * sizeof(double)) ); } // Create streams diff --git a/Project/GPU/Src/srmhd.cu b/Project/GPU/Src/srmhd.cu index 97c15d36..2e1aa649 100644 --- a/Project/GPU/Src/srmhd.cu +++ b/Project/GPU/Src/srmhd.cu @@ -21,6 +21,7 @@ #define IDCons(var, idx, jdx, kdx) ( (var) + (idx)*(d->Ncons)*(d->Nz)*(d->Ny) + (jdx)*(d->Ncons)*(d->Nz) + (kdx)*(d->Ncons) ) #define IDPrims(var, idx, jdx, kdx) ( (var) + (idx)*(d->Nprims)*(d->Nz)*(d->Ny) + (jdx)*(d->Nprims)*(d->Nz) + (kdx)*(d->Nprims) ) #define IDAux(var, idx, jdx, kdx) ( (var) + (idx)*(d->Naux)*(d->Nz)*(d->Ny) + (jdx)*(d->Naux)*(d->Nz) + (kdx)*(d->Naux) ) +#define IDGuess(guessId, Nguess, idx, jdx, kdx) ( (guessId) + (idx)*(Nguess)*(d->Nz)*(d->Ny) + (jdx)*(Nguess)*(d->Nz) + (kdx)*(Nguess) ) __device__ int SRMHDresidualParallel(void *p, int n, const double *x, double *fvec, int iflag); @@ -28,7 +29,7 @@ int SRMHDresidualParallel(void *p, int n, const double *x, double *fvec, int ifl int SRMHDresidual(void *p, int n, const double *x, double *fvec, int iflag); __global__ -static void getPrimitiveVarsParallel(double *cons, double *prims, double *aux, double *guess, int stream, double gamma, double sigma, int Ncons, int Nprims, int Naux, int origWidth, int streamWidth); +static void getPrimitiveVarsParallel(double *cons, double *prims, double *aux, double *guess, int stream, double gamma, double sigma, int Ncons, int Nprims, int Naux, int nGuess, int origWidth, int streamWidth); SRMHD::SRMHD() : Model() { @@ -771,22 +772,16 @@ void SRMHD::getPrimitiveVars(double *cons, double *prims, double *aux) } } + // Copy 5 values required for the initial guess. They are stored in the following format: + // guess_h = {prims[0], prims[1], prims[2], prims[3], aux[0]} for (int i(0); i < d->Nx; i++) { for (int j(0); j < d->Ny; j++) { for (int k(0); k < d->Nz; k++) { - for (int var(0); var < d->Nprims; var++) { - c2pArgs->prims_h[IDPrims(var, i, j, k)] = prims[ID(var, i, j, k)]; - } - } - } - } - - for (int i(0); i < d->Nx; i++) { - for (int j(0); j < d->Ny; j++) { - for (int k(0); k < d->Nz; k++) { - for (int var(0); var < d->Naux; var++) { - c2pArgs->aux_h[IDAux(var, i, j, k)] = aux[ID(var, i, j, k)]; - } + c2pArgs->guess_h[IDGuess(0, c2pArgs->nGuessSRMHD, i, j, k)] = prims[ID(0, i, j, k)]; + c2pArgs->guess_h[IDGuess(1, c2pArgs->nGuessSRMHD, i, j, k)] = prims[ID(1, i, j, k)]; + c2pArgs->guess_h[IDGuess(2, c2pArgs->nGuessSRMHD, i, j, k)] = prims[ID(2, i, j, k)]; + c2pArgs->guess_h[IDGuess(3, c2pArgs->nGuessSRMHD, i, j, k)] = prims[ID(3, i, j, k)]; + c2pArgs->guess_h[IDGuess(4, c2pArgs->nGuessSRMHD, i, j, k)] = aux[ID(0, i, j, k)]; } } } @@ -804,16 +799,15 @@ void SRMHD::getPrimitiveVars(double *cons, double *prims, double *aux) // Send stream's data gpuErrchk( cudaMemcpyAsync(c2pArgs->cons_d[i], c2pArgs->cons_h + lcell*d->Ncons, inMemsize*d->Ncons, cudaMemcpyHostToDevice, c2pArgs->stream[i]) ); - gpuErrchk( cudaMemcpyAsync(c2pArgs->prims_d[i], c2pArgs->prims_h + lcell*d->Nprims, inMemsize*d->Nprims, cudaMemcpyHostToDevice, c2pArgs->stream[i]) ); - gpuErrchk( cudaMemcpyAsync(c2pArgs->aux_d[i], c2pArgs->aux_h + lcell*d->Naux, inMemsize*d->Naux, cudaMemcpyHostToDevice, c2pArgs->stream[i]) ); - gpuErrchk( cudaMemcpyAsync(c2pArgs->guess_d[i], c2pArgs->guess_h + lcell, inMemsize, cudaMemcpyHostToDevice, c2pArgs->stream[i]) ); + gpuErrchk( cudaMemcpyAsync(c2pArgs->guess_d[i], c2pArgs->guess_h + lcell*c2pArgs->nGuessSRMHD, inMemsize*c2pArgs->nGuessSRMHD, cudaMemcpyHostToDevice, c2pArgs->stream[i]) ); // Call kernel and operate on data + //! TODO -- remove prims and aux -- all values that are needed are contained in guess getPrimitiveVarsParallel <<< c2pArgs->bpg, c2pArgs->tpb, c2pArgs->tpb * c2pArgs->cellMem, c2pArgs->stream[i] >>> (c2pArgs->cons_d[i], c2pArgs->prims_d[i], c2pArgs->aux_d[i], c2pArgs->guess_d[i], i, d->gamma, d->sigma, d->Ncons, - d->Nprims, d->Naux, c2pArgs->streamWidth, width); + d->Nprims, d->Naux, c2pArgs->nGuessSRMHD, c2pArgs->streamWidth, width); // Copy all data back @@ -845,7 +839,7 @@ void SRMHD::getPrimitiveVars(double *cons, double *prims, double *aux) // SRRMHD::getPrimitiveVars is required, i.e. all cells need to be found. // */ __global__ -static void getPrimitiveVarsParallel(double *streamCons, double *streamPrims, double *streamAux, double *guess, int stream, double gamma, double sigma, int Ncons, int Nprims, int Naux, int origWidth, int streamWidth) +static void getPrimitiveVarsParallel(double *streamCons, double *streamPrims, double *streamAux, double *streamGuess, int stream, double gamma, double sigma, int Ncons, int Nprims, int Naux, int Nguess, int origWidth, int streamWidth) { // First need thread indicies const int tID(threadIdx.x); //!< thread index (in block) @@ -856,6 +850,8 @@ static void getPrimitiveVarsParallel(double *streamCons, double *streamPrims, do double * cons = &sharedArray[tID * (Ncons + Nprims + Naux)]; double * prims = &cons[Ncons]; double * aux = &prims[Nprims]; + //! TODO -- could probably put guess in registers rather than shared memory + double * guess = &aux[Nguess]; // Hybrd1 set-up double sol[2]; // Guess and solution vector @@ -868,8 +864,7 @@ static void getPrimitiveVarsParallel(double *streamCons, double *streamPrims, do // Load conserved vector into shared memory, and the initial guess for (int i(0); i < Ncons; i++) cons[i] = streamCons[lID * Ncons + i]; - for (int i(0); i < Nprims; i++) prims[i] = streamPrims[lID * Nprims + i]; - for (int i(0); i < Naux; i++) aux[i] = streamAux[lID * Naux + i]; + for (int i(0); i < Nguess; i++) guess[i] = streamGuess[lID * Nguess + i]; @@ -897,8 +892,8 @@ static void getPrimitiveVarsParallel(double *streamCons, double *streamPrims, do GPVAArgs.tau = cons[4]; // Guesses of solution - sol[0] = prims[1] * prims[1] + prims[2] * prims[2] + prims[3] * prims[3]; - sol[1] = prims[0] * aux[0] / (1 - sol[0]); + sol[0] = guess[1] * guess[1] + guess[2] * guess[2] + guess[3] * guess[3]; + sol[1] = guess[0] * guess[4] / (1 - sol[0]); // Solve residual = 0 From d83af685bf8e933115bd6e174afcd50f4d7d894b Mon Sep 17 00:00:00 2001 From: aniabrown Date: Wed, 30 Sep 2020 13:34:17 +0100 Subject: [PATCH 29/56] adding back multiple streams to GPU version --- Project/GPU/Src/simData.cu | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/Project/GPU/Src/simData.cu b/Project/GPU/Src/simData.cu index 0e829e03..40e79c13 100644 --- a/Project/GPU/Src/simData.cu +++ b/Project/GPU/Src/simData.cu @@ -74,9 +74,7 @@ Data::Data(int nx, int ny, int nz, cudaDeviceSetLimit(cudaLimitStackSize, 2048); // Needed for SRMHS and SSP2, hybrd called recursively meaning nvcc does not know the stack size at compile time. Manually set. // Determine the number of GPU streams - //Nstreams = Ncells / (tpb * bpg) + 1; - //! TODO -- for debugging. Remove - Nstreams = 1; + Nstreams = Ncells / (tpb * bpg) + 1; if (false) { From 01c58011c13c3d3b0080a371c5cac883359b96bd Mon Sep 17 00:00:00 2001 From: "ania.brown" Date: Wed, 28 Oct 2020 15:04:01 +0000 Subject: [PATCH 30/56] converted half of non-MPI GPU tests to use platformEnv API --- Tests/GPU/Makefile | 46 +++++++++++++-------- Tests/GPU/Src/test_boundaryConds.cu | 15 ++++--- Tests/GPU/Src/test_initFunc.cu | 34 +++++++++++----- Tests/GPU/Src/test_simulation.cu | 28 +++++++++---- Tests/GPU/Src/test_srmhd.cu | 46 ++++++++++++--------- Tests/GPU/Src/test_twoFluidEMHD.cu | 62 ++++++++++++++++++----------- 6 files changed, 149 insertions(+), 82 deletions(-) diff --git a/Tests/GPU/Makefile b/Tests/GPU/Makefile index 8f8bfd1a..9dbf306b 100644 --- a/Tests/GPU/Makefile +++ b/Tests/GPU/Makefile @@ -17,6 +17,10 @@ # Compiler CC = nvcc +MPICC = mpic++ + +#use `mpic++ -show` to find library and include flags +MPI_FLAGS = -I/local/software/mpich/3.2.1/gcc/include -L/local/software/mpich/3.2.1/gcc/lib -lmpi -lmpicxx # Points to the root of Google Test, relative to where this file is. # Remember to tweak this if you move this file. @@ -57,11 +61,14 @@ TESTS = test_simulation \ test_cminpack \ test_boundaryConds \ test_twoFluidEMHD \ - test_srrmhd \ - test_fvs \ - test_id \ - test_rk2 \ - test_imex +# test_srrmhd \ +# test_fvs \ +# test_id \ +# test_rk2 \ +# test_imex + + +PARALLEL_TESTS = test_parallel_srmhd \ # All Google Test headers. Usually you shouldn't change this # definition. @@ -155,7 +162,7 @@ test_rk2.o : $(TEST_DIR)/test_rk2.cu $(INC_DIR)/RK2.h \ $(INC_DIR)/twoFluidEMHD.h $(INC_DIR)/simulation.h $(INC_DIR)/simData.h $(GTEST_HEADERS) @$(CC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) -c $(TEST_DIR)/test_rk2.cu -I$(INC_DIR) -I$(RTFIND_INC_DIR) -test_rk2 : test_rk2.o C2PArgs.o srrmhd.o fluxVectorSplitting.o boundaryConds.o simData.o RK2.o initFunc.o simulation.o weno.o saveData.o $(RTFIND_OBJS) gtest_main.a +test_rk2 : test_rk2.o C2PArgs.o srrmhd.o fluxVectorSplitting.o boundaryConds.o simData.o RK2.o initFunc.o simulation.o weno.o serialSaveData.o serialEnv.o $(RTFIND_OBJS) gtest_main.a @$(CC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) -lpthread $^ -o $@ # Explicit RK split integrator @@ -174,12 +181,19 @@ test_fvs.o : $(TEST_DIR)/test_fvs.cu \ $(INC_DIR)/srmhd.h $(INC_DIR)/twoFluidEMHD.h $(INC_DIR)/simulation.h $(INC_DIR)/simData.h $(GTEST_HEADERS) @$(CC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) -c $(TEST_DIR)/test_fvs.cu -I$(INC_DIR) -I$(RTFIND_INC_DIR) -test_fvs : srmhd.o twoFluidEMHD.o test_fvs.o fluxVectorSplitting.o boundaryConds.o simData.o RK2.o rkSplit.o initFunc.o simulation.o weno.o saveData.o $(RTFIND_OBJS) gtest_main.a +test_fvs : srmhd.o C2PArgs.o twoFluidEMHD.o test_fvs.o fluxVectorSplitting.o boundaryConds.o simData.o RK2.o rkSplit.o initFunc.o simulation.o weno.o serialSaveData.o serialEnv.o $(RTFIND_OBJS) gtest_main.a @$(CC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) -lpthread $^ -o $@ # Save data (required for simulation.evolve) -saveData.o : $(MODULE_DIR)/saveData.cu $(INC_DIR)/saveData.h - @$(CC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) -c $(MODULE_DIR)/saveData.cu -I$(INC_DIR) +serialSaveData.o : $(MODULE_DIR)/serialSaveData.cu $(INC_DIR)/serialSaveData.h + @$(CC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) -c $(MODULE_DIR)/serialSaveData.cu -I$(INC_DIR) + +parallelSaveData.o : $(MODULE_DIR)/parallelSaveData.cu $(INC_DIR)/parallelSaveData.h + @$(MPICC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) -c $(MODULE_DIR)/parallelSaveData.cu -I$(INC_DIR) + +# Platform env +serialEnv.o : $(MODULE_DIR)/serialEnv.cu $(INC_DIR)/serialEnv.h + $(CC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) -c $(MODULE_DIR)/serialEnv.cu -I$(INC_DIR) # Simulation simulation.o : $(MODULE_DIR)/simulation.cu $(INC_DIR)/simulation.h @@ -189,7 +203,7 @@ test_simulation.o : $(TEST_DIR)/test_simulation.cu \ $(INC_DIR)/simulation.h $(GTEST_HEADERS) @$(CC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) -c $(TEST_DIR)/test_simulation.cu -I$(INC_DIR) -I$(RTFIND_INC_DIR) -test_simulation : simulation.o test_simulation.o simData.o fluxVectorSplitting.o srmhd.o boundaryConds.o initFunc.o RK2.o rkSplit.o saveData.o $(RTFIND_OBJS) weno.o gtest_main.a +test_simulation : simulation.o test_simulation.o simData.o fluxVectorSplitting.o srmhd.o C2PArgs.o boundaryConds.o initFunc.o RK2.o rkSplit.o serialSaveData.o serialEnv.o $(RTFIND_OBJS) weno.o gtest_main.a @$(CC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) -lpthread $^ -o $@ @@ -201,7 +215,7 @@ test_srmhd.o : $(TEST_DIR)/test_srmhd.cu \ $(INC_DIR)/srmhd.h $(INC_DIR)/simulation.h $(INC_DIR)/simData.h $(GTEST_HEADERS) @$(CC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) -c $(TEST_DIR)/test_srmhd.cu -I$(INC_DIR) -I$(RTFIND_INC_DIR) -test_srmhd : srmhd.o test_srmhd.o simData.o fluxVectorSplitting.o initFunc.o simulation.o weno.o saveData.o $(RTFIND_OBJS) gtest_main.a +test_srmhd : srmhd.o C2PArgs.o test_srmhd.o simData.o boundaryConds.o fluxVectorSplitting.o initFunc.o simulation.o weno.o serialSaveData.o serialEnv.o $(RTFIND_OBJS) gtest_main.a @$(CC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) -lpthread $^ -o $@ @@ -213,7 +227,7 @@ test_srrmhd.o : $(TEST_DIR)/test_srrmhd.cu \ $(INC_DIR)/srrmhd.h $(INC_DIR)/simulation.h $(INC_DIR)/simData.h $(GTEST_HEADERS) @$(CC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) -c $(TEST_DIR)/test_srrmhd.cu -I$(INC_DIR) -I$(RTFIND_INC_DIR) -test_srrmhd : srrmhd.o C2PArgs.o test_srrmhd.o simData.o fluxVectorSplitting.o initFunc.o simulation.o weno.o saveData.o $(RTFIND_OBJS) gtest_main.a +test_srrmhd : srrmhd.o C2PArgs.o test_srrmhd.o simData.o fluxVectorSplitting.o initFunc.o simulation.o weno.o serialSaveData.o serialEnv.o $(RTFIND_OBJS) gtest_main.a @$(CC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) -lpthread $^ -o $@ # TwoFluidEMHD Model @@ -225,7 +239,7 @@ test_twoFluidEMHD.o : $(TEST_DIR)/test_twoFluidEMHD.cu \ $(INC_DIR)/twoFluidEMHD.h $(INC_DIR)/simulation.h $(INC_DIR)/simData.h $(GTEST_HEADERS) @$(CC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) -c $(TEST_DIR)/test_twoFluidEMHD.cu -I$(INC_DIR) -I$(RTFIND_INC_DIR) -test_twoFluidEMHD : twoFluidEMHD.o test_twoFluidEMHD.o fluxVectorSplitting.o boundaryConds.o simData.o RK2.o rkSplit.o initFunc.o simulation.o weno.o saveData.o $(RTFIND_OBJS) gtest_main.a +test_twoFluidEMHD : twoFluidEMHD.o test_twoFluidEMHD.o fluxVectorSplitting.o boundaryConds.o simData.o RK2.o rkSplit.o initFunc.o simulation.o weno.o serialSaveData.o serialEnv.o $(RTFIND_OBJS) gtest_main.a @$(CC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) -lpthread $^ -o $@ @@ -238,7 +252,7 @@ test_initFunc.o : $(TEST_DIR)/test_initFunc.cu $(INC_DIR)/initFunc.h \ $(INC_DIR)/simulation.h $(INC_DIR)/simData.h $(GTEST_HEADERS) @$(CC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) -c $(TEST_DIR)/test_initFunc.cu -I$(INC_DIR) -I$(RTFIND_INC_DIR) -test_initFunc : initFunc.o test_initFunc.o simData.o simulation.o twoFluidEMHD.o srmhd.o saveData.o $(RTFIND_OBJS) weno.o gtest_main.a +test_initFunc : initFunc.o test_initFunc.o boundaryConds.o simData.o simulation.o twoFluidEMHD.o srmhd.o C2PArgs.o serialSaveData.o serialEnv.o $(RTFIND_OBJS) weno.o gtest_main.a @$(CC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) -lpthread $^ -o $@ @@ -258,7 +272,7 @@ test_boundaryConds.o : $(TEST_DIR)/test_boundaryConds.cu $(INC_DIR)/boundaryCond $(GTEST_HEADERS) @$(CC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) -c $(TEST_DIR)/test_boundaryConds.cu -I$(INC_DIR) -I$(RTFIND_INC_DIR) -test_boundaryConds : test_boundaryConds.o boundaryConds.o simData.o srmhd.o simulation.o initFunc.o weno.o saveData.o $(RTFIND_OBJS) gtest_main.a +test_boundaryConds : test_boundaryConds.o boundaryConds.o simData.o srmhd.o C2PArgs.o simulation.o initFunc.o weno.o serialSaveData.o serialEnv.o $(RTFIND_OBJS) gtest_main.a @$(CC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) -lpthread $^ -o $@ # ID macro @@ -284,5 +298,5 @@ IMEX3Args.o : $(MODULE_DIR)/IMEX3Args.cu $(INC_DIR)/IMEX3Args.h test_imex.o : $(TEST_DIR)/test_imex.cu @$(CC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) -c $(TEST_DIR)/test_imex.cu -I$(INC_DIR) -I$(RTFIND_INC_DIR) -test_imex : IMEX2Args.o C2PArgs.o SSP2.o IMEX3Args.o SSP3.o simData.o srrmhd.o srmhd.o twoFluidEMHD.o initFunc.o boundaryConds.o saveData.o simulation.o test_imex.o $(RTFIND_OBJS) fluxVectorSplitting.o weno.o gtest_main.a +test_imex : IMEX2Args.o C2PArgs.o SSP2.o IMEX3Args.o SSP3.o simData.o srrmhd.o srmhd.o C2PArgs.o twoFluidEMHD.o initFunc.o boundaryConds.o serialSaveData.o serialEnv.o simulation.o test_imex.o $(RTFIND_OBJS) fluxVectorSplitting.o weno.o gtest_main.a @$(CC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) -lpthread $^ -o $@ diff --git a/Tests/GPU/Src/test_boundaryConds.cu b/Tests/GPU/Src/test_boundaryConds.cu index c4613d75..517f5200 100644 --- a/Tests/GPU/Src/test_boundaryConds.cu +++ b/Tests/GPU/Src/test_boundaryConds.cu @@ -4,14 +4,16 @@ #include "srmhd.h" #include "simulation.h" #include "initFunc.h" +#include "serialEnv.h" TEST(Periodic, periodicBoundaryConditions) { - Data d(10, 10, 10, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.4); + SerialEnv env(0, NULL, 1, 1, 1); + Data d(10, 10, 10, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.4, &env); SRMHD model(&d); - Simulation sim(&d); - OTVortexSingleFluid init(&d); Periodic bcs(&d); + Simulation sim(&d, &env); + OTVortexSingleFluid init(&d); // Set the values of the cons vars to something simple for (int var(0); var < d.Ncons; var++) { @@ -190,11 +192,12 @@ TEST(Periodic, periodicBoundaryConditions) TEST(Outflow, outflowBoundaryConditions) { - Data d(10, 10, 10, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.4); + SerialEnv env(0, NULL, 1, 1, 1); + Data d(10, 10, 10, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.4, &env); SRMHD model(&d); - Simulation sim(&d); - OTVortexSingleFluid init(&d); Outflow bcs(&d); + Simulation sim(&d, &env); + OTVortexSingleFluid init(&d); // Set the values of the cons vars to something simple for (int var(0); var < d.Ncons; var++) { diff --git a/Tests/GPU/Src/test_initFunc.cu b/Tests/GPU/Src/test_initFunc.cu index 8902a36a..798b9568 100644 --- a/Tests/GPU/Src/test_initFunc.cu +++ b/Tests/GPU/Src/test_initFunc.cu @@ -1,16 +1,20 @@ #include "gtest/gtest.h" #include "simData.h" #include "initFunc.h" +#include "boundaryConds.h" #include "simulation.h" #include "srmhd.h" #include "twoFluidEMHD.h" +#include "serialEnv.h" #include -TEST(InitialFunc, baseConstructor) +TEST(InitialFunc, BaseConstructor) { - Data data(100, 10, 10, 0, 1, -0.5, 0.5, -0.1, 0.1, 0.8); + SerialEnv env(0, NULL, 1, 1, 1); + Data data(100, 10, 10, 0, 1, -0.5, 0.5, -0.1, 0.1, 0.8, &env); SRMHD model(&data); - Simulation sim(&data); + Periodic bcs(&data); + Simulation sim(&data, &env); InitialFunc init(&data); EXPECT_EQ(data.prims[0], 0); @@ -40,9 +44,11 @@ TEST(InitialFunc, baseConstructor) TEST(InitialFunc, OTVortexSingleFluidFunc) { - Data data(100, 10, 0, 0, 1, 0, 1, -0.1, 0.1, 0.8); + SerialEnv env(0, NULL, 1, 1, 1); + Data data(100, 10, 0, 0, 1, 0, 1, -0.1, 0.1, 0.8, &env); SRMHD model(&data); - Simulation sim(&data); + Periodic bcs(&data); + Simulation sim(&data, &env); OTVortexSingleFluid init(&data); EXPECT_NEAR(data.prims[data.id(0, 0, 0, 0)], 0.2210485321, 0.0000000001); @@ -62,24 +68,30 @@ TEST(InitialFunc, OTVortexSingleFluidFunc) TEST(InitialFunc, BrioWuTwoFluidFunc) { // Discontinuity in x direction - Data dx(10, 10, 10, 0, 1, 0, 1, 0, 1, 0.8); + SerialEnv env(0, NULL, 1, 1, 1); + Data dx(10, 10, 10, 0, 1, 0, 1, 0, 1, 0.8, &env); TwoFluidEMHD modelx(&dx); - Simulation simx(&dx); + Periodic bcsx(&dx); + Simulation simx(&dx, &env); BrioWuTwoFluid initx(&dx, 0); // Discontinuity in y direction - Data dy(10, 10, 10, 0, 1, 0, 1, 0, 1, 0.8); + SerialEnv env2(0, NULL, 1, 1, 1); + Data dy(10, 10, 10, 0, 1, 0, 1, 0, 1, 0.8, &env2); TwoFluidEMHD modely(&dy); - Simulation simy(&dy); + Periodic bcsy(&dy); + Simulation simy(&dy, &env2); BrioWuTwoFluid inity(&dy, 1); // Discontinuity in z direction - Data dz(10, 10, 10, 0, 1, 0, 1, 0, 1, 0.8); + SerialEnv env3(0, NULL, 1, 1, 1); + Data dz(10, 10, 10, 0, 1, 0, 1, 0, 1, 0.8, &env3); TwoFluidEMHD modelz(&dz); - Simulation simz(&dz); + Periodic bcsz(&dz); + Simulation simz(&dz, &env3); BrioWuTwoFluid initz(&dz, 2); for (int var(0); var < dx.Ncons; var++) { diff --git a/Tests/GPU/Src/test_simulation.cu b/Tests/GPU/Src/test_simulation.cu index 91e0fd1e..4426d146 100644 --- a/Tests/GPU/Src/test_simulation.cu +++ b/Tests/GPU/Src/test_simulation.cu @@ -1,5 +1,5 @@ #include "gtest/gtest.h" -#include "saveData.h" +#include "serialSaveData.h" #include "simData.h" #include "simulation.h" #include "initFunc.h" @@ -7,18 +7,28 @@ #include "boundaryConds.h" #include "rkSplit.h" #include "fluxVectorSplitting.h" +#include "serialEnv.h" #include #include -Data data(100, 10, 2, 0, 1, -0.5, 0.5, -0.1, 0.1, 0.8); TEST(Simulation, dataInitialisation) { + SerialEnv envNoModel(0, NULL, 1, 1, 1); + Data dataNoModel(100, 10, 2, 0, 1, -0.5, 0.5, -0.1, 0.1, 0.8, &envNoModel); + Periodic bcsNoModel(&dataNoModel); + EXPECT_THROW( Simulation sim(&dataNoModel, &envNoModel), std::runtime_error); - EXPECT_THROW( Simulation sim(&data), std::runtime_error); + SerialEnv envNoBcs(0, NULL, 1, 1, 1); + Data dataNoBcs(100, 10, 2, 0, 1, -0.5, 0.5, -0.1, 0.1, 0.8, &envNoBcs); + SRMHD modelNoBcs(&dataNoBcs); + EXPECT_THROW( Simulation sim(&dataNoBcs, &envNoBcs), std::runtime_error); + SerialEnv env(0, NULL, 1, 1, 1); + Data data(100, 10, 2, 0, 1, -0.5, 0.5, -0.1, 0.1, 0.8, &env); SRMHD model(&data); - Simulation sim(&data); + Periodic bcs(&data); + Simulation sim(&data, &env); // Check standard data EXPECT_EQ(sim.data->nx, 100); @@ -66,14 +76,15 @@ TEST(Simulation, dataInitialisation) //! Check that the fields dont change if the system if homogenous TEST(Simulation, equilibriumSimulation) { - Data data(30, 30, 10, 0, 1, 0, 1, 0, 1, 0.1); + SerialEnv env(0, NULL, 1, 1, 1); + Data data(30, 30, 10, 0, 1, 0, 1, 0, 1, 0.1, &env); SRMHD model(&data); FVS fluxMethod(&data, &model); - Simulation sim(&data); - OTVortexSingleFluid init(&data); Periodic bcs(&data); + Simulation sim(&data, &env); + OTVortexSingleFluid init(&data); RKSplit timeInt(&data, &model, &bcs, &fluxMethod); - SaveData save(&data); + SerialSaveData save(&data, &env); for (int i(0); i < data.Nx; i++) { for (int j(0); j < data.Ny; j++) { @@ -108,4 +119,5 @@ TEST(Simulation, equilibriumSimulation) } } } + } diff --git a/Tests/GPU/Src/test_srmhd.cu b/Tests/GPU/Src/test_srmhd.cu index 75c8eacc..2728b0d9 100644 --- a/Tests/GPU/Src/test_srmhd.cu +++ b/Tests/GPU/Src/test_srmhd.cu @@ -1,9 +1,11 @@ #include "gtest/gtest.h" #include "srmhd.h" +#include "boundaryConds.h" #include "simulation.h" #include "simData.h" #include "initFunc.h" #include "fluxVectorSplitting.h" +#include "serialEnv.h" #include #include #include @@ -12,7 +14,8 @@ TEST(SRMHD, Constructor) { - Data d(100, 10, 0, 0, 1, -0.5, 0.5, -0.1, 0.1, 0.8); + SerialEnv env(0, NULL, 1, 1, 1); + Data d(100, 10, 0, 0, 1, -0.5, 0.5, -0.1, 0.1, 0.8, &env); SRMHD model(&d); EXPECT_EQ(d.Ncons, 9); EXPECT_EQ(d.Nprims, 8); @@ -26,13 +29,14 @@ TEST(SRMHD, Constructor) TEST(SRMHD, FluxVectorSplittingStationary) { - + double tol(1.0e-15); // Set up - Data d(10, 10, 10, 0, 1, 0, 1, 0, 1, 1.0, 0.5, 4, 5.0/3.0, 1000.0, 0.5); + SerialEnv env(0, NULL, 1, 1, 1); + Data d(10, 10, 10, 0, 1, 0, 1, 0, 1, 1.0, &env, 0.5, 4, 5.0/3.0, 1000.0, 0.5); SRMHD model(&d); FVS fluxMethod(&d, &model); - Simulation sim(&d); - + Periodic bcs(&d); + Simulation sim(&d, &env); // Set state to stationary equilibrium state for (int i(0); i < d.Nx; i++) { for (int j(0); j < d.Ny; j++) { @@ -45,7 +49,6 @@ TEST(SRMHD, FluxVectorSplittingStationary) d.prims[d.id(5, i, j, k)] = 0.0; d.prims[d.id(6, i, j, k)] = 0.0; d.prims[d.id(7, i, j, k)] = 0.0; - d.prims[d.id(8, i, j, k)] = 0.0; } } } @@ -59,7 +62,7 @@ TEST(SRMHD, FluxVectorSplittingStationary) for (int j(d.Ng); j < d.Ny-d.Ng; j++) { for (int k(d.Ng); k < d.Nz-d.Ng; k++) { for (int var(0); var < d.Ncons; var++) { - EXPECT_EQ(d.fnet[d.id(var, i, j, k)], 0.0); + EXPECT_NEAR(d.fnet[d.id(var, i, j, k)], 0.0, tol); } } } @@ -70,7 +73,7 @@ TEST(SRMHD, FluxVectorSplittingStationary) for (int j(d.Ng); j < d.Ny-d.Ng; j++) { for (int k(d.Ng); k < d.Nz-d.Ng; k++) { for (int var(0); var < d.Ncons; var++) { - EXPECT_EQ(d.fnet[d.id(var, i, j, k)], 0.0); + EXPECT_NEAR(d.fnet[d.id(var, i, j, k)], 0.0, tol); } } } @@ -81,7 +84,7 @@ TEST(SRMHD, FluxVectorSplittingStationary) for (int j(d.Ng); j < d.Ny-d.Ng; j++) { for (int k(d.Ng); k < d.Nz-d.Ng; k++) { for (int var(0); var < d.Ncons; var++) { - EXPECT_EQ(d.fnet[d.id(var, i, j, k)], 0.0); + EXPECT_NEAR(d.fnet[d.id(var, i, j, k)], 0.0, tol); } } } @@ -95,9 +98,11 @@ TEST(SRMHD, SourceTerm) { // Set up - Data d(10, 10, 10, 0, 1, 0, 1, 0, 1, 1.0, 0.5, 4, 5.0/3.0, 1000.0, 0.5); + SerialEnv env(0, NULL, 1, 1, 1); + Data d(10, 10, 10, 0, 1, 0, 1, 0, 1, 1.0, &env, 0.5, 4, 5.0/3.0, 1000.0, 0.5); SRMHD model(&d); - Simulation sim(&d); + Periodic bcs(&d); + Simulation sim(&d, &env); // Set cons to something for (int i(0); i < d.Nx; i++) { @@ -130,12 +135,16 @@ TEST(SRMHD, SourceTerm) TEST(SRMHD, Prims2Cons2Prims) { const double tol = 1.49011612e-8; // Tolerance of rootfinder - Data d(10, 10, 0, 0, 1, 0, 1, 0, 1, 1.0); - Data d2(10, 10, 0, 0, 1, 0, 1, 0, 1, 1.0); + SerialEnv env(0, NULL, 1, 1, 1); + SerialEnv env2(0, NULL, 1, 1, 1); + Data d(10, 10, 0, 0, 1, 0, 1, 0, 1, 1.0, &env); + Data d2(10, 10, 0, 0, 1, 0, 1, 0, 1, 1.0, &env2); SRMHD model(&d); SRMHD model2(&d2); - Simulation sim(&d); - Simulation sim2(&d2); + Periodic bcs(&d); + Periodic bcs2(&d2); + Simulation sim(&d, &env); + Simulation sim2(&d2, &env2); OTVortexSingleFluid init(&d); OTVortexSingleFluid init2(&d2); @@ -146,7 +155,6 @@ TEST(SRMHD, Prims2Cons2Prims) model2.getPrimitiveVars(d2.cons, d2.prims, d2.aux); - for (int var(0); var < d.Nprims; var++) { for (int i(0); i < d.Nx; i++) { for (int j(0); j < d.Ny; j++) { @@ -218,9 +226,11 @@ TEST(SRMHD, Prims2Cons2Prims) TEST(SRMHD, PrimsToAll) { // Set up - Data d(10, 10, 10, 0, 1, 0, 1, 0, 1, 1.0); + SerialEnv env(0, NULL, 1, 1, 1); + Data d(10, 10, 10, 0, 1, 0, 1, 0, 1, 1.0, &env); SRMHD model(&d); - Simulation sim(&d); + Periodic bcs(&d); + Simulation sim(&d, &env); InitialFunc init(&d); // Set cons and aux vars to rubbish so we know they have changed, and diff --git a/Tests/GPU/Src/test_twoFluidEMHD.cu b/Tests/GPU/Src/test_twoFluidEMHD.cu index 1e162f5b..2ae22746 100644 --- a/Tests/GPU/Src/test_twoFluidEMHD.cu +++ b/Tests/GPU/Src/test_twoFluidEMHD.cu @@ -2,19 +2,21 @@ #include "twoFluidEMHD.h" #include "simulation.h" #include "simData.h" +#include "serialSaveData.h" #include "initFunc.h" #include "rkSplit.h" #include "fluxVectorSplitting.h" +#include "serialEnv.h" #include #include #include - -/* ######################### Test model constructor ########################*/ + /* ######################### Test model constructor ########################*/ TEST(TwoFluidEMHD, Constructor) { - Data d(100, 10, 0, 0, 1, -0.5, 0.5, -0.1, 0.1, 0.8); + SerialEnv env(0, NULL, 1, 1, 1); + Data d(100, 10, 0, 0, 1, -0.5, 0.5, -0.1, 0.1, 0.8, &env); TwoFluidEMHD model(&d); EXPECT_EQ(d.Ncons, 18); EXPECT_EQ(d.Nprims, 16); @@ -40,37 +42,40 @@ TEST(TwoFluidEMHD, Constructor) TEST(TwoFluidEMHD, FluxFunctionIsConsistentUponRotation) { // Discontinuity in x direction - Data dx(30, 30, 30, 0, 1, 0, 1, 0, 1, 0.8); + SerialEnv env(0, NULL, 1, 1, 1); + Data dx(30, 30, 30, 0, 1, 0, 1, 0, 1, 0.8, &env); TwoFluidEMHD modelx(&dx); FVS fluxMethodx(&dx, &modelx); - Simulation simx(&dx); - BrioWuTwoFluid initx(&dx, 0, 0); Outflow bcsx(&dx); + Simulation simx(&dx, &env); + BrioWuTwoFluid initx(&dx, 0, 0); RKSplit timeIntx(&dx, &modelx, &bcsx, &fluxMethodx); - SaveData save(&dx); + SerialSaveData save(&dx, &env); simx.set(&initx, &modelx, &timeIntx, &bcsx, &fluxMethodx, &save); printf("Stepping x-discontinuity...\n"); simx.updateTime(); // Discontinuity in y direction - Data dy(30, 30, 30, 0, 1, 0, 1, 0, 1, 0.8); + SerialEnv env2(0, NULL, 1, 1, 1); + Data dy(30, 30, 30, 0, 1, 0, 1, 0, 1, 0.8, &env2); TwoFluidEMHD modely(&dy); FVS fluxMethody(&dy, &modely); - Simulation simy(&dy); - BrioWuTwoFluid inity(&dy, 1, 0); Outflow bcsy(&dy); + Simulation simy(&dy, &env2); + BrioWuTwoFluid inity(&dy, 1, 0); RKSplit timeInty(&dy, &modely, &bcsy, &fluxMethody); simy.set(&inity, &modely, &timeInty, &bcsy, &fluxMethody, &save); printf("Stepping y-discontinuity...\n"); simy.updateTime(); // Discontinuity in z direction - Data dz(30, 30, 30, 0, 1, 0, 1, 0, 1, 0.8); + SerialEnv env3(0, NULL, 1, 1, 1); + Data dz(30, 30, 30, 0, 1, 0, 1, 0, 1, 0.8, &env3); TwoFluidEMHD modelz(&dz); FVS fluxMethodz(&dz, &modelz); - Simulation simz(&dz); - BrioWuTwoFluid initz(&dz, 2, 0); Outflow bcsz(&dz); + Simulation simz(&dz, &env3); + BrioWuTwoFluid initz(&dz, 2, 0); RKSplit timeIntz(&dz, &modelz, &bcsz, &fluxMethodz); simz.set(&initz, &modelz, &timeIntz, &bcsz, &fluxMethodz, &save); printf("Stepping z-discontinuity...\n"); @@ -80,6 +85,7 @@ TEST(TwoFluidEMHD, FluxFunctionIsConsistentUponRotation) for (int i(dx.Ng); i < dx.Nx-dx.Ng; i++) { for (int j(dy.Ng); j < dy.Ny-dy.Ng; j++) { for (int k(dz.Ng); k < dz.Nz-dz.Ng; k++) { + // Swap x and y EXPECT_NEAR(dx.cons[dx.id(0, i, j, k)], dy.cons[dy.id(0, j, i, k)], 1e-15); EXPECT_NEAR(dx.cons[dx.id(1, i, j, k)], dy.cons[dy.id(2, j, i, k)], 1e-15); @@ -101,7 +107,6 @@ TEST(TwoFluidEMHD, FluxFunctionIsConsistentUponRotation) EXPECT_NEAR(dx.cons[dx.id(17, i, j, k)], dy.cons[dy.id(17, j, i, k)], 1e-15); - // Swap x and z EXPECT_NEAR(dx.cons[dx.id(0, i, j, k)], dz.cons[dz.id(0, k, j, i)], 1e-15); EXPECT_NEAR(dx.cons[dx.id(1, i, j, k)], dz.cons[dz.id(3, k, j, i)], 1e-15); @@ -122,6 +127,7 @@ TEST(TwoFluidEMHD, FluxFunctionIsConsistentUponRotation) EXPECT_NEAR(dx.cons[dx.id(16, i, j, k)], dz.cons[dz.id(16, k, j, i)], 1e-15); EXPECT_NEAR(dx.cons[dx.id(17, i, j, k)], dz.cons[dz.id(17, k, j, i)], 1e-15); + // Swap y and z EXPECT_NEAR(dy.cons[dy.id(0, i, j, k)], dz.cons[dz.id(0, i, k, j)], 1e-15); EXPECT_NEAR(dy.cons[dy.id(1, i, j, k)], dz.cons[dz.id(1, i, k, j)], 1e-15); @@ -150,14 +156,18 @@ TEST(TwoFluidEMHD, FluxFunctionIsConsistentUponRotation) TEST(TwoFluidEMHD, Prims2Cons2Prims) { const double tol = 1.49011612e-8; // Tolerance of rootfinder - Data d(10, 10, 10, 0, 1, 0, 1, 0, 1, 0.8); + SerialEnv env(0, NULL, 1, 1, 1); + Data d(10, 10, 10, 0, 1, 0, 1, 0, 1, 0.8, &env); TwoFluidEMHD model(&d); - Simulation sim(&d); + Periodic bcs(&d); + Simulation sim(&d, &env); BrioWuTwoFluid init(&d, 0, 0); - Data d2(10, 10, 10, 0, 1, 0, 1, 0, 1, 0.8); + SerialEnv env2(0, NULL, 1, 1, 1); + Data d2(10, 10, 10, 0, 1, 0, 1, 0, 1, 0.8, &env2); TwoFluidEMHD model2(&d2); - Simulation sim2(&d2); + Periodic bcs2(&d2); + Simulation sim2(&d2, &env2); BrioWuTwoFluid init2(&d2, 0, 0); model2.primsToAll(d2.cons, d2.prims, d2.aux); @@ -231,10 +241,12 @@ TEST(TwoFluidEMHD, FluxVectorSplittingStationary) { // Set up - Data d(6, 6, 6, 0, 1, 0, 1, 0, 1, 1.0, 0.5, 4, 5.0/3.0, 1000.0, 0.5); + SerialEnv env(0, NULL, 1, 1, 1); + Data d(6, 6, 6, 0, 1, 0, 1, 0, 1, 1.0, &env, 0.5, 4, 5.0/3.0, 1000.0, 0.5); TwoFluidEMHD model(&d); FVS fluxMethod(&d, &model); - Simulation sim(&d); + Periodic bcs(&d); + Simulation sim(&d, &env); // Set state to stationary equilibrium state for (int i(0); i < d.Nx; i++) { @@ -252,6 +264,7 @@ TEST(TwoFluidEMHD, FluxVectorSplittingStationary) // System is stationary, there should be zero flux // x-direction + model.fluxVector(d.cons, d.prims, d.aux, d.f, 0); fluxMethod.fluxReconstruction(d.cons, d.prims, d.aux, d.f, d.fnet, 0); @@ -259,31 +272,34 @@ TEST(TwoFluidEMHD, FluxVectorSplittingStationary) for (int j(d.Ng); j < d.Ny-d.Ng; j++) { for (int k(d.Ng); k < d.Nz-d.Ng; k++) { for (int var(0); var < d.Ncons; var++) { - EXPECT_EQ(d.fnet[d.id(var, i, j, k)], 0.0); + EXPECT_NEAR(d.fnet[d.id(var, i, j, k)], 0.0, 1e-15); } } } } // y-direction + model.fluxVector(d.cons, d.prims, d.aux, d.f, 1); fluxMethod.fluxReconstruction(d.cons, d.prims, d.aux, d.f, d.fnet, 1); for (int i(d.Ng); i < d.Nx-d.Ng; i++) { for (int j(d.Ng); j < d.Ny-d.Ng; j++) { for (int k(d.Ng); k < d.Nz-d.Ng; k++) { for (int var(0); var < d.Ncons; var++) { - EXPECT_EQ(d.fnet[d.id(var, i, j, k)], 0.0); + EXPECT_NEAR(d.fnet[d.id(var, i, j, k)], 0.0, 1e-15); } } } } // z-direction + model.fluxVector(d.cons, d.prims, d.aux, d.f, 2); fluxMethod.fluxReconstruction(d.cons, d.prims, d.aux, d.f, d.fnet, 2); for (int i(d.Ng); i < d.Nx-d.Ng; i++) { for (int j(d.Ng); j < d.Ny-d.Ng; j++) { for (int k(d.Ng); k < d.Nz-d.Ng; k++) { for (int var(0); var < d.Ncons; var++) { - EXPECT_EQ(d.fnet[d.id(var, i, j, k)], 0.0); + EXPECT_NEAR(d.fnet[d.id(var, i, j, k)], 0.0, 1e-15); } } } } + } // End test From ccf97040b374c8a4c1a3026a3809d53f2d07b3d5 Mon Sep 17 00:00:00 2001 From: "ania.brown" Date: Tue, 3 Nov 2020 12:06:04 +0000 Subject: [PATCH 31/56] finished converting serial GPU tests to use platformEnv object --- Tests/GPU/Makefile | 12 ++++----- Tests/GPU/Src/test_fvs.cu | 48 ++++++++++++++++++++---------------- Tests/GPU/Src/test_imex.cu | 29 ++++++++++++---------- Tests/GPU/Src/test_rk2.cu | 11 +++++---- Tests/GPU/Src/test_srrmhd.cu | 31 ++++++++++++++--------- 5 files changed, 75 insertions(+), 56 deletions(-) diff --git a/Tests/GPU/Makefile b/Tests/GPU/Makefile index 9dbf306b..14ad2d73 100644 --- a/Tests/GPU/Makefile +++ b/Tests/GPU/Makefile @@ -61,11 +61,11 @@ TESTS = test_simulation \ test_cminpack \ test_boundaryConds \ test_twoFluidEMHD \ -# test_srrmhd \ -# test_fvs \ -# test_id \ -# test_rk2 \ -# test_imex + test_srrmhd \ + test_fvs \ + test_id \ + test_rk2 \ + test_imex PARALLEL_TESTS = test_parallel_srmhd \ @@ -227,7 +227,7 @@ test_srrmhd.o : $(TEST_DIR)/test_srrmhd.cu \ $(INC_DIR)/srrmhd.h $(INC_DIR)/simulation.h $(INC_DIR)/simData.h $(GTEST_HEADERS) @$(CC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) -c $(TEST_DIR)/test_srrmhd.cu -I$(INC_DIR) -I$(RTFIND_INC_DIR) -test_srrmhd : srrmhd.o C2PArgs.o test_srrmhd.o simData.o fluxVectorSplitting.o initFunc.o simulation.o weno.o serialSaveData.o serialEnv.o $(RTFIND_OBJS) gtest_main.a +test_srrmhd : srrmhd.o boundaryConds.o C2PArgs.o test_srrmhd.o simData.o fluxVectorSplitting.o initFunc.o simulation.o weno.o serialSaveData.o serialEnv.o $(RTFIND_OBJS) gtest_main.a @$(CC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) -lpthread $^ -o $@ # TwoFluidEMHD Model diff --git a/Tests/GPU/Src/test_fvs.cu b/Tests/GPU/Src/test_fvs.cu index 7a219be0..5bf19139 100644 --- a/Tests/GPU/Src/test_fvs.cu +++ b/Tests/GPU/Src/test_fvs.cu @@ -3,9 +3,11 @@ #include "twoFluidEMHD.h" #include "simulation.h" #include "simData.h" +#include "serialSaveData.h" #include "initFunc.h" #include "rkSplit.h" #include "fluxVectorSplitting.h" +#include "serialEnv.h" #include #include #include @@ -16,14 +18,15 @@ TEST(FVS, SameFnetAsSerial) as the serial version. */ { - Data d(20, 20, 20, 0, 1, 0, 1, 0, 1, 0.8); + SerialEnv env(0, NULL, 1, 1, 1); + Data d(20, 20, 0, 0, 1, 0, 1, 0, 1, 0.8, &env); SRMHD model(&d); FVS fluxMethod(&d, &model); - Simulation sim(&d); - OTVortexSingleFluid init(&d); Outflow bcs(&d); + Simulation sim(&d, &env); + OTVortexSingleFluid init(&d); RKSplit timeInt(&d, &model, &bcs, &fluxMethod); - SaveData save(&d); + SerialSaveData save(&d, &env); sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save); fluxMethod.F(d.cons, d.prims, d.aux, d.f, d.fnet); @@ -51,28 +54,29 @@ TEST(FVS, SameFnetAsSerial) TEST(FVS, SameXReconstructionAsSerial) { - Data d(20, 20, 20, 0, 1, 0, 1, 0, 1, 0.8); + SerialEnv env(0, NULL, 1, 1, 1); + Data d(20, 20, 0, 0, 1, 0, 1, 0, 1, 0.8, &env); SRMHD model(&d); FVS fluxMethod(&d, &model); - Simulation sim(&d); - OTVortexSingleFluid init(&d); Outflow bcs(&d); + Simulation sim(&d, &env); + OTVortexSingleFluid init(&d); RKSplit timeInt(&d, &model, &bcs, &fluxMethod); - SaveData save(&d); + SerialSaveData save(&d, &env); sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save); - + model.fluxVector(d.cons, d.prims, d.aux, d.f, 0); fluxMethod.fluxReconstruction(d.cons, d.prims, d.aux, d.f, d.fnet, 0); for (int var(0); var < d.Ncons; var++) { - for (int i(0); i < d.Nx; i++) + for (int i(0); i < d.Nx-1; i++) { for (int j(0); j < d.Ny; j++) { for (int k(0); k < d.Nz; k++) { - d.cons[d.id(var, i, j, k)] = d.fnet[d.id(var, i, j, k)]; + d.cons[d.id(var, i, j, k)] = d.fnet[d.id(var, i+1, j, k)]/d.dx - d.fnet[d.id(var, i, j, k)]/d.dx; } } } @@ -88,14 +92,15 @@ TEST(FVS, SameXReconstructionAsSerial) TEST(FVS, SameYReconstructionAsSerial) { - Data d(20, 20, 20, 0, 1, 0, 1, 0, 1, 0.8); + SerialEnv env(0, NULL, 1, 1, 1); + Data d(20, 20, 0, 0, 1, 0, 1, 0, 1, 0.8, &env); SRMHD model(&d); FVS fluxMethod(&d, &model); - Simulation sim(&d); - OTVortexSingleFluid init(&d); Outflow bcs(&d); + Simulation sim(&d, &env); + OTVortexSingleFluid init(&d); RKSplit timeInt(&d, &model, &bcs, &fluxMethod); - SaveData save(&d); + SerialSaveData save(&d, &env); sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save); model.fluxVector(d.cons, d.prims, d.aux, d.f, 1); @@ -109,7 +114,7 @@ TEST(FVS, SameYReconstructionAsSerial) { for (int k(0); k < d.Nz; k++) { - d.cons[d.id(var, i, j, k)] = d.fnet[d.id(var, i, j, k)]; + d.cons[d.id(var, i, j, k)] = d.fnet[d.id(var, i, j+1, k)]/d.dy - d.fnet[d.id(var, i, j, k)]/d.dy; } } } @@ -125,14 +130,15 @@ TEST(FVS, SameYReconstructionAsSerial) TEST(FVS, SameZReconstructionAsSerial) { - Data d(20, 20, 20, 0, 1, 0, 1, 0, 1, 0.8); + SerialEnv env(0, NULL, 1, 1, 1); + Data d(20, 20, 0, 0, 1, 0, 1, 0, 1, 0.8, &env); SRMHD model(&d); FVS fluxMethod(&d, &model); - Simulation sim(&d); - OTVortexSingleFluid init(&d); Outflow bcs(&d); + Simulation sim(&d, &env); + OTVortexSingleFluid init(&d); RKSplit timeInt(&d, &model, &bcs, &fluxMethod); - SaveData save(&d); + SerialSaveData save(&d, &env); sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save); model.fluxVector(d.cons, d.prims, d.aux, d.f, 2); @@ -146,7 +152,7 @@ TEST(FVS, SameZReconstructionAsSerial) { for (int k(0); k < d.Nz; k++) { - d.cons[d.id(var, i, j, k)] = d.fnet[d.id(var, i, j, k)]; + d.cons[d.id(var, i, j, k)] = d.fnet[d.id(var, i, j, k+1)]/d.dz - d.fnet[d.id(var, i, j, k)]/d.dz; } } } diff --git a/Tests/GPU/Src/test_imex.cu b/Tests/GPU/Src/test_imex.cu index 830da06e..5bb776a0 100644 --- a/Tests/GPU/Src/test_imex.cu +++ b/Tests/GPU/Src/test_imex.cu @@ -8,7 +8,7 @@ #include "SSP3.h" #include "saveData.h" #include "fluxVectorSplitting.h" -#include "saveData.h" +#include "serialSaveData.h" #include #include @@ -21,17 +21,18 @@ TEST(SSP2, IMEX2ConsistentWithSerialVersion) */ double sigma(0); - Data data(64, 16, 0, 0, 1, 0, 1, 0, 1, 0.05, + SerialEnv env(0, NULL, 1, 1, 1); + Data data(64, 16, 0, 0, 1, 0, 1, 0, 1, 0.05, &env, 0.5, 4, 4.0/3.0, sigma); // Choose particulars of simulation SRRMHD model(&data); FVS fluxMethod(&data, &model); - Simulation sim(&data); - BrioWuSingleFluid init(&data); Outflow bcs(&data); + Simulation sim(&data, &env); + BrioWuSingleFluid init(&data); SSP2 timeInt(&data, &model, &bcs, &fluxMethod); - SaveData save(&data); + SerialSaveData save(&data, &env); sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save); sim.evolve(); @@ -54,17 +55,18 @@ TEST(SSP2FlowKHSingleFluid, IMEX2ConsistentWithSerialVersion) */ double sigma(0); - Data data(64, 16, 0, -0.5, 0.5, -1, 1, 0, 1, 0.05, + SerialEnv env(0, NULL, 1, 1, 1); + Data data(64, 16, 0, -0.5, 0.5, -1, 1, 0, 1, 0.05, &env, 0.5, 4, 4.0/3.0, sigma); // Choose particulars of simulation SRRMHD model(&data); FVS fluxMethod(&data, &model); - Simulation sim(&data); - KHInstabilitySingleFluid init(&data); Flow bcs(&data); + Simulation sim(&data, &env); + KHInstabilitySingleFluid init(&data); SSP2 timeInt(&data, &model, &bcs, &fluxMethod); - SaveData save(&data); + SerialSaveData save(&data, &env); sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save); sim.evolve(); @@ -129,17 +131,18 @@ TEST(SSP3, IMEX3ConsistentWithSerialVersion) */ double sigma(0); - Data data(64, 16, 0, 0, 1, 0, 1, 0, 1, 0.05, + SerialEnv env(0, NULL, 1, 1, 1); + Data data(64, 16, 0, 0, 1, 0, 1, 0, 1, 0.05, &env, 0.5, 4, 4.0/3.0, sigma); // Choose particulars of simulation SRRMHD model(&data); FVS fluxMethod(&data, &model); - Simulation sim(&data); - BrioWuSingleFluid init(&data); Outflow bcs(&data); + Simulation sim(&data, &env); + BrioWuSingleFluid init(&data); SSP3 timeInt(&data, &model, &bcs, &fluxMethod); - SaveData save(&data); + SerialSaveData save(&data, &env); sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save); sim.evolve(); diff --git a/Tests/GPU/Src/test_rk2.cu b/Tests/GPU/Src/test_rk2.cu index 183ff724..f00407fc 100644 --- a/Tests/GPU/Src/test_rk2.cu +++ b/Tests/GPU/Src/test_rk2.cu @@ -1,6 +1,7 @@ #include "gtest/gtest.h" #include "srrmhd.h" #include "simulation.h" +#include "serialSaveData.h" #include "simData.h" #include "initFunc.h" #include "RK2.h" @@ -15,15 +16,15 @@ TEST(RK2, RK2OutputConsistentWithSerial) The following was used to gather data to compare the parallel version with. No tests are run in the serial version of this test */ - - Data d(30, 30, 30, 0, 1, 0, 1, 0, 1, 0.004); + SerialEnv env(0, NULL, 1, 1, 1, 1); + Data d(30, 30, 30, 0, 1, 0, 1, 0, 1, 0.004, &env); SRRMHD model(&d); FVS fluxMethod(&d, &model); - Simulation sim(&d); - OTVortexSingleFluid init(&d); Outflow bcs(&d); + Simulation sim(&d, &env); + OTVortexSingleFluid init(&d); RK2 timeInt(&d, &model, &bcs, &fluxMethod); - SaveData save(&d); + SerialSaveData save(&d, &env); sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save); sim.evolve(); diff --git a/Tests/GPU/Src/test_srrmhd.cu b/Tests/GPU/Src/test_srrmhd.cu index c155f1c6..b220004d 100644 --- a/Tests/GPU/Src/test_srrmhd.cu +++ b/Tests/GPU/Src/test_srrmhd.cu @@ -2,8 +2,10 @@ #include "srrmhd.h" #include "simulation.h" #include "simData.h" +#include "boundaryConds.h" #include "initFunc.h" #include "fluxVectorSplitting.h" +#include "serialEnv.h" #include #include #include @@ -13,7 +15,8 @@ TEST(SRRMHD, Constructor) { - Data d(100, 10, 0, 0, 1, -0.5, 0.5, -0.1, 0.1, 0.8); + SerialEnv env(0, NULL, 1, 1, 1); + Data d(100, 10, 0, 0, 1, -0.5, 0.5, -0.1, 0.1, 0.8, &env); SRRMHD model(&d); EXPECT_EQ(d.Ncons, 14); EXPECT_EQ(d.Nprims, 11); @@ -27,12 +30,14 @@ TEST(SRRMHD, Constructor) TEST(SRRMHD, FluxVectorSplittingStationary) { - + double tol(1.0e-15); // Set up - Data d(10, 10, 10, 0, 1, 0, 1, 0, 1, 1.0, 0.5, 4, 5.0/3.0, 1000.0, 0.5); + SerialEnv env(0, NULL, 1, 1, 1); + Data d(10, 10, 10, 0, 1, 0, 1, 0, 1, 1.0, &env, 0.5, 4, 5.0/3.0, 1000.0, 0.5); SRRMHD model(&d); FVS fluxMethod(&d, &model); - Simulation sim(&d); + Periodic bcs(&d); + Simulation sim(&d, &env); // Set state to stationary equilibrium state for (int i(0); i < d.Nx; i++) { @@ -62,7 +67,7 @@ TEST(SRRMHD, FluxVectorSplittingStationary) for (int j(d.Ng); j < d.Ny-d.Ng; j++) { for (int k(d.Ng); k < d.Nz-d.Ng; k++) { for (int var(0); var < d.Ncons; var++) { - EXPECT_EQ(d.fnet[d.id(var, i, j, k)], 0.0); + EXPECT_NEAR(d.fnet[d.id(var, i, j, k)], 0.0, tol); } } } @@ -73,7 +78,7 @@ TEST(SRRMHD, FluxVectorSplittingStationary) for (int j(d.Ng); j < d.Ny-d.Ng; j++) { for (int k(d.Ng); k < d.Nz-d.Ng; k++) { for (int var(0); var < d.Ncons; var++) { - EXPECT_EQ(d.fnet[d.id(var, i, j, k)], 0.0); + EXPECT_NEAR(d.fnet[d.id(var, i, j, k)], 0.0, tol); } } } @@ -84,7 +89,7 @@ TEST(SRRMHD, FluxVectorSplittingStationary) for (int j(d.Ng); j < d.Ny-d.Ng; j++) { for (int k(d.Ng); k < d.Nz-d.Ng; k++) { for (int var(0); var < d.Ncons; var++) { - EXPECT_EQ(d.fnet[d.id(var, i, j, k)], 0.0); + EXPECT_NEAR(d.fnet[d.id(var, i, j, k)], 0.0, tol); } } } @@ -97,12 +102,16 @@ TEST(SRRMHD, FluxVectorSplittingStationary) TEST(SRRMHD, Prims2Cons2Prims) { const double tol = 1.49011612e-8; // Tolerance of rootfinder - Data d(10, 10, 0, 0, 1, 0, 1, 0, 1, 1.0); - Data d2(10, 10, 0, 0, 1, 0, 1, 0, 1, 1.0); + SerialEnv env(0, NULL, 1, 1, 1); + SerialEnv env2(0, NULL, 1, 1, 1); + Data d(10, 10, 0, 0, 1, 0, 1, 0, 1, 1.0, &env); + Data d2(10, 10, 0, 0, 1, 0, 1, 0, 1, 1.0, &env2); SRRMHD model(&d); SRRMHD model2(&d2); - Simulation sim(&d); - Simulation sim2(&d2); + Periodic bcs(&d); + Periodic bcs2(&d2); + Simulation sim(&d, &env); + Simulation sim2(&d2, &env2); OTVortexSingleFluid init(&d); OTVortexSingleFluid init2(&d2); From 7aecf2b510469baff7eda3dfb382141460f74a71 Mon Sep 17 00:00:00 2001 From: "ania.brown" Date: Tue, 17 Nov 2020 12:20:28 +0000 Subject: [PATCH 32/56] adding tests to compare gpu with and without MPI --- Tests/GPU/Makefile | 37 ++++++++++++++++++++--- Tests/GPU/Src/compareParallelAndSerial.py | 6 ++-- makePaths.sh | 6 ++++ 3 files changed, 42 insertions(+), 7 deletions(-) diff --git a/Tests/GPU/Makefile b/Tests/GPU/Makefile index 14ad2d73..26eecb7c 100644 --- a/Tests/GPU/Makefile +++ b/Tests/GPU/Makefile @@ -17,7 +17,6 @@ # Compiler CC = nvcc -MPICC = mpic++ #use `mpic++ -show` to find library and include flags MPI_FLAGS = -I/local/software/mpich/3.2.1/gcc/include -L/local/software/mpich/3.2.1/gcc/lib -lmpi -lmpicxx @@ -68,7 +67,7 @@ TESTS = test_simulation \ test_imex -PARALLEL_TESTS = test_parallel_srmhd \ +PARALLEL_TESTS = test_parallel_rk2 \ # All Google Test headers. Usually you shouldn't change this # definition. @@ -80,9 +79,14 @@ RTFIND = buildRootfinder # House-keeping build targets. -test : $(RTFIND) $(TESTS) +test : $(RTFIND) $(TESTS) $(PARALLEL_TESTS) # Run all tests @$(foreach exe, $(TESTS), ./$(exe);) + # Run all parallel tests + $(foreach exe, $(PARALLEL_TESTS), mpirun -np 4 ./$(exe);) + # Run tests that compare outputs of TestData/GPU and TestData/MPIGPU + py.test -v Src/compareParallelAndSerial.py + all : $(RTFIND) $(TESTS) @@ -189,7 +193,7 @@ serialSaveData.o : $(MODULE_DIR)/serialSaveData.cu $(INC_DIR)/serialSaveData.h @$(CC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) -c $(MODULE_DIR)/serialSaveData.cu -I$(INC_DIR) parallelSaveData.o : $(MODULE_DIR)/parallelSaveData.cu $(INC_DIR)/parallelSaveData.h - @$(MPICC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) -c $(MODULE_DIR)/parallelSaveData.cu -I$(INC_DIR) + @$(CC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) $(MPI_FLAGS) -c $(MODULE_DIR)/parallelSaveData.cu -I$(INC_DIR) # Platform env serialEnv.o : $(MODULE_DIR)/serialEnv.cu $(INC_DIR)/serialEnv.h @@ -300,3 +304,28 @@ test_imex.o : $(TEST_DIR)/test_imex.cu test_imex : IMEX2Args.o C2PArgs.o SSP2.o IMEX3Args.o SSP3.o simData.o srrmhd.o srmhd.o C2PArgs.o twoFluidEMHD.o initFunc.o boundaryConds.o serialSaveData.o serialEnv.o simulation.o test_imex.o $(RTFIND_OBJS) fluxVectorSplitting.o weno.o gtest_main.a @$(CC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) -lpthread $^ -o $@ + +# main func used for testing parallel tests. Serial tests can use the default gtest_main +main.o : $(TEST_DIR)/main.cu $(INC_DIR)/parallelEnv.h + $(CC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) $(MPI_FLAGS) -c $(TEST_DIR)/main.cu -I$(INC_DIR) + +parallelEnv.o : $(MODULE_DIR)/parallelEnv.cu $(INC_DIR)/parallelEnv.h + $(CC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) $(MPI_FLAGS) -c $(MODULE_DIR)/parallelEnv.cu -I$(INC_DIR) + +parallelBoundaryConds.o : $(MODULE_DIR)/parallelBoundaryConds.cu $(INC_DIR)/parallelBoundaryConds.h + @$(CC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) $(MPI_FLAGS) -c $(MODULE_DIR)/parallelBoundaryConds.cu -I$(INC_DIR) + + +test_parallel_rk2.o : $(TEST_DIR)/test_parallel_rk2.cu $(INC_DIR)/RK2.h \ + $(INC_DIR)/twoFluidEMHD.h $(INC_DIR)/simulation.h $(INC_DIR)/simData.h $(GTEST_HEADERS) + $(CC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) $(MPI_FLAGS) -c $(TEST_DIR)/test_parallel_rk2.cu -I$(INC_DIR) -I$(RTFIND_INC_DIR) + +test_parallel_rk2 : main.o C2PArgs.o test_parallel_rk2.o srmhd.o srrmhd.o fluxVectorSplitting.o parallelBoundaryConds.o simData.o RK2.o initFunc.o simulation.o weno.o parallelEnv.o parallelSaveData.o $(RTFIND_OBJS) gtest.a + $(CC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) $(MPI_FLAGS) -lpthread $^ -o $@ + +test_parallel_rkSplit.o : $(TEST_DIR)/test_parallel_rkSplit.cu $(INC_DIR)/rkSplit.h \ + $(INC_DIR)/twoFluidEMHD.h $(INC_DIR)/simulation.h $(INC_DIR)/simData.h $(GTEST_HEADERS) + $(CC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) $(MPI_FLAGS) -c $(TEST_DIR)/test_parallel_rkSplit.cu -I$(INC_DIR) -I$(RTFIND_INC_DIR) + +test_parallel_rkSplit : main.o test_parallel_rkSplit.o weno.o wenoUpwinds.o srmhd.o srrmhd.o fluxVectorSplitting.o parallelBoundaryConds.o simData.o rkSplit.o RK2.o initFunc.o simulation.o weno.o parallelEnv.o parallelSaveData.o $(RTFIND_OBJS) gtest.a + $(CC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) $(MPI_FLAGS) -lpthread $^ -o $@ diff --git a/Tests/GPU/Src/compareParallelAndSerial.py b/Tests/GPU/Src/compareParallelAndSerial.py index 808c5340..1928d410 100644 --- a/Tests/GPU/Src/compareParallelAndSerial.py +++ b/Tests/GPU/Src/compareParallelAndSerial.py @@ -61,7 +61,7 @@ def getFiles(self): # For each file, determine the appendix and use interactivePlot to # gather the data - for i, serfile in enumerate(glob(fromSpyder+"../TestData/GPU/Conserved/*")): + for i, serfile in enumerate(glob(fromSpyder+"../TestData/MPIGPU/Conserved/*")): ext = serfile.find('.dat') app = serfile.find('Conserved/cons') + len('Conserved.cons') appendix = serfile[app:ext] @@ -69,8 +69,8 @@ def getFiles(self): print("Fetching {} data...".format(appendix)) with HidePrints(): - self.Serials.append(Plot(fromSpyder+"../TestData/Serial/", appendix)) - self.Parallels.append(Plot(fromSpyder+"../TestData/GPU/", appendix)) + self.Serials.append(Plot(fromSpyder+"../TestData/GPU/", appendix)) + self.Parallels.append(Plot(fromSpyder+"../TestData/MPIGPU/", appendix)) self.Ncons.append(self.Serials[i].c['Ncons']) self.Nprims.append(self.Serials[i].c['Nprims']) diff --git a/makePaths.sh b/makePaths.sh index 91508bc3..08d50ad7 100644 --- a/makePaths.sh +++ b/makePaths.sh @@ -48,6 +48,12 @@ mkdir Tests/TestData/GPU/Conserved mkdir Tests/TestData/GPU/Constants mkdir Tests/TestData/GPU/Primitive +mkdir Tests/TestData/MPIGPU +mkdir Tests/TestData/MPIGPU/Auxiliary +mkdir Tests/TestData/MPIGPU/Conserved +mkdir Tests/TestData/MPIGPU/Constants +mkdir Tests/TestData/MPIGPU/Primitive + mkdir Tests/TestData mkdir Tests/TestData/CPU mkdir Tests/TestData/CPU/Auxiliary From 709359ebdd454cea07da118aca87d54762ae4c32 Mon Sep 17 00:00:00 2001 From: "ania.brown" Date: Tue, 17 Nov 2020 14:24:07 +0000 Subject: [PATCH 33/56] added instructions for running gpu tests on iridis 5 --- Scrips/IridisEnv/requirements.txt | 5 ++++ Scrips/IridisEnv/tests_instructions.md | 33 ++++++++++++++++++++++++++ Scrips/IridisEnv/tests_job.sh | 22 +++++++++++++++++ 3 files changed, 60 insertions(+) create mode 100644 Scrips/IridisEnv/requirements.txt create mode 100644 Scrips/IridisEnv/tests_instructions.md create mode 100644 Scrips/IridisEnv/tests_job.sh diff --git a/Scrips/IridisEnv/requirements.txt b/Scrips/IridisEnv/requirements.txt new file mode 100644 index 00000000..af599da1 --- /dev/null +++ b/Scrips/IridisEnv/requirements.txt @@ -0,0 +1,5 @@ +numpy +matplotlib +scipy +pytest +h5py diff --git a/Scrips/IridisEnv/tests_instructions.md b/Scrips/IridisEnv/tests_instructions.md new file mode 100644 index 00000000..9c215425 --- /dev/null +++ b/Scrips/IridisEnv/tests_instructions.md @@ -0,0 +1,33 @@ +## Tests Instructions + +These are instructions to run GPU unit tests as a batch job on Iridis 5 + +## Setting up python env + +In the root METHOD folder, create a python venv using + +``` +module purge +module load gcc/6.4.0 +module load python/3.6.4 +module load hdf5/1.10.2/gcc/parallel +module load cuda/8.0 +python3 -m venv venv +source venv/bin/activate +``` + +Then install python modules using + +``` +python -m pip install -r Scripts/IridisEnv/requirements.txt +``` + +## Runing unit tests as a batch job + +From `Tests/GPU` run `sbatch ../../Scripts/IridisEnv/tests_job.sh` + +This will run all GPU tests + + + + diff --git a/Scrips/IridisEnv/tests_job.sh b/Scrips/IridisEnv/tests_job.sh new file mode 100644 index 00000000..165c1917 --- /dev/null +++ b/Scrips/IridisEnv/tests_job.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +#SBATCH --ntasks-per-node=2 # Tasks per node +#SBATCH --nodes=1 # Number of nodes requested +#SBATCH --partition=gtx1080 +#SBATCH --time=00:10:00 + +module purge +#module load gcc/6.4.0 +module load python/3.6.4 +module load hdf5/1.10.2/gcc/parallel +module load cuda/8.0 + +module list + +source ../../venv/bin/activate + +make clean +make test + + + From bed02a70f415501286dd7158458aa67a7ca0b028 Mon Sep 17 00:00:00 2001 From: "ania.brown" Date: Mon, 23 Nov 2020 13:49:48 +0000 Subject: [PATCH 34/56] splitting tests into non-python and python to make setting up module env on Iridis easier --- Scrips/IridisEnv/tests_job.sh | 7 ++++++- Tests/GPU/Makefile | 9 ++++++--- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/Scrips/IridisEnv/tests_job.sh b/Scrips/IridisEnv/tests_job.sh index 165c1917..2e701818 100644 --- a/Scrips/IridisEnv/tests_job.sh +++ b/Scrips/IridisEnv/tests_job.sh @@ -16,7 +16,12 @@ module list source ../../venv/bin/activate make clean -make test +make gpu_test + +# required for GLIBCXX_3.4.21 module to be available for python +module load gcc/6.4.0 + +make compare_mpi_test diff --git a/Tests/GPU/Makefile b/Tests/GPU/Makefile index 26eecb7c..4427cf5e 100644 --- a/Tests/GPU/Makefile +++ b/Tests/GPU/Makefile @@ -79,14 +79,17 @@ RTFIND = buildRootfinder # House-keeping build targets. -test : $(RTFIND) $(TESTS) $(PARALLEL_TESTS) +compare_mpi_test : $(RTFIND) $(TESTS) $(PARALLEL_TESTS) gpu_test + # Run tests that compare outputs of TestData/GPU and TestData/MPIGPU + py.test -v Src/compareParallelAndSerial.py + +gpu_test : $(RTFIND) $(TESTS) $(PARALLEL_TESTS) # Run all tests @$(foreach exe, $(TESTS), ./$(exe);) # Run all parallel tests $(foreach exe, $(PARALLEL_TESTS), mpirun -np 4 ./$(exe);) - # Run tests that compare outputs of TestData/GPU and TestData/MPIGPU - py.test -v Src/compareParallelAndSerial.py +test : gpu_test compare_mpi_test all : $(RTFIND) $(TESTS) From 7a0cfb3f8ce2c8b81516704d3807bae5b380f336 Mon Sep 17 00:00:00 2001 From: "ania.brown" Date: Mon, 30 Nov 2020 12:25:48 +0000 Subject: [PATCH 35/56] updating BrioWuSingleFluid for MPI; adding parallel tests --- Project/GPU/Src/initFunc.cu | 87 ++++++-- Tests/GPU/Makefile | 2 +- Tests/GPU/Src/compareParallelAndSerial.py | 257 +++++----------------- Tests/GPU/Src/test_rk2.cu | 164 +++++++++++++- 4 files changed, 282 insertions(+), 228 deletions(-) diff --git a/Project/GPU/Src/initFunc.cu b/Project/GPU/Src/initFunc.cu index c9a99601..733ccdd3 100644 --- a/Project/GPU/Src/initFunc.cu +++ b/Project/GPU/Src/initFunc.cu @@ -351,9 +351,6 @@ BrioWuSingleFluid::BrioWuSingleFluid(Data * data, int dir) : InitialFunc(data) if (d->nx%2 || d->ny%2 || d->nz%2) throw std::invalid_argument("Please ensure even number of cells in each direction for Brio Wu initial data.\n"); - int endX(d->Nx - 1); - int endY(d->Ny - 1); - int endZ(d->Nz - 1); int facX(1); int facY(1); int facZ(1); @@ -382,25 +379,79 @@ BrioWuSingleFluid::BrioWuSingleFluid(Data * data, int dir) : InitialFunc(data) lBx = 0.5; rBx = -0.5; } - - for (int i(0); i < d->Nx/facX; i++) { - for (int j(0); j < d->Ny/facY; j++) { - for (int k(0); k < d->Nz/facZ; k++) { + double xLower((d->xmax - d->xmin)/facX + d->xmin); + double yLower((d->ymax - d->ymin)/facY + d->ymin); + double zLower((d->zmax - d->zmin)/facZ + d->zmin); + double xUpper(d->xmax - (d->xmax - d->xmin)/facX); + double yUpper(d->ymax - (d->ymax - d->ymin)/facY); + double zUpper(d->zmax - (d->zmax - d->zmin)/facZ); + + if (d->dims==3){ + for (int i(0); i < d->Nx; i++) { + for (int j(0); j < d->Ny; j++) { + for (int k(0); k < d->Nz; k++) { + // Left side + if ((d->x[i] < xLower) && (d->y[j] < yLower) && (d->z[k] < zLower)){ + d->prims[ID(0, i, j, k)] = 1; + d->prims[ID(4, i, j, k)] = 1; + d->prims[ID(5, i, j, k)] = lBx; + d->prims[ID(6, i, j, k)] = lBy; + d->prims[ID(7, i, j, k)] = lBz; + } + + // Right side + if ((d->x[i] > xUpper) && (d->y[j] > yUpper) && (d->z[k] > zUpper)){ + d->prims[ID(0, i, j, k)] = 0.125; + d->prims[ID(4, i, j, k)] = 0.1; + d->prims[ID(5, i, j, k)] = rBx; + d->prims[ID(6, i, j, k)] = rBy; + d->prims[ID(7, i, j, k)] = rBz; + } + } + } + } + } else if (d->dims==2) { + for (int i(0); i < d->Nx; i++) { + for (int j(0); j < d->Ny; j++) { + // Left side + if ((d->x[i] < xLower) && (d->y[j] < yLower)){ + d->prims[ID(0, i, j, 0)] = 1; + d->prims[ID(4, i, j, 0)] = 1; + d->prims[ID(5, i, j, 0)] = lBx; + d->prims[ID(6, i, j, 0)] = lBy; + d->prims[ID(7, i, j, 0)] = lBz; + } + + // Right side + if ((d->x[i] > xUpper) && (d->y[j] > yUpper)){ + d->prims[ID(0, i, j, 0)] = 0.125; + d->prims[ID(4, i, j, 0)] = 0.1; + d->prims[ID(5, i, j, 0)] = rBx; + d->prims[ID(6, i, j, 0)] = rBy; + d->prims[ID(7, i, j, 0)] = rBz; + } + } + } + } else { + for (int i(0); i < d->Nx; i++) { // Left side - d->prims[ID(0, i, j, k)] = 1; - d->prims[ID(4, i, j, k)] = 1; - d->prims[ID(5, i, j, k)] = lBx; - d->prims[ID(6, i, j, k)] = lBy; - d->prims[ID(7, i, j, k)] = lBz; + if (d->x[i] < xLower){ + d->prims[ID(0, i, 0, 0)] = 1; + d->prims[ID(4, i, 0, 0)] = 1; + d->prims[ID(5, i, 0, 0)] = lBx; + d->prims[ID(6, i, 0, 0)] = lBy; + d->prims[ID(7, i, 0, 0)] = lBz; + } // Right side - d->prims[ID(0, endX - i, endY - j, endZ - k)] = 0.125; - d->prims[ID(4, endX - i, endY - j, endZ - k)] = 0.1; - d->prims[ID(5, endX - i, endY - j, endZ - k)] = rBx; - d->prims[ID(6, endX - i, endY - j, endZ - k)] = rBy; - d->prims[ID(7, endX - i, endY - j, endZ - k)] = rBz; + if (d->x[i] > xUpper){ + d->prims[ID(0, i, 0, 0)] = 0.125; + d->prims[ID(4, i, 0, 0)] = 0.1; + d->prims[ID(5, i, 0, 0)] = rBx; + d->prims[ID(6, i, 0, 0)] = rBy; + d->prims[ID(7, i, 0, 0)] = rBz; + } } - } } } diff --git a/Tests/GPU/Makefile b/Tests/GPU/Makefile index 4427cf5e..303dd2a4 100644 --- a/Tests/GPU/Makefile +++ b/Tests/GPU/Makefile @@ -169,7 +169,7 @@ test_rk2.o : $(TEST_DIR)/test_rk2.cu $(INC_DIR)/RK2.h \ $(INC_DIR)/twoFluidEMHD.h $(INC_DIR)/simulation.h $(INC_DIR)/simData.h $(GTEST_HEADERS) @$(CC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) -c $(TEST_DIR)/test_rk2.cu -I$(INC_DIR) -I$(RTFIND_INC_DIR) -test_rk2 : test_rk2.o C2PArgs.o srrmhd.o fluxVectorSplitting.o boundaryConds.o simData.o RK2.o initFunc.o simulation.o weno.o serialSaveData.o serialEnv.o $(RTFIND_OBJS) gtest_main.a +test_rk2 : test_rk2.o C2PArgs.o srmhd.o srrmhd.o fluxVectorSplitting.o boundaryConds.o simData.o RK2.o initFunc.o simulation.o weno.o serialSaveData.o serialEnv.o $(RTFIND_OBJS) gtest_main.a @$(CC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) -lpthread $^ -o $@ # Explicit RK split integrator diff --git a/Tests/GPU/Src/compareParallelAndSerial.py b/Tests/GPU/Src/compareParallelAndSerial.py index 1928d410..017cf8b8 100644 --- a/Tests/GPU/Src/compareParallelAndSerial.py +++ b/Tests/GPU/Src/compareParallelAndSerial.py @@ -85,224 +85,71 @@ def getFiles(self): self.ybounds.append((0, self.ny[-1])) self.zbounds.append((0, self.nz[-1])) - - # Instantiate the compare class so we have the data Compare = CompareParallelAndSerial() # Test functions -# IMEX3 -def test_ConsEquivalentForSSP3(): - Obj = Compare.Appendicies.index('SSP3') - Serial, Parallel = Compare.Serials[Obj], Compare.Parallels[Obj] - for Nv in range(Compare.Ncons[Obj]): - for i in range(*Compare.xbounds[Obj]): - for j in range(*Compare.ybounds[Obj]): - for k in range(*Compare.zbounds[Obj]): - try: - print(abs((Serial.cons[Nv, i, j, k] - Parallel.cons[Nv, i, j, k]))) - assert(abs((Serial.cons[Nv, i, j, k] - Parallel.cons[Nv, i, j, k]) < TOL)) - except AssertionError: - print("Error for (Nv, i, j, k) = ({}, {}, {}, {})".format(Nv, i, j, k)) - assert(abs((Serial.cons[Nv, i, j, k] - Parallel.cons[Nv, i, j, k]) < TOL)) - -def test_PrimsEquivalentForSSP3(): - Obj = Compare.Appendicies.index('SSP3') - Serial, Parallel = Compare.Serials[Obj], Compare.Parallels[Obj] - for Nv in range(Compare.Nprims[Obj]): - for i in range(*Compare.xbounds[Obj]): - for j in range(*Compare.ybounds[Obj]): - for k in range(*Compare.zbounds[Obj]): - try: - assert(abs((Serial.prims[Nv, i, j, k] - Parallel.prims[Nv, i, j, k]) < TOL)) - except AssertionError: - print("Error for (Nv, i, j, k) = ({}, {}, {}, {})".format(Nv, i, j, k)) - assert(abs((Serial.prims[Nv, i, j, k] - Parallel.prims[Nv, i, j, k]) < TOL)) - -def test_AuxEquivalentForSSP3(): - Obj = Compare.Appendicies.index('SSP3') - Serial, Parallel = Compare.Serials[Obj], Compare.Parallels[Obj] - for Nv in range(Compare.Naux[Obj]): - for i in range(*Compare.xbounds[Obj]): - for j in range(*Compare.ybounds[Obj]): - for k in range(*Compare.zbounds[Obj]): - try: - assert(abs((Serial.aux[Nv, i, j, k] - Parallel.aux[Nv, i, j, k]) < TOL)) - except AssertionError: - print("Error for (Nv, i, j, k) = ({}, {}, {}, {})".format(Nv, i, j, k)) - assert(abs((Serial.aux[Nv, i, j, k] - Parallel.aux[Nv, i, j, k]) < TOL)) - -# IMEX2 -def test_ConsEquivalentForSSP2(): - Obj = Compare.Appendicies.index('SSP2') - Serial, Parallel = Compare.Serials[Obj], Compare.Parallels[Obj] - for Nv in range(Compare.Ncons[Obj]): - for i in range(*Compare.xbounds[Obj]): - for j in range(*Compare.ybounds[Obj]): - for k in range(*Compare.zbounds[Obj]): - try: - assert(abs((Serial.cons[Nv, i, j, k] - Parallel.cons[Nv, i, j, k]) < TOL)) - except AssertionError: - print("Error for (Nv, i, j, k) = ({}, {}, {}, {})".format(Nv, i, j, k)) - assert(abs((Serial.cons[Nv, i, j, k] - Parallel.cons[Nv, i, j, k]) < TOL)) - -def test_PrimsEquivalentForSSP2(): - Obj = Compare.Appendicies.index('SSP2') - Serial, Parallel = Compare.Serials[Obj], Compare.Parallels[Obj] - for Nv in range(Compare.Nprims[Obj]): - for i in range(*Compare.xbounds[Obj]): - for j in range(*Compare.ybounds[Obj]): - for k in range(*Compare.zbounds[Obj]): - try: - assert(abs((Serial.prims[Nv, i, j, k] - Parallel.prims[Nv, i, j, k]) < TOL)) - except AssertionError: - print("Error for (Nv, i, j, k) = ({}, {}, {}, {})".format(Nv, i, j, k)) - assert(abs((Serial.prims[Nv, i, j, k] - Parallel.prims[Nv, i, j, k]) < TOL)) - -def test_AuxEquivalentForSSP2(): - Obj = Compare.Appendicies.index('SSP2') - Serial, Parallel = Compare.Serials[Obj], Compare.Parallels[Obj] - for Nv in range(Compare.Naux[Obj]): - for i in range(*Compare.xbounds[Obj]): - for j in range(*Compare.ybounds[Obj]): - for k in range(*Compare.zbounds[Obj]): - try: - assert(abs((Serial.aux[Nv, i, j, k] - Parallel.aux[Nv, i, j, k]) < TOL)) - except AssertionError: - print("Error for (Nv, i, j, k) = ({}, {}, {}, {})".format(Nv, i, j, k)) - assert(abs((Serial.aux[Nv, i, j, k] - Parallel.aux[Nv, i, j, k]) < TOL)) - -def test_ConsEquivalentForSSP2FlowKHSingleFluid(): - Obj = Compare.Appendicies.index('SSP2FlowKHSingleFluid') - Serial, Parallel = Compare.Serials[Obj], Compare.Parallels[Obj] - for Nv in range(Compare.Ncons[Obj]): - for i in range(*Compare.xbounds[Obj]): - for j in range(*Compare.ybounds[Obj]): - for k in range(*Compare.zbounds[Obj]): - try: - assert(abs((Serial.cons[Nv, i, j, k] - Parallel.cons[Nv, i, j, k]) < TOL)) - except AssertionError: - print("Error for (Nv, i, j, k) = ({}, {}, {}, {})".format(Nv, i, j, k)) - assert(abs((Serial.cons[Nv, i, j, k] - Parallel.cons[Nv, i, j, k]) < TOL)) - -def test_PrimsEquivalentForSSP2FlowKHSingleFluid(): - Obj = Compare.Appendicies.index('SSP2FlowKHSingleFluid') - Serial, Parallel = Compare.Serials[Obj], Compare.Parallels[Obj] - for Nv in range(Compare.Nprims[Obj]): - for i in range(*Compare.xbounds[Obj]): - for j in range(*Compare.ybounds[Obj]): - for k in range(*Compare.zbounds[Obj]): - try: - assert(abs((Serial.prims[Nv, i, j, k] - Parallel.prims[Nv, i, j, k]) < TOL)) - except AssertionError: - print("Error for (Nv, i, j, k) = ({}, {}, {}, {})".format(Nv, i, j, k)) - assert(abs((Serial.prims[Nv, i, j, k] - Parallel.prims[Nv, i, j, k]) < TOL)) - -def test_AuxEquivalentForSSP2FlowKHSingleFluid(): - Obj = Compare.Appendicies.index('SSP2FlowKHSingleFluid') - Serial, Parallel = Compare.Serials[Obj], Compare.Parallels[Obj] - for Nv in range(Compare.Naux[Obj]): - for i in range(*Compare.xbounds[Obj]): - for j in range(*Compare.ybounds[Obj]): - for k in range(*Compare.zbounds[Obj]): - try: - assert(abs((Serial.aux[Nv, i, j, k] - Parallel.aux[Nv, i, j, k]) < TOL)) - except AssertionError: - print("Error for (Nv, i, j, k) = ({}, {}, {}, {})".format(Nv, i, j, k)) - assert(abs((Serial.aux[Nv, i, j, k] - Parallel.aux[Nv, i, j, k]) < TOL)) - - -# RK2 -def test_ConsEquivalentForRK2(): - Obj = Compare.Appendicies.index('RK2') - Serial, Parallel = Compare.Serials[Obj], Compare.Parallels[Obj] - for Nv in range(Compare.Ncons[Obj]): +def _compareStateVarArrays(serialArray, parallelArray, Obj, nVars): + for Nv in range(nVars): for i in range(*Compare.xbounds[Obj]): for j in range(*Compare.ybounds[Obj]): for k in range(*Compare.zbounds[Obj]): try: - assert(abs((Serial.cons[Nv, i, j, k] - Parallel.cons[Nv, i, j, k]) < TOL)) + assert(abs((serialArray[Nv, i, j, k] - parallelArray[Nv, i, j, k]) < TOL)) except AssertionError: print("Error for (Nv, i, j, k) = ({}, {}, {}, {})".format(Nv, i, j, k)) - assert(abs((Serial.cons[Nv, i, j, k] - Parallel.cons[Nv, i, j, k]) < TOL)) + assert(abs((serialArray[Nv, i, j, k] - parallelArray[Nv, i, j, k]) < TOL)) + + +# RK2 + +## BrioWuSingleFluid -def test_PrimsEquivalentForRK2(): - Obj = Compare.Appendicies.index('RK2') +def test_ConsEquivalentForRK2SrmhdOutflowBrioWuSF(): + Obj = Compare.Appendicies.index('RK2SrmhdOutflowBrioWuSF') Serial, Parallel = Compare.Serials[Obj], Compare.Parallels[Obj] - for Nv in range(Compare.Nprims[Obj]): - for i in range(*Compare.xbounds[Obj]): - for j in range(*Compare.ybounds[Obj]): - for k in range(*Compare.zbounds[Obj]): - try: - assert(abs((Serial.prims[Nv, i, j, k] - Parallel.prims[Nv, i, j, k]) < TOL)) - except AssertionError: - print("Error for (Nv, i, j, k) = ({}, {}, {}, {})".format(Nv, i, j, k)) - assert(abs((Serial.prims[Nv, i, j, k] - Parallel.prims[Nv, i, j, k]) < TOL)) + _compareStateVarArrays(Serial.cons, Parallel.cons, Obj, Compare.Ncons[Obj]) -def test_AuxEquivalentForRK2(): - Obj = Compare.Appendicies.index('RK2') +def test_PrimsEquivalentForRK2SrmhdOutflowBrioWuSF(): + Obj = Compare.Appendicies.index('RK2SrmhdOutflowBrioWuSF') Serial, Parallel = Compare.Serials[Obj], Compare.Parallels[Obj] - for Nv in range(Compare.Naux[Obj]): - for i in range(*Compare.xbounds[Obj]): - for j in range(*Compare.ybounds[Obj]): - for k in range(*Compare.zbounds[Obj]): - try: - assert(abs((Serial.aux[Nv, i, j, k] - Parallel.aux[Nv, i, j, k]) < TOL)) - except AssertionError: - print("Error for (Nv, i, j, k) = ({}, {}, {}, {})".format(Nv, i, j, k)) - assert(abs((Serial.aux[Nv, i, j, k] - Parallel.aux[Nv, i, j, k]) < TOL)) - -# FVS -def test_FnetEquivalentForFVS(): - Obj = Compare.Appendicies.index('FVSFnet') - Serial, Parallel = Compare.Serials[Obj], Compare.Parallels[Obj] - for Nv in range(Compare.Ncons[Obj]): - for i in range(*Compare.xbounds[Obj]): - for j in range(*Compare.ybounds[Obj]): - for k in range(*Compare.zbounds[Obj]): - try: - assert(abs((Serial.cons[Nv, i, j, k] - Parallel.cons[Nv, i, j, k]) < TOL)) - except AssertionError: - print("Error for (Nv, i, j, k) = ({}, {}, {}, {})".format(Nv, i, j, k)) - assert(abs((Serial.cons[Nv, i, j, k] - Parallel.cons[Nv, i, j, k]) < TOL)) - -def test_FxEquivalentForFVS(): - Obj = Compare.Appendicies.index('FVSFx') - Serial, Parallel = Compare.Serials[Obj], Compare.Parallels[Obj] - for Nv in range(Compare.Ncons[Obj]): - for i in range(*Compare.xbounds[Obj]): - for j in range(*Compare.ybounds[Obj]): - for k in range(*Compare.zbounds[Obj]): - try: - assert(abs((Serial.cons[Nv, i, j, k] - Parallel.cons[Nv, i, j, k]) < TOL)) - except AssertionError: - print("Error for (Nv, i, j, k) = ({}, {}, {}, {})".format(Nv, i, j, k) + " with diff of {}".format(Serial.cons[Nv, i, j, k] - Parallel.cons[Nv, i, j, k])) - assert(abs((Serial.cons[Nv, i, j, k] - Parallel.cons[Nv, i, j, k]) < TOL)) - -def test_FyEquivalentForFVS(): - Obj = Compare.Appendicies.index('FVSFy') - Serial, Parallel = Compare.Serials[Obj], Compare.Parallels[Obj] - for Nv in range(Compare.Ncons[Obj]): - for i in range(*Compare.xbounds[Obj]): - for j in range(*Compare.ybounds[Obj]): - for k in range(*Compare.zbounds[Obj]): - try: - assert(abs((Serial.cons[Nv, i, j, k] - Parallel.cons[Nv, i, j, k]) < TOL)) - except AssertionError: - print("Error for (Nv, i, j, k) = ({}, {}, {}, {})".format(Nv, i, j, k) + " with diff of {}".format(Serial.cons[Nv, i, j, k] - Parallel.cons[Nv, i, j, k])) - assert(abs((Serial.cons[Nv, i, j, k] - Parallel.cons[Nv, i, j, k]) < TOL)) - -def test_FzEquivalentForFVS(): - Obj = Compare.Appendicies.index('FVSFz') - Serial, Parallel = Compare.Serials[Obj], Compare.Parallels[Obj] - for Nv in range(Compare.Ncons[Obj]): - for i in range(*Compare.xbounds[Obj]): - for j in range(*Compare.ybounds[Obj]): - for k in range(*Compare.zbounds[Obj]): - try: - assert(abs((Serial.cons[Nv, i, j, k] - Parallel.cons[Nv, i, j, k]) < TOL)) - except AssertionError: - print("Error for (Nv, i, j, k) = ({}, {}, {}, {})".format(Nv, i, j, k) + " with diff of {}".format(Serial.cons[Nv, i, j, k] - Parallel.cons[Nv, i, j, k])) - assert(abs((Serial.cons[Nv, i, j, k] - Parallel.cons[Nv, i, j, k]) < TOL)) + _compareStateVarArrays(Serial.prims, Parallel.prims, Obj, Compare.Nprims[Obj]) + +def test_AuxEquivalentForRK2SrmhdOutflowBrioWuSF(): + Obj = Compare.Appendicies.index('RK2SrmhdOutflowBrioWuSF') + Serial, Parallel = Compare.Serials[Obj], Compare.Parallels[Obj] + _compareStateVarArrays(Serial.aux, Parallel.aux, Obj, Compare.Naux[Obj]) + +def test_ConsEquivalentForRK2SrmhdPeriodicBrioWuSF(): + Obj = Compare.Appendicies.index('RK2SrmhdPeriodicBrioWuSF') + Serial, Parallel = Compare.Serials[Obj], Compare.Parallels[Obj] + _compareStateVarArrays(Serial.cons, Parallel.cons, Obj, Compare.Ncons[Obj]) + +def test_PrimsEquivalentForRK2SrmhdPeriodicBrioWuSF(): + Obj = Compare.Appendicies.index('RK2SrmhdPeriodicBrioWuSF') + Serial, Parallel = Compare.Serials[Obj], Compare.Parallels[Obj] + _compareStateVarArrays(Serial.prims, Parallel.prims, Obj, Compare.Nprims[Obj]) + +def test_AuxEquivalentForRK2SrmhdPeriodicBrioWuSF(): + Obj = Compare.Appendicies.index('RK2SrmhdPeriodicBrioWuSF') + Serial, Parallel = Compare.Serials[Obj], Compare.Parallels[Obj] + _compareStateVarArrays(Serial.aux, Parallel.aux, Obj, Compare.Naux[Obj]) + +def test_ConsEquivalentForRK2SrmhdFlowBrioWuSF(): + Obj = Compare.Appendicies.index('RK2SrmhdFlowBrioWuSF') + Serial, Parallel = Compare.Serials[Obj], Compare.Parallels[Obj] + _compareStateVarArrays(Serial.cons, Parallel.cons, Obj, Compare.Ncons[Obj]) + +def test_PrimsEquivalentForRK2SrmhdFlowBrioWuSF(): + Obj = Compare.Appendicies.index('RK2SrmhdFlowBrioWuSF') + Serial, Parallel = Compare.Serials[Obj], Compare.Parallels[Obj] + _compareStateVarArrays(Serial.prims, Parallel.prims, Obj, Compare.Nprims[Obj]) + +def test_AuxEquivalentForRK2SrmhdFlowBrioWuSF(): + Obj = Compare.Appendicies.index('RK2SrmhdFlowBrioWuSF') + Serial, Parallel = Compare.Serials[Obj], Compare.Parallels[Obj] + _compareStateVarArrays(Serial.aux, Parallel.aux, Obj, Compare.Naux[Obj]) + + diff --git a/Tests/GPU/Src/test_rk2.cu b/Tests/GPU/Src/test_rk2.cu index f00407fc..1c9059f4 100644 --- a/Tests/GPU/Src/test_rk2.cu +++ b/Tests/GPU/Src/test_rk2.cu @@ -1,5 +1,6 @@ #include "gtest/gtest.h" #include "srrmhd.h" +#include "srmhd.h" #include "simulation.h" #include "serialSaveData.h" #include "simData.h" @@ -9,16 +10,67 @@ #include -TEST(RK2, RK2OutputConsistentWithSerial) +/* + Assumptions: + RKRandomInstabilitySingleFluid is tested in 2D only + BrioWuSingleFluid is tested in 1D only +*/ + + +// RKOTVSingleFluidPeriodic +TEST(RK2OutputConsistentWithSerial, RK2SrmhdPeriodicOTVSF) { /* The following was used to gather data to compare the parallel version with. No tests are run in the serial version of this test */ + + double cfl(0.6); + int Ng(4); + double gamma(2.0); + SerialEnv env(0, NULL, 1, 1, 1, 1); - Data d(30, 30, 30, 0, 1, 0, 1, 0, 1, 0.004, &env); - SRRMHD model(&d); + Data d(40, 40, 0, 0, 1, 0, 1, 0, 1, 0.004, &env, cfl, Ng, gamma); + SRMHD model(&d); + FVS fluxMethod(&d, &model); + Periodic bcs(&d); + Simulation sim(&d, &env); + OTVortexSingleFluid init(&d); + RK2 timeInt(&d, &model, &bcs, &fluxMethod); + SerialSaveData save(&d, &env); + sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save); + + // sim.evolve(); + sim.updateTime(); + // sim.updateTime(); + + + // Save data in test directory + strcpy(save.dir, "../TestData/GPU"); + strcpy(save.app, "RK2SrmhdPeriodicOTVSF"); + + save.saveCons(); + save.savePrims(); + save.saveAux(); + save.saveConsts(); +} +// RKOTVSingleFluidOutflow +TEST(RK2OutputConsistentWithSerial, RK2SrmhdOutflowOTVSF) +{ + + /* + The following was used to gather data to compare the parallel + version with. No tests are run in the serial version of this test + */ + + double cfl(0.6); + int Ng(4); + double gamma(2.0); + + SerialEnv env(0, NULL, 1, 1, 1, 1); + Data d(40, 40, 0, 0, 1, 0, 1, 0, 1, 0.004, &env, cfl, Ng, gamma); + SRMHD model(&d); FVS fluxMethod(&d, &model); Outflow bcs(&d); Simulation sim(&d, &env); @@ -27,17 +79,121 @@ TEST(RK2, RK2OutputConsistentWithSerial) SerialSaveData save(&d, &env); sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save); + // sim.evolve(); + sim.updateTime(); + // sim.updateTime(); + + + // Save data in test directory + strcpy(save.dir, "../TestData/GPU"); + strcpy(save.app, "RK2SrmhdOutflowOTVSF"); + + save.saveCons(); + save.savePrims(); + save.saveAux(); + save.saveConsts(); +} + + + + +// BrioWuSingleFluid + +TEST(RK2OutputConsistentWithSerial, RK2SrmhdOutflowBrioWuSF) +{ + + /* + The following was used to gather data to compare the parallel + version with. No tests are run in the serial version of this test + */ + + SerialEnv env(0, NULL, 1, 1, 1, 1); + Data d(40, 40, 0, 0, 1, 0, 1, 0, 1, 0.004, &env); + SRMHD model(&d); + FVS fluxMethod(&d, &model); + Outflow bcs(&d); + Simulation sim(&d, &env); + BrioWuSingleFluid init(&d); + RK2 timeInt(&d, &model, &bcs, &fluxMethod); + SerialSaveData save(&d, &env); + sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save); + + sim.evolve(); + + + // Save data in test directory + strcpy(save.dir, "../TestData/GPU"); + strcpy(save.app, "RK2SrmhdOutflowBrioWuSF"); + + save.saveCons(); + save.savePrims(); + save.saveAux(); + save.saveConsts(); +} + +TEST(RK2OutputConsistentWithSerial, RK2SrmhdPeriodicBrioWuSF) +{ + + /* + The following was used to gather data to compare the parallel + version with. No tests are run in the serial version of this test + */ + + SerialEnv env(0, NULL, 1, 1, 1, 1); + Data d(40, 40, 0, 0, 1, 0, 1, 0, 1, 0.004, &env); + SRMHD model(&d); + FVS fluxMethod(&d, &model); + Periodic bcs(&d); + Simulation sim(&d, &env); + BrioWuSingleFluid init(&d); + RK2 timeInt(&d, &model, &bcs, &fluxMethod); + SerialSaveData save(&d, &env); + sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save); + sim.evolve(); // Save data in test directory strcpy(save.dir, "../TestData/GPU"); - strcpy(save.app, "RK2"); + strcpy(save.app, "RK2SrmhdPeriodicBrioWuSF"); + save.saveCons(); save.savePrims(); save.saveAux(); save.saveConsts(); +} + +TEST(RK2OutputConsistentWithSerial, RK2SrmhdFlowBrioWuSF) +{ + + /* + The following was used to gather data to compare the parallel + version with. No tests are run in the serial version of this test + */ + + SerialEnv env(0, NULL, 1, 1, 1, 1); + Data d(40, 40, 0, 0, 1, 0, 1, 0, 1, 0.004, &env); + SRMHD model(&d); + FVS fluxMethod(&d, &model); + Flow bcs(&d); + Simulation sim(&d, &env); + BrioWuSingleFluid init(&d); + RK2 timeInt(&d, &model, &bcs, &fluxMethod); + SerialSaveData save(&d, &env); + sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save); + sim.evolve(); + // Save data in test directory + strcpy(save.dir, "../TestData/GPU"); + strcpy(save.app, "RK2SrmhdFlowBrioWuSF"); + + save.saveCons(); + save.savePrims(); + save.saveAux(); + save.saveConsts(); } + + + From 5dab77294c9ed78880f882a894f728bd069028da Mon Sep 17 00:00:00 2001 From: "ania.brown" Date: Mon, 30 Nov 2020 12:26:53 +0000 Subject: [PATCH 36/56] forgetting to add files to git --- Tests/GPU/Src/main.cu | 12 ++ Tests/GPU/Src/test_parallel_rk2.cu | 238 +++++++++++++++++++++++++++++ 2 files changed, 250 insertions(+) create mode 100644 Tests/GPU/Src/main.cu create mode 100644 Tests/GPU/Src/test_parallel_rk2.cu diff --git a/Tests/GPU/Src/main.cu b/Tests/GPU/Src/main.cu new file mode 100644 index 00000000..c6ff0a86 --- /dev/null +++ b/Tests/GPU/Src/main.cu @@ -0,0 +1,12 @@ +#include "gtest/gtest.h" +#include "parallelEnv.h" + +int main(int argc, char** argv) +{ + ::testing::InitGoogleTest(&argc, argv); + + // Create env here to ensure MPI initialisation is handled. Will need to create this object again inside each test + // -- mpi init will only be called the first time + ParallelEnv env(0, NULL, 1, 1, 1); + return RUN_ALL_TESTS(); +} diff --git a/Tests/GPU/Src/test_parallel_rk2.cu b/Tests/GPU/Src/test_parallel_rk2.cu new file mode 100644 index 00000000..8e866a83 --- /dev/null +++ b/Tests/GPU/Src/test_parallel_rk2.cu @@ -0,0 +1,238 @@ +#include "gtest/gtest.h" +#include "srrmhd.h" +#include "srmhd.h" +#include "simulation.h" +#include "simData.h" +#include "parallelSaveData.h" +#include "parallelBoundaryConds.h" +#include "initFunc.h" +#include "RK2.h" +#include "fluxVectorSplitting.h" +#include "parallelEnv.h" +#include + +/* + Assumptions: + RKRandomInstabilitySingleFluid is tested in 2D only + BrioWuSingleFluid is tested in 1D only +*/ + + +#if 1 +// RKOTVSingleFluidPeriodic +TEST(RK2OutputConsistentWithSerial, RK2SrmhdPeriodicOTVSF) +{ + + /* + The following was used to gather data to compare the parallel + version with. No tests are run in the serial version of this test + */ + + double cfl(0.6); + int Ng(4); + double gamma(2.0); + + ParallelEnv env(0, NULL, 2, 2, 1, 1); + Data d(40, 40, 0, 0, 1, 0, 1, 0, 1, 0.004, &env, cfl, Ng, gamma); + SRMHD model(&d); + FVS fluxMethod(&d, &model); + ParallelPeriodic bcs(&d, &env); + Simulation sim(&d, &env); + OTVortexSingleFluid init(&d); + RK2 timeInt(&d, &model, &bcs, &fluxMethod); + ParallelSaveData save(&d, &env); + sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save); + + // sim.evolve(); + sim.updateTime(); + // sim.updateTime(); + + + // Save data in test directory + strcpy(save.dir, "../TestData/MPIGPU"); + strcpy(save.app, "RK2SrmhdPeriodicOTVSF"); + + save.saveCons(); + save.savePrims(); + save.saveAux(); + save.saveConsts(); +} +// RKOTVSingleFluidOutflow +TEST(RK2OutputConsistentWithSerial, RK2SrmhdOutflowOTVSF) +{ + + /* + The following was used to gather data to compare the parallel + version with. No tests are run in the serial version of this test + */ + + double cfl(0.6); + int Ng(4); + double gamma(2.0); + + ParallelEnv env(0, NULL, 2, 2, 1, 1); + Data d(40, 40, 0, 0, 1, 0, 1, 0, 1, 0.004, &env, cfl, Ng, gamma); + SRMHD model(&d); + FVS fluxMethod(&d, &model); + ParallelOutflow bcs(&d, &env); + Simulation sim(&d, &env); + OTVortexSingleFluid init(&d); + RK2 timeInt(&d, &model, &bcs, &fluxMethod); + ParallelSaveData save(&d, &env); + sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save); + + // sim.evolve(); + sim.updateTime(); + // sim.updateTime(); + + + // Save data in test directory + strcpy(save.dir, "../TestData/MPIGPU"); + strcpy(save.app, "RK2SrmhdOutflowOTVSF"); + + save.saveCons(); + save.savePrims(); + save.saveAux(); + save.saveConsts(); +} + + + + +// BrioWuSingleFluid + +TEST(RK2OutputConsistentWithSerial, RK2SrmhdOutflowBrioWuSF) +{ + + /* + The following was used to gather data to compare the parallel + version with. No tests are run in the serial version of this test + */ + + ParallelEnv env(0, NULL, 2, 2, 1, 1); + Data d(40, 40, 0, 0, 1, 0, 1, 0, 1, 0.004, &env); + SRMHD model(&d); + FVS fluxMethod(&d, &model); + ParallelOutflow bcs(&d, &env); + Simulation sim(&d, &env); + BrioWuSingleFluid init(&d); + RK2 timeInt(&d, &model, &bcs, &fluxMethod); + ParallelSaveData save(&d, &env); + sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save); + + sim.evolve(); + + + // Save data in test directory + strcpy(save.dir, "../TestData/MPIGPU"); + strcpy(save.app, "RK2SrmhdOutflowBrioWuSF"); + + save.saveCons(); + save.savePrims(); + save.saveAux(); + save.saveConsts(); +} + +TEST(RK2OutputConsistentWithSerial, RK2SrmhdPeriodicBrioWuSF) +{ + + /* + The following was used to gather data to compare the parallel + version with. No tests are run in the serial version of this test + */ + + ParallelEnv env(0, NULL, 2, 2, 1, 1); + Data d(40, 40, 0, 0, 1, 0, 1, 0, 1, 0.004, &env); + SRMHD model(&d); + FVS fluxMethod(&d, &model); + ParallelPeriodic bcs(&d, &env); + Simulation sim(&d, &env); + BrioWuSingleFluid init(&d); + RK2 timeInt(&d, &model, &bcs, &fluxMethod); + ParallelSaveData save(&d, &env); + sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save); + + sim.evolve(); + + + // Save data in test directory + strcpy(save.dir, "../TestData/MPIGPU"); + strcpy(save.app, "RK2SrmhdPeriodicBrioWuSF"); + + save.saveCons(); + save.savePrims(); + save.saveAux(); + save.saveConsts(); +} + +TEST(RK2OutputConsistentWithSerial, RK2SrmhdFlowBrioWuSF) +{ + + /* + The following was used to gather data to compare the parallel + version with. No tests are run in the serial version of this test + */ + + ParallelEnv env(0, NULL, 2, 2, 1, 1); + Data d(40, 40, 0, 0, 1, 0, 1, 0, 1, 0.004, &env); + SRMHD model(&d); + FVS fluxMethod(&d, &model); + ParallelFlow bcs(&d, &env); + Simulation sim(&d, &env); + BrioWuSingleFluid init(&d); + RK2 timeInt(&d, &model, &bcs, &fluxMethod); + ParallelSaveData save(&d, &env); + sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save); + + sim.evolve(); + + + // Save data in test directory + strcpy(save.dir, "../TestData/MPIGPU"); + strcpy(save.app, "RK2SrmhdFlowBrioWuSF"); + + save.saveCons(); + save.savePrims(); + save.saveAux(); + save.saveConsts(); +} +#endif + + + +#if 0 + +// Tests which do not currently pass + +TEST(RK2OutputConsistentWithSerial, RK2SrrmhdOutflowOTVortexSingleFluid) +{ + + /* + The following was used to gather data to compare the parallel + version with. No tests are run in the serial version of this test + */ + + ParallelEnv env(0, NULL, 2, 2, 1, 1); + Data d(30, 30, 0, 0, 1, 0, 1, 0, 1, 0.004, &env); + SRRMHD model(&d); + FVS fluxMethod(&d, &model); + ParallelOutflow bcs(&d, &env); + Simulation sim(&d, &env); + OTVortexSingleFluid init(&d); + RK2 timeInt(&d, &model, &bcs, &fluxMethod); + ParallelSaveData save(&d, &env); + sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save); + + sim.evolve(); + + + // Save data in test directory + strcpy(save.dir, "../TestData/MPIGPU"); + strcpy(save.app, "RK2SrrmhdOutflowOTVortexSingleFluid"); + + save.saveCons(); + save.savePrims(); + save.saveAux(); + save.saveConsts(); +} +#endif From a3c115c8764e9760747ce4c6689bc55b56d669b4 Mon Sep 17 00:00:00 2001 From: "ania.brown" Date: Wed, 6 Jan 2021 15:13:35 +0000 Subject: [PATCH 37/56] putting cpu and gpu iridis instructions in the same place --- Project/GPU/Include/serialEnv.h | 1 + Project/GPU/Src/main.cu | 12 ++++------- Scrips/IridisEnv/tests_instructions.md | 19 ++++++++++++++--- Scrips/IridisEnv/tests_job_cpu.sh | 21 +++++++++++++++++++ .../{tests_job.sh => tests_job_gpu.sh} | 0 5 files changed, 42 insertions(+), 11 deletions(-) create mode 100644 Scrips/IridisEnv/tests_job_cpu.sh rename Scrips/IridisEnv/{tests_job.sh => tests_job_gpu.sh} (100%) diff --git a/Project/GPU/Include/serialEnv.h b/Project/GPU/Include/serialEnv.h index f1cae491..7ad548c7 100644 --- a/Project/GPU/Include/serialEnv.h +++ b/Project/GPU/Include/serialEnv.h @@ -19,6 +19,7 @@ class SerialEnv : public PlatformEnv { public: + // TODO -- no reason for this constructor to take nxRanks etc //! Constructor -- Initialize global MPI communicator SerialEnv(int *argcP, char **argvP[], int nxRanks, int nyRanks, int nzRanks, int testing=0); diff --git a/Project/GPU/Src/main.cu b/Project/GPU/Src/main.cu index bb109cd3..4acfedb0 100644 --- a/Project/GPU/Src/main.cu +++ b/Project/GPU/Src/main.cu @@ -5,10 +5,9 @@ #include "srmhd.h" #include "srrmhd.h" #include "boundaryConds.h" -#include "parallelBoundaryConds.h" #include "rkSplit.h" #include "SSP2.h" -#include "parallelSaveData.h" +#include "serialSaveData.h" #include "fluxVectorSplitting.h" #include "serialEnv.h" @@ -43,9 +42,6 @@ int main(int argc, char *argv[]) { double sigma(0); bool output(true); int safety(180); - int nxRanks(2); - int nyRanks(2); - int nzRanks(1); char * ptr(0); //! Overwrite any variables that have been passed in as main() arguments @@ -55,7 +51,7 @@ int main(int argc, char *argv[]) { } } - ParallelEnv env(&argc, &argv, nxRanks, nyRanks, nzRanks); + SerialEnv env(&argc, &argv, 1, 1, 1); Data data(nx, ny, nz, xmin, xmax, ymin, ymax, zmin, zmax, endTime, &env, cfl, Ng, gamma, sigma); @@ -65,7 +61,7 @@ int main(int argc, char *argv[]) { FVS fluxMethod(&data, &model); - ParallelFlow bcs(&data, &env); + Flow bcs(&data, &env); Simulation sim(&data, &env); @@ -73,7 +69,7 @@ int main(int argc, char *argv[]) { RK2 timeInt(&data, &model, &bcs, &fluxMethod); - ParallelSaveData save(&data, &env); + SerialSaveData save(&data, &env); // Now objects have been created, set up the simulation sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save); diff --git a/Scrips/IridisEnv/tests_instructions.md b/Scrips/IridisEnv/tests_instructions.md index 9c215425..3569ca88 100644 --- a/Scrips/IridisEnv/tests_instructions.md +++ b/Scrips/IridisEnv/tests_instructions.md @@ -1,6 +1,6 @@ ## Tests Instructions -These are instructions to run GPU unit tests as a batch job on Iridis 5 +These are instructions to run GPU or CPU unit tests as a batch job on Iridis 5 ## Setting up python env @@ -11,7 +11,13 @@ module purge module load gcc/6.4.0 module load python/3.6.4 module load hdf5/1.10.2/gcc/parallel -module load cuda/8.0 +``` + +Optionally also type `module load cuda/8.0` if using gpu, + +Finish creating and activating the python venv with: + +``` python3 -m venv venv source venv/bin/activate ``` @@ -24,10 +30,17 @@ python -m pip install -r Scripts/IridisEnv/requirements.txt ## Runing unit tests as a batch job -From `Tests/GPU` run `sbatch ../../Scripts/IridisEnv/tests_job.sh` +For GPU: + +From `Tests/GPU` run `sbatch ../../Scripts/IridisEnv/tests_job_gpu.sh` This will run all GPU tests +For CPU: + +From `Tests/CPU` run `sbatch ../../Scripts/IridisEnv/tests_job_cpu.sh` + + diff --git a/Scrips/IridisEnv/tests_job_cpu.sh b/Scrips/IridisEnv/tests_job_cpu.sh new file mode 100644 index 00000000..583b9043 --- /dev/null +++ b/Scrips/IridisEnv/tests_job_cpu.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +#SBATCH --ntasks-per-node=4 # Tasks per node +#SBATCH --nodes=1 # Number of nodes requested +#SBATCH --time=00:10:00 # walltime + +module purge +module load gcc/6.4.0 +module load python/3.6.4 +module load hdf5/1.10.2/gcc/parallel +#module load hdf5/1.10.2/gcc/serial + +module list + +source ../../venv/bin/activate + +export PYTHONPATH=$PYTHONPATH:../../Scripts:/home/amb1u19/METHOD_branches/METHOD_dev_hdf5/Scripts + +gcc --version +make clean +make test diff --git a/Scrips/IridisEnv/tests_job.sh b/Scrips/IridisEnv/tests_job_gpu.sh similarity index 100% rename from Scrips/IridisEnv/tests_job.sh rename to Scrips/IridisEnv/tests_job_gpu.sh From 9fd16ee7f0c7f46fccc4b08dfc1314a080ba29b7 Mon Sep 17 00:00:00 2001 From: "ania.brown" Date: Fri, 8 Jan 2021 15:38:58 +0000 Subject: [PATCH 38/56] small fix to main file --- Project/GPU/Makefile | 2 +- Project/GPU/Src/main.cu | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Project/GPU/Makefile b/Project/GPU/Makefile index a5100e7b..b3e0c821 100644 --- a/Project/GPU/Makefile +++ b/Project/GPU/Makefile @@ -4,7 +4,7 @@ # -------------- PARAMETERS FOR USERS TO EDIT -------------------- # if USE_MPI=1, need to use parallel versions of objects, such as ParallelEnv, ParallelSaveData etc -USE_MPI=1 +USE_MPI=0 # find location of MPI libraries to link on your local system using 'mpicc -show' MPI_FLAGS = -I/usr/include/mpich -L/usr/lib/x86_64-linux-gnu -lmpichcxx -lmpich diff --git a/Project/GPU/Src/main.cu b/Project/GPU/Src/main.cu index 4acfedb0..7b2ffe50 100644 --- a/Project/GPU/Src/main.cu +++ b/Project/GPU/Src/main.cu @@ -61,7 +61,7 @@ int main(int argc, char *argv[]) { FVS fluxMethod(&data, &model); - Flow bcs(&data, &env); + Flow bcs(&data); Simulation sim(&data, &env); From e7b5062d5bc682724eb1784c80deaa4601b54a69 Mon Sep 17 00:00:00 2001 From: "ania.brown" Date: Fri, 8 Jan 2021 16:15:02 +0000 Subject: [PATCH 39/56] adding initial files for hdf5 and checkpoint restart, copying from cpu version --- Project/GPU/Include/checkpointArgs.h | 68 ++++ Project/GPU/Include/parallelCheckpointArgs.h | 37 ++ Project/GPU/Include/parallelSaveDataHDF5.h | 120 +++++++ Project/GPU/Include/serialCheckpointArgs.h | 37 ++ Project/GPU/Include/serialSaveDataHDF5.h | 122 +++++++ Project/GPU/Src/checkpointArgs.cu | 67 ++++ Project/GPU/Src/parallelCheckpointArgs.cu | 70 ++++ Project/GPU/Src/parallelSaveDataHDF5.cu | 348 +++++++++++++++++++ Project/GPU/Src/serialCheckpointArgs.cu | 67 ++++ Project/GPU/Src/serialSaveDataHDF5.cu | 306 ++++++++++++++++ 10 files changed, 1242 insertions(+) create mode 100644 Project/GPU/Include/checkpointArgs.h create mode 100644 Project/GPU/Include/parallelCheckpointArgs.h create mode 100644 Project/GPU/Include/parallelSaveDataHDF5.h create mode 100644 Project/GPU/Include/serialCheckpointArgs.h create mode 100644 Project/GPU/Include/serialSaveDataHDF5.h create mode 100644 Project/GPU/Src/checkpointArgs.cu create mode 100644 Project/GPU/Src/parallelCheckpointArgs.cu create mode 100644 Project/GPU/Src/parallelSaveDataHDF5.cu create mode 100644 Project/GPU/Src/serialCheckpointArgs.cu create mode 100644 Project/GPU/Src/serialSaveDataHDF5.cu diff --git a/Project/GPU/Include/checkpointArgs.h b/Project/GPU/Include/checkpointArgs.h new file mode 100644 index 00000000..a1cb5478 --- /dev/null +++ b/Project/GPU/Include/checkpointArgs.h @@ -0,0 +1,68 @@ +#ifndef CHECKPOINTARGS_H +#define CHECKPOINTARGS_H + +#include +#include +#include "platformEnv.h" + + +//! Wrapper around Data object for populating Data from a checkpoint restart file +/*! + @par + Class contains all the data of the simulation relevant to any of the other + modules. Containing it in this way prevents issues of cyclic includes, also + results in Simulation as more of an interface than a class that needs to be + known to lower objects---good practice.
+ +*/ +class CheckpointArgs +{ + public: + +int + //@{ + nx, ny, nz; //!< Number of physical cells in specified direction + //@} + double + //@{ + xmin, xmax, + ymin, ymax, //!< Positional limits of domain in specified direction + zmin, zmax, + //@} + endTime, //!< End time of simulation + cfl; //!< Courant factor + int Ng; //!< Number of ghost cells + double + gamma, //!< Adiabatic index + sigma; //!< Resistivity + int + //@{ + Ncons, Nprims, Naux; //!< Number of specified variables + //@} + double + cp; //!< Constant divergence cleaning term + double + gam; //!< Exponent in the functional conductivity + double + t, //!< Current time + dt; //!< Width of current timestep + int + //@{ + Nx, Ny, Nz; //!< Total number of compute cells in domain in the specified direction + //@} + + + //! Constructor + /*! + @par + Allocates the memory required for the state arrays and sets the simulation + constants to the given values. Does not set initial state, thats done by + the initialFunc object. + @param name name of checkpoint file to use for restart, including path and extension + @param env environment object containing platform details eg MPI ranks + */ + CheckpointArgs() {}; + +}; + +#endif diff --git a/Project/GPU/Include/parallelCheckpointArgs.h b/Project/GPU/Include/parallelCheckpointArgs.h new file mode 100644 index 00000000..d2a2f09e --- /dev/null +++ b/Project/GPU/Include/parallelCheckpointArgs.h @@ -0,0 +1,37 @@ +#ifndef PARALLELCHECKPOINTARGS_H +#define PARALLELCHECKPOINTARGS_H + +#include +#include +#include "parallelEnv.h" + + +//! Wrapper around Data object for populating Data from a checkpoint restart file +/*! + @par + Class contains all the data of the simulation relevant to any of the other + modules. Containing it in this way prevents issues of cyclic includes, also + results in Simulation as more of an interface than a class that needs to be + known to lower objects---good practice.
+ +*/ +class ParallelCheckpointArgs : public CheckpointArgs +{ + public: + + //! Constructor + /*! + @par + Allocates the memory required for the state arrays and sets the simulation + constants to the given values. Does not set initial state, thats done by + the initialFunc object. + @param name name of checkpoint file to use for restart, including path and extension + @param env environment object containing platform details eg MPI ranks + */ + ParallelCheckpointArgs( + const char* name, + ParallelEnv *env); + +}; + +#endif diff --git a/Project/GPU/Include/parallelSaveDataHDF5.h b/Project/GPU/Include/parallelSaveDataHDF5.h new file mode 100644 index 00000000..52c2f5b0 --- /dev/null +++ b/Project/GPU/Include/parallelSaveDataHDF5.h @@ -0,0 +1,120 @@ +#ifndef PARALLELSAVEDATAHDF5_H +#define PARALLELSAVEDATAHDF5_H + +#include +#include +#include +#include +#include +#include +#include "hdf5.h" +#include "simData.h" +#include "saveData.h" +#include "parallelEnv.h" + +using namespace std; + +//! Class used to save simulation data to HDF5 using a single process +/*! + @par + Class is initialized with the data that is to be saved. Saves the simulation + data in the Data directory, located within the Project folder. All data is + saved automatically, including all constant data (xmin, ymax, endTime etc) and + and the values of all prims, aux and cons variables. +*/ +class ParallelSaveDataHDF5 : public SaveData +{ + +public: + ParallelEnv * env; //!< Pointer to PlatformEnv class containing platform specific info such as MPI details + string filename; //!< Filename for the HDF5 file. Defaults to 'data.hdf5'. + hid_t file = 0; //!< HDF5 file to write to. + int file_iteration = 0; //!< The simulation iteration this file was opened for. + + //! The level of detail to output to file + enum OutputDetail { + OUTPUT_ALL, //!< All conserved, primitive, auxiliary and user-defined data + OUTPUT_REDUCED, //!< Skip auxiliary data + OUTPUT_MINIMAL //!< Only conserved and primitive data + } detail; + + //! Saves the conserved vector state + void saveCons() override; + + //! Saves the primitive vector state + void savePrims() override; + + //! Saves the auxiliary vector state + void saveAux() override; + + //! Saves the domain coordinates + void saveDomain() override; + + //! Saves the constant data + void saveConsts() override; + + //! Constructor + /*! + @param[in] *data pointer to the Data class + @param[in] *env pointer to the Parallel Environment containing information on bounds etc. + @param[in] filename String describing the file to create. Can ignore + */ + ParallelSaveDataHDF5( + Data * data, ParallelEnv * env, string filename="data", OutputDetail detail=OUTPUT_ALL + ) : SaveData(data, 0), env(env), filename(filename), detail(detail) { + // Remove any pre-existing checkpoint file + std::remove((filename+".checkpoint.hdf5").c_str()); + } + + virtual ~ParallelSaveDataHDF5() { } //!< Destructor + + //! Saves all cons, prims, aux and constant data + /*! + @par + This calls the other member functions to save their respective + simulation data. + + @param[in] timeSeries flags whether the saved data is final or transient + */ + void saveAll(bool timeSeries=false) override; + + //! Saves user specified variable + /*! + @par + Function saves the data for the variable specified by the string `var` + + @param[in] variable Defines the variable the user wants to save. Should match a variable label + @param[in] num number of user-specified variables to save in total (required for consistent numbering of files) + */ + void saveVar(string variable, int num=1) override; + + //! Opens a new HDF5 file + /*! + * @par + * Function opens a new HDF5 file with a specified filename, and closes any current one. + * + * @param[in] name Filename to create + */ + void openFile(const char *name); + + //! Tries to open a checkpoint file + /*! + * @par + * If there is not already a checkkpoint file open for this iteration, opens a new one + */ + void openCheckpointFile(); + + //! Writes a new dataset + /*! + * @par + * Saves a new dataset double to file + * + * @param group Root location to save to + * @param name Name of the new dataset + * @param var Which variable to save within the data array + * @param data Pointer to the data array (cons, prims, aux etc.) + */ + void writeDataSetDouble(const hid_t *group, const char *name, const int *var, const double *data); +}; + +#endif diff --git a/Project/GPU/Include/serialCheckpointArgs.h b/Project/GPU/Include/serialCheckpointArgs.h new file mode 100644 index 00000000..8b072fbd --- /dev/null +++ b/Project/GPU/Include/serialCheckpointArgs.h @@ -0,0 +1,37 @@ +#ifndef SERIALCHECKPOINTARGS_H +#define SERIALCHECKPOINTARGS_H + +#include +#include +#include "platformEnv.h" + + +//! Wrapper around Data object for populating Data from a checkpoint restart file +/*! + @par + Class contains all the data of the simulation relevant to any of the other + modules. Containing it in this way prevents issues of cyclic includes, also + results in Simulation as more of an interface than a class that needs to be + known to lower objects---good practice.
+ +*/ +class SerialCheckpointArgs : public CheckpointArgs +{ + public: + + //! Constructor + /*! + @par + Allocates the memory required for the state arrays and sets the simulation + constants to the given values. Does not set initial state, thats done by + the initialFunc object. + @param name name of checkpoint file to use for restart, including path and extension + @param env environment object containing platform details eg MPI ranks + */ + SerialCheckpointArgs( + const char* name, + PlatformEnv *env); + +}; + +#endif diff --git a/Project/GPU/Include/serialSaveDataHDF5.h b/Project/GPU/Include/serialSaveDataHDF5.h new file mode 100644 index 00000000..16ab4139 --- /dev/null +++ b/Project/GPU/Include/serialSaveDataHDF5.h @@ -0,0 +1,122 @@ +#ifndef SERIALSAVEDATAHDF5_H +#define SERIALSAVEDATAHDF5_H + +#include +#include +#include +#include +#include +#include +#include "hdf5.h" +#include "simData.h" +#include "saveData.h" +#include "serialEnv.h" + +using namespace std; + +#include "hdf5.h" +//! Class used to save simulation data to HDF5 using a single process +/*! + @par + Class is initialized with the data that is to be saved. Saves the simulation + data in the Data directory, located within the Project folder. All data is + saved automatically, including all constant data (xmin, ymax, endTime etc) and + and the values of all prims, aux and cons variables. +*/ +class SerialSaveDataHDF5 : public SaveData +{ + + public: + + SerialEnv * env; //!< Pointer to PlatformEnv class containing platform specific info such as MPI details + string filename; //!< Filename for the HDF5 file. Defaults to 'data.hdf5'. + hid_t file = 0; //!< HDF5 file to write to. + int file_iteration = 0; //!< The simulation iteration this file was opened for. + + //! The level of detail to output to file + enum OutputDetail { + OUTPUT_ALL, //!< All conserved, primitive, auxiliary and user-defined data + OUTPUT_REDUCED, //!< Skip auxiliary data + OUTPUT_MINIMAL //!< Only conserved and primitive data + } detail; + + //! Saves the conserved vector state + void saveCons() override; + + //! Saves the primitive vector state + void savePrims() override; + + //! Saves the auxiliary vector state + void saveAux() override; + + //! Saves the domain coordinates + void saveDomain() override; + + //! Saves the constant data + void saveConsts() override; + + //! Constructor + /*! + @param *data pointer to the Data class + @param *env pointer to the Serial Environment containing information on bounds etc. + @param filename String describing the file to create. Can ignore + */ + SerialSaveDataHDF5( + Data * data, SerialEnv * env, string filename="data", OutputDetail detail=OUTPUT_ALL + ) : SaveData(data, 0), env(env), filename(filename), detail(detail) { + // Remove any pre-existing checkpoint file + std::remove((filename+".checkpoint.hdf5").c_str()); + } + + virtual ~SerialSaveDataHDF5() { } //!< Destructor + + //! Saves all cons, prims, aux and constant data + /*! + @par + This calls the other member functions to save their respective + simulation data. + + @param[in] timeSeries flags whether the saved data is final or transient + */ + void saveAll(bool timeSeries=false) override; + + //! Saves user specified variable + /*! + @par + Function saves the data for the variable specified by the string `var` + + @param[in] variable Defines the variable the user wants to save. Should match a variable label + @param[in] num number of user-specified variables to save in total (required for consistent numbering of files) + */ + void saveVar(string variable, int num=1) override; + + //! Opens a new HDF5 file + /*! + * @par + * Function opens a new HDF5 file with a specified filename, and closes any current one. + * + * @param[in] name Filename to create + */ + void openFile(const char *name); + + //! Tries to open a checkpoint file + /*! + * @par + * If there is not already a checkkpoint file open for this iteration, opens a new one + */ + void openCheckpointFile(); + + //! Writes a new dataset + /*! + * @par + * Saves a new dataset double to file + * + * @param group Root location to save to + * @param name Name of the new dataset + * @param var Which variable to save within the data array + * @param data Pointer to the data array (cons, prims, aux etc.) + */ + void writeDataSetDouble(const hid_t *group, const char *name, const int *var, const double *data); +}; + +#endif diff --git a/Project/GPU/Src/checkpointArgs.cu b/Project/GPU/Src/checkpointArgs.cu new file mode 100644 index 00000000..c6d2cffb --- /dev/null +++ b/Project/GPU/Src/checkpointArgs.cu @@ -0,0 +1,67 @@ +#include "simData.h" +#include "checkpointArgs.h" +#include "platformEnv.h" +#include +#include +#include "hdf5.h" +#include "hdf5_hl.h" +#include + + +CheckpointArgs::CheckpointArgs(const char* name, PlatformEnv *env) +{ + herr_t error=0, tmpError=-1; + hid_t file = H5Fopen(name, H5F_ACC_RDONLY, H5P_DEFAULT); + + if (file<0) throw std::runtime_error("Could not open checkpoint restart file. Does it exist? CheckpointArgs requires path to file and extension"); + + // Read global file attributes + tmpError = H5LTget_attribute_double(file, ".", "cfl", &(cfl)); + if (tmpError < 0) error = tmpError; + tmpError = H5LTget_attribute_double(file, ".", "gamma", &(gamma)); + if (tmpError < 0) error = tmpError; + tmpError = H5LTget_attribute_double(file, ".", "sigma", &(sigma)); + if (tmpError < 0) error = tmpError; + tmpError = H5LTget_attribute_double(file, ".", "cp", &(cp)); + if (tmpError < 0) error = tmpError; + tmpError = H5LTget_attribute_double(file, ".", "t", &(t)); + if (tmpError < 0) error = tmpError; + if (error<0) throw std::runtime_error("Checkpoint restart file is missing some global attributes"); + + // Remaining required attributes are stored in the Domain group + hid_t group = H5Gopen(file, "Domain", H5P_DEFAULT); + tmpError = H5LTget_attribute_int(group, ".", "nx", &(nx)); + if (tmpError < 0) error = tmpError; + tmpError = H5LTget_attribute_int(group, ".", "ny", &(ny)); + if (tmpError < 0) error = tmpError; + tmpError = H5LTget_attribute_int(group, ".", "nz", &(nz)); + if (tmpError < 0) error = tmpError; + tmpError = H5LTget_attribute_int(group, ".", "Nx", &(Nx)); + if (tmpError < 0) error = tmpError; + tmpError = H5LTget_attribute_int(group, ".", "Ny", &(Ny)); + if (tmpError < 0) error = tmpError; + tmpError = H5LTget_attribute_int(group, ".", "Nz", &(Nz)); + if (tmpError < 0) error = tmpError; + tmpError = H5LTget_attribute_int(group, ".", "Ng", &(Ng)); + if (tmpError < 0) error = tmpError; + tmpError = H5LTget_attribute_double(group, ".", "xmin", &(xmin)); + if (tmpError < 0) error = tmpError; + tmpError = H5LTget_attribute_double(group, ".", "ymin", &(ymin)); + if (tmpError < 0) error = tmpError; + tmpError = H5LTget_attribute_double(group, ".", "zmin", &(zmin)); + if (tmpError < 0) error = tmpError; + tmpError = H5LTget_attribute_double(group, ".", "xmax", &(xmax)); + if (tmpError < 0) error = tmpError; + tmpError = H5LTget_attribute_double(group, ".", "ymax", &(ymax)); + if (tmpError < 0) error = tmpError; + tmpError = H5LTget_attribute_double(group, ".", "zmax", &(zmax)); + if (tmpError < 0) error = tmpError; + tmpError = H5LTget_attribute_double(group, ".", "endTime", &(endTime)); + if (tmpError < 0) error = tmpError; + if (error<0) throw std::runtime_error("Checkpoint restart file is missing some domain attributes"); + + H5Gclose(group); + H5Fclose(file); +} + + diff --git a/Project/GPU/Src/parallelCheckpointArgs.cu b/Project/GPU/Src/parallelCheckpointArgs.cu new file mode 100644 index 00000000..20b6d485 --- /dev/null +++ b/Project/GPU/Src/parallelCheckpointArgs.cu @@ -0,0 +1,70 @@ +#include "simData.h" +#include "parallelCheckpointArgs.h" +#include "parallelEnv.h" +#include +#include +#include "hdf5.h" +#include "hdf5_hl.h" +#include + + +ParallelCheckpointArgs::ParallelCheckpointArgs(const char* name, ParallelEnv *env) : CheckpointArgs() +{ + herr_t error=0, tmpError=-1; + + hid_t file_access_property_list = H5Pcreate(H5P_FILE_ACCESS); + H5Pset_fapl_mpio(file_access_property_list, env->mpiCartesianComm, env->mpiInfo); + hid_t file = H5Fopen(name, H5F_ACC_RDONLY, file_access_property_list); + if (file<0) throw std::runtime_error("Could not open checkpoint restart file. Does it exist? CheckpointArgs requires path to file and extension"); + + // Read global file attributes + tmpError = H5LTget_attribute_double(file, ".", "cfl", &(cfl)); + if (tmpError < 0) error = tmpError; + tmpError = H5LTget_attribute_double(file, ".", "gamma", &(gamma)); + if (tmpError < 0) error = tmpError; + tmpError = H5LTget_attribute_double(file, ".", "sigma", &(sigma)); + if (tmpError < 0) error = tmpError; + tmpError = H5LTget_attribute_double(file, ".", "cp", &(cp)); + if (tmpError < 0) error = tmpError; + tmpError = H5LTget_attribute_double(file, ".", "t", &(t)); + if (tmpError < 0) error = tmpError; + if (error<0) throw std::runtime_error("Checkpoint restart file is missing some global attributes"); + + // Remaining required attributes are stored in the Domain group + hid_t group = H5Gopen(file, "Domain", H5P_DEFAULT); + tmpError = H5LTget_attribute_int(group, ".", "nx", &(nx)); + if (tmpError < 0) error = tmpError; + tmpError = H5LTget_attribute_int(group, ".", "ny", &(ny)); + if (tmpError < 0) error = tmpError; + tmpError = H5LTget_attribute_int(group, ".", "nz", &(nz)); + if (tmpError < 0) error = tmpError; + tmpError = H5LTget_attribute_int(group, ".", "Nx", &(Nx)); + if (tmpError < 0) error = tmpError; + tmpError = H5LTget_attribute_int(group, ".", "Ny", &(Ny)); + if (tmpError < 0) error = tmpError; + tmpError = H5LTget_attribute_int(group, ".", "Nz", &(Nz)); + if (tmpError < 0) error = tmpError; + tmpError = H5LTget_attribute_int(group, ".", "Ng", &(Ng)); + if (tmpError < 0) error = tmpError; + tmpError = H5LTget_attribute_double(group, ".", "xmin", &(xmin)); + if (tmpError < 0) error = tmpError; + tmpError = H5LTget_attribute_double(group, ".", "ymin", &(ymin)); + if (tmpError < 0) error = tmpError; + tmpError = H5LTget_attribute_double(group, ".", "zmin", &(zmin)); + if (tmpError < 0) error = tmpError; + tmpError = H5LTget_attribute_double(group, ".", "xmax", &(xmax)); + if (tmpError < 0) error = tmpError; + tmpError = H5LTget_attribute_double(group, ".", "ymax", &(ymax)); + if (tmpError < 0) error = tmpError; + tmpError = H5LTget_attribute_double(group, ".", "zmax", &(zmax)); + if (tmpError < 0) error = tmpError; + tmpError = H5LTget_attribute_double(group, ".", "endTime", &(endTime)); + if (tmpError < 0) error = tmpError; + if (error<0) throw std::runtime_error("Checkpoint restart file is missing some domain attributes"); + + H5Gclose(group); + H5Fclose(file); + H5Pclose(file_access_property_list); +} + + diff --git a/Project/GPU/Src/parallelSaveDataHDF5.cu b/Project/GPU/Src/parallelSaveDataHDF5.cu new file mode 100644 index 00000000..712465d4 --- /dev/null +++ b/Project/GPU/Src/parallelSaveDataHDF5.cu @@ -0,0 +1,348 @@ +#include "parallelSaveDataHDF5.h" +#include +#include +#include +#include "hdf5.h" +#include "hdf5_hl.h" + +using namespace std; + + +/*! + * /brief Opens a HDF5 file + * + * This bundles up closing any existing open checkpoint file, removing the old file with the same name, + * then recording the iteration this file was opened on (for reusing checkpoint files later in the same + * cycle). + * + * TODO: If there is an existing file, if it has the same dimensions, we should overwrite it and not remove it. + * + * @param name Name of the file to open + */ +void ParallelSaveDataHDF5::openFile(const char *name) { + if(this->file) H5Fclose(this->file); + + std::remove(name); + + hid_t file_access_property_list = H5Pcreate(H5P_FILE_ACCESS); + H5Pset_fapl_mpio(file_access_property_list, env->mpiCartesianComm, env->mpiInfo); + + this->file = H5Fcreate( + name, H5F_ACC_TRUNC, H5P_DEFAULT, + file_access_property_list + ); + this->file_iteration = this->d->iters; + H5Pclose(file_access_property_list); +} + + +/*! + * /brief Opens a HDF5 checkpoint file + * + * Checkpoint files are used to either store all data for restarting a run, + * or to store individual outputs in user-defined mode, or both. + * Writing out individual variables happens before the final checkpoint write. + * So therefore, when we want to write out a final file, there may or may not be an existing + * checkpoint file for this cycle full of user-defined outputs. + */ +void ParallelSaveDataHDF5::openCheckpointFile() { + if(this->file) { + // If there's currently a checkpoint file, was it opened this cycle? + if (this->file_iteration != this->d->iters) { + // If not, close the open file, delete the file with the name we want to write to on disk, + // then open a new one + string filename_full = this->filename+".checkpoint."+to_string(this->d->t)+".hdf5"; + this->openFile(filename_full.c_str()); + hid_t user_def = H5Gcreate(this->file, "UserDef", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + H5Gclose(user_def); + } else { + // Then the checkpoint file was opened this cycle, and we can write to it + } + + } else { + // If there's no existing checkpoint file, we need to create a new one. + string filename_full = this->filename+".checkpoint."+to_string(this->d->t)+".hdf5"; + this->openFile(filename_full.c_str()); + hid_t user_def = H5Gcreate(this->file, "UserDef", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + H5Gclose(user_def); + } +} + + +/*! + * /brief Writes an HDF5 dataset to file + * + * Prepares the buffer for writing to file, and writes a dataset. + * + * @param group The group within the file (or the file itself for root datasets) + * @param name The name the dataset should have + * @param var Data is stored in 4-d arrays for each class of data (conserved/primitive/auxiliary), + * with the 1st dimension being the variable. This argument indicates which variable is being output. + * @param data The pointer to the data array. + */ +void ParallelSaveDataHDF5::writeDataSetDouble(const hid_t *group, const char *name, const int *var, + const double *data) { + hsize_t lengths_local[d->dims]; + hsize_t lengths_total[d->dims]; + hsize_t offsets[d->dims]; + + // So now, we set the total data-space size, and the offset the local data-space has from it. + // The local data dimensions Nx/Ny/Nz include ghost cells, whilst the total one does not. + lengths_total[0] = d->nx; + lengths_local[0] = (d->Nx - 2 * d->Ng); + offsets[0] = (d->Nx - 2 * d->Ng) * env->xRankId; + unsigned long buffer_size = lengths_local[0]; // The length of the buffer + + if(d->dims > 1) { + lengths_total[1] = d->ny; + lengths_local[1] = (d->Ny - 2 * d->Ng); + offsets[1] = (d->Ny - 2 * d->Ng) * env->yRankId; + buffer_size *= lengths_local[1]; + } + if(d->dims > 2) { + lengths_total[2] = d->nz; + lengths_local[2] = (d->Nz - 2 * d->Ng); + offsets[2] = (d->Nz - 2 * d->Ng) * env->zRankId; + buffer_size = lengths_local[2]; + } + + // We also need to create a buffer to write to, that excludes the ghost cells. + // So we calculate the size it needs to be, excluding ghost cells. + double buffer[buffer_size]; + int buffer_position(0); + + // Consider the efficiency of this! std::copy would probably be better but maybe the compiler + // will vectorise this. I prefer the consistency of a single set of loops over having 1 per dimension. + for (int i(d->is); i < d->ie; i++) { + for (int j(d->js); j < d->je; j++) { + for (int k(d->ks); k < d->ke; k++) { + buffer[buffer_position++] = data[ID(*var, i, j, k)]; + } + } + } + + // Define the total dataspace for this dataset, and create the dataset + hid_t dataspace_total = H5Screate_simple(d->dims, lengths_total, nullptr); + hid_t dataset = H5Dcreate( + *group, name, H5T_NATIVE_DOUBLE, dataspace_total, + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT + ); + + // Define the dataspace that describes the fraction of the total dataspace + // accessed by this process. + hid_t dataspace_local = H5Screate_simple(d->dims, lengths_local, nullptr); + + // Create an access property list that tells the write to use MPI + hid_t dataset_access_property_list = H5Pcreate(H5P_DATASET_XFER); + H5Pset_dxpl_mpio(dataset_access_property_list, H5FD_MPIO_COLLECTIVE); + + // Select the 'hyperslab', i.e. the subset of the total dataspace to write to + // This bit is per process + H5Sselect_hyperslab( + dataspace_total, H5S_SELECT_SET, offsets, nullptr, lengths_local, nullptr + ); + + // Write this processes' buffer contents to the hyperslab + H5Dwrite( + dataset, H5T_NATIVE_DOUBLE, + dataspace_local, dataspace_total, + dataset_access_property_list, buffer + ); + + // Close everything to avoid memory leaks + H5Pclose(dataset_access_property_list); + H5Sclose(dataspace_total); + H5Sclose(dataspace_local); + H5Dclose(dataset); +} + + +/*! + * /brief Saves all data to file + * + * Saves all the data to file. This is modified by the level of detail on this + * (this->detail), and whether or not it is a checkpoint file. + * + * @param timeSeries If this is a checkpoint or not + */ +void ParallelSaveDataHDF5::saveAll(bool timeSeries) +{ + if(timeSeries) { + // If we're doing a timeseries/checkpoint output, things may be complicated + // as saveVars may have written some of the variables to file already! + string filename_full = this->filename+".checkpoint."+to_string(d->t)+".hdf5"; + if(!env->rank) { + std::cout << "Saving checkpoint '" << filename_full << "' (iteration " + to_string(d->iters) + ")\n"; + } + this->openCheckpointFile(); + + } else { + string filename_full = this->filename+".hdf5"; + if(!env->rank) { + std::cout << "Saving final output '" << filename_full << "'\n"; + } + this->openFile(filename_full.c_str()); + } + + this->saveConsts(); + this->saveDomain(); + this->savePrims(); + if(this->detail != OUTPUT_MINIMAL) this->saveCons(); + if(this->detail == OUTPUT_ALL) this->saveAux(); + + // If this isn't a timeseries, then this is the final save and the file should be closed. + if(!timeSeries)H5Fclose(this->file); +} + + +/*! + * /brief Saves conserved variables + */ +void ParallelSaveDataHDF5::saveCons() +{ + hid_t group = H5Gcreate(this->file, "Conserved", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + H5LTset_attribute_int(group, ".", "Ncons", &d->Ncons, 1); + // For each one of the conserved variables, write it to disk + string varOrder; + for(int var(0); var < d->Ncons; var++) { + this->writeDataSetDouble(&group, d->consLabels[var].c_str(), &var, d->cons); + varOrder += d->consLabels[var] + ','; + } + H5LTset_attribute_string(group, ".", "varOrder", varOrder.c_str()); + H5Gclose(group); +} + + +/*! + * /brief Saves primitive variables + */ +void ParallelSaveDataHDF5::savePrims() +{ + hid_t group = H5Gcreate(this->file, "Primitive", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + H5LTset_attribute_int(group, ".", "Nprims", &d->Nprims, 1); + + string varOrder; + for(int var(0); var < d->Nprims; var++) { + this->writeDataSetDouble(&group, d->primsLabels[var].c_str(), &var, d->prims); + varOrder += d->primsLabels[var] + ','; + } + H5LTset_attribute_string(group, ".", "varOrder", varOrder.c_str()); + H5Gclose(group); +} + + +/*! + * /brief Save auxiliary variables + */ +void ParallelSaveDataHDF5::saveAux() +{ + hid_t group = H5Gcreate(this->file, "Auxiliary", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + H5LTset_attribute_int(group, ".", "Naux", &d->Naux, 1); + + string varOrder; + for(int var(0); var < d->Naux; var++) { + this->writeDataSetDouble(&group, d->auxLabels[var].c_str(), &var, d->aux); + varOrder += d->auxLabels[var] + ','; + } + H5LTset_attribute_string(group, ".", "varOrder", varOrder.c_str()); + H5Gclose(group); +} + + +/*! + * /brief Save domain information + */ +void ParallelSaveDataHDF5::saveDomain() +{ + hid_t group = H5Gcreate(this->file, "Domain", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + H5LTset_attribute_int(group, ".", "nx", &d->nx, 1); + H5LTset_attribute_int(group, ".", "ny", &d->ny, 1); + H5LTset_attribute_int(group, ".", "nz", &d->nz, 1); + H5LTset_attribute_int(group, ".", "Nx", &d->Nx, 1); + H5LTset_attribute_int(group, ".", "Ny", &d->Ny, 1); + H5LTset_attribute_int(group, ".", "Nz", &d->Nz, 1); + H5LTset_attribute_int(group, ".", "Ng", &d->Ng, 1); + H5LTset_attribute_double(group, ".", "xmin", &d->xmin, 1); + H5LTset_attribute_double(group, ".", "ymin", &d->ymin, 1); + H5LTset_attribute_double(group, ".", "zmin", &d->zmin, 1); + H5LTset_attribute_double(group, ".", "xmax", &d->xmax, 1); + H5LTset_attribute_double(group, ".", "ymax", &d->ymax, 1); + H5LTset_attribute_double(group, ".", "zmax", &d->zmax, 1); + H5LTset_attribute_double(group, ".", "dx", &d->dx, 1); + H5LTset_attribute_double(group, ".", "dy", &d->dy, 1); + H5LTset_attribute_double(group, ".", "dz", &d->dz, 1); + H5LTset_attribute_double(group, ".", "endTime", &d->endTime, 1); + H5LTset_attribute_double(group, ".", "dt", &d->dt, 1); + + // Unlike serial, we do not write out the domain- gathering across threads is a pain and it's all defined in xmin, xmax & dx. + H5Gclose(group); +} + + +/*! + * /brief Save constants + */ +void ParallelSaveDataHDF5::saveConsts() +{ + H5LTset_attribute_double(this->file, ".", "cfl", &d->cfl, 1); + H5LTset_attribute_double(this->file, ".", "gamma", &d->gamma, 1); + H5LTset_attribute_double(this->file, ".", "sigma", &d->sigma, 1); + H5LTset_attribute_double(this->file, ".", "cp", &d->cp, 1); + H5LTset_attribute_double(this->file, ".", "t", &d->t, 1); +} + + +/*! + * /brief Save a single variable to a checkpoint file + * + * Saves variables for debug or animation purposes. + * Finds what data index and array the variable name corresponds to, + * then opens a checkpoint file and saves to it. + * + * @param variable The name of the variable + * @param num The number of variables to save; not used in HDF5 version + */ +void ParallelSaveDataHDF5::saveVar(string variable, int num) +{ + int found_var(-1); // Variable number + double *data; // Pointer to the data array containing the variable + + // Determine which variable the user wants saved + for (int var(0); var < d->Ncons; var++) { + if (strcmp(d->consLabels[var].c_str(), variable.c_str()) == 0) { + found_var=var; + data = d->cons; + break; + } + } + + if (found_var < 0) { + for (int var(0); var < d->Nprims; var++) { + if (strcmp(d->primsLabels[var].c_str(), variable.c_str()) == 0) { + found_var=var; + data = d->prims; + break; + } + } + } + + if (found_var < 0) { + for (int var(0); var < d->Naux; var++) { + if (strcmp(d->auxLabels[var].c_str(), variable.c_str()) == 0) { + found_var=var; + data = d->aux; + break; + } + } + } + + if (found_var < 0) { + printf("Error: Could not find user specified variable '%s'\n", variable.c_str()); + exit(1); + } + + this->openCheckpointFile(); + hid_t user_def = H5Gopen1(this->file, "UserDef"); + writeDataSetDouble(&user_def, variable.c_str(), &found_var, data); + H5Gclose(user_def); +} diff --git a/Project/GPU/Src/serialCheckpointArgs.cu b/Project/GPU/Src/serialCheckpointArgs.cu new file mode 100644 index 00000000..c69ffd93 --- /dev/null +++ b/Project/GPU/Src/serialCheckpointArgs.cu @@ -0,0 +1,67 @@ +#include "simData.h" +#include "serialCheckpointArgs.h" +#include "platformEnv.h" +#include +#include +#include "hdf5.h" +#include "hdf5_hl.h" +#include + + +SerialCheckpointArgs::SerialCheckpointArgs(const char* name, PlatformEnv *env) : CheckpointArgs +{ + herr_t error=0, tmpError=-1; + hid_t file = H5Fopen(name, H5F_ACC_RDONLY, H5P_DEFAULT); + + if (file<0) throw std::runtime_error("Could not open checkpoint restart file. Does it exist? CheckpointArgs requires path to file and extension"); + + // Read global file attributes + tmpError = H5LTget_attribute_double(file, ".", "cfl", &(cfl)); + if (tmpError < 0) error = tmpError; + tmpError = H5LTget_attribute_double(file, ".", "gamma", &(gamma)); + if (tmpError < 0) error = tmpError; + tmpError = H5LTget_attribute_double(file, ".", "sigma", &(sigma)); + if (tmpError < 0) error = tmpError; + tmpError = H5LTget_attribute_double(file, ".", "cp", &(cp)); + if (tmpError < 0) error = tmpError; + tmpError = H5LTget_attribute_double(file, ".", "t", &(t)); + if (tmpError < 0) error = tmpError; + if (error<0) throw std::runtime_error("Checkpoint restart file is missing some global attributes"); + + // Remaining required attributes are stored in the Domain group + hid_t group = H5Gopen(file, "Domain", H5P_DEFAULT); + tmpError = H5LTget_attribute_int(group, ".", "nx", &(nx)); + if (tmpError < 0) error = tmpError; + tmpError = H5LTget_attribute_int(group, ".", "ny", &(ny)); + if (tmpError < 0) error = tmpError; + tmpError = H5LTget_attribute_int(group, ".", "nz", &(nz)); + if (tmpError < 0) error = tmpError; + tmpError = H5LTget_attribute_int(group, ".", "Nx", &(Nx)); + if (tmpError < 0) error = tmpError; + tmpError = H5LTget_attribute_int(group, ".", "Ny", &(Ny)); + if (tmpError < 0) error = tmpError; + tmpError = H5LTget_attribute_int(group, ".", "Nz", &(Nz)); + if (tmpError < 0) error = tmpError; + tmpError = H5LTget_attribute_int(group, ".", "Ng", &(Ng)); + if (tmpError < 0) error = tmpError; + tmpError = H5LTget_attribute_double(group, ".", "xmin", &(xmin)); + if (tmpError < 0) error = tmpError; + tmpError = H5LTget_attribute_double(group, ".", "ymin", &(ymin)); + if (tmpError < 0) error = tmpError; + tmpError = H5LTget_attribute_double(group, ".", "zmin", &(zmin)); + if (tmpError < 0) error = tmpError; + tmpError = H5LTget_attribute_double(group, ".", "xmax", &(xmax)); + if (tmpError < 0) error = tmpError; + tmpError = H5LTget_attribute_double(group, ".", "ymax", &(ymax)); + if (tmpError < 0) error = tmpError; + tmpError = H5LTget_attribute_double(group, ".", "zmax", &(zmax)); + if (tmpError < 0) error = tmpError; + tmpError = H5LTget_attribute_double(group, ".", "endTime", &(endTime)); + if (tmpError < 0) error = tmpError; + if (error<0) throw std::runtime_error("Checkpoint restart file is missing some domain attributes"); + + H5Gclose(group); + H5Fclose(file); +} + + diff --git a/Project/GPU/Src/serialSaveDataHDF5.cu b/Project/GPU/Src/serialSaveDataHDF5.cu new file mode 100644 index 00000000..7b5463d4 --- /dev/null +++ b/Project/GPU/Src/serialSaveDataHDF5.cu @@ -0,0 +1,306 @@ +#include "serialSaveDataHDF5.h" +#include +#include +#include +#include "hdf5.h" +#include "hdf5_hl.h" + +using namespace std; + + +/*! + * /brief Opens a HDF5 file + * + * This bundles up closing any existing open checkpoint file, removing the old file with the same name, + * then recording the iteration this file was opened on (for reusing checkpoint files later in the same + * cycle). + * + * TODO: If there is an existing file, if it has the same dimensions, we should overwrite it and not remove it. + * + * @param name Name of the file to open + */ +void SerialSaveDataHDF5::openFile(const char *name) { + if(this->file) H5Fclose(this->file); + + std::remove(name); + this->file = H5Fcreate(name, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); + this->file_iteration = this->d->iters; +} + + +/*! + * /brief Opens a HDF5 checkpoint file + * + * Checkpoint files are used to either store all data for restarting a run, + * or to store individual outputs in user-defined mode, or both. + * Writing out individual variables happens before the final checkpoint write. + * So therefore, when we want to write out a final file, there may or may not be an existing + * checkpoint file for this cycle full of user-defined outputs. + */ +void SerialSaveDataHDF5::openCheckpointFile() { + if(this->file) { + // If there's currently a checkpoint file, was it opened this cycle? + if (this->file_iteration != this->d->iters) { + // If not, close the open file, delete the file with the name we want to write to on disk, + // then open a new one + string filename_full = this->filename+".checkpoint."+to_string(this->d->t)+".hdf5"; + this->openFile(filename_full.c_str()); + hid_t user_def = H5Gcreate(this->file, "UserDef", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + H5Gclose(user_def); + } else { + // Then the checkpoint file was opened this cycle, and we can write to it + } + + } else { + // If there's no existing checkpoint file, we need to create a new one. + string filename_full = this->filename+".checkpoint."+to_string(this->d->t)+".hdf5"; + this->openFile(filename_full.c_str()); + hid_t user_def = H5Gcreate(this->file, "UserDef", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + H5Gclose(user_def); + } +} + + +/*! + * /brief Writes an HDF5 dataset to file + * + * Prepares the buffer for writing to file, and writes a dataset. + * + * @param group The group within the file (or the file itself for root datasets) + * @param name The name the dataset should have + * @param var Data is stored in 4-d arrays for each class of data (conserved/primitive/auxiliary), + * with the 1st dimension being the variable. This argument indicates which variable is being output. + * @param data The pointer to the data array. + */ +void SerialSaveDataHDF5::writeDataSetDouble(const hid_t *group, const char *name, const int *var, + const double *data) { + + // So now, we set the data-space size. We also need to create a buffer to write to, that excludes the ghost cells. + // So we calculate the size it needs to be, excluding ghost cells. + hsize_t lengths[d->dims]; + + lengths[0] = d->ie - d->is; + unsigned long buffer_size = lengths[0]; // The length of the buffer + + if(d->dims > 1) { + lengths[1] = d->je - d->js; + buffer_size *= lengths[1]; + } + if(d->dims > 2) { + lengths[2] = d->ke - d->ks; + buffer_size = lengths[2]; + } + + // Now create the buffer to store the data in + double buffer[buffer_size]; + int buffer_position(0); + + // Consider the efficiency of this! std::copy would probably be better but maybe the compiler + // will vectorise this. I prefer the consistency of a single set of loops over having 1 per dimension. + for (int i(d->is); i < d->ie; i++) { + for (int j(d->js); j < d->je; j++) { + for (int k(d->ks); k < d->ke; k++) { + buffer[buffer_position++] = data[ID(*var, i, j, k)]; + } + } + } + H5LTmake_dataset_double(*group, name, d->dims, lengths, buffer); +} + + +/*! + * /brief Saves all data to file + * + * Saves all the data to file. This is modified by the level of detail on this + * (this->detail), and whether or not it is a checkpoint file. + * + * @param timeSeries If this is a checkpoint or not + */ +void SerialSaveDataHDF5::saveAll(bool timeSeries) +{ + if(timeSeries) { + // If we're doing a timeseries/checkpoint output, things may be complicated + // as saveVars may have written some of the variables to file already! + string filename_full = this->filename+".checkpoint."+to_string(d->t)+".hdf5"; + std::cout << "Saving checkpoint '" << filename_full << "' (iteration "+to_string(d->iters)+")\n"; + this->openCheckpointFile(); + + } else { + string filename_full = this->filename+".hdf5"; + std::cout << "Saving final output '" << filename_full << "'\n"; + this->openFile(filename_full.c_str()); + } + + this->saveConsts(); + this->saveDomain(); + this->savePrims(); + if(this->detail != OUTPUT_MINIMAL) this->saveCons(); + if(this->detail == OUTPUT_ALL) this->saveAux(); + + // If this isn't a timeseries, then this is the final save and the file should be closed. + if(!timeSeries)H5Fclose(this->file); +} + + +/*! + * /brief Saves conserved variables + */ +void SerialSaveDataHDF5::saveCons() +{ + hid_t group = H5Gcreate(this->file, "Conserved", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + H5LTset_attribute_int(group, ".", "Ncons", &d->Ncons, 1); + + // For each one of the conserved variables, write it to disk + string varOrder; + for(int var(0); var < d->Ncons; var++) { + this->writeDataSetDouble(&group, d->consLabels[var].c_str(), &var, d->cons); + varOrder += d->consLabels[var] + ','; + } + H5LTset_attribute_string(group, ".", "varOrder", varOrder.c_str()); + H5Gclose(group); +} + + +/*! + * /brief Saves primitive variables + */ +void SerialSaveDataHDF5::savePrims() +{ + hid_t group = H5Gcreate(this->file, "Primitive", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + H5LTset_attribute_int(group, ".", "Nprims", &d->Nprims, 1); + + string varOrder; + for(int var(0); var < d->Nprims; var++) { + this->writeDataSetDouble(&group, d->primsLabels[var].c_str(), &var, d->prims); + varOrder += d->primsLabels[var] + ','; + } + H5LTset_attribute_string(group, ".", "varOrder", varOrder.c_str()); + H5Gclose(group); +} + + +/*! + * /brief Save auxiliary variables + */ +void SerialSaveDataHDF5::saveAux() +{ + hid_t group = H5Gcreate(this->file, "Auxiliary", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + H5LTset_attribute_int(group, ".", "Naux", &d->Naux, 1); + + string varOrder; + for(int var(0); var < d->Naux; var++) { + this->writeDataSetDouble(&group, d->auxLabels[var].c_str(), &var, d->aux); + varOrder += d->auxLabels[var] + ','; + } + H5LTset_attribute_string(group, ".", "varOrder", varOrder.c_str()); + H5Gclose(group); +} + + +/*! + * /brief Save domain information + */ +void SerialSaveDataHDF5::saveDomain() +{ + hid_t group = H5Gcreate(this->file, "Domain", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + H5LTset_attribute_int(group, ".", "nx", &d->nx, 1); + H5LTset_attribute_int(group, ".", "ny", &d->ny, 1); + H5LTset_attribute_int(group, ".", "nz", &d->nz, 1); + H5LTset_attribute_int(group, ".", "Nx", &d->Nx, 1); + H5LTset_attribute_int(group, ".", "Ny", &d->Ny, 1); + H5LTset_attribute_int(group, ".", "Nz", &d->Nz, 1); + H5LTset_attribute_int(group, ".", "Ng", &d->Ng, 1); + H5LTset_attribute_double(group, ".", "xmin", &d->xmin, 1); + H5LTset_attribute_double(group, ".", "ymin", &d->ymin, 1); + H5LTset_attribute_double(group, ".", "zmin", &d->zmin, 1); + H5LTset_attribute_double(group, ".", "xmax", &d->xmax, 1); + H5LTset_attribute_double(group, ".", "ymax", &d->ymax, 1); + H5LTset_attribute_double(group, ".", "zmax", &d->zmax, 1); + H5LTset_attribute_double(group, ".", "dx", &d->dx, 1); + H5LTset_attribute_double(group, ".", "dy", &d->dy, 1); + H5LTset_attribute_double(group, ".", "dz", &d->dz, 1); + H5LTset_attribute_double(group, ".", "endTime", &d->endTime, 1); + H5LTset_attribute_double(group, ".", "dt", &d->dt, 1); + + hsize_t length(d->nx); + H5LTmake_dataset_double(group, "x", 1, &length, &d->x[d->Ng]); + + if (d->ny) { + length = d->ny; + H5LTmake_dataset_double(group, "y", 1, &length, &d->y[d->Ng]); + } + if (d->nz) { + length = d->nz; + H5LTmake_dataset_double(group, "z", 1, &length, &d->z[d->Ng]); + } + H5Gclose(group); +} + + +/*! + * /brief Save constants + */ +void SerialSaveDataHDF5::saveConsts() +{ + H5LTset_attribute_double(this->file, ".", "cfl", &d->cfl, 1); + H5LTset_attribute_double(this->file, ".", "gamma", &d->gamma, 1); + H5LTset_attribute_double(this->file, ".", "sigma", &d->sigma, 1); + H5LTset_attribute_double(this->file, ".", "cp", &d->cp, 1); + H5LTset_attribute_double(this->file, ".", "t", &d->t, 1); +} + + +/*! + * /brief Save a single variable to a checkpoint file + * + * Saves variables for debug or animation purposes. + * Finds what data index and array the variable name corresponds to, + * then opens a checkpoint file and saves to it. + * + * @param variable The name of the variable + * @param num The number of variables to save; not used in HDF5 version + */ +void SerialSaveDataHDF5::saveVar(string variable, int num) +{ + int found_var(-1); // Variable number + double *data; // Pointer to the data array containing the variable + + // Determine which variable the user wants saved + for (int var(0); var < d->Ncons; var++) { + if (strcmp(d->consLabels[var].c_str(), variable.c_str()) == 0) { + found_var=var; + data = d->cons; + break; + } + } + + if (found_var < 0) { + for (int var(0); var < d->Nprims; var++) { + if (strcmp(d->primsLabels[var].c_str(), variable.c_str()) == 0) { + found_var=var; + data = d->prims; + break; + } + } + } + + if (found_var < 0) { + for (int var(0); var < d->Naux; var++) { + if (strcmp(d->auxLabels[var].c_str(), variable.c_str()) == 0) { + found_var=var; + data = d->aux; + break; + } + } + } + + if (found_var < 0) { + printf("Error: Could not find user specified variable '%s'\n", variable.c_str()); + exit(1); + } + + this->openCheckpointFile(); + hid_t user_def = H5Gopen1(this->file, "UserDef"); + writeDataSetDouble(&user_def, variable.c_str(), &found_var, data); + H5Gclose(user_def); +} From 490764b72d2c5cca6962692402a948b86925facb Mon Sep 17 00:00:00 2001 From: "ania.brown" Date: Mon, 11 Jan 2021 15:22:38 +0000 Subject: [PATCH 40/56] continuing to modify gpu version for hdf5 and checkpoint restart --- Project/GPU/Include/initFunc.h | 3 +- Project/GPU/Include/initFuncFromCheckpoint.h | 21 +++ Project/GPU/Include/parallelBoundaryConds.h | 10 +- Project/GPU/Include/parallelEnv.h | 1 + .../Include/parallelInitFuncFromCheckpoint.h | 22 +++ Project/GPU/Include/saveData.h | 2 +- Project/GPU/Include/simData.h | 33 ++++- Project/GPU/Makefile | 46 +++++- Project/GPU/Src/initFuncFromCheckpoint.cu | 96 +++++++++++++ Project/GPU/Src/main.cu | 12 +- Project/GPU/Src/parallelEnv.cu | 28 ++-- .../GPU/Src/parallelInitFuncFromCheckpoint.cu | 134 ++++++++++++++++++ Project/GPU/Src/parallelSaveDataHDF5.cu | 1 + Project/GPU/Src/serialSaveDataHDF5.cu | 1 + Project/GPU/Src/simData.cu | 38 ++++- Project/GPU/Src/simulation.cu | 3 +- Project/GPU/Src/srrmhd.cu | 2 +- 17 files changed, 420 insertions(+), 33 deletions(-) create mode 100644 Project/GPU/Include/initFuncFromCheckpoint.h create mode 100644 Project/GPU/Include/parallelInitFuncFromCheckpoint.h create mode 100644 Project/GPU/Src/initFuncFromCheckpoint.cu create mode 100644 Project/GPU/Src/parallelInitFuncFromCheckpoint.cu diff --git a/Project/GPU/Include/initFunc.h b/Project/GPU/Include/initFunc.h index e36b3909..a6899572 100644 --- a/Project/GPU/Include/initFunc.h +++ b/Project/GPU/Include/initFunc.h @@ -13,10 +13,11 @@ class InitialFunc { private: - Data * data; //!< Pointer to Data class containing global simulation data public: + Data * data; //!< Pointer to Data class containing global simulation data + //! Constructor /*! Stores a pointer to the Data class for reference in its methods and diff --git a/Project/GPU/Include/initFuncFromCheckpoint.h b/Project/GPU/Include/initFuncFromCheckpoint.h new file mode 100644 index 00000000..4c93ff7a --- /dev/null +++ b/Project/GPU/Include/initFuncFromCheckpoint.h @@ -0,0 +1,21 @@ +#ifndef INITFUNCFROMCHECKPOINT_H +#define INITFUNCFROMCHCKPOINT_H + +#include "simData.h" +#include "initFunc.h" +#include "hdf5.h" +#include "hdf5_hl.h" + +class CheckpointRestart : public InitialFunc +{ + public: + CheckpointRestart(Data * data, const char* name); + + virtual ~CheckpointRestart() { } //!< Destructor + + virtual void readDataSetDouble(const hid_t *group, const char *name, const int *var, double *varData); +}; + + + +#endif diff --git a/Project/GPU/Include/parallelBoundaryConds.h b/Project/GPU/Include/parallelBoundaryConds.h index 85978654..636192a8 100644 --- a/Project/GPU/Include/parallelBoundaryConds.h +++ b/Project/GPU/Include/parallelBoundaryConds.h @@ -30,9 +30,9 @@ class ParallelBcs : public Bcs @param[in] *env pointer to ParallelEnv class @sa Bcs::Bcs */ - ParallelBcs(Data *data, ParallelEnv *env, int xPeriodic=1, int yPeriodic=1, int zPeriodic=1) : Bcs(data), env(env) + ParallelBcs(Data *data, ParallelEnv *env) : Bcs(data), env(env) { - env->setParallelDecomposition(xPeriodic, yPeriodic, zPeriodic); + } virtual ~ParallelBcs() { } //!< Destructor @@ -165,7 +165,7 @@ class ParallelOutflow : public ParallelBcs @param[in] *data pointer to Data class @sa ParallelBcs::ParallelBcs */ - ParallelOutflow(Data * data, ParallelEnv *env) : ParallelBcs(data, env, xPeriodic=0, yPeriodic=0, zPeriodic=0) { } + ParallelOutflow(Data * data, ParallelEnv *env) : ParallelBcs(data, env) { } virtual ~ParallelOutflow() { } //!< Destructor @@ -243,7 +243,7 @@ class ParallelPeriodic : public ParallelBcs @param[in] *env pointer to ParallelEnv class @sa ParallelBcs::ParallelBcs */ - ParallelPeriodic(Data * data, ParallelEnv * env) : ParallelBcs(data, env, xPeriodic=1, yPeriodic=1, zPeriodic=1) { } + ParallelPeriodic(Data * data, ParallelEnv * env) : ParallelBcs(data, env) { } virtual ~ParallelPeriodic() { } //!< Destructor @@ -276,7 +276,7 @@ class ParallelFlow : public ParallelBcs @param[in] *data pointer to Data class @sa ParallelBcs::ParallelBcs */ - ParallelFlow(Data * data, ParallelEnv *env) : ParallelBcs(data, env, xPeriodic=1, yPeriodic=0, zPeriodic=0) { } + ParallelFlow(Data * data, ParallelEnv *env) : ParallelBcs(data, env) { } virtual ~ParallelFlow() { } //!< Destructor diff --git a/Project/GPU/Include/parallelEnv.h b/Project/GPU/Include/parallelEnv.h index 7c67181e..0642fe46 100644 --- a/Project/GPU/Include/parallelEnv.h +++ b/Project/GPU/Include/parallelEnv.h @@ -20,6 +20,7 @@ class ParallelEnv : public PlatformEnv { public: MPI_Comm mpiCartesianComm; //!< Cartesian MPI communicator that maps processes to the simulation grid + MPI_Info mpiInfo; //!< MPI information channel //! Constructor -- Initialize global MPI communicator ParallelEnv(int *argcP, char **argvP[], int nxRanks, int nyRanks, int nzRanks, int testing=0); diff --git a/Project/GPU/Include/parallelInitFuncFromCheckpoint.h b/Project/GPU/Include/parallelInitFuncFromCheckpoint.h new file mode 100644 index 00000000..a4832095 --- /dev/null +++ b/Project/GPU/Include/parallelInitFuncFromCheckpoint.h @@ -0,0 +1,22 @@ +#ifndef PARALLELINITFUNCFROMCHECKPOINT_H +#define PARALLELINITFUNCFROMCHCKPOINT_H + +#include "simData.h" +#include "initFunc.h" +#include "hdf5.h" +#include "hdf5_hl.h" +#include "parallelEnv.h" + +class ParallelCheckpointRestart : public InitialFunc +{ + public: + ParallelCheckpointRestart(Data * data, const char* name, ParallelEnv *env); + + virtual ~ParallelCheckpointRestart() { } //!< Destructor + + virtual void readDataSetDouble(const hid_t *group, const char *name, const int *var, double *varData, ParallelEnv *env); +}; + + + +#endif diff --git a/Project/GPU/Include/saveData.h b/Project/GPU/Include/saveData.h index 95734cc2..20d97fcd 100644 --- a/Project/GPU/Include/saveData.h +++ b/Project/GPU/Include/saveData.h @@ -58,7 +58,7 @@ class SaveData in the Project folder. @param *data pointer to the Data class - @param test integar flagging if we are in the 'Examples' directory or not, + @param test integer flagging if we are in the 'Examples' directory or not, Only used for running the given examples, can ignore otherwise. */ SaveData(Data * data, int test=0) : d(data), Nouts(0), Ncount(0), test(test) diff --git a/Project/GPU/Include/simData.h b/Project/GPU/Include/simData.h index 1111ab3b..05936292 100644 --- a/Project/GPU/Include/simData.h +++ b/Project/GPU/Include/simData.h @@ -4,6 +4,7 @@ #include #include #include "platformEnv.h" +#include "checkpointArgs.h" /*! @@ -104,7 +105,7 @@ class Data //@{ alphaX, alphaY, alphaZ,//!< Max wave speed in specified direction. As we are evolving EM fields, this is always the speed of light. //@} - t, //!< Current time + t=-1, //!< Current time dt, //!< Width of current timestep //@{ dx, dy, dz; //!< Witdth of specified spatial step @@ -122,6 +123,10 @@ class Data //@} int dims, //!< Number of dimensions of simulation + //@{ + is, js, ks, + ie, je, ke, //!< Cell IDs for interior grid points + //@} GPUcount; //!< Number of NVIDIA devices detected cudaDeviceProp prop; //!< Properties of NVIDIA device (assuming all are same) @@ -144,6 +149,18 @@ class Data return var * this->Nx * this->Ny * this->Nz + i * this->Ny * this->Nz + j * this->Nz + k; } + //! Initialiser + /*! + @par + Allocates the memory required for the state arrays and sets the simulation + constants to the given values. Does not set initial state, thats done by + the initialFunc object. Called automatically from constructors after setting object vars. + This is separated from the constructor to avoid duplicated code between the two available + constructors for Data. + */ + void initData(PlatformEnv *env); + + //! Constructor /*! @par @@ -179,6 +196,20 @@ class Data double mu1=-1.0e4, double mu2=1.0e4, int frameskip=10); + //! Constructor + /*! + @par + Allocates the memory required for the state arrays and sets the simulation + constants to the given values. Does not set initial state, thats done by + the initialFunc object. + @param args simulation arguments such as cfl, sigma etc, as read from checkpoint restart file + @param mu1 charge mass ratio of species 1 + @param mu2 charge mass ratio of species 2 + */ + Data(CheckpointArgs args, PlatformEnv *env, double mu1=-1.0e4, double mu2=1.0e4, + int frameskip=10, int reportItersPeriod=1, int functionalSigma=false, double gam=12); + + }; #endif diff --git a/Project/GPU/Makefile b/Project/GPU/Makefile index b3e0c821..981d7fc9 100644 --- a/Project/GPU/Makefile +++ b/Project/GPU/Makefile @@ -4,10 +4,23 @@ # -------------- PARAMETERS FOR USERS TO EDIT -------------------- # if USE_MPI=1, need to use parallel versions of objects, such as ParallelEnv, ParallelSaveData etc -USE_MPI=0 +USE_MPI=1 +USE_HDF=1 -# find location of MPI libraries to link on your local system using 'mpicc -show' -MPI_FLAGS = -I/usr/include/mpich -L/usr/lib/x86_64-linux-gnu -lmpichcxx -lmpich +# Compiler used for all compilation objects. This should be the version of the hdf5 +# compiler available on your system that links the correct mpi libraries if required. Should +# be one of h5pcc, h5pcc.openmpi or h5pcc.mpich if using MPI. Should be h5cc otherwise. +#CC = h5pcc +CC = mpic++ + +# Compiler used by hdf5 for c++. Shouldn't need to change this +#export HDF5_CXX := mpic++ +#export HDF5_CLINKER := mpic++ + +HDF5_FLAGS = -I/local/software/szip/2.1.1/include -L/local/software/hdf5/1.10.2/gcc/parallel/lib -L/local/software/szip/2.1.1/lib -lsz -lz -ldl -lm -I/local/software/hdf5/1.10.2/gcc/parallel/include -lhdf5 -lhdf5_hl + +# this should no longer be needed but leaving them in just in case +MPI_FLAGS = # -------------- END PARAMETERS FOR USERS TO EDIT -------------------- @@ -37,6 +50,11 @@ CXXFLAGS = -fopenmp -Wall -std=c++11 -O3 -lineinfo # NVIDIA compiler flags NVFLAGS = -std=c++11 -Wno-deprecated-gpu-targets -rdc=true -fmad=false -O3 -arch=sm_52 -Xcompiler -fopenmp -Xcompiler -Wall -lineinfo + +ifeq ($(USE_MPI), 1) + NVFLAGS += -ccbin ${CC} +endif + # Sources SRCS = main.cu \ simulation.cu \ @@ -58,12 +76,21 @@ SRCS = main.cu \ srrmhd.cu \ C2PArgs.cu + +SERIAL_SRCS = serialSaveData.cu \ + serialEnv.cu + +ifeq ($(USE_HDF), 1) + SERIAL_SRCS += serialSaveDataHDF5.cu +endif + PARALLEL_SRCS = parallelSaveData.cu \ parallelEnv.cu \ parallelBoundaryConds.cu -SERIAL_SRCS = serialSaveData.cu \ - serialEnv.cu +ifeq ($(USE_HDF), 1) + PARALLEL_SRCS += parallelSaveDataHDF5.cu +endif # Headers HDRS = ${SRCS:.cu=.h} cudaErrorCheck.h @@ -144,7 +171,7 @@ simulation.o : $(MODULE_DIR)/simulation.cu $(INC_DIR)/simulation.h $(INC_DIR)/mo $(CC_GPU) $< -c $(NVFLAGS) -I$(INC_DIR) main.o : $(MODULE_DIR)/main.cu $(INC_DIR)/simulation.h $(INC_DIR)/model.h $(INC_DIR)/simData.h $(INC_DIR)/initFunc.h - $(CC_GPU) $< -c $(NVFLAGS) -I$(INC_DIR) -I$(RTFIND_INC_DIR) $(MPI_FLAGS) + $(CC_GPU) $< -c $(HDF5_FLAGS) $(NVFLAGS) -I$(INC_DIR) -I$(RTFIND_INC_DIR) $(MPI_FLAGS) weno.o : $(MODULE_DIR)/weno.cu $(INC_DIR)/weno.h $(CC_GPU) $< -c $(NVFLAGS) -I$(INC_DIR) @@ -179,6 +206,8 @@ boundaryConds.o : $(MODULE_DIR)/boundaryConds.cu $(INC_DIR)/boundaryConds.h serialSaveData.o : $(MODULE_DIR)/serialSaveData.cu $(INC_DIR)/serialSaveData.h $(CC_GPU) $< -c $(NVFLAGS) -I$(INC_DIR) +serialSaveDataHDF5.o : $(MODULE_DIR)/serialSaveDataHDF5.cu $(INC_DIR)/serialSaveDataHDF5.h + $(CC_GPU) $< -c $(HDF5_FLAGS) $(NVFLAGS) -I$(INC_DIR) serialEnv.o : $(MODULE_DIR)/serialEnv.cu $(INC_DIR)/serialEnv.h $(CC_GPU) $< -c $(NVFLAGS) -I$(INC_DIR) @@ -191,6 +220,9 @@ fluxVectorSplitting.o : $(MODULE_DIR)/fluxVectorSplitting.cu $(INC_DIR)/fluxVect parallelSaveData.o : $(MODULE_DIR)/parallelSaveData.cu $(INC_DIR)/parallelSaveData.h $(CC_GPU) $< -c $(NVFLAGS) -I$(INC_DIR) $(MPI_FLAGS) +parallelSaveDataHDF5.o : $(MODULE_DIR)/parallelSaveDataHDF5.cu $(INC_DIR)/parallelSaveDataHDF5.h + $(CC_GPU) $< -c $(HDF5_FLAGS) $(NVFLAGS) -I$(INC_DIR) $(MPI_FLAGS) + parallelBoundaryConds.o : $(MODULE_DIR)/parallelBoundaryConds.cu $(INC_DIR)/parallelBoundaryConds.h $(CC_GPU) $< -c $(NVFLAGS) -I$(INC_DIR) $(MPI_FLAGS) @@ -202,7 +234,7 @@ parallelEnv.o : $(MODULE_DIR)/parallelEnv.cu $(INC_DIR)/parallelEnv.h # Executable main : $(RTFIND_OBJS) $(OBJS) $(ENV_OBJS) - $(CC_GPU) $^ -o $@ $(NVFLAGS) $(MPI_FLAGS) + $(CC_GPU) $^ -o $@ $(HDF5_FLAGS) $(NVFLAGS) $(MPI_FLAGS) buildRootfinder: @cd $(RTFIND_DIR) && $(MAKE) objects diff --git a/Project/GPU/Src/initFuncFromCheckpoint.cu b/Project/GPU/Src/initFuncFromCheckpoint.cu new file mode 100644 index 00000000..d714b1aa --- /dev/null +++ b/Project/GPU/Src/initFuncFromCheckpoint.cu @@ -0,0 +1,96 @@ +#include "initFuncFromCheckpoint.h" +#include +#include +#include +#include +#include +#include +#include "hdf5.h" +#include "hdf5_hl.h" + +/*! + * /brief Writes an HDF5 dataset to file + * + * Prepares the buffer for writing to file, and writes a dataset. + * + * @param group The group within the file (or the file itself for root datasets) + * @param name The name the dataset should have + * @param var Data is stored in 4-d arrays for each class of data (conserved/primitive/auxiliary), + * with the 1st dimension being the variable. This argument indicates which variable is being output. + * @param data The pointer to the data array. + */ +void CheckpointRestart::readDataSetDouble(const hid_t *group, const char *name, const int *var, + double *varData) { + // Syntax + Data * d(data); + + // So now, we set the data-space size. We also need to create a buffer to write to, that excludes the ghost cells. + // So we calculate the size it needs to be, excluding ghost cells. + hsize_t lengths[d->dims]; + + lengths[0] = d->ie - d->is; + unsigned long buffer_size = lengths[0]; // The length of the buffer + + if(d->dims > 1) { + lengths[1] = d->je - d->js; + buffer_size *= lengths[1]; + } + if(d->dims > 2) { + lengths[2] = d->ke - d->ks; + buffer_size = lengths[2]; + } + + // Now create the buffer to store the data in + double buffer[buffer_size]; + + H5LTread_dataset_double(*group, name, buffer); + + int buffer_position(0); + + // Consider the efficiency of this! std::copy would probably be better but maybe the compiler + // will vectorise this. I prefer the consistency of a single set of loops over having 1 per dimension. + for (int i(d->is); i < d->ie; i++) { + for (int j(d->js); j < d->je; j++) { + for (int k(d->ks); k < d->ke; k++) { + varData[ID(*var, i, j, k)] = buffer[buffer_position++]; + } + } + } +} + +CheckpointRestart::CheckpointRestart(Data * data, const char *name) : InitialFunc(data) +{ + // Syntax + Data * d(data); + + herr_t error=0; + hid_t file = H5Fopen(name, H5F_ACC_RDONLY, H5P_DEFAULT); + + if (file<0) throw std::runtime_error("Could not open checkpoint restart file. Does it exist? CheckpointArgs requires path to file and extension"); + + // Read number of vars and check against the number set by the model we are using + // We we check both cons and prims first, so if there is an error we know before we've wasted time + // reading in any data + int NconsFile=0, NprimsFile=0; + hid_t groupCons = H5Gopen(file, "Conserved", H5P_DEFAULT); + error = H5LTget_attribute_int(groupCons, ".", "Ncons", &(NconsFile)); + if (error<0 || NconsFile < d->Ncons) throw std::runtime_error("Too few conserved vars recorded in checkpoint restart file for this model"); + + hid_t groupPrims = H5Gopen(file, "Primitive", H5P_DEFAULT); + error = H5LTget_attribute_int(groupPrims, ".", "Nprims", &(NprimsFile)); + if (error<0 || NconsFile < d->Nprims) throw std::runtime_error("Too few primitive vars recorded in checkpoint restart file for this model"); + + // Read all cons vars + for(int var(0); var < d->Ncons; var++) { + readDataSetDouble(&groupCons, d->consLabels[var].c_str(), &var, d->cons); + } + H5Gclose(groupCons); + + // Read all prims vars + for(int var(0); var < d->Nprims; var++) { + readDataSetDouble(&groupPrims, d->primsLabels[var].c_str(), &var, d->prims); + } + H5Gclose(groupPrims); + + H5Fclose(file); +} diff --git a/Project/GPU/Src/main.cu b/Project/GPU/Src/main.cu index 7b2ffe50..9ef7f103 100644 --- a/Project/GPU/Src/main.cu +++ b/Project/GPU/Src/main.cu @@ -5,9 +5,10 @@ #include "srmhd.h" #include "srrmhd.h" #include "boundaryConds.h" +#include "parallelBoundaryConds.h" #include "rkSplit.h" #include "SSP2.h" -#include "serialSaveData.h" +#include "parallelSaveDataHDF5.h" #include "fluxVectorSplitting.h" #include "serialEnv.h" @@ -42,6 +43,9 @@ int main(int argc, char *argv[]) { double sigma(0); bool output(true); int safety(180); + int nxRanks(2); + int nyRanks(2); + int nzRanks(1); char * ptr(0); //! Overwrite any variables that have been passed in as main() arguments @@ -51,7 +55,7 @@ int main(int argc, char *argv[]) { } } - SerialEnv env(&argc, &argv, 1, 1, 1); + ParallelEnv env(&argc, &argv, nxRanks, nyRanks, nzRanks); Data data(nx, ny, nz, xmin, xmax, ymin, ymax, zmin, zmax, endTime, &env, cfl, Ng, gamma, sigma); @@ -61,7 +65,7 @@ int main(int argc, char *argv[]) { FVS fluxMethod(&data, &model); - Flow bcs(&data); + ParallelFlow bcs(&data, &env); Simulation sim(&data, &env); @@ -69,7 +73,7 @@ int main(int argc, char *argv[]) { RK2 timeInt(&data, &model, &bcs, &fluxMethod); - SerialSaveData save(&data, &env); + ParallelSaveDataHDF5 save(&data, &env, "data_parallel", ParallelSaveDataHDF5::OUTPUT_ALL); // Now objects have been created, set up the simulation sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save); diff --git a/Project/GPU/Src/parallelEnv.cu b/Project/GPU/Src/parallelEnv.cu index 0c8141a8..694e81ea 100644 --- a/Project/GPU/Src/parallelEnv.cu +++ b/Project/GPU/Src/parallelEnv.cu @@ -12,20 +12,30 @@ ParallelEnv::ParallelEnv(int *argcP, char **argvP[], int nxRanks, int nyRanks, int nzRanks, int testing) : PlatformEnv(testing) { + int initialized; MPI_Initialized(&initialized); - if (!initialized && !testing) MPI_Init(argcP, argvP); - - MPI_Comm_size(MPI_COMM_WORLD, &nProc); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); + // TODO -- is testing required? Won't initialize if already initialised anyway + if (!initialized && !testing) MPI_Init(argcP, argvP); + + MPI_Comm_size(MPI_COMM_WORLD, &nProc); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Info_create(&this->mpiInfo); - if (rank==0){ + if (rank==0){ printf("Running in multi-process mode with %d processes\n", nProc); } - - this->nxRanks = nxRanks; - this->nyRanks = nyRanks; - this->nzRanks = nzRanks; + + this->nxRanks = nxRanks; + this->nyRanks = nyRanks; + this->nzRanks = nzRanks; + + // NOTE: We always set the parallel decomposition to be periodic in all dimensions here, rather than determining + // periodicity based on the Bcs object. This is very slightly less efficient for eg Flow bcs, as external processes will + // exchange a small amount of data which is not used, but makes the order in which bcs are created relative to + // PlatformEnv and Data much less strict. This is necessary as parallel checkpoint restart requires the cartesian + // mpi communicator set below to exist before being able to create Data, but bcs require Data to be created first. + setParallelDecomposition(1,1,1); } ParallelEnv::~ParallelEnv() diff --git a/Project/GPU/Src/parallelInitFuncFromCheckpoint.cu b/Project/GPU/Src/parallelInitFuncFromCheckpoint.cu new file mode 100644 index 00000000..17122142 --- /dev/null +++ b/Project/GPU/Src/parallelInitFuncFromCheckpoint.cu @@ -0,0 +1,134 @@ +#include "parallelInitFuncFromCheckpoint.h" +#include +#include +#include +#include +#include +#include +#include "hdf5.h" +#include "hdf5_hl.h" + +/*! + * /brief Writes an HDF5 dataset to file + * + * Prepares the buffer for writing to file, and writes a dataset. + * + * @param group The group within the file (or the file itself for root datasets) + * @param name The name the dataset should have + * @param var Data is stored in 4-d arrays for each class of data (conserved/primitive/auxiliary), + * with the 1st dimension being the variable. This argument indicates which variable is being output. + * @param data The pointer to the data array. + */ +void ParallelCheckpointRestart::readDataSetDouble(const hid_t *group, const char *name, const int *var, + double *varData, ParallelEnv* env) { + // Syntax + Data * d(data); + + // So now, we set the total data-space size, and the offset the local data-space has from it. + // The local data dimensions Nx/Ny/Nz include ghost cells, whilst the total ones (nx/ny/nz) do not. + // The data-spaces to be read should not include ghost cells + hsize_t lengths_local[d->dims]; + hsize_t lengths_total[d->dims]; + hsize_t offsets[d->dims]; + + lengths_total[0] = d->nx; + lengths_local[0] = (d->Nx - 2 * d->Ng); + offsets[0] = (d->Nx - 2 * d->Ng) * env->xRankId; + unsigned long buffer_size = lengths_local[0]; // The length of the buffer + + if(d->dims > 1) { + lengths_total[1] = d->ny; + lengths_local[1] = (d->Ny - 2 * d->Ng); + offsets[1] = (d->Ny - 2 * d->Ng) * env->yRankId; + buffer_size *= lengths_local[1]; + } + if(d->dims > 2) { + lengths_total[2] = d->nz; + lengths_local[2] = (d->Nz - 2 * d->Ng); + offsets[2] = (d->Nz - 2 * d->Ng) * env->zRankId; + buffer_size = lengths_local[2]; + } + + // Now create the buffer to store the data in + double buffer[buffer_size]; + + + // Define the total dataspace for this dataset, and create the dataset + hid_t dataspace_total = H5Screate_simple(d->dims, lengths_total, nullptr); + hid_t dataset = H5Dopen( + *group, name, H5P_DEFAULT + ); + + // Define the dataspace that describes the fraction of the total dataspace + // accessed by this process. + hid_t dataspace_local = H5Screate_simple(d->dims, lengths_local, nullptr); + + // Create an access property list that tells the write to use MPI + hid_t dataset_access_property_list = H5Pcreate(H5P_DATASET_XFER); + H5Pset_dxpl_mpio(dataset_access_property_list, H5FD_MPIO_COLLECTIVE); + + // Select the 'hyperslab', i.e. the subset of the total dataspace to write to + // This bit is per process + H5Sselect_hyperslab( + dataspace_total, H5S_SELECT_SET, offsets, nullptr, lengths_local, nullptr + ); + + // Read this processes hyperslab into the buffer + H5Dread( + dataset, H5T_NATIVE_DOUBLE, + dataspace_local, dataspace_total, + dataset_access_property_list, buffer + ); + + int buffer_position(0); + + // Consider the efficiency of this! std::copy would probably be better but maybe the compiler + // will vectorise this. I prefer the consistency of a single set of loops over having 1 per dimension. + for (int i(d->is); i < d->ie; i++) { + for (int j(d->js); j < d->je; j++) { + for (int k(d->ks); k < d->ke; k++) { + varData[ID(*var, i, j, k)] = buffer[buffer_position++]; + } + } + } +} + +ParallelCheckpointRestart::ParallelCheckpointRestart(Data * data, const char *name, ParallelEnv *env) : InitialFunc(data) +{ + // Syntax + Data * d(data); + + herr_t error=0; + + hid_t file_access_property_list = H5Pcreate(H5P_FILE_ACCESS); + H5Pset_fapl_mpio(file_access_property_list, env->mpiCartesianComm, env->mpiInfo); + hid_t file = H5Fopen(name, H5F_ACC_RDONLY, file_access_property_list); + + if (file<0) throw std::runtime_error("Could not open checkpoint restart file. Does it exist? CheckpointArgs requires path to file and extension"); + + // Read number of vars and check against the number set by the model we are using + // We we check both cons and prims first, so if there is an error we know before we've wasted time + // reading in any data + int NconsFile=0, NprimsFile=0; + hid_t groupCons = H5Gopen(file, "Conserved", H5P_DEFAULT); + error = H5LTget_attribute_int(groupCons, ".", "Ncons", &(NconsFile)); + if (error<0 || NconsFile < d->Ncons) throw std::runtime_error("Too few conserved vars recorded in checkpoint restart file for this model"); + + hid_t groupPrims = H5Gopen(file, "Primitive", H5P_DEFAULT); + error = H5LTget_attribute_int(groupPrims, ".", "Nprims", &(NprimsFile)); + if (error<0 || NconsFile < d->Nprims) throw std::runtime_error("Too few primitive vars recorded in checkpoint restart file for this model"); + + // Read all cons vars + for(int var(0); var < d->Ncons; var++) { + readDataSetDouble(&groupCons, d->consLabels[var].c_str(), &var, d->cons, env); + } + H5Gclose(groupCons); + + // Read all prims vars + for(int var(0); var < d->Nprims; var++) { + readDataSetDouble(&groupPrims, d->primsLabels[var].c_str(), &var, d->prims, env); + } + H5Gclose(groupPrims); + + H5Fclose(file); +} diff --git a/Project/GPU/Src/parallelSaveDataHDF5.cu b/Project/GPU/Src/parallelSaveDataHDF5.cu index 712465d4..c045e432 100644 --- a/Project/GPU/Src/parallelSaveDataHDF5.cu +++ b/Project/GPU/Src/parallelSaveDataHDF5.cu @@ -7,6 +7,7 @@ using namespace std; +#define ID(variable, idx, jdx, kdx) ((variable)*(d->Nx)*(d->Ny)*(d->Nz) + (idx)*(d->Ny)*(d->Nz) + (jdx)*(d->Nz) + (kdx)) /*! * /brief Opens a HDF5 file diff --git a/Project/GPU/Src/serialSaveDataHDF5.cu b/Project/GPU/Src/serialSaveDataHDF5.cu index 7b5463d4..45853d50 100644 --- a/Project/GPU/Src/serialSaveDataHDF5.cu +++ b/Project/GPU/Src/serialSaveDataHDF5.cu @@ -7,6 +7,7 @@ using namespace std; +#define ID(variable, idx, jdx, kdx) ((variable)*(d->Nx)*(d->Ny)*(d->Nz) + (idx)*(d->Ny)*(d->Nz) + (jdx)*(d->Nz) + (kdx)) /*! * /brief Opens a HDF5 file diff --git a/Project/GPU/Src/simData.cu b/Project/GPU/Src/simData.cu index 40e79c13..58bfc73f 100644 --- a/Project/GPU/Src/simData.cu +++ b/Project/GPU/Src/simData.cu @@ -25,8 +25,31 @@ Data::Data(int nx, int ny, int nz, Ncons(0), Nprims(0), Naux(0), cp(cp), mu1(mu1), mu2(mu2), - frameSkip(frameSkip) + frameSkip(frameSkip), t(0) { + initData(env); +} + +Data::Data(CheckpointArgs args, PlatformEnv *env, double mu1, double mu2, + int frameSkip) + : + nx(args.nx), ny(args.ny), nz(args.nz), + xmin(args.xmin), xmax(args.xmax), + ymin(args.ymin), ymax(args.ymax), + zmin(args.zmin), zmax(args.zmax), + endTime(args.endTime), cfl(args.cfl), Ng(args.Ng), + gamma(args.gamma), sigma(args.sigma), + memSet(0), bcsSet(0), + Ncons(0), Nprims(0), Naux(0), + cp(args.cp), + mu1(mu1), mu2(mu2), + frameSkip(frameSkip), + t(args.t) +{ + initData(env); +} + +void Data::initData(PlatformEnv *env){ // TODO -- handle nx not dividing perfectly into nxRanks // Set Nx to be nx per MPI process + ghost cells @@ -51,6 +74,17 @@ Data::Data(int nx, int ny, int nz, dims = 1; } + // Set some variables that define the interior cells + is = Ng; ie = Nx-Ng; // i-start, i-end + js = Ng; je = Ny-Ng; // j-start, j-end + ks = Ng; ke = Nz-Ng; // k-start, k-end + if (dims<3) { + ks = 0; ke = 1; + } + if (dims<2) { + js = 0; je = 1; + } + // Total number of cells Ncells = Nx * Ny * Nz; @@ -95,5 +129,5 @@ Data::Data(int nx, int ny, int nz, } // cudaDeviceSetCacheConfig(cudaFuncCachePreferShared); - } + diff --git a/Project/GPU/Src/simulation.cu b/Project/GPU/Src/simulation.cu index 46d36208..ebbbe636 100644 --- a/Project/GPU/Src/simulation.cu +++ b/Project/GPU/Src/simulation.cu @@ -56,7 +56,6 @@ Simulation::Simulation(Data * data, PlatformEnv *env) : data(data), env(env) d->dy = (d->ymax - d->ymin) / d->ny; d->dz = (d->zmax - d->zmin) / d->nz; d->iters = 0; - d->t = 0; d->alphaX = 1.0; d->alphaY = 1.0; d->alphaZ = 1.0; @@ -193,7 +192,7 @@ void Simulation::evolve(bool output, int safety) } if (safety>0 && d->iters%safety==0) { - this->save->saveAll(); + this->save->saveAll(true); if (env->rank==0) printf("Data saved...\n"); } diff --git a/Project/GPU/Src/srrmhd.cu b/Project/GPU/Src/srrmhd.cu index 43027d0d..5c703831 100644 --- a/Project/GPU/Src/srrmhd.cu +++ b/Project/GPU/Src/srrmhd.cu @@ -72,7 +72,7 @@ SRRMHD::SRRMHD(Data * data) : Model(data) this->data->consLabels.push_back("D"); this->data->consLabels.push_back("Sx"); - this->data->consLabels.push_back("Sy"); this->data->consLabels.push_back("Sx"); + this->data->consLabels.push_back("Sy"); this->data->consLabels.push_back("Sz"); this->data->consLabels.push_back("tau"); this->data->consLabels.push_back("Bx"); this->data->consLabels.push_back("By"); this->data->consLabels.push_back("Bz"); this->data->consLabels.push_back("Ex"); this->data->consLabels.push_back("Ey"); From 478fa0610c1cb5d1585d4e31c58a2836db4ee89e Mon Sep 17 00:00:00 2001 From: "ania.brown" Date: Mon, 11 Jan 2021 15:40:02 +0000 Subject: [PATCH 41/56] updating project makefile for checkpoint restart --- Project/GPU/Include/simData.h | 2 +- Project/GPU/Makefile | 20 +++++++++++++++++-- Project/GPU/Src/initFuncFromCheckpoint.cu | 2 ++ Project/GPU/Src/main.cu | 13 ++++++++++-- .../GPU/Src/parallelInitFuncFromCheckpoint.cu | 2 ++ 5 files changed, 34 insertions(+), 5 deletions(-) diff --git a/Project/GPU/Include/simData.h b/Project/GPU/Include/simData.h index 05936292..24665c62 100644 --- a/Project/GPU/Include/simData.h +++ b/Project/GPU/Include/simData.h @@ -207,7 +207,7 @@ class Data @param mu2 charge mass ratio of species 2 */ Data(CheckpointArgs args, PlatformEnv *env, double mu1=-1.0e4, double mu2=1.0e4, - int frameskip=10, int reportItersPeriod=1, int functionalSigma=false, double gam=12); + int frameskip=10); }; diff --git a/Project/GPU/Makefile b/Project/GPU/Makefile index 981d7fc9..c7b284a5 100644 --- a/Project/GPU/Makefile +++ b/Project/GPU/Makefile @@ -81,7 +81,9 @@ SERIAL_SRCS = serialSaveData.cu \ serialEnv.cu ifeq ($(USE_HDF), 1) - SERIAL_SRCS += serialSaveDataHDF5.cu + SERIAL_SRCS += serialSaveDataHDF5.cu \ + initFuncFromCheckpoint.cu \ + checkpointArgs.cu endif PARALLEL_SRCS = parallelSaveData.cu \ @@ -89,7 +91,9 @@ PARALLEL_SRCS = parallelSaveData.cu \ parallelBoundaryConds.cu ifeq ($(USE_HDF), 1) - PARALLEL_SRCS += parallelSaveDataHDF5.cu + PARALLEL_SRCS += parallelSaveDataHDF5.cu \ + parallelInitFuncFromCheckpoint.cu \ + parallelCheckpointArgs.cu endif # Headers @@ -152,9 +156,18 @@ clean : simData.o : $(MODULE_DIR)/simData.cu $(INC_DIR)/simData.h $(CC_GPU) $< -c $(NVFLAGS) -I$(INC_DIR) +serialCheckpointArgs.o : $(MODULE_DIR)/serialCheckpointArgs.cu $(INC_DIR)/serialCheckpointArgs.h + $(CC_GPU) $< -c $(HDF5_FLAGS) $(NVFLAGS) -I$(INC_DIR) + +parallelCheckpointArgs.o : $(MODULE_DIR)/parallelCheckpointArgs.cu $(INC_DIR)/parallelCheckpointArgs.h + $(CC_GPU) $< -c $(HDF5_FLAGS) $(NVFLAGS) -I$(INC_DIR) $(MPI_FLAGS) + initFunc.o : $(MODULE_DIR)/initFunc.cu $(INC_DIR)/initFunc.h $(INC_DIR)/simData.h $(CC_GPU) $< -c $(NVFLAGS) -I$(INC_DIR) +initFuncFromCheckpoint.o : $(MODULE_DIR)/initFuncFromCheckpoint.cu $(INC_DIR)/initFuncFromCheckpoint.h $(INC_DIR)/simData.h + $(CC_GPU) $< -c $(HDF5_FLAGS) $(NVFLAGS) -I$(INC_DIR) + C2PArgs.o : $(MODULE_DIR)/C2PArgs.cu $(INC_DIR)/C2PArgs.h $(INC_DIR)/simData.h $(CC_GPU) $< -c $(NVFLAGS) -I$(INC_DIR) @@ -229,6 +242,9 @@ parallelBoundaryConds.o : $(MODULE_DIR)/parallelBoundaryConds.cu $(INC_DIR)/para parallelEnv.o : $(MODULE_DIR)/parallelEnv.cu $(INC_DIR)/parallelEnv.h $(CC_GPU) $< -c $(NVFLAGS) -I$(INC_DIR) $(MPI_FLAGS) +parallelInitFuncFromCheckpoint.o : $(MODULE_DIR)/parallelInitFuncFromCheckpoint.cu $(INC_DIR)/parallelInitFuncFromCheckpoint.h $(INC_DIR)/simData.h + $(CC_GPU) $< -c $(HDF5_FLAGS) $(NVFLAGS) -I$(INC_DIR) $(MPI_FLAGS) + #end parallel sources diff --git a/Project/GPU/Src/initFuncFromCheckpoint.cu b/Project/GPU/Src/initFuncFromCheckpoint.cu index d714b1aa..11dfb5ca 100644 --- a/Project/GPU/Src/initFuncFromCheckpoint.cu +++ b/Project/GPU/Src/initFuncFromCheckpoint.cu @@ -8,6 +8,8 @@ #include "hdf5.h" #include "hdf5_hl.h" +#define ID(variable, idx, jdx, kdx) ((variable)*(d->Nx)*(d->Ny)*(d->Nz) + (idx)*(d->Ny)*(d->Nz) + (jdx)*(d->Nz) + (kdx)) + /*! * /brief Writes an HDF5 dataset to file * diff --git a/Project/GPU/Src/main.cu b/Project/GPU/Src/main.cu index 9ef7f103..75bce683 100644 --- a/Project/GPU/Src/main.cu +++ b/Project/GPU/Src/main.cu @@ -1,7 +1,9 @@ // Serial main #include "simData.h" +#include "parallelCheckpointArgs.h" #include "simulation.h" #include "initFunc.h" +#include "initFuncFromCheckpoint.h" #include "srmhd.h" #include "srrmhd.h" #include "boundaryConds.h" @@ -57,8 +59,15 @@ int main(int argc, char *argv[]) { ParallelEnv env(&argc, &argv, nxRanks, nyRanks, nzRanks); - Data data(nx, ny, nz, xmin, xmax, ymin, ymax, zmin, zmax, endTime, &env, - cfl, Ng, gamma, sigma); + const char* filename = "data_t0.checkpoint.hdf5"; + + ParallelCheckpointArgs checkpointArgs(filename, &env); + checkpointArgs.endTime=3.0; + + Data data(checkpointArgs, &env); + + //Data data(nx, ny, nz, xmin, xmax, ymin, ymax, zmin, zmax, endTime, &env, + //cfl, Ng, gamma, sigma); // Choose particulars of simulation SRMHD model(&data); diff --git a/Project/GPU/Src/parallelInitFuncFromCheckpoint.cu b/Project/GPU/Src/parallelInitFuncFromCheckpoint.cu index 17122142..f9533624 100644 --- a/Project/GPU/Src/parallelInitFuncFromCheckpoint.cu +++ b/Project/GPU/Src/parallelInitFuncFromCheckpoint.cu @@ -8,6 +8,8 @@ #include "hdf5.h" #include "hdf5_hl.h" +#define ID(variable, idx, jdx, kdx) ((variable)*(d->Nx)*(d->Ny)*(d->Nz) + (idx)*(d->Ny)*(d->Nz) + (jdx)*(d->Nz) + (kdx)) + /*! * /brief Writes an HDF5 dataset to file * From 919f4830ac4a02afb5e32cfd86ce12b7c0a66b65 Mon Sep 17 00:00:00 2001 From: "ania.brown" Date: Tue, 12 Jan 2021 13:26:22 +0000 Subject: [PATCH 42/56] small fix to project makefile with checkpoint restart --- Project/GPU/Makefile | 2 +- Project/GPU/Src/main.cu | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/Project/GPU/Makefile b/Project/GPU/Makefile index c7b284a5..3d160bb4 100644 --- a/Project/GPU/Makefile +++ b/Project/GPU/Makefile @@ -83,7 +83,7 @@ SERIAL_SRCS = serialSaveData.cu \ ifeq ($(USE_HDF), 1) SERIAL_SRCS += serialSaveDataHDF5.cu \ initFuncFromCheckpoint.cu \ - checkpointArgs.cu + serialCheckpointArgs.cu endif PARALLEL_SRCS = parallelSaveData.cu \ diff --git a/Project/GPU/Src/main.cu b/Project/GPU/Src/main.cu index 75bce683..7158bcba 100644 --- a/Project/GPU/Src/main.cu +++ b/Project/GPU/Src/main.cu @@ -39,7 +39,8 @@ int main(int argc, char *argv[]) { double ymax(1.0); double zmin(-1.5); double zmax(1.5); - double endTime(0.0005); + //double endTime(0.0005); + double endTime(0.01); double cfl(0.1); double gamma(4.0/3.0); double sigma(0); @@ -62,7 +63,7 @@ int main(int argc, char *argv[]) { const char* filename = "data_t0.checkpoint.hdf5"; ParallelCheckpointArgs checkpointArgs(filename, &env); - checkpointArgs.endTime=3.0; + checkpointArgs.endTime=0.1; Data data(checkpointArgs, &env); From de16673a04a834a3acd244082809436c8a3ba6ed Mon Sep 17 00:00:00 2001 From: "ania.brown" Date: Thu, 14 Jan 2021 09:49:24 +0000 Subject: [PATCH 43/56] adding example checkpoint file; cleaning up makefile --- Project/GPU/Makefile | 11 +++-------- Project/GPU/Src/main.cu | 2 +- Project/GPU/data_t0.checkpoint.hdf5 | Bin 0 -> 266408 bytes 3 files changed, 4 insertions(+), 9 deletions(-) create mode 100644 Project/GPU/data_t0.checkpoint.hdf5 diff --git a/Project/GPU/Makefile b/Project/GPU/Makefile index 3d160bb4..f72611e3 100644 --- a/Project/GPU/Makefile +++ b/Project/GPU/Makefile @@ -7,16 +7,11 @@ USE_MPI=1 USE_HDF=1 -# Compiler used for all compilation objects. This should be the version of the hdf5 -# compiler available on your system that links the correct mpi libraries if required. Should -# be one of h5pcc, h5pcc.openmpi or h5pcc.mpich if using MPI. Should be h5cc otherwise. -#CC = h5pcc CC = mpic++ -# Compiler used by hdf5 for c++. Shouldn't need to change this -#export HDF5_CXX := mpic++ -#export HDF5_CLINKER := mpic++ - +# HDF5 libraries must be linked explicitly like this rather than using the hdf5 compiler h5pcc. +# h5pcc should wrap mpicc with the hdf5 libraries included, but appears to interact badly with nvcc +# The library paths below are found using h5pcc -show HDF5_FLAGS = -I/local/software/szip/2.1.1/include -L/local/software/hdf5/1.10.2/gcc/parallel/lib -L/local/software/szip/2.1.1/lib -lsz -lz -ldl -lm -I/local/software/hdf5/1.10.2/gcc/parallel/include -lhdf5 -lhdf5_hl # this should no longer be needed but leaving them in just in case diff --git a/Project/GPU/Src/main.cu b/Project/GPU/Src/main.cu index 7158bcba..60e4f941 100644 --- a/Project/GPU/Src/main.cu +++ b/Project/GPU/Src/main.cu @@ -63,7 +63,7 @@ int main(int argc, char *argv[]) { const char* filename = "data_t0.checkpoint.hdf5"; ParallelCheckpointArgs checkpointArgs(filename, &env); - checkpointArgs.endTime=0.1; + checkpointArgs.endTime=endTime; Data data(checkpointArgs, &env); diff --git a/Project/GPU/data_t0.checkpoint.hdf5 b/Project/GPU/data_t0.checkpoint.hdf5 new file mode 100644 index 0000000000000000000000000000000000000000..c3c39aeb5902e4b9491ff662222cf322ce0cab1e GIT binary patch literal 266408 zcmeI*2e?$#**4$-L=iOt_J-ICM2%vL;ml~nmRQl)qoSx73!)-o!`>4UOJa-07{yp) z#d2os8e?N_qO6#e=c zY(3}iS!(>#qep3mQm?lE^shaYQ z{{6aTg>gZ+`DTL$m+DOZFvv>B&y@YwSARO6Cj7tNf%OKjxk=-9+$#K{m4Nq!`z&0R z*4}5YordpSDs3`y_+G>JA3kbW_~85d?7iQxk)wv~60X-gVD#`khwn7z-}bz5A6*J=*@OQW`i*Y2c)ApLgk!CCGtipVj#1 zrt)OJ8Qc7J+i9=8cB=by)^Cfpe?I)0-+se)`o2@fD&YtnoI-|4ye__1NxSv)C`;Tsn z4*wCa9~Fd7;*gCtUc0fDtPpft?VkOIbz6-&eCXy;z&P4`zM`74n<-HNP?c zqF?3Y>qjqi)m(SQKi}xRc51sL{C4eRur7#{m6o{#YR zhC!^eKVQ$CCv7_K{F{}#lxEEKnD8KX2@{$5w-SP3xBBM|YF=LV`ZV46yldC4ZH_&H zU$-vfETvyL0rUyecY}QG1z+q?Duo-Oemy<@$@cZG;isyfOTIPrpTaLS&Vo91`u|^_ zj(wrl_6v`FKHzf!pAGm-z^4N~74XS`e+l?Rz{dkV7Vy!4j|6-;;DiiI4+Z>lz&{22 zW59a@-W~AHfVT&%2D~}o4FRtSczM8!0{$xCSXj>fz<(9+qJWnNye8lc0dEdi4S0LN zI|JSw@ZNxb4EU#je-8LihGQoLd^q4E0Ur(cSir{vJ`wOQ0iO)`RKTYLJ`?cSfX@Yd zKHv)(mfoplzyBKnUk&)zfZ;wUg+I^IeW89eENA-r0*0Tr6mm;{4fR(8z7g=9fSEe| zJ9Qv+Aax*h;J?>_W^<*v2^N?u`((e>*suI@-?X{1OR0O?Tx?AAedp-#Jd~YlY9- z4sUgi=hOXK>6Zz25Bka9`^?n#J>5nl>MwHktLvumd%8NU?Vqo&7fWaL>=+ro#hd)| zR5Drn&sSjm=ohSD+vV5S55I9>|A!$vhxTaye)-9rryjUk`1ti*rp~K`k3aFEEr*_U zzf`(!XbvHi{rNo^-G8yleYC>D@h4xswmy#A@vHUts$VFK-zom&tJl-1Us#{vY$z|IfNmUtb1{8a-gtK?6pO z88BkNTB8T7b=~&C6BCqvMm8lUJwX!&goo;?eQR%gL+L@!^}7 ztByyU4bg%E?1KIzD+hd38EIeDiYE@#y&E<>b}r`0$mJhj?^+@^bR(bbR>c z<*MV+@yW}{tJCq}D<==}==kL2G<%KlZSY8eDZSg>U4bg z=H;s6(ecU4$*a@x;VUN(@#y&E<>b}r`0&lkRmY>_lb4fMr{lv{P9EaX@yW}{tJCq} zo0qGON5>~GC$CP&hp(JG#G~Vrmy=hg&oV+?6AHH((5RZ;eUQS+}jt}3wTy;D;K6yELbvizL<>VnA9iP0MygD5p zzInOocyxU7a`NhQeE7=ALp(Y@c{zD?IzD{!a@FzZ_~hl})#>=~m6L~fbbRu1^6GSa z_~zxRuG<%?%T>pt~GC$CP& zhi_i4IvyRLyqvr`9Us1O@(_=XPhL)5osJLRyj*oWIzD+hd38EIeC6aJ9vz>&oV+?6 zAHI3H>UeZ~@^bR(bbR>A$wNFkK6yD={l)P9|HX3l_XE68{>67r`SHX_?N|%-THxm* zUeMS0@Qw50YdktWd{cg-123ljx7`R|?}Piqed9iIUxoXp{eF+H@#y&Q4gA#q)c<-1 z()|}d51tdxjpxX775M4-%buUF^+BKXO&|3&?st5RN5_Y6>VKmHljohdANr(k`lzpg zpVt4jMx^yGz8~Bt?i=@!`zpTQ;%huQK78Z6_!^In58pU1zQ&{D!#B>0ukq;k@Qw50 zYdktWeB-?M8jp?--#9P6#-roIH_nT%@#y&Qjq~DbJUTvn0ukq;k@Qw50YdktWeB-?M8jp?--#9P6#-roIH_nT%@#y&Qjq~Db zJUTvn0ukq;k@Qw50YdktWeB-?M8jp?- z-#9P6#-roIH_nT%@#y&Qjq~DbJUTvn0 zukq;k@Qw50YdktWeB-?M8jp?--#9P6#-roIH_nT%@#y&Qjq~DbJUTvn0ukq;k@Qw50YdktWeB-?M8jp?--#9P6#-roIH_nT% z@#y&Qjq~DbJUTvn-|kOQ~p-}zW?_3?2L>4e*ejTU)GG--{eS4B9y_gn zU)IdwN1T?wucz@>#rZAQ9x!C|fFTDB7&2zS{yQBo`S0r)vHS40zq2Rr@29(Oo9wS^ z-ZA7(E4(tO`wG3k|NB9A%=*T5Z@hA3%Z+P|U*wj)bClO!;>8(mT5;QURL7$W*c$59 zz^es)JF4T+b@@qU-i0o_bI?wMzWcywYo6OOW`hs@e(ae8%InsKuDR|O`?sSy9$moJ zP_G7FE$G`(9gl9eTdo~=+?o>xt+w;i8?HUUK9;;p5vclm0v;JxKO1bpe`1{V<^0;fc^p6k!`sKc7T-S2MZ$9a}`J-!>ufKBF%kMq^ zly+3dqYKy?>eaxj1${fJ;VyW&fOjz6#8;013R-!enDnbtU_=lbPSCM~ku zsE2;uj_P=H0b4`88hEv!Z%1`Jy6ql3Z{)gf|83CaYs|63?N{8|(tXeQZ~U~|hUL>1 zT>XTFe|vg6s^ifGYz_5l;MIb@9o6yZ=G|tkLtY#8>Y&#ySnac|mc6azjWx#p?7eL^ zDbG1>gL_{4{x9269gi+xYp7QPuNL&}sE$W>+KMwTvcw->A2jBGC6D^(@!MNo9{SHK z&X{Gh^0|LKap2~coz;%&cys|UeYkTSL7Xc(tH!M|C{9SC(7hwYLs?chGSc z+;s6AzrUwt)bb0@z3MwZD$m#BxTpJXd_g;^oPPe+_qL4P^2mE1Ie*LYQLDUi)?t6XpdHom=mNHedNuHBLEnz*cyup(aKQb; zPntOB$zzUP>gfi8+3+{*sE$V$ur<`HfmaLqc2vis zGabL6@5oc*_+zXPK6=a{hmY#G)csBZnC3)sH? z1YRxZ+ELy5LuVbeu3BgRTkG!uVgK1A>_7d({okM4}H z{~jFn-&ex^`|Gg(LUla4fUTik4ZK>=wWB&7-8|9!@%J!)%n{}fsBZqC3)nt?1YRxZ z+ELy7LAPy~zkVO)uWn)f`bC((pgJC1z}8T&23{@b+EE>k?)ET$9vkM*u3`Q>Da@Zx z9gi+xYp7QPuNHLesE$W>a+ts03-kApVg9}$%->KQk1k+qs8<887If{Xjz>2(oIi$z z^T&X2{`l;l+4%#iaQ>PQ&R3XkL>*U<8b~wFq}UJhw~>?$D<3_8tT=+s|8&XJC{4BPwV-;==^F9HW!E?E*jz{Ml?p*GiKCS2XqURUq zaOZO8^l3f67d^i@hdY-$r%&toz3BPXIo!G2IsJd-{9g3?gX!J>>3#oA@8{3=fYu-1 z6He>*hrWljuK1pETED-X-s`XLfqgIRd*W&R{@C}>zL)kr^|XF}?R#+Fi~F8@TE9Q{ zJ-qMbeNR8F-`{%=VE*u)U|QcFcn@K&@Sb8?-(PqS;=PFXB-8r-#CsU;WxS`E*7rBw z19>mxJ<+tjKk^>Rdnxa!ruF@m_h8-$6R zA-$LMo^o2>UwRMfy{Px3)B66@dsy#fy{Dbl_qW~ydoS!g@wC1__8!`MY4540_5HQ? z;NFXSPd=^h&%KBDUfz59X?=e`t^a=liI+N%I*>Y$I*>Y$I*>Y$I*>Y$I*>Y$I*>Y$ zI*>Y$I*>Y$I*>Y$I*>Y$I*>Y$I*>Y$I*>Y$I*>Y$I`B>Jz{K#sLy$20lS+L^eQ0zb z-3RGDNcTZ{4$?Z1)`7GRq;(*z18E&d>p)ru(mIgVfwT^!bs((+X&p%GKw1aVI*`_Z zv<{?oAgu#w9Z2gyS_l5u)`9G)P5(|ENF7KWNF7KWNF7KWNF7KWNF7KWNF7KWNF7KW zNF7KWNF7KWNF7KWNF7KWNF7KWNF7KWNF7KWNF7KWNF7KWNF7KWNF7KWNF7KWNF7KW zNF7KWNF7KWNF7KWNF7KWNF7KWNF7KWNF7KWNF7KWNF7KWNF7KWNFA6y9XR`}#y>Ze z6Q(|-`yf3BX&p%GKw1aVI*`_Zv<{?oAgu#w9Z2gyS_jfPkk)~;4y1J;tpjNtNb5ja z2huu_)`7GRq;(*z18E&d>p)ru{&&}b%{Lo7xK!#|niT!x{~O`S{$1Msvqvb5yCJ(? z8X@8T=?-)$%~)#t6VLwG>;Iko$#?0R6)NGk?-ekhf-(y4-g*Oul8u*cOq2BqvOL@j$C-u%GuBHa&4E58~Nk^vdp~K z)p6LN!)AQ2p7*Vlelu?)x5up==d0t<@!=~+F1%*-EPthij@hx1-(%=6Z+oJS-|q3? znC_>xmTSxP7}&_a>5h)`)$!=~@RcJM-d)b}pId$CL5=)L+uYb^<}vS;7n=R!FSn@Y zpS#_JZ5#RfKG1Q#IvyP#zH;Qk!`IF7$BmiktVaHZPrcA@{d)ep`+a`Mt@ZquwjcS^ zM*f3OcAT${N5_Y+9J%m}O|tyWPaaflDS(TpqqytRD$9H0NOk-ybz z9p|g#(edFcM=rc-n=F63ZO(kHkw4(>la_zIp5JZU_rJKgp8w#Ti_N-UJ^zT0JI+_f zqvOL@j$F9guq=Oveh1GnuAbj>;P|=LJovrx%cHJZWBp&WmRCRf$D1|s$Mtxv!{<*O zkB$#tIdb6^BeMJ_XZc>Gk^jJjf3KcX&+m5ioj-WGo`3q~pPtpoUwr8<5!zj$T)M*f%Ucbu<|N5_Y+9J%o0GqU_YcHiRZ zM*d2(Uflb!dj3NX%--X{dj9$|AAf2if7>5*oUe{Y$A_;Rxv=NCS^n-zulRl=|Kj~t zJ!Hm1-z#r0_ZHXoJ)^aJWo47`jr=)w>^NT?kB$#tIdb9Z7i9VCp19czm)7%FUVWQY zm#OFf_R7l#|EQjS`S)*pw2^<;t{vyAo%1yY>7t zKe+7dk@fs@4!P>3M*ebpbeyk_N5_Y+9Jx>(kB$#tIr09ljz`CbuN=8hU-U`e^if~+ zInGzdqvOL@j$G)zaG$tu+(+&!_gS2;jz`CbuN=A1bKyDh+<1;WSDv#tUmcH*4_`TQ zp>@GJVcoEfSXZnwalSep9Us1OUeZ~_{xzB?ThwF`=))=zG|P1^VRX_`0$k@7n%#q3FZcKgt@|;5$CJp(edFc zM=mrMnUl;-<|uQOIV;Xr$D`xJSB_k0E;J{a8_kjCN^@qMuZ~B@hp!yD&|GXzHaDB2 z&DG}YIA0x)jt^fsa-nm9bAoe&bA)q+b4Hx6jz`CbuN=A1xyU)mxyd=oxym^!&R55y z9O+!?oEhh<1y-<_{7`lN6AsIU4Q=d0t<@!>1S?+x7-?i2To`^bIeK8y3!@#y&Ql_M8=E<7im z8_$vF%5xUytK-q};VVZjv@TdDtQ*!5>xy+I&R55yHIzD{m$c5%2bCS8q9A&ODXT|yIcyxUD%8?7rh2}(aqdC%CY0ixE z)$!=~@RcJMnv2cJ=4NxWx!Rl^=d0t<@!=~+E_5z%PH=8;j&QDU&WQ8X@#y&Ql_M8A z7da<6H#tW+S2<_J`RaIdeE7pj&Cb!z z)y~;*zB(QqAHH(rLUlYkK78f;yYux$pY%;1^;Mtae04lJK78f)y`lTUed4}xAGxpG zXK}td9vvUPa^ynKh3CX`<2mwNdCuZ|bv!ygeC5c6)&=W?b;CMhU9rx@`RaIdeE7=X74`-pwTJ`?AwbLy<_dF0oUe{Y$A_;RxzJo>PBJ%{qs&$2tT?UmcH*4_`TQp>v^gqI08jq;sWnW}L5% zN5_Y+9Jz3MoxjH)vC#srU)Qs8$dMU7_sW>Qjq910I`V?Q)NzhOhQ2lX>)mggAkqa+AKFeQZnX6W9UeZ~_{x#nj;Ck&-@j$y;f?%X-TB59d)M>deQc{go>b4>bHPo2 z+Q>iru8#B7@#y&Q9UpSqa~pQ`ytOXQ@TTv+dUfOauGP+be^Nbv@ps;Qz3bn*-`0Jl z0e@`dzxYVU`RaIdeE7^sM}5?K@66yPp5TCv(oZ zU_HP8D;?*nMnpn@_2}{=+xA-!}N*W%q34 zU;fXI^VRX_`0$kz=YQ~imcQ5~Gril$-{hwE7CCx+&&s{W{CcrD-t2zcMa%Ylu#tae zpT>jo_4{8PkB$#tIdT5WeKu~p4@T_Uy4M@^{H>OJ{)wY+>RGw^v0wc5m3scEOZ{Wz zh3dH<_3t=e9gmI=UpaC9$Q85v?zMN`Y2=@BMZZf^NT?kB$#tIdT3y$7lI>J^7PO-m2%H-hYwdTekMB?D5erdiQv@`)%FMX}!IXfBwlG z=d0t<@!=~c&Y$D_EdRvHJ(o4|f3Vyht-Y!}E0-_!-pm)&^H)FM;#n82=l}iJ9p|g# z(edFcC(b|j>MZ~5Cr*C9k-zzIEA4b)J^zt4uNgJZ#QN|3>m$}}A ziSw_yHOoJ5%omHlUC+OA#O625SnFANcJ%SL4zK6`xYw*B8u=^U*m1r(9vvUPa^n1_ z?#S|Yx%WFQjr_MBz2Ldk>-ig<{f8@0tLHEC{tZVr@(118alSep9Us1OzG?aTqEGs! zkNT?5_I;?1N5_Y+ocQ;5U${@)H|`_%mHSLCRL7&^!&gq6@44`tcy2sLo-5CpT&Rvm z$A_<+IN!Qpov?0LN31K>8M#m$kB$#tIdQ&q(K>0}w2oR=t+R5WIvyP#zH;Jx`+|MK zzF{A+uh?hgLUlYkK78fG`SwNoqAiSx|`<^*$tIl^3F&X5b$ z@#y&Ql@sTii_A&pCUcaz%A6$^s^ih|;VUQ3Hy4@{&5h`E>y>(A z`KIOTi$3X_KI*GJ`@Nw$9vvUPa^m0Lec?WF-?)$5SMD>pP#uqs4_`TPzURVo;<@o0 zd9FNXa-ljN9Us1O;(Y6Zb;7z~9kH%hXXHY4JUTvn<;3~cMeC$>(>iKhwa&_g>UeZ~ z_{xd%?F;q^`-XkQzG9z|3)S)H`0$kz=i3+UllD#fsD0HwD;KKc(edFcC(bt)m=nwm z<_L3zIYTZ~$D`xJS5BO7E;1*Xo6J$>Dsz@xsE$X+hp(JC-&|-;G&ha&e9vvUPa^igF0_Ozh2ImOp3g-;DP#uqs4_`TPzH^at zl5>-DlyjAHmRzWgN5_Y+oH*aP&^gh$(K*t&(m7KuRL7&^!&gq6?_BPj?%eJi?_BSk zFBhug(edFc=bM(VFZ!f!`lzq^?DvN1cyxUD%87q}_l5h!ed9iIU%AiZLUlYkK78fG z`JM~UiRZ?1AiSw-s)(Pu|b;P=2oskRG@#y&Ql@sS%7p;@lP3x$2 z)jBH|s^ih|;VUQ3w=dWy>>KtG`-*)=E>y>(v#z-C8|8Ah88<6^ zbj`8lNpGI`$DjTB$nsxC?DzDw-~Mj-)lc8N;O{S7+VWzz#os>spAQcT*c$59z^esa zE%?-eZ!P3hLvHKt7maxL`k@mCZNBl7uXjJ=-j<6VoaO#EpV_kfryFPf!M2}VP~L0d zje5@amvhSRzJKNxi~nLsdDTx=KJoJ{?`+xljZ1!M7H2 zsv);^zWdJ^zuuys4Z3pFc@Lj>?4vC!mu9-D<@TM+YaDvgvt#GKyu8it&-mMj*)A#X zKlbI=)q+ng_|`&BHRQJLwcv-7 z#yr%u<)+f+-`=OsGcAvnKR^1>oqtlU%roz$pB#K`dA)OP8v5x+SCwbKa?``lJaE_Y z=0|Nj=c5CkXj$pHJG*`U%$I`#wuX8&@M=L<3qG~rTMIeWkXt?N!-Wsn_l22SmfmLj zQ||xvOD%JLHe2h6o%Suyv)Mwc&3McB^0p5=@{^_3zpniAIe)y`!AI>`-tf*#ud6Nh zY|9TO&h*|)XLN(rP_G7FE$C{&rxtu`AxCbt+v>|+bn@nXwWD=abY{*z{c z)ljbnUM=Wq!KW5{YavH&_2%wBTyNjs^lDlEmSv9E=-t;^=KaC^I}BcRba}$_2d?t- zHEu88`SZE&*mm%(<%O1fbwZ!#_A6gIV8i)lx$ti-i@$cx=402J1y)178hEv!s|BB0 z@U4X$xz%G%+u+%AYJFNx|I2Gf{_?XoTK3p?-4Dk9@Zj=?^K}1s?!E6UFI-t*)|FnY zmES*j{|_(RWmNgdC0qBHd(l^0Rw*B`&U?3i3sytD8hEv!s|BB0@U4X$xz)Y9F0u0| zSIyb-`t|GYw$0sdx2$&Gk;gpW_ptI6V;BGEr4e_RKiy!1(LX-(_HxhryFK~Eb)(BG z?X&oPr~c&imhx|Qe)!&xd%&hk4Oj_bM4ePhboKez2|kB@$%<<`FI ztaai8ky}0Zha;9>`n`Et{!*Fq!!_6VN6RA<*FUt+K1Y;C z_S$xXyB@u-Jo>g#*Nxo%uJTbYJ-)${XB|@h)*|z4{Fk-gYN>5ozHq0D=77~uuLfQ% z=xV{I7JO?VM{aeqn^%4C-tW!VveZIzoH1aT_gaR2a{EK~?Qmpyv&A=EYM1`^m+zkK zhR+F_;QD*#T%hH_ zy{=#C(L>&ES!$i{t+>~XN0s-#Yu(kBoBM(CK1aTN(91L5TfTMlHH%gTj4gjKOaCXf z`{do0(Vri4MQxS2VKvmNfmaKXo}6&xMGusZd#b$gMTgy6eqz9?8-IN0*z(&eEq>N}Kb+Xoa^Iq-e}3`Y zuo~*sz^esaE%?-eZ!P4=t)6!I4BKz}*9BT$Kk4YtZ}0U%%QoL0JpX(f9$o&&9_Rmd zrnep_&$Rtq2XA!Kz2(Ix^*^%vC1cC){Oy|Sk3C>w%Msf@aN>?H%nhrdUJblj(A9!Z zE%??#j$An1@~3zIr}zCgy`R5LqxEN}u>Ncs)}K4W`m=bn{`@YiKLOj_1zD{panl|7@GH|Acxq@M=L<3qG~rTMIeW zkXs!Q_TRsU{r9_J|Gg^gzyA#T?<`^e9T@iCO4xq`RztlSc(tIb1)o~*t%V%9)uW^N zV|bW9J_z$i-!Ol?7tJ37!~7Ajef|i%TF}*kPc8Vi&mSSTdQ+Ic)(!L5++qHDD9m5C zhxu#4Fn?VW=C8%V{1vbo>eaxj1zj!p)Pip<=)q+ng_|`&>-0JCJ{$3``-*bied){dNJ}u1O8|Td5pF{4pS$KLWOfdNuHBL01bt zwcuL|IdZFg!ujjcaQ<2$oWE8M=daDf`D;Wte~k&}uQkK@D`0D=R|BsWbhY493%<3G zBe!+eaQ@sZoIfuN=g<4Y`ST~?{P|Wmf36(PpFa%e&w#C=UJblj(A9!ZE%??#PBrAV z?i|kF$A$Cv%HjOoGn~J>zm=W8UkK;#x5N4Sm2mzJ*c$59z^esaE%?-eZ!P3hL$3T* zbv!ygeC5dXbNIRZoPKVG#$LebFa<(?@;PXMOkQ;LpXMlRr0qj{aQzIs0=D z_g|~~!hPbtaUZ#_+-L4P_o4gJed@k-AG@#J=kELP{8c>{o)gcF=g4#AIrH3k4n3Ei zQ_rpE*mLbU_uPl|r)piWPFOdbe~p>%4V8>_1ief_=iiVIQ%t*k|lJ_96R{eagONAG5F7 z=j?m-LFaP&qDsxsi|5tqvU@kMKncK{9<~nnpxi6gmtIp-l z>CWxW@y_+m`QiLu^*w;^1$?hEJ7s&lz>x^ug8ymP&CemMVE zeGlM!0pAn&-oW<=zE|))gL7{<|5u&MoztD$o#UPBo%6%_zv_Dc-wXJj!1o5eNASIZ z?-_jWFun7qcmJpN{WrazzwrHI)%xRmLf;$u9?|!T)}QeGW7YSN)*s(f`rgv_n7-Gv z{`lT=dau9X`{%0fg?&%#dt=`t`(D}i%<=x?duiWO``+63*uK~HJ-6?@!}sr1-;4X6 z-1p|bNB6zD@7crm?^WN!`(EDn^uD+EJ-+Yteb4WE|M32yYX0z^z)l%m#X(7-jjH5;ysG@D&Dh%_b*lNVZ4{|p2m9{?{U1> z@t((fpYZ;v>b;QnMBW>DkL10Q_e|maQ`LJY@1?w_^4`jOEbq0v=knewynm~DFXla& z_h#mA@726#i|23e<-Dgee|wMTy`J}c-urnE=)Iu#gx(u^kLbOk_l({bGs~wjZQR*Q`)! zyDF7>1&pt|mF8`e_U>0M^=RW%N&{yp4V=_16Lhav8%|xjW`;fL{Kh}sx{MniBKx%! z!oT_im&Rw;Uhu^ZrPA(Ge*7Q*@9Ef&CsRL4^*{ag`{R?;kFWG&-mcl*-*3u?;Q!<2 zWwmc&Km6IO9sXm;=joI%pYpG3E-y>(PuYEmAqjnm(@yK0< zjSTYA9QFTb{O*41-3M$rVAz122Mk?dz|heHh8{Fv=$HXR_uF^CsQUk{HDthAbr@3r zKk}xy;S5)lFX_{#QtF=J(MO)PR^$5q<(8Q7cXe!?x9@{9?A^Pvf6H};%~;3ozwdWQ z19!djtEi4g$A_;Rx$wfCS^jT+eeTwc{0+AF&Tk*B<1>FBx5{$$yxLRCp4OIs)mL-< z9Nj}5kB$#tIdb9e=gRUwTlIls8~NQnx@XSi=jc=UaNYqU53lFnP}}I1M*gGYJI+_f zqvOL@j$C-{5?TJhdxqWD$lrhH0?%Aq&%d+vkzGEl=N>c586P(Cd)?b{zB(QqAHH(r z!nwYi<)8M}-xe5G&ma8e`!8-cXP?Sedz^LRN&ED!d^+N`@=W#oZ$H&>zB(QqAHH(r z!r3;<^8dC@@537TKm60nmoGL~pUR`Hy%+o5h~AZb*Z%m-M*e~Cbeyk_N5_Y+9J%ni zJ+u5-{<+kfjrAkqaOFU6$YP z@Scx1^6wu#=P6Im)2DLjq|JBx-N@dR8yCLl-bVgQt96{Ojz`CbuN=8>zB{t~$98yX zz2DdKyUf1XW_Qoqr*h%ETfcw6e!VL*k3M?2nd|voHtRTF9gmI=UpaDN?b$4U*Tp7Y z-^gFI^^@Z-oUc!1(w`RD|GWG5u6%gUo;5ZW(a8Vn zN87IOt9t&GC-><6Nj-o0W9IrvBY(5=JI+_fqvOL@j$F9h;#vN(eTO~W$X{fh4PSY` zp1t%{eZPACs0FXSq>;byl^y4+%6%pms^ih|;VVZj^jvsOJU5;r&z0v)E>y>(*kw9gmI=UpaE2eZf9q->{F^ zSL`!#p*kKNAHH(rLi?h9(!OaQwXfP|cyxUD z%8?7rMdl=PlR3&UeZ~_{xzBor|55otvGbovWR*Akqg!F==kuJBNyt6KIxl2>Z?A>h3a^8eE7Z4QT&Rvm$A_;RxzN5~pRjM(N9-&18M#m$kB$#tIdY+W(LQP4w2#_X?Xz;BIvyP# zzH;P3bAdU*++dC{SC}*8LUlYkK78fKh2|o2lDWwoWv()3$%X28bbR>Akqga*=0tO& zInrEd&Xfz)@#y&Ql_M9Li_OXAW^=T;+MF#Hs^ih|;VVZjbS`jCaBgspaISF9kPFrE z==kuJBNsXsIVU+cIY&8HIcLd*>UeZ~_{xzBoeP~4og1AaohzL)UeZ~_{xzB^+libO&|4DpXEYzJUTvn z<;aEZ3-^ip#(m_za-YeC>UeZ~_{xzBJr|x6&yDBEbLBac3)S)H`0$k@7g`sr6V?su zh;_v}BNwXU(edFcM=rE3S|_cW)=}%KbyhA^$D`xJSB_k0U$9TuH|!(!75j`_sE$X+ zhp!yD(7tG&v~Su+?W^`#xlkRCjt^fsa-q4voM3J+N0=+j8FHaI9vvUPa^ymDkvYlS zWR5adnX}|Vbv!ygeC5c6=0bC#xzQYHt~6)Lh3a^8eE7UeZ~_{xzBor|24oSU4ZoU5F(p)ru(mIgVfwT^!b>M$@9r#cG z{}&|x)PdB2)PdB2)PdB2)PdB2)PdB2)PdB2)PdB2)PdB2)PdB2)PdB2)PdB2)PdB2 z)PdB2)PdB2)PdB2|9l5F-)!*UQmJccQuL4H)wb)}_MfYTLXUg1>rD_6{-5qZuTrnJ z->=vBE^Rm>{I;`}DsBJOrPRGNYiZunMmrrax>4xWqg0xq?UR0O|L)Qy(`ElImEde` zpEj;%%xd9-E~TY2UDvEo3B2s5D23E|PN;XQXE&I=`;|-e==wjE(!g0t11EKByQu$k zJ=#9++BGxmQRg@Q>DFc3j@g}D>ep5X|LPN58lKl)@Wl?L((Y4!{2%}CAscPHc9;6k zuvJJJe?s_0a*}rp_4`hoI?r$MFe0-Lw9m=vp=O$Y5eU|=dBm?pWHol-U@z? zJEqRF4n2A2)OpkKz8V{TvVK#xZhGHW)3F~Hd=vZO&&G4HWhe{_xO2dv;l>==wz`!{ zLl4S67&<24e*3ol$Ebj74aw?jh3g>!v(>DzI(@VL$$qXmD=}_8aPyQ%IA&wppI>0!c-8Cj!*30?3#`b-;_V~KL>r%H+^i@@Az{%K73REQ~#&74!BR; zH}0eO{%Uqj$A@pa|I+=J?!Wwf*mL5!@fc`0!27UwZ!1^Ov5#ymi1jVcoEf z#Oq75YdSuB^K#Yk==e0ti$AC1!&goo;?ePm{hM9W@!^}7tByybbR>c<*MV+@oAP9e@@4Tube!@ zqvI3%H@l|e!#6Ki9gmJrv%L6oIzD{m`rG`0&lkRmY>_(=0FkoQ@A)IeCaj$0zo0c1_2JZ(goC9vz=%dGY6TeE7=A zLp(Y@v468`IzD{!a@FzZ_%zFlKd0lvS56+{(ea7>n_biK;hUGMjz`C*Szi1(9Us1O z@(_=XPwe09nvM_Oyj*oWIzG+v;?L>$@RgH?cyxSX|7O>8eE8<&s^ih|X_gm%PREC@ zoIJ#%;}iQgyQbsAH!oKmkB(2Xy!dlEK78fmAs!u{*uU8|9Us1Vx$1aye46FOpVRT- zD<==}==j9`&93SA@XgCr$D`xZEHD0?jt^fsd5A~HC-!f4O~;3CUamSG9iL`-@#l1W z_{zycJUTwHf3s^kK78|X)$!=~G|P)Wr{lv{P9EaX@rnJLUDNU5o0qGON5`jGUi>*7 zAHH((5RZ;e?BDE~jt}3wTy;D;KF#vt&*}K^m6L~fbbMm}X4iCl_~zxRb0^a-UOV?>!pYolFhg>`bs^if?b-d2%k6JnVIbN>qvT-ASk5}$nYWx(a zjwctY<8@Yl&FWeHN(&vcVr6&hnpIeds}r{6ps+zUXXY zCZjr@T&RxMS^eSbX8GgB%yd>GfAvexUtz;~K2*n(3)S)HpgJBMJY$nAfAf-w0HTfdOlRgql4;rbWk0S4(>KA z%ip2j!E=nO=bv-*l}ijhcrvQv(Lr^*&g!=qk>x)*%l9gc{MTQ6|H1R?`A{8CE>y>( zgX(y6aJGZ9{LLQgw`U{&p1X(CKB?zJbv!z#jzFR#{N$d8PDXV+I;f7UeZe9ghyGUeZe9ghyGUeZe9ghyGt|8DeZf9q-+=0Per~9aM+epM=%9Vk zK55^C>UeZe9ghyG)@>UeZe9ghyG z=X74sE$Vm)$!<{ zIvyP?x_=hcn+wbd<_4&a=jVp%cyv%5j}DrP%t_`ZsE$Vm)$!<{IvyP?IzJZGn~TlK z=4Pml=jVp%cyv%5j}AH)I43waKy^GisE$Vm)$!p>+qH`lu$MbVTbv!z# zjz3=LV|d`MIGw9vxK2ql4B3>x6X!s^if?bv!z#jzFxe2P{`MIGw9vxK2ql4x` zbE3Hss^if?bv!z#jz;#CXg`hpkipyHhG>owfh$)k%XYpSArz7!0a>*#zhJQ~y){)BTs8zr1z8I$_PLrvuip&eDiYE@#y$8%Zop!58u38bv!yg&GO>U>G<%K zlZSY8d}9A**K~aN=H;s6(eY`P7k^I2hp(JG#G~UA`!~C$hJ z<>VnA9iQ00*)<&>zInOocyxT4<;9=V@!=~c5Ao>u#Qx2$>G<%?%T>ptA$wNFkKCyqZYdSuB^K#Yk==e0ti$AC1 z!&goo;?ePm{hM9W@!^}7tByybbR>c<*MV+@oAP9e@@4Tube!@qvI3%H@l|e!#6Ki9gmJrv%L6o zIzD{m`rG`0&lkRmY>_(=0Fk zoQ@A)IeCaj$0zo0c1_2JZ(goC9vz=%dGY6TeE7=ALp(Y@v468`IzD{!a@FzZ_%zFl zKd0lvS56+{(ea7>n_biK;hUGMjz`C*Szi1(9Us1O@(_=XPwe09nvM_Oyj*oWIzG+v z;?L>$@RgH?cyxSX|7O>8eE8<&s^ih|X_gm%PREC@oIJ#%;}iQgyQbsAH!oKmkB(2X zy!dlEK78fmAs!u{*uU8|9Us1Vx$1aye46FOpVRT-D<==}==j9`&93SA@XgCr$D`xZ zEHD0?jt^fsd5A~HC-!f4O~;3CUamSG9iL`-@#l1W_{zycJUTwHf3s^kK78|X)$!=~ zG|P)Wr{lv{P9EaX@rnJLUDNU5o0qGON5>~GZ#w?(Lfn`AOr?HP{x2f@fBe6b@p|e< zss6uw)BeBD(ESE{5y>>+tkPk6pO=u`iZm{M$+o zPioY^eaMA>J+J)N@|SBoyTh;R_(c=MqvI3%H@l|e!#Cwm{m((4^i3a|^*jEYjt}3| z|J47ftpn~8_l^4~zQ3AX)A8Y(?!R>ZrTZ^`ANHJhZahcLp11gOIzD{U^Ov5#^!%mg zFK->NPFOdrBk}su?3#`b-@IIPJUTwj^5W0w`0$mJhj?^+V*h5>bbR>c<*MV+@oAP9 ze@@4Tube!@qvI3%H@l|e!#6Ki9gmJrv%L6oIzD{m`rG`0&lkRmY>_(=0FkoQ@A)IeCaj$0zo0c1_2JZ(goC9vz=% zdGY6TeE7=ALp(Y@v468`IzD{!a@FzZ_%zFlKd0lvS56+{(ea7>n_biK;hUGMjz`C* zSzi1(9Us1O@(_=XPwe09nvM_Oyj*oWIzG+v;?L>$@RgH?cyxSX|7O>8eE8<&s^ih| zX_gm%PREC@oIJ#%;}iQgyQbsAH!oKmkB(2Xy!dlEK78fmAs!u{*uU8|9Us1Vx$1ay ze46FOpVRT-D<==}==j9`&93SA@XgCr$D`xZEHD0?jt^fsd5A~HC-!f4O~;3CUamSG z9iL`-@#l1W_{zycJUTwHf3s^kK78|X)$!=~G|P)Wr{lv{P9EaX@rnJLUDNU5o0qGO zN5`jGUi>*7AHH((5RZ;e?BDE~jt}3wTy;D;KF#vt&*}K^m6L~fbbMm}X4iCl_~zxR zUeZ~^78(#|Nl3n-!F9_bs%*hbs%*hbs%*hbs%*hbs%*hbs%*hbs%*hbs%*hbs%*h zbs%*hbs%*hbs%*hbs%*hbs%*hbs%*hbs%*hbs%*hbs%*hbs%*hbs%*hbs%*hbs%*h zbs%*hbs%*hbs%*hbs%*hbs%*hbs%*hbs%*hbs%*hbs%*hbs%*hbs%*hbs%*hbs%*h zbs%*hbs%*hbs%*hbs%*hbs%*hbs%*hbs%*hbs%*hbs%*hbs%*hbs%-%ztMq3dR}qt z8U1f4PnbEwTesNun8x*LbMJNFnNy%T9vxK2>#TmwnX>#DmwRbUBmc*J=UV%SDNr3x zE>y?stbUEIS^n^U4B54jf8nlMB`HI;+3@A6fpz8y~%BBmdzI&fM?8WhbLLo?NJo*IE7DuV(rEXMXO()9d+* z?{Mjy``7cKI-XppjziJL|j}EHi(Lr@QI(X!1 zS^g`ld~iV{|I3S)|KRqeCZjqY9aP8btbVnVv;6b=^gXJPf7uInpKwe)AFAWYh3a^8 zP#upBu5?_M|Lw<4-Mf+h)TQsexotfks^if?bv!z#jzi$B2i5WDpgJBMbYHkn+&54i zj}EHi(Lr@QI#~4iDXOUeZe z9ghyGUeZe9ghyGUeZe9ghyG zWiKqi|VJ> z`F(ufW8U0!{9)xM7RvCUmgin;T=#$Dx??w=0@d;8pgLY>_0_qv{O=BW?EXgnmpd$Y z&wNv$I-Xppj@McJ<2|zc!(Vve=Z*aFy?stbXFtS^f?Oy#M$n_59&8F244%^?ay~Cl{*Y(Lr@Qx^}!Z z%RhRBf!j3lPuk=7=a#>IGOCXcymno$TszW-_Yd#rf)Zoz*WoB+Fm_u-7i#sGfiQ*asH8 z`wx>*9WTyT$Lp;Akma-dJ<9LS)5u?Vre9ajyLvLJ4SRL6_+)$uy3zwh=ef1{Ci{-}}v#Ae@px>r3Ps^i7^>UeZe z9WU0uaBY_V^En?{sF8o#Wiy|1-sO`~9q-?*zxVPi|MNcglp6UzS!Ca*ht~6wAz$ zUvz#bsxLY}71bA=AB*aX&d){lMb8gK_0C1kNzP4B9WP!#)bZ$`I$o@w-sex}a_4mC zcBqaQ|NQEBbWk1do2H-M{dZruPuw?99WU;mIvyQV$BXqvpP!=oqU%RdebM!^sJ`g_ zQB+@a|17F6IzJTE7oDGq>Wj{gMfK)lbF#S^s^i7$hdLe|RL6_;Mb8gK^+nH5MfF9` zk45#<>-^rOG-Iht<1ZR$`&R>&Zu@Hox^&G7mA0!=saL@Gx?5@9HfisEHF zA6KV-liNvalOAeEb-e5cnox%7c%9XcnYH0Gp}f_ObFS3DZol8}<}If{bv(IH9j~+cRTjUeUYI$me>cdnV`FTC#BvvsNG|Lvmz-|gLXGOFXrh3a^n z)xR}7%Rlc|*Z!fAU%KJA54Z0&8P)ORLUp{(>X$ex%OAM^xAyB=&p+jUeUYI$me>eOt2pD>pcH?(X&c*GHAQeXr+aRL7GG)$uy3f8>BH|J?y+ z|Fe<*@|?>YJM*lQQ5{b%RLASAeuLj<`Q5e~`S6VO{9)H^z1FR>O-6M*xlkRiv-+c+ z%JR=y_0PYUshd&2it?NP_se>qdjsl6tn zI-Xppj@McJcNfX>_c(9w@s0e`o}b~eGkQ-(bv(IH9j~+ck5^0z(m{+34mgf*@@@ptunsE#KWs^if?bv!z#jzWi+QMfFAZkD~gb`)5&o(fOgMzUcf^R9|#{ zEUGU$KNr;(JwFuH7d<}})fYWK7S$I$KmV`mi}tUmzUci^RA2P@DXK5JeiYRgT|bNJ zi|!vq^+os3qWYrqLs5Ov`KhSB==@kzUvz#hsxNweD5@`dek!UjdVVabcP@5Lc5a61 zc+SmG9ghyGl*=clN?==xDqUv&K}sxP{K z6xA2qKa1*%&JRWPMdzoY`l9n=QGL<*xv0MA`Jt%3==rIrzUcX}sDApM-?Lpf{X2Ca zbs%*hbs%*hbs%*hbs%*hbs%*hbs%*hbs%*hbs%*hbs%*hbs%*hbs%*hbs%*hbs%*h zbs%*hb>M%q1Bc9c<+?A8Il7!M^&#B{={ZR2Kw1aVI*`_Zv<{?oAgu#w9Z2gyS_jfP zkk)~;4y1J;tpjNtNb5ja2huu_)`7GRq;(*z18E&d>p)ru(mL?JyAJf&Y{^$YUVq8T znph=wmtQ@uP;7igKK0xO-u`N?pF-l}=`0YMB|8U95vb`5r<%TYOD-VBu;K3v6`QyH`bcesc zIvyP#zH;Qk3*X4{XPWi>AN{kQ|J|3D-Sm!)maJU!_~ZAyIz!*e)!iQZ*2sGP@~3y4 zuZ~B@hp!yD@S$~=YP%1vn0@SHpVjlPIH=c`_iVak<)7Wp_{m2-`c_U@e&t^K*YkhY zdzlV@e|0=MK78fKg+IPN%dhoc`iw8?`Je8%@x&opELpj%&%1lH&epfm|DzXwGOC{c z$weLKtK-q};VVZjTw?j9+kXF#e|p7nlj`|TZTaO+>ukMbWz4Z_em3hIeJg{HzNqIx z_56p|TeidBUmcH*4_`TQ;n1tI{PB;!_Ny-S`@YMf>z#Mt_Dfcl7+ah15A*h|tn}gd zM-Hy%-}F((`RaIdeE7!IVy4Ca7x&5U5t2-@OdAR3sON{K>w=&b; zZyR@5J%8qL{X6{q)$!=~@RcJM?tXoizj&`(?(SaC|LcXXJvMRJl9l(KTl1EA7Vlem z{ier998u4IcZ&fX=Bwk;@!=~+E?jQ;W!rxL^e|AVVL&R55yJEW%&pFa)xbYtLOK;=EC!~I-vgNu=CQ3 zKC)Wh%57&H@!LlJ36FN1uZ~B@hp!yDP#uqs4_`U!Nkix@jG?u3Bg1LUlYkK78fKh4uyegnh$4VqdY( z$c5^7bbR>AkqhmM_DTDuebl~cpOp*M@#y&Ql_M9L3(N`T26KeD!ki%&s^ih|;VVZj zG#8nZ%uVJfbCo$uE>y>(10k>*Nsrd+6wN5_Y+9J$b3Y)&>eo1@Lu z=4`o89gmI=UpaE2bAfY$bAxk)bA@w;T&Rvm$A_;RxzM@DImx-nIm)@pIZG~7$D`xJ zSB_lhT7%~tv%W)hJUTvn z<>)(fU${@)H|`_%mHSLCRL7&^!&i=6=(+Hmcy2sLo-5CpT&Rvm$A_;RxzM^`ov?0L zN31K>8M#m$kB$#tIdY+O(K>0}w2oR=t+R5WIvyP#zH;P3`+|MKzF{A+uh?hgLUlYk zK78fKh4w}JqAkqgZQ<^*$tIl^3F&X5b$@#y&Ql_M9Li_A&p zCUcaz%A6$^s^ih|;VVZjG#8o^&5h`E>y>(Akqg!F==kuJBNwXU z(edFcM=sPCebP65)K`6$3)S)H`0$k@7rHOpC+-{fk^9PhCKsyX(edFcM=tbScuqVw zo+Hnd=S(hC$D`xJSB_k0U9e7AH>@Mp73+*#sE$X+hp!yD(7I@yv~F5Qt*h2qxlkRC zjt^fsa-n^}K4IUmkJwl2GjgFi9vvUPa^yn$qJ7f7X&<$(+GpiLbv!ygeC5c6<^pqq zxxpM^t}tiFh3a^8eE7Akqf8S`TPF?LT(QL literal 0 HcmV?d00001 From d89ce8bd55f09a8697b3a5c13318190823b78c3d Mon Sep 17 00:00:00 2001 From: "ania.brown" Date: Tue, 19 Jan 2021 09:56:31 +0000 Subject: [PATCH 44/56] updating tests makefile to use mpicc rather than hard code mpi library path --- Tests/GPU/Makefile | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/Tests/GPU/Makefile b/Tests/GPU/Makefile index 303dd2a4..83419fef 100644 --- a/Tests/GPU/Makefile +++ b/Tests/GPU/Makefile @@ -17,9 +17,15 @@ # Compiler CC = nvcc +MPI_CC = mpic++ -#use `mpic++ -show` to find library and include flags -MPI_FLAGS = -I/local/software/mpich/3.2.1/gcc/include -L/local/software/mpich/3.2.1/gcc/lib -lmpi -lmpicxx +# HDF5 libraries must be linked explicitly like this rather than using the hdf5 compiler h5pcc. +# h5pcc should wrap mpicc with the hdf5 libraries included, but appears to interact badly with nvcc +# The library paths below are found using h5pcc -show +HDF5_FLAGS = -I/local/software/szip/2.1.1/include -L/local/software/hdf5/1.10.2/gcc/parallel/lib -L/local/software/szip/2.1.1/lib -lsz -lz -ldl -lm -I/local/software/hdf5/1.10.2/gcc/parallel/include -lhdf5 -lhdf5_hl + +# this should no longer be needed but leaving them in just in case +MPI_FLAGS = # Points to the root of Google Test, relative to where this file is. # Remember to tweak this if you move this file. @@ -43,12 +49,13 @@ STDFLAGS = -std=c++11 CXXFLAGS = -D_MWAITXINTRIN_H_INCLUDED \ -D_FORCE_INLINES \ -D__STRICT_ANSI__ \ - -Wno-deprecated-gpu-targets \ + -Wno-deprecated-gpu-targets NVFLAGS = -std=c++11 \ -rdc=true \ -fmad=false \ - -Wno-deprecated-gpu-targets + -Wno-deprecated-gpu-targets \ + -ccbin ${MPI_CC} @@ -67,7 +74,9 @@ TESTS = test_simulation \ test_imex -PARALLEL_TESTS = test_parallel_rk2 \ +PARALLEL_TESTS = test_parallel_rk2 + +HDF5_TESTS = test_hdf5_rk2 # All Google Test headers. Usually you shouldn't change this # definition. @@ -98,7 +107,7 @@ buildRootfinder: clean : - rm -f $(TESTS) gtest.a gtest_main.a *.o + rm -f $(TESTS) $(PARALLEL_TESTS) gtest.a gtest_main.a *.o # Builds gtest.a and gtest_main.a. From 8e623a34740571dea17bcc5fcf210615e01bdc83 Mon Sep 17 00:00:00 2001 From: "ania.brown" Date: Tue, 19 Jan 2021 11:43:58 +0000 Subject: [PATCH 45/56] adding serial hdf5 to gpu tests --- Tests/GPU/Makefile | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/Tests/GPU/Makefile b/Tests/GPU/Makefile index 83419fef..9ea7fdff 100644 --- a/Tests/GPU/Makefile +++ b/Tests/GPU/Makefile @@ -49,7 +49,7 @@ STDFLAGS = -std=c++11 CXXFLAGS = -D_MWAITXINTRIN_H_INCLUDED \ -D_FORCE_INLINES \ -D__STRICT_ANSI__ \ - -Wno-deprecated-gpu-targets + -Wno-deprecated-gpu-targets \ NVFLAGS = -std=c++11 \ -rdc=true \ @@ -181,6 +181,13 @@ test_rk2.o : $(TEST_DIR)/test_rk2.cu $(INC_DIR)/RK2.h \ test_rk2 : test_rk2.o C2PArgs.o srmhd.o srrmhd.o fluxVectorSplitting.o boundaryConds.o simData.o RK2.o initFunc.o simulation.o weno.o serialSaveData.o serialEnv.o $(RTFIND_OBJS) gtest_main.a @$(CC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) -lpthread $^ -o $@ +test_hdf5_rk2.o : $(TEST_DIR)/test_hdf5_rk2.cu $(INC_DIR)/RK2.h \ + $(INC_DIR)/twoFluidEMHD.h $(INC_DIR)/simulation.h $(INC_DIR)/simData.h $(GTEST_HEADERS) + @$(CC) $(CPPFLAGS) $(CXXFLAGS) $(HDF5_FLAGS) $(NVFLAGS) -c $(TEST_DIR)/test_rk2.cu -I$(INC_DIR) -I$(RTFIND_INC_DIR) + +test_hdf5_rk2 : test_hdf5_rk2.o C2PArgs.o srmhd.o srrmhd.o fluxVectorSplitting.o boundaryConds.o simData.o RK2.o initFunc.o simulation.o weno.o serialSaveDataHDF5.o serialEnv.o $(RTFIND_OBJS) gtest_main.a + @$(CC) $(CPPFLAGS) $(CXXFLAGS) $(HDF5_FLAGS) $(NVFLAGS) -lpthread $^ -o $@ + # Explicit RK split integrator rkSplit.o : $(MODULE_DIR)/rkSplit.cu $(INC_DIR)/rkSplit.h $(INC_DIR)/RK2.h @$(CC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) -c $(MODULE_DIR)/rkSplit.cu -I$(INC_DIR) @@ -204,6 +211,9 @@ test_fvs : srmhd.o C2PArgs.o twoFluidEMHD.o test_fvs.o fluxVectorSplitting.o bou serialSaveData.o : $(MODULE_DIR)/serialSaveData.cu $(INC_DIR)/serialSaveData.h @$(CC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) -c $(MODULE_DIR)/serialSaveData.cu -I$(INC_DIR) +serialSaveDataHDF5.o : $(MODULE_DIR)/serialSaveDataHDF5.cu $(INC_DIR)/serialSaveDataHDF5.h + @$(CC) $(CPPFLAGS) $(CXXFLAGS) $(HDF5_FLAGS) $(NVFLAGS) -c $(MODULE_DIR)/serialSaveDataHDF5.cu -I$(INC_DIR) + parallelSaveData.o : $(MODULE_DIR)/parallelSaveData.cu $(INC_DIR)/parallelSaveData.h @$(CC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) $(MPI_FLAGS) -c $(MODULE_DIR)/parallelSaveData.cu -I$(INC_DIR) From a19f45a8d283b2f8f3997e9dae7dd93c3089aaa8 Mon Sep 17 00:00:00 2001 From: "ania.brown" Date: Tue, 19 Jan 2021 11:44:26 +0000 Subject: [PATCH 46/56] adding serial hdf5 to gpu tests --- Tests/GPU/Src/test_hdf5_rk2.cu | 195 +++++++++++++++++++++++++++++++++ 1 file changed, 195 insertions(+) create mode 100644 Tests/GPU/Src/test_hdf5_rk2.cu diff --git a/Tests/GPU/Src/test_hdf5_rk2.cu b/Tests/GPU/Src/test_hdf5_rk2.cu new file mode 100644 index 00000000..d46684af --- /dev/null +++ b/Tests/GPU/Src/test_hdf5_rk2.cu @@ -0,0 +1,195 @@ +#include "gtest/gtest.h" +#include "srrmhd.h" +#include "srmhd.h" +#include "simulation.h" +#include "serialSaveDataHDF5.h" +#include "simData.h" +#include "initFunc.h" +#include "RK2.h" +#include "fluxVectorSplitting.h" +#include + + +/* + Assumptions: + RKRandomInstabilitySingleFluid is tested in 2D only + BrioWuSingleFluid is tested in 1D only +*/ + + +// RKOTVSingleFluidPeriodic +TEST(RK2OutputConsistentWithSerial, RK2SrmhdPeriodicOTVSF) +{ + + /* + The following was used to gather data to compare the parallel + version with. No tests are run in the serial version of this test + */ + + double cfl(0.6); + int Ng(4); + double gamma(2.0); + + SerialEnv env(0, NULL, 1, 1, 1, 1); + Data d(40, 40, 0, 0, 1, 0, 1, 0, 1, 0.004, &env, cfl, Ng, gamma); + SRMHD model(&d); + FVS fluxMethod(&d, &model); + Periodic bcs(&d); + Simulation sim(&d, &env); + OTVortexSingleFluid init(&d); + RK2 timeInt(&d, &model, &bcs, &fluxMethod); + SerialSaveDataHDF5 save(&d, &env, "../TestData/GPUHDF5/RK2SrmhdPeriodicOTVSF", SerialSaveDataHDF5::OUTPUT_ALL); + sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save); + + // sim.evolve(); + sim.updateTime(); + // sim.updateTime(); + + // Save data in test directory + // This currently needs to be set in the save() function above as well + strcpy(save.dir, "../TestData/GPUHDF5"); + strcpy(save.app, "RK2SrmhdPeriodicOTVSF"); + + save.saveAll(); +} +// RKOTVSingleFluidOutflow +TEST(RK2OutputConsistentWithSerial, RK2SrmhdOutflowOTVSF) +{ + + /* + The following was used to gather data to compare the parallel + version with. No tests are run in the serial version of this test + */ + + double cfl(0.6); + int Ng(4); + double gamma(2.0); + + SerialEnv env(0, NULL, 1, 1, 1, 1); + Data d(40, 40, 0, 0, 1, 0, 1, 0, 1, 0.004, &env, cfl, Ng, gamma); + SRMHD model(&d); + FVS fluxMethod(&d, &model); + Outflow bcs(&d); + Simulation sim(&d, &env); + OTVortexSingleFluid init(&d); + RK2 timeInt(&d, &model, &bcs, &fluxMethod); + SerialSaveDataHDF5 save(&d, &env, "../TestData/GPUHDF5/RK2SrmhdOutflowOTVSF", SerialSaveDataHDF5::OUTPUT_ALL); + sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save); + + // sim.evolve(); + sim.updateTime(); + // sim.updateTime(); + + // Save data in test directory + // This currently needs to be set in the save() function above as well + strcpy(save.dir, "../TestData/GPUHDF5"); + strcpy(save.app, "RK2SrmhdOutflowOTVSF"); + + save.saveAll(); + +} + + + + +// BrioWuSingleFluid + +TEST(RK2OutputConsistentWithSerial, RK2SrmhdOutflowBrioWuSF) +{ + + /* + The following was used to gather data to compare the parallel + version with. No tests are run in the serial version of this test + */ + + SerialEnv env(0, NULL, 1, 1, 1, 1); + Data d(40, 40, 0, 0, 1, 0, 1, 0, 1, 0.004, &env); + SRMHD model(&d); + FVS fluxMethod(&d, &model); + Outflow bcs(&d); + Simulation sim(&d, &env); + BrioWuSingleFluid init(&d); + RK2 timeInt(&d, &model, &bcs, &fluxMethod); + + SerialSaveDataHDF5 save(&d, &env, "../TestData/GPUHDF5/RK2SrmhdOutflowBrioWuSF", SerialSaveDataHDF5::OUTPUT_ALL); + sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save); + + // sim.evolve(); + sim.updateTime(); + // sim.updateTime(); + + // Save data in test directory + // This currently needs to be set in the save() function above as well + strcpy(save.dir, "../TestData/GPUHDF5"); + strcpy(save.app, "RK2SrmhdOutflowBrioWuSF"); + + save.saveAll(); + +} + +TEST(RK2OutputConsistentWithSerial, RK2SrmhdPeriodicBrioWuSF) +{ + + /* + The following was used to gather data to compare the parallel + version with. No tests are run in the serial version of this test + */ + + SerialEnv env(0, NULL, 1, 1, 1, 1); + Data d(40, 40, 0, 0, 1, 0, 1, 0, 1, 0.004, &env); + SRMHD model(&d); + FVS fluxMethod(&d, &model); + Periodic bcs(&d); + Simulation sim(&d, &env); + BrioWuSingleFluid init(&d); + RK2 timeInt(&d, &model, &bcs, &fluxMethod); + + SerialSaveDataHDF5 save(&d, &env, "../TestData/GPUHDF5/RK2SrmhdPeriodicBrioWuSF", SerialSaveDataHDF5::OUTPUT_ALL); + sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save); + + // sim.evolve(); + sim.updateTime(); + // sim.updateTime(); + + // Save data in test directory + // This currently needs to be set in the save() function above as well + strcpy(save.dir, "../TestData/GPUHDF5"); + strcpy(save.app, "RK2SrmhdPeriodicBrioWuSF"); + + save.saveAll(); +} + +TEST(RK2OutputConsistentWithSerial, RK2SrmhdFlowBrioWuSF) +{ + + /* + The following was used to gather data to compare the parallel + version with. No tests are run in the serial version of this test + */ + + SerialEnv env(0, NULL, 1, 1, 1, 1); + Data d(40, 40, 0, 0, 1, 0, 1, 0, 1, 0.004, &env); + SRMHD model(&d); + FVS fluxMethod(&d, &model); + Flow bcs(&d); + Simulation sim(&d, &env); + BrioWuSingleFluid init(&d); + RK2 timeInt(&d, &model, &bcs, &fluxMethod); + + SerialSaveDataHDF5 save(&d, &env, "../TestData/GPUHDF5/RK2SrmhdFlowBrioWuSF", SerialSaveDataHDF5::OUTPUT_ALL); + sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save); + + // sim.evolve(); + sim.updateTime(); + // sim.updateTime(); + + // Save data in test directory + // This currently needs to be set in the save() function above as well + strcpy(save.dir, "../TestData/GPUHDF5"); + strcpy(save.app, "RK2SrmhdFlowBrioWuSF"); + + save.saveAll(); +} + + + From ea5548eec66965549a24ff9239a01f77abddea12 Mon Sep 17 00:00:00 2001 From: aniabrown Date: Tue, 19 Jan 2021 14:57:12 +0000 Subject: [PATCH 47/56] updating test makefile to run hdf5 on ubuntu --- Tests/GPU/Makefile | 21 +++++++++++++-------- Tests/GPU/Src/main.cu | 2 +- makePaths.sh | 8 ++++++++ 3 files changed, 22 insertions(+), 9 deletions(-) diff --git a/Tests/GPU/Makefile b/Tests/GPU/Makefile index 9ea7fdff..4a7830ee 100644 --- a/Tests/GPU/Makefile +++ b/Tests/GPU/Makefile @@ -17,19 +17,24 @@ # Compiler CC = nvcc -MPI_CC = mpic++ +MPI_CC = mpicxx.mpich +GPU_COMPUTE_CAPABILITY = 61 + +##DOCKER_ENV = --allow-run-as-root +DOCKER_ENV = # HDF5 libraries must be linked explicitly like this rather than using the hdf5 compiler h5pcc. # h5pcc should wrap mpicc with the hdf5 libraries included, but appears to interact badly with nvcc # The library paths below are found using h5pcc -show -HDF5_FLAGS = -I/local/software/szip/2.1.1/include -L/local/software/hdf5/1.10.2/gcc/parallel/lib -L/local/software/szip/2.1.1/lib -lsz -lz -ldl -lm -I/local/software/hdf5/1.10.2/gcc/parallel/include -lhdf5 -lhdf5_hl +#HDF5_FLAGS = -I/local/software/szip/2.1.1/include -L/local/software/hdf5/1.10.2/gcc/parallel/lib -L/local/software/szip/2.1.1/lib -lsz -lz -ldl -lm -I/local/software/hdf5/1.10.2/gcc/parallel/include -lhdf5 -lhdf5_hl +HDF5_FLAGS = -I/usr/include/hdf5/mpich -L/usr/lib/x86_64-linux-gnu/hdf5/mpich /usr/lib/x86_64-linux-gnu/hdf5/mpich/libhdf5_hl.a /usr/lib/x86_64-linux-gnu/hdf5/mpich/libhdf5.a -lsz -lz -lm # this should no longer be needed but leaving them in just in case MPI_FLAGS = # Points to the root of Google Test, relative to where this file is. # Remember to tweak this if you move this file. -GTEST_DIR = ../../../GoogleTest +GTEST_DIR = ../../GoogleTest # Where to find user code. MODULE_DIR = ./../../Project/GPU/Src @@ -55,7 +60,8 @@ NVFLAGS = -std=c++11 \ -rdc=true \ -fmad=false \ -Wno-deprecated-gpu-targets \ - -ccbin ${MPI_CC} + -ccbin ${MPI_CC} \ + -arch=sm_${GPU_COMPUTE_CAPABILITY} @@ -70,13 +76,12 @@ TESTS = test_simulation \ test_srrmhd \ test_fvs \ test_id \ - test_rk2 \ + test_hdf5_rk2 \ test_imex PARALLEL_TESTS = test_parallel_rk2 -HDF5_TESTS = test_hdf5_rk2 # All Google Test headers. Usually you shouldn't change this # definition. @@ -96,7 +101,7 @@ gpu_test : $(RTFIND) $(TESTS) $(PARALLEL_TESTS) # Run all tests @$(foreach exe, $(TESTS), ./$(exe);) # Run all parallel tests - $(foreach exe, $(PARALLEL_TESTS), mpirun -np 4 ./$(exe);) + $(foreach exe, $(PARALLEL_TESTS), mpirun.mpich -np 4 ${DOCKER_ENV} ./$(exe);) test : gpu_test compare_mpi_test @@ -183,7 +188,7 @@ test_rk2 : test_rk2.o C2PArgs.o srmhd.o srrmhd.o fluxVectorSplitting.o boundaryC test_hdf5_rk2.o : $(TEST_DIR)/test_hdf5_rk2.cu $(INC_DIR)/RK2.h \ $(INC_DIR)/twoFluidEMHD.h $(INC_DIR)/simulation.h $(INC_DIR)/simData.h $(GTEST_HEADERS) - @$(CC) $(CPPFLAGS) $(CXXFLAGS) $(HDF5_FLAGS) $(NVFLAGS) -c $(TEST_DIR)/test_rk2.cu -I$(INC_DIR) -I$(RTFIND_INC_DIR) + @$(CC) $(CPPFLAGS) $(CXXFLAGS) $(HDF5_FLAGS) $(NVFLAGS) -c $(TEST_DIR)/test_hdf5_rk2.cu -I$(INC_DIR) -I$(RTFIND_INC_DIR) test_hdf5_rk2 : test_hdf5_rk2.o C2PArgs.o srmhd.o srrmhd.o fluxVectorSplitting.o boundaryConds.o simData.o RK2.o initFunc.o simulation.o weno.o serialSaveDataHDF5.o serialEnv.o $(RTFIND_OBJS) gtest_main.a @$(CC) $(CPPFLAGS) $(CXXFLAGS) $(HDF5_FLAGS) $(NVFLAGS) -lpthread $^ -o $@ diff --git a/Tests/GPU/Src/main.cu b/Tests/GPU/Src/main.cu index c6ff0a86..fb01812d 100644 --- a/Tests/GPU/Src/main.cu +++ b/Tests/GPU/Src/main.cu @@ -7,6 +7,6 @@ int main(int argc, char** argv) // Create env here to ensure MPI initialisation is handled. Will need to create this object again inside each test // -- mpi init will only be called the first time - ParallelEnv env(0, NULL, 1, 1, 1); + ParallelEnv env(0, NULL, 2, 2, 1); return RUN_ALL_TESTS(); } diff --git a/makePaths.sh b/makePaths.sh index d2125bf5..b58b2b4b 100644 --- a/makePaths.sh +++ b/makePaths.sh @@ -33,12 +33,16 @@ mkdir Tests/TestData/GPU/Conserved mkdir Tests/TestData/GPU/Constants mkdir Tests/TestData/GPU/Primitive +mkdir Tests/TestData/GPUHDF5 + mkdir Tests/TestData/MPIGPU mkdir Tests/TestData/MPIGPU/Auxiliary mkdir Tests/TestData/MPIGPU/Conserved mkdir Tests/TestData/MPIGPU/Constants mkdir Tests/TestData/MPIGPU/Primitive +mkdir Tests/TestData/MPIGPUHDF5 + mkdir Tests/TestData mkdir Tests/TestData/CPU mkdir Tests/TestData/CPU/Auxiliary @@ -46,6 +50,8 @@ mkdir Tests/TestData/CPU/Conserved mkdir Tests/TestData/CPU/Constants mkdir Tests/TestData/CPU/Primitive +mkdir Tests/TestData/CPUHDF5 + mkdir Tests/TestData/SerialHDF5 mkdir Tests/TestData/SerialTextToHDF5 mkdir Tests/TestData/CPUHDF5 @@ -57,6 +63,8 @@ mkdir Tests/TestData/Serial/Conserved mkdir Tests/TestData/Serial/Constants mkdir Tests/TestData/Serial/Primitive +mkdir Tests/TestData/SerialHDF5 + mkdir Examples/Data mkdir Examples/Data/Final mkdir Examples/Data/Final/Auxiliary From d8af18d5f9aae8bcb21e623faa322243179cf29f Mon Sep 17 00:00:00 2001 From: aniabrown Date: Tue, 19 Jan 2021 15:37:54 +0000 Subject: [PATCH 48/56] use hdf5 comparison script in gpu tests --- Tests/GPU/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Tests/GPU/Makefile b/Tests/GPU/Makefile index 4a7830ee..33be5794 100644 --- a/Tests/GPU/Makefile +++ b/Tests/GPU/Makefile @@ -95,7 +95,7 @@ RTFIND = buildRootfinder compare_mpi_test : $(RTFIND) $(TESTS) $(PARALLEL_TESTS) gpu_test # Run tests that compare outputs of TestData/GPU and TestData/MPIGPU - py.test -v Src/compareParallelAndSerial.py + py.test -v Src/compareParallelAndSerialHDF5.py gpu_test : $(RTFIND) $(TESTS) $(PARALLEL_TESTS) # Run all tests From d87d993b280bf6f80c1808c93025fd3b9e575146 Mon Sep 17 00:00:00 2001 From: aniabrown Date: Wed, 20 Jan 2021 15:05:02 +0000 Subject: [PATCH 49/56] adding parallel HDF5 to gpu tests --- Scripts/compareHDF5.py | 10 +- Tests/CPU/Src/compareParallelHDF5.py | 3 + Tests/CPU/Src/compareSerialHDF5.py | 3 + Tests/GPU/Makefile | 12 +- Tests/GPU/Src/compareParallelAndSerialHDF5.py | 37 ++++ Tests/GPU/Src/test_hdf5_parallel_rk2.cu | 186 ++++++++++++++++++ Tests/GPU/Src/test_hdf5_rk2.cu | 12 +- 7 files changed, 255 insertions(+), 8 deletions(-) create mode 100644 Tests/GPU/Src/compareParallelAndSerialHDF5.py create mode 100644 Tests/GPU/Src/test_hdf5_parallel_rk2.cu diff --git a/Scripts/compareHDF5.py b/Scripts/compareHDF5.py index dc5e981c..51171ed9 100644 --- a/Scripts/compareHDF5.py +++ b/Scripts/compareHDF5.py @@ -96,7 +96,15 @@ def compare(file1, file2): for attribute_name, a_attribute in a_group.attrs.items(): if attribute_name not in whitelist_attributes: b_attribute = b_group.attrs[attribute_name] - if not np.allclose(a_attribute, b_attribute): + if a_attribute.dtype.char == 'S': + if not a_attribute == b_attribute: + warnings_found = True + print( + "Warning: "+group_name+" attribute '"+attribute_name+"' values differ!\n" + " - "+file1+": "+a_attribute+"\n" + " - "+file2+": "+b_attribute + ) + elif not np.allclose(a_attribute, b_attribute): warnings_found = True print( "Warning: "+group_name+" attribute '"+attribute_name+"' values differ!\n" diff --git a/Tests/CPU/Src/compareParallelHDF5.py b/Tests/CPU/Src/compareParallelHDF5.py index 5e302821..6ce3ae4b 100644 --- a/Tests/CPU/Src/compareParallelHDF5.py +++ b/Tests/CPU/Src/compareParallelHDF5.py @@ -21,6 +21,9 @@ def test_compareParallelHDF5(): directory2: Path = Path("../TestData/CPUHDF5/") print("Running tests...") + # Double check that the previous steps have actually generated the files we expect + assert(len(list(directory2.glob("*")))>0) + assert(len(list(directory1.glob("*")))>0) # For each file, determine the appendix and use the CompareHDF5 script for serfile in directory2.glob("*"): diff --git a/Tests/CPU/Src/compareSerialHDF5.py b/Tests/CPU/Src/compareSerialHDF5.py index 0c34d59e..ffd0fb11 100644 --- a/Tests/CPU/Src/compareSerialHDF5.py +++ b/Tests/CPU/Src/compareSerialHDF5.py @@ -21,6 +21,9 @@ def test_compareSerialHDF5(): directory2: Path = Path("../TestData/SerialHDF5/") print("Running tests...") + # Double check that the previous steps have actually generated the files we expect + assert(len(list(directory2.glob("*")))>0) + assert(len(list(directory1.glob("*")))>0) # For each file, determine the appendix and use the CompareHDF5 script for serfile in directory2.glob("*"): diff --git a/Tests/GPU/Makefile b/Tests/GPU/Makefile index 33be5794..e5f79c0d 100644 --- a/Tests/GPU/Makefile +++ b/Tests/GPU/Makefile @@ -80,7 +80,7 @@ TESTS = test_simulation \ test_imex -PARALLEL_TESTS = test_parallel_rk2 +PARALLEL_TESTS = test_hdf5_parallel_rk2 # All Google Test headers. Usually you shouldn't change this @@ -222,6 +222,9 @@ serialSaveDataHDF5.o : $(MODULE_DIR)/serialSaveDataHDF5.cu $(INC_DIR)/serialSave parallelSaveData.o : $(MODULE_DIR)/parallelSaveData.cu $(INC_DIR)/parallelSaveData.h @$(CC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) $(MPI_FLAGS) -c $(MODULE_DIR)/parallelSaveData.cu -I$(INC_DIR) +parallelSaveDataHDF5.o : $(MODULE_DIR)/parallelSaveDataHDF5.cu $(INC_DIR)/parallelSaveDataHDF5.h + @$(CC) $(CPPFLAGS) $(CXXFLAGS) $(HDF5_FLAGS) $(NVFLAGS) $(MPI_FLAGS) -c $(MODULE_DIR)/parallelSaveDataHDF5.cu -I$(INC_DIR) + # Platform env serialEnv.o : $(MODULE_DIR)/serialEnv.cu $(INC_DIR)/serialEnv.h $(CC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) -c $(MODULE_DIR)/serialEnv.cu -I$(INC_DIR) @@ -350,6 +353,13 @@ test_parallel_rk2.o : $(TEST_DIR)/test_parallel_rk2.cu $(INC_DIR)/RK2.h \ test_parallel_rk2 : main.o C2PArgs.o test_parallel_rk2.o srmhd.o srrmhd.o fluxVectorSplitting.o parallelBoundaryConds.o simData.o RK2.o initFunc.o simulation.o weno.o parallelEnv.o parallelSaveData.o $(RTFIND_OBJS) gtest.a $(CC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) $(MPI_FLAGS) -lpthread $^ -o $@ +test_hdf5_parallel_rk2.o : $(TEST_DIR)/test_hdf5_parallel_rk2.cu $(INC_DIR)/RK2.h \ + $(INC_DIR)/twoFluidEMHD.h $(INC_DIR)/simulation.h $(INC_DIR)/simData.h $(GTEST_HEADERS) + $(CC) $(CPPFLAGS) $(CXXFLAGS) $(HDF5_FLAGS) $(NVFLAGS) $(MPI_FLAGS) -c $(TEST_DIR)/test_hdf5_parallel_rk2.cu -I$(INC_DIR) -I$(RTFIND_INC_DIR) + +test_hdf5_parallel_rk2 : main.o C2PArgs.o test_hdf5_parallel_rk2.o srmhd.o srrmhd.o fluxVectorSplitting.o parallelBoundaryConds.o simData.o RK2.o initFunc.o simulation.o weno.o parallelEnv.o parallelSaveDataHDF5.o $(RTFIND_OBJS) gtest.a + $(CC) $(CPPFLAGS) $(CXXFLAGS) $(HDF5_FLAGS) $(NVFLAGS) $(MPI_FLAGS) -lpthread $^ -o $@ + test_parallel_rkSplit.o : $(TEST_DIR)/test_parallel_rkSplit.cu $(INC_DIR)/rkSplit.h \ $(INC_DIR)/twoFluidEMHD.h $(INC_DIR)/simulation.h $(INC_DIR)/simData.h $(GTEST_HEADERS) $(CC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) $(MPI_FLAGS) -c $(TEST_DIR)/test_parallel_rkSplit.cu -I$(INC_DIR) -I$(RTFIND_INC_DIR) diff --git a/Tests/GPU/Src/compareParallelAndSerialHDF5.py b/Tests/GPU/Src/compareParallelAndSerialHDF5.py new file mode 100644 index 00000000..cd70ee82 --- /dev/null +++ b/Tests/GPU/Src/compareParallelAndSerialHDF5.py @@ -0,0 +1,37 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Mon 26 Oct 2020 + +@author: ania + +Tests the precision of the serial plaintext and HDF5 version of METHOD to within +some tolerance. To execute these tests, `make test` from the Tests/CPU directory +""" + +import sys +from glob import glob +from pathlib import Path + +from compareHDF5 import compare + + +def test_compareParallelAndSerialHDF5(): + directory1: Path = Path("../TestData/GPUHDF5/") + directory2: Path = Path("../TestData/MPIGPUHDF5/") + + print("Running tests...") + + # Double check that the previous steps have actually generated the files we expect + assert(len(list(directory2.glob("*")))>0) + assert(len(list(directory1.glob("*")))>0) + + # For each file, determine the appendix and use the CompareHDF5 script + for serfile in directory2.glob("*"): + appendix = serfile.stem + # TODO -- is this still necessary? + appendix = appendix.strip('aux') + file1 = directory1 / (appendix + ".hdf5") + file2 = directory2 / (appendix + ".hdf5") + print(file1, file2) + assert(compare(str(file1), str(file2))) diff --git a/Tests/GPU/Src/test_hdf5_parallel_rk2.cu b/Tests/GPU/Src/test_hdf5_parallel_rk2.cu new file mode 100644 index 00000000..385bf228 --- /dev/null +++ b/Tests/GPU/Src/test_hdf5_parallel_rk2.cu @@ -0,0 +1,186 @@ +#include "gtest/gtest.h" +#include "srrmhd.h" +#include "srmhd.h" +#include "simulation.h" +#include "simData.h" +#include "parallelSaveDataHDF5.h" +#include "parallelBoundaryConds.h" +#include "initFunc.h" +#include "RK2.h" +#include "fluxVectorSplitting.h" +#include "parallelEnv.h" +#include + +/* + Assumptions: + RKRandomInstabilitySingleFluid is tested in 2D only + BrioWuSingleFluid is tested in 1D only +*/ + + +#if 1 +// RKOTVSingleFluidPeriodic +TEST(RK2OutputConsistentWithSerial, RK2SrmhdPeriodicOTVSF) +{ + + /* + The following was used to gather data to compare the parallel + version with. No tests are run in the serial version of this test + */ + + double cfl(0.6); + int Ng(4); + double gamma(2.0); + + ParallelEnv env(0, NULL, 2, 2, 1, 1); + Data d(40, 40, 0, 0, 1, 0, 1, 0, 1, 0.004, &env, cfl, Ng, gamma); + SRMHD model(&d); + FVS fluxMethod(&d, &model); + ParallelPeriodic bcs(&d, &env); + Simulation sim(&d, &env); + OTVortexSingleFluid init(&d); + RK2 timeInt(&d, &model, &bcs, &fluxMethod); + ParallelSaveDataHDF5 save(&d, &env, "../TestData/MPIGPUHDF5/RK2SrmhdPeriodicOTVSF", ParallelSaveDataHDF5::OUTPUT_ALL); + sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save); + + // sim.evolve(); + sim.updateTime(); + // sim.updateTime(); + + + // Save data in test directory + strcpy(save.dir, "../TestData/MPIGPUHDF5"); + strcpy(save.app, "RK2SrmhdPeriodicOTVSF"); + + save.saveAll(); +} +// RKOTVSingleFluidOutflow +TEST(RK2OutputConsistentWithSerial, RK2SrmhdOutflowOTVSF) +{ + + /* + The following was used to gather data to compare the parallel + version with. No tests are run in the serial version of this test + */ + + double cfl(0.6); + int Ng(4); + double gamma(2.0); + + ParallelEnv env(0, NULL, 2, 2, 1, 1); + Data d(40, 40, 0, 0, 1, 0, 1, 0, 1, 0.004, &env, cfl, Ng, gamma); + SRMHD model(&d); + FVS fluxMethod(&d, &model); + ParallelOutflow bcs(&d, &env); + Simulation sim(&d, &env); + OTVortexSingleFluid init(&d); + RK2 timeInt(&d, &model, &bcs, &fluxMethod); + ParallelSaveDataHDF5 save(&d, &env, "../TestData/MPIGPUHDF5/RK2SrmhdOutflowOTVSF", ParallelSaveDataHDF5::OUTPUT_ALL); + sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save); + + // sim.evolve(); + sim.updateTime(); + // sim.updateTime(); + + + // Save data in test directory + strcpy(save.dir, "../TestData/MPIGPUHDF5"); + strcpy(save.app, "RK2SrmhdOutflowOTVSF"); + + save.saveAll(); +} + + + + +// BrioWuSingleFluid + +TEST(RK2OutputConsistentWithSerial, RK2SrmhdOutflowBrioWuSF) +{ + + /* + The following was used to gather data to compare the parallel + version with. No tests are run in the serial version of this test + */ + + ParallelEnv env(0, NULL, 2, 2, 1, 1); + Data d(40, 40, 0, 0, 1, 0, 1, 0, 1, 0.004, &env); + SRMHD model(&d); + FVS fluxMethod(&d, &model); + ParallelOutflow bcs(&d, &env); + Simulation sim(&d, &env); + BrioWuSingleFluid init(&d); + RK2 timeInt(&d, &model, &bcs, &fluxMethod); + ParallelSaveDataHDF5 save(&d, &env, "../TestData/MPIGPUHDF5/RK2SrmhdOutflowBrioWuSF", ParallelSaveDataHDF5::OUTPUT_ALL); + sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save); + + sim.evolve(); + + + // Save data in test directory + strcpy(save.dir, "../TestData/MPIGPUHDF5"); + strcpy(save.app, "RK2SrmhdOutflowBrioWuSF"); + + save.saveAll(); +} + +TEST(RK2OutputConsistentWithSerial, RK2SrmhdPeriodicBrioWuSF) +{ + + /* + The following was used to gather data to compare the parallel + version with. No tests are run in the serial version of this test + */ + + ParallelEnv env(0, NULL, 2, 2, 1, 1); + Data d(40, 40, 0, 0, 1, 0, 1, 0, 1, 0.004, &env); + SRMHD model(&d); + FVS fluxMethod(&d, &model); + ParallelPeriodic bcs(&d, &env); + Simulation sim(&d, &env); + BrioWuSingleFluid init(&d); + RK2 timeInt(&d, &model, &bcs, &fluxMethod); + ParallelSaveDataHDF5 save(&d, &env, "../TestData/MPIGPUHDF5/RK2SrmhdPeriodicBrioWuSF", ParallelSaveDataHDF5::OUTPUT_ALL); + sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save); + + sim.evolve(); + + + // Save data in test directory + strcpy(save.dir, "../TestData/MPIGPUHDF5"); + strcpy(save.app, "RK2SrmhdPeriodicBrioWuSF"); + + save.saveAll(); +} + +TEST(RK2OutputConsistentWithSerial, RK2SrmhdFlowBrioWuSF) +{ + + /* + The following was used to gather data to compare the parallel + version with. No tests are run in the serial version of this test + */ + + ParallelEnv env(0, NULL, 2, 2, 1, 1); + Data d(40, 40, 0, 0, 1, 0, 1, 0, 1, 0.004, &env); + SRMHD model(&d); + FVS fluxMethod(&d, &model); + ParallelFlow bcs(&d, &env); + Simulation sim(&d, &env); + BrioWuSingleFluid init(&d); + RK2 timeInt(&d, &model, &bcs, &fluxMethod); + ParallelSaveDataHDF5 save(&d, &env, "../TestData/MPIGPUHDF5/RK2SrmhdFlowBrioWuSF", ParallelSaveDataHDF5::OUTPUT_ALL); + sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save); + + sim.evolve(); + + + // Save data in test directory + strcpy(save.dir, "../TestData/MPIGPUHDF5"); + strcpy(save.app, "RK2SrmhdFlowBrioWuSF"); + + save.saveAll(); +} +#endif + + diff --git a/Tests/GPU/Src/test_hdf5_rk2.cu b/Tests/GPU/Src/test_hdf5_rk2.cu index d46684af..f5c4cf67 100644 --- a/Tests/GPU/Src/test_hdf5_rk2.cu +++ b/Tests/GPU/Src/test_hdf5_rk2.cu @@ -114,8 +114,8 @@ TEST(RK2OutputConsistentWithSerial, RK2SrmhdOutflowBrioWuSF) SerialSaveDataHDF5 save(&d, &env, "../TestData/GPUHDF5/RK2SrmhdOutflowBrioWuSF", SerialSaveDataHDF5::OUTPUT_ALL); sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save); - // sim.evolve(); - sim.updateTime(); + sim.evolve(); + //sim.updateTime(); // sim.updateTime(); // Save data in test directory @@ -147,8 +147,8 @@ TEST(RK2OutputConsistentWithSerial, RK2SrmhdPeriodicBrioWuSF) SerialSaveDataHDF5 save(&d, &env, "../TestData/GPUHDF5/RK2SrmhdPeriodicBrioWuSF", SerialSaveDataHDF5::OUTPUT_ALL); sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save); - // sim.evolve(); - sim.updateTime(); + sim.evolve(); + //sim.updateTime(); // sim.updateTime(); // Save data in test directory @@ -179,8 +179,8 @@ TEST(RK2OutputConsistentWithSerial, RK2SrmhdFlowBrioWuSF) SerialSaveDataHDF5 save(&d, &env, "../TestData/GPUHDF5/RK2SrmhdFlowBrioWuSF", SerialSaveDataHDF5::OUTPUT_ALL); sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save); - // sim.evolve(); - sim.updateTime(); + sim.evolve(); + //sim.updateTime(); // sim.updateTime(); // Save data in test directory From 6e6f5c7ee970ec8f44e004bbe4f823b22c2aa064 Mon Sep 17 00:00:00 2001 From: "ania.brown" Date: Fri, 22 Jan 2021 11:56:41 +0000 Subject: [PATCH 50/56] fixing a typo which lead to the latest cpu and gpu instructions for iridis being stored in the wrong folder --- Scrips/IridisEnv/tests_instructions.md | 46 ------------------- Scripts/IridisEnv/tests_instructions.md | 20 ++++++-- Scripts/IridisEnv/tests_job.sh | 22 --------- .../IridisEnv/tests_job_cpu.sh | 0 .../IridisEnv/tests_job_gpu.sh | 0 5 files changed, 17 insertions(+), 71 deletions(-) delete mode 100644 Scrips/IridisEnv/tests_instructions.md delete mode 100644 Scripts/IridisEnv/tests_job.sh rename {Scrips => Scripts}/IridisEnv/tests_job_cpu.sh (100%) rename {Scrips => Scripts}/IridisEnv/tests_job_gpu.sh (100%) diff --git a/Scrips/IridisEnv/tests_instructions.md b/Scrips/IridisEnv/tests_instructions.md deleted file mode 100644 index 3569ca88..00000000 --- a/Scrips/IridisEnv/tests_instructions.md +++ /dev/null @@ -1,46 +0,0 @@ -## Tests Instructions - -These are instructions to run GPU or CPU unit tests as a batch job on Iridis 5 - -## Setting up python env - -In the root METHOD folder, create a python venv using - -``` -module purge -module load gcc/6.4.0 -module load python/3.6.4 -module load hdf5/1.10.2/gcc/parallel -``` - -Optionally also type `module load cuda/8.0` if using gpu, - -Finish creating and activating the python venv with: - -``` -python3 -m venv venv -source venv/bin/activate -``` - -Then install python modules using - -``` -python -m pip install -r Scripts/IridisEnv/requirements.txt -``` - -## Runing unit tests as a batch job - -For GPU: - -From `Tests/GPU` run `sbatch ../../Scripts/IridisEnv/tests_job_gpu.sh` - -This will run all GPU tests - -For CPU: - -From `Tests/CPU` run `sbatch ../../Scripts/IridisEnv/tests_job_cpu.sh` - - - - - diff --git a/Scripts/IridisEnv/tests_instructions.md b/Scripts/IridisEnv/tests_instructions.md index 7b00163d..3569ca88 100644 --- a/Scripts/IridisEnv/tests_instructions.md +++ b/Scripts/IridisEnv/tests_instructions.md @@ -1,6 +1,6 @@ ## Tests Instructions -These are instructions to run CPU unit tests as a batch job on Iridis 5 +These are instructions to run GPU or CPU unit tests as a batch job on Iridis 5 ## Setting up python env @@ -11,6 +11,13 @@ module purge module load gcc/6.4.0 module load python/3.6.4 module load hdf5/1.10.2/gcc/parallel +``` + +Optionally also type `module load cuda/8.0` if using gpu, + +Finish creating and activating the python venv with: + +``` python3 -m venv venv source venv/bin/activate ``` @@ -23,9 +30,16 @@ python -m pip install -r Scripts/IridisEnv/requirements.txt ## Runing unit tests as a batch job -From `Tests/CPU` run `sbatch ../../Scripts/IridisEnv/tests_job.sh` +For GPU: + +From `Tests/GPU` run `sbatch ../../Scripts/IridisEnv/tests_job_gpu.sh` + +This will run all GPU tests + +For CPU: + +From `Tests/CPU` run `sbatch ../../Scripts/IridisEnv/tests_job_cpu.sh` -This will run all CPU tests including tests of the hdf5 serial and parallel writers diff --git a/Scripts/IridisEnv/tests_job.sh b/Scripts/IridisEnv/tests_job.sh deleted file mode 100644 index 8dcaa48e..00000000 --- a/Scripts/IridisEnv/tests_job.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/bin/bash - -#SBATCH --ntasks-per-node=4 # Tasks per node -#SBATCH --nodes=1 # Number of nodes requested -#SBATCH --time=00:10:00 # walltime - -module purge -module load gcc/6.4.0 -module load python/3.6.4 -module load hdf5/1.10.2/gcc/parallel -#module load hdf5/1.10.2/gcc/serial - -module list - -source ../../venv/bin/activate - -export PYTHONPATH=$PYTHONPATH:../../Scripts:/home/amb1u19/METHOD_branches/METHOD_dev_hdf5/Scripts - -gcc --version -make clean -make test - diff --git a/Scrips/IridisEnv/tests_job_cpu.sh b/Scripts/IridisEnv/tests_job_cpu.sh similarity index 100% rename from Scrips/IridisEnv/tests_job_cpu.sh rename to Scripts/IridisEnv/tests_job_cpu.sh diff --git a/Scrips/IridisEnv/tests_job_gpu.sh b/Scripts/IridisEnv/tests_job_gpu.sh similarity index 100% rename from Scrips/IridisEnv/tests_job_gpu.sh rename to Scripts/IridisEnv/tests_job_gpu.sh From 3a03cbf36ef0eb891ee38108256175b94725a072 Mon Sep 17 00:00:00 2001 From: "ania.brown" Date: Fri, 22 Jan 2021 11:56:41 +0000 Subject: [PATCH 51/56] fixing a typo which lead to the latest cpu and gpu instructions for iridis being stored in the wrong folder --- Scrips/IridisEnv/tests_instructions.md | 46 ------------------- Scripts/IridisEnv/tests_instructions.md | 20 ++++++-- Scripts/IridisEnv/tests_job.sh | 22 --------- .../IridisEnv/tests_job_cpu.sh | 0 .../IridisEnv/tests_job_gpu.sh | 0 5 files changed, 17 insertions(+), 71 deletions(-) delete mode 100644 Scrips/IridisEnv/tests_instructions.md delete mode 100644 Scripts/IridisEnv/tests_job.sh rename {Scrips => Scripts}/IridisEnv/tests_job_cpu.sh (100%) rename {Scrips => Scripts}/IridisEnv/tests_job_gpu.sh (100%) diff --git a/Scrips/IridisEnv/tests_instructions.md b/Scrips/IridisEnv/tests_instructions.md deleted file mode 100644 index 3569ca88..00000000 --- a/Scrips/IridisEnv/tests_instructions.md +++ /dev/null @@ -1,46 +0,0 @@ -## Tests Instructions - -These are instructions to run GPU or CPU unit tests as a batch job on Iridis 5 - -## Setting up python env - -In the root METHOD folder, create a python venv using - -``` -module purge -module load gcc/6.4.0 -module load python/3.6.4 -module load hdf5/1.10.2/gcc/parallel -``` - -Optionally also type `module load cuda/8.0` if using gpu, - -Finish creating and activating the python venv with: - -``` -python3 -m venv venv -source venv/bin/activate -``` - -Then install python modules using - -``` -python -m pip install -r Scripts/IridisEnv/requirements.txt -``` - -## Runing unit tests as a batch job - -For GPU: - -From `Tests/GPU` run `sbatch ../../Scripts/IridisEnv/tests_job_gpu.sh` - -This will run all GPU tests - -For CPU: - -From `Tests/CPU` run `sbatch ../../Scripts/IridisEnv/tests_job_cpu.sh` - - - - - diff --git a/Scripts/IridisEnv/tests_instructions.md b/Scripts/IridisEnv/tests_instructions.md index 7b00163d..3569ca88 100644 --- a/Scripts/IridisEnv/tests_instructions.md +++ b/Scripts/IridisEnv/tests_instructions.md @@ -1,6 +1,6 @@ ## Tests Instructions -These are instructions to run CPU unit tests as a batch job on Iridis 5 +These are instructions to run GPU or CPU unit tests as a batch job on Iridis 5 ## Setting up python env @@ -11,6 +11,13 @@ module purge module load gcc/6.4.0 module load python/3.6.4 module load hdf5/1.10.2/gcc/parallel +``` + +Optionally also type `module load cuda/8.0` if using gpu, + +Finish creating and activating the python venv with: + +``` python3 -m venv venv source venv/bin/activate ``` @@ -23,9 +30,16 @@ python -m pip install -r Scripts/IridisEnv/requirements.txt ## Runing unit tests as a batch job -From `Tests/CPU` run `sbatch ../../Scripts/IridisEnv/tests_job.sh` +For GPU: + +From `Tests/GPU` run `sbatch ../../Scripts/IridisEnv/tests_job_gpu.sh` + +This will run all GPU tests + +For CPU: + +From `Tests/CPU` run `sbatch ../../Scripts/IridisEnv/tests_job_cpu.sh` -This will run all CPU tests including tests of the hdf5 serial and parallel writers diff --git a/Scripts/IridisEnv/tests_job.sh b/Scripts/IridisEnv/tests_job.sh deleted file mode 100644 index 8dcaa48e..00000000 --- a/Scripts/IridisEnv/tests_job.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/bin/bash - -#SBATCH --ntasks-per-node=4 # Tasks per node -#SBATCH --nodes=1 # Number of nodes requested -#SBATCH --time=00:10:00 # walltime - -module purge -module load gcc/6.4.0 -module load python/3.6.4 -module load hdf5/1.10.2/gcc/parallel -#module load hdf5/1.10.2/gcc/serial - -module list - -source ../../venv/bin/activate - -export PYTHONPATH=$PYTHONPATH:../../Scripts:/home/amb1u19/METHOD_branches/METHOD_dev_hdf5/Scripts - -gcc --version -make clean -make test - diff --git a/Scrips/IridisEnv/tests_job_cpu.sh b/Scripts/IridisEnv/tests_job_cpu.sh similarity index 100% rename from Scrips/IridisEnv/tests_job_cpu.sh rename to Scripts/IridisEnv/tests_job_cpu.sh diff --git a/Scrips/IridisEnv/tests_job_gpu.sh b/Scripts/IridisEnv/tests_job_gpu.sh similarity index 100% rename from Scrips/IridisEnv/tests_job_gpu.sh rename to Scripts/IridisEnv/tests_job_gpu.sh From 59a08096639f2d5bbefd196d283949051ee68321 Mon Sep 17 00:00:00 2001 From: "ania.brown" Date: Fri, 22 Jan 2021 14:50:06 +0000 Subject: [PATCH 52/56] standardising gpu project and test makefile --- Project/GPU/Makefile | 39 +++++++++++++++++++++++++++++++++------ Tests/GPU/Makefile | 36 +++++++++++++++++++++++------------- 2 files changed, 56 insertions(+), 19 deletions(-) diff --git a/Project/GPU/Makefile b/Project/GPU/Makefile index f72611e3..91d92563 100644 --- a/Project/GPU/Makefile +++ b/Project/GPU/Makefile @@ -7,21 +7,39 @@ USE_MPI=1 USE_HDF=1 -CC = mpic++ +# The compute capability of the GPU +GPU_COMPUTE_CAPABILITY = 52 +# --- IF USE_MPI --- +# The c++ capable mpi compiler. In systems with multiple versions of MPI, the particular version may need to be specified with eg +# mpicxx.mpich +MPI_CC = mpic++ + +# --- IF USE_HDF --- # HDF5 libraries must be linked explicitly like this rather than using the hdf5 compiler h5pcc. # h5pcc should wrap mpicc with the hdf5 libraries included, but appears to interact badly with nvcc # The library paths below are found using h5pcc -show HDF5_FLAGS = -I/local/software/szip/2.1.1/include -L/local/software/hdf5/1.10.2/gcc/parallel/lib -L/local/software/szip/2.1.1/lib -lsz -lz -ldl -lm -I/local/software/hdf5/1.10.2/gcc/parallel/include -lhdf5 -lhdf5_hl +# Ubuntu 18.04 mpich example +#HDF5_FLAGS = -I/usr/include/hdf5/mpich -L/usr/lib/x86_64-linux-gnu/hdf5/mpich /usr/lib/x86_64-linux-gnu/hdf5/mpich/libhdf5_hl.a /usr/lib/x86_64-linux-gnu/hdf5/mpich/libhdf5.a -lsz -lz -lm -# this should no longer be needed but leaving them in just in case -MPI_FLAGS = +# Points to the root of Google Test, relative to where this file is. +# Remember to tweak this if you move this file. +GTEST_DIR = ../../../GoogleTest -# -------------- END PARAMETERS FOR USERS TO EDIT -------------------- +# -------------- END PARAMETERS USERS ARE LIKELY TO NEED TO EDIT -------------------- # Compiler CC_GPU = nvcc +# this should no longer be needed but leaving them in just in case +MPI_FLAGS = + +ifneq ($(USE_HDF), 1) + HDF5_FLAGS = +endif + + # Module directory MODULE_DIR = ./Src @@ -43,11 +61,20 @@ RTFIND_SRC_DIR = ./CminpackLibrary/Src CXXFLAGS = -fopenmp -Wall -std=c++11 -O3 -lineinfo # NVIDIA compiler flags -NVFLAGS = -std=c++11 -Wno-deprecated-gpu-targets -rdc=true -fmad=false -O3 -arch=sm_52 -Xcompiler -fopenmp -Xcompiler -Wall -lineinfo +NVFLAGS = -std=c++11 \ + -rdc=true \ + -fmad=false \ + -O3 \ + -Wno-deprecated-gpu-targets \ + -ccbin ${MPI_CC} \ + -arch=sm_${GPU_COMPUTE_CAPABILITY} \ + -Xcompiler -Wall \ + -Xcompiler -fopenmp \ + -lineinfo ifeq ($(USE_MPI), 1) - NVFLAGS += -ccbin ${CC} + NVFLAGS += -ccbin ${MPI_CC} endif # Sources diff --git a/Tests/GPU/Makefile b/Tests/GPU/Makefile index e5f79c0d..a2dd60e9 100644 --- a/Tests/GPU/Makefile +++ b/Tests/GPU/Makefile @@ -15,26 +15,36 @@ # project, except GTEST_HEADERS, which you can use in your own targets # but shouldn't modify. -# Compiler -CC = nvcc -MPI_CC = mpicxx.mpich -GPU_COMPUTE_CAPABILITY = 61 -##DOCKER_ENV = --allow-run-as-root -DOCKER_ENV = +# -------------- PARAMETERS FOR USERS TO EDIT -------------------- + +# The c++ capable mpi compiler. In systems with multiple versions of MPI, the particular version may need to be specified with eg +# mpicxx.mpich +MPI_CC = mpic++ +# The script used to launch mpi programs. In systems with multiple versions of MPI, the particular version may need to be +# specified with eg mpirun.mpich +MPIEXEC = mpirun +# The compute capability of the GPU +GPU_COMPUTE_CAPABILITY = 52 # HDF5 libraries must be linked explicitly like this rather than using the hdf5 compiler h5pcc. # h5pcc should wrap mpicc with the hdf5 libraries included, but appears to interact badly with nvcc # The library paths below are found using h5pcc -show -#HDF5_FLAGS = -I/local/software/szip/2.1.1/include -L/local/software/hdf5/1.10.2/gcc/parallel/lib -L/local/software/szip/2.1.1/lib -lsz -lz -ldl -lm -I/local/software/hdf5/1.10.2/gcc/parallel/include -lhdf5 -lhdf5_hl -HDF5_FLAGS = -I/usr/include/hdf5/mpich -L/usr/lib/x86_64-linux-gnu/hdf5/mpich /usr/lib/x86_64-linux-gnu/hdf5/mpich/libhdf5_hl.a /usr/lib/x86_64-linux-gnu/hdf5/mpich/libhdf5.a -lsz -lz -lm - -# this should no longer be needed but leaving them in just in case -MPI_FLAGS = +HDF5_FLAGS = -I/local/software/szip/2.1.1/include -L/local/software/hdf5/1.10.2/gcc/parallel/lib -L/local/software/szip/2.1.1/lib -lsz -lz -ldl -lm -I/local/software/hdf5/1.10.2/gcc/parallel/include -lhdf5 -lhdf5_hl +# Ubuntu 18.04 mpich example +#HDF5_FLAGS = -I/usr/include/hdf5/mpich -L/usr/lib/x86_64-linux-gnu/hdf5/mpich /usr/lib/x86_64-linux-gnu/hdf5/mpich/libhdf5_hl.a /usr/lib/x86_64-linux-gnu/hdf5/mpich/libhdf5.a -lsz -lz -lm # Points to the root of Google Test, relative to where this file is. # Remember to tweak this if you move this file. -GTEST_DIR = ../../GoogleTest +GTEST_DIR = ../../../GoogleTest + +# -------------- END PARAMETERS USERS ARE LIKELY TO NEED TO EDIT -------------------- + +# Compiler +CC = nvcc + +# this should no longer be needed but leaving them in just in case +MPI_FLAGS = # Where to find user code. MODULE_DIR = ./../../Project/GPU/Src @@ -101,7 +111,7 @@ gpu_test : $(RTFIND) $(TESTS) $(PARALLEL_TESTS) # Run all tests @$(foreach exe, $(TESTS), ./$(exe);) # Run all parallel tests - $(foreach exe, $(PARALLEL_TESTS), mpirun.mpich -np 4 ${DOCKER_ENV} ./$(exe);) + $(foreach exe, $(PARALLEL_TESTS), ${MPIEXEC} -np 4 ./$(exe);) test : gpu_test compare_mpi_test From 0920b8cc0d4b1c572a84717fdc45cc7d35a29c63 Mon Sep 17 00:00:00 2001 From: "ania.brown" Date: Fri, 22 Jan 2021 15:13:54 +0000 Subject: [PATCH 53/56] add Scripts to gpu test python path --- Scrips/IridisEnv/requirements.txt | 5 ----- Scripts/IridisEnv/tests_job_gpu.sh | 2 ++ 2 files changed, 2 insertions(+), 5 deletions(-) delete mode 100644 Scrips/IridisEnv/requirements.txt diff --git a/Scrips/IridisEnv/requirements.txt b/Scrips/IridisEnv/requirements.txt deleted file mode 100644 index af599da1..00000000 --- a/Scrips/IridisEnv/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -numpy -matplotlib -scipy -pytest -h5py diff --git a/Scripts/IridisEnv/tests_job_gpu.sh b/Scripts/IridisEnv/tests_job_gpu.sh index 2e701818..534f3a04 100644 --- a/Scripts/IridisEnv/tests_job_gpu.sh +++ b/Scripts/IridisEnv/tests_job_gpu.sh @@ -15,6 +15,8 @@ module list source ../../venv/bin/activate +export PYTHONPATH=$PYTHONPATH:../../Scripts:/home/amb1u19/METHOD_branches/METHOD_dev_hdf5/Scripts + make clean make gpu_test From 8827b78eca084d1bf52bbc91527904d7622c25c0 Mon Sep 17 00:00:00 2001 From: "ania.brown" Date: Fri, 29 Jan 2021 15:43:26 +0000 Subject: [PATCH 54/56] remove unecessary line from makefile --- Project/GPU/Makefile | 4 ---- 1 file changed, 4 deletions(-) diff --git a/Project/GPU/Makefile b/Project/GPU/Makefile index 91d92563..8d6aa1d5 100644 --- a/Project/GPU/Makefile +++ b/Project/GPU/Makefile @@ -23,10 +23,6 @@ HDF5_FLAGS = -I/local/software/szip/2.1.1/include -L/local/software/hdf5/1.10.2/ # Ubuntu 18.04 mpich example #HDF5_FLAGS = -I/usr/include/hdf5/mpich -L/usr/lib/x86_64-linux-gnu/hdf5/mpich /usr/lib/x86_64-linux-gnu/hdf5/mpich/libhdf5_hl.a /usr/lib/x86_64-linux-gnu/hdf5/mpich/libhdf5.a -lsz -lz -lm -# Points to the root of Google Test, relative to where this file is. -# Remember to tweak this if you move this file. -GTEST_DIR = ../../../GoogleTest - # -------------- END PARAMETERS USERS ARE LIKELY TO NEED TO EDIT -------------------- # Compiler From ce306cd5c1b9355f08fc30f258fd413ae99d7e8e Mon Sep 17 00:00:00 2001 From: "ania.brown" Date: Thu, 18 Feb 2021 15:08:03 +0000 Subject: [PATCH 55/56] fixing small error in Project makefile --- Project/GPU/Makefile | 1 - 1 file changed, 1 deletion(-) diff --git a/Project/GPU/Makefile b/Project/GPU/Makefile index 8d6aa1d5..1aa1817e 100644 --- a/Project/GPU/Makefile +++ b/Project/GPU/Makefile @@ -62,7 +62,6 @@ NVFLAGS = -std=c++11 \ -fmad=false \ -O3 \ -Wno-deprecated-gpu-targets \ - -ccbin ${MPI_CC} \ -arch=sm_${GPU_COMPUTE_CAPABILITY} \ -Xcompiler -Wall \ -Xcompiler -fopenmp \ From 5f5fa1a3fd2bb28a4c85eea5394f425e23df35bb Mon Sep 17 00:00:00 2001 From: "ania.brown" Date: Fri, 26 Mar 2021 14:00:22 +0000 Subject: [PATCH 56/56] removing hard coded path from iridis scripts --- Scripts/IridisEnv/tests_job_cpu.sh | 14 ++++++++++++-- Scripts/IridisEnv/tests_job_gpu.sh | 13 ++++++++++++- 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/Scripts/IridisEnv/tests_job_cpu.sh b/Scripts/IridisEnv/tests_job_cpu.sh index 583b9043..34ed55f3 100644 --- a/Scripts/IridisEnv/tests_job_cpu.sh +++ b/Scripts/IridisEnv/tests_job_cpu.sh @@ -1,5 +1,8 @@ #!/bin/bash +# This script submits a Southampton Iridis5 batch job for the cpu tests +# in Tests/CPU + #SBATCH --ntasks-per-node=4 # Tasks per node #SBATCH --nodes=1 # Number of nodes requested #SBATCH --time=00:10:00 # walltime @@ -11,10 +14,17 @@ module load hdf5/1.10.2/gcc/parallel #module load hdf5/1.10.2/gcc/serial module list - source ../../venv/bin/activate -export PYTHONPATH=$PYTHONPATH:../../Scripts:/home/amb1u19/METHOD_branches/METHOD_dev_hdf5/Scripts +# -------------- PARAMETERS USERS NEED TO EDIT ------------------- + +# Enter absolute path to METHOD/Scripts directory here +SCRIPT_DIR=/absolute/path/to/method/root/Scripts + +# ----------------------------------------------------------------- + +# Let python find the scripts for comparing hdf5 files +export PYTHONPATH=$PYTHONPATH:$SCRIPT_DIR gcc --version make clean diff --git a/Scripts/IridisEnv/tests_job_gpu.sh b/Scripts/IridisEnv/tests_job_gpu.sh index 534f3a04..f6b84fcf 100644 --- a/Scripts/IridisEnv/tests_job_gpu.sh +++ b/Scripts/IridisEnv/tests_job_gpu.sh @@ -1,5 +1,8 @@ #!/bin/bash +# This script submits a Southampton Iridis5 batch job for the gpu tests +# in Tests/GPU + #SBATCH --ntasks-per-node=2 # Tasks per node #SBATCH --nodes=1 # Number of nodes requested #SBATCH --partition=gtx1080 @@ -15,7 +18,15 @@ module list source ../../venv/bin/activate -export PYTHONPATH=$PYTHONPATH:../../Scripts:/home/amb1u19/METHOD_branches/METHOD_dev_hdf5/Scripts +# -------------- PARAMETERS USERS NEED TO EDIT ------------------- + +# Enter absolute path to METHOD/Scripts directory here +SCRIPT_DIR=/absolute/path/to/method/root/Scripts + +# ----------------------------------------------------------------- + +# Let python find the scripts for comparing hdf5 files +export PYTHONPATH=$PYTHONPATH:$SCRIPT_DIR make clean make gpu_test