From 585b8ed30e6f51d2f721f0d00b289f4fc9f20eb7 Mon Sep 17 00:00:00 2001
From: aniabrown <ania.brown@gmail.com>
Date: Tue, 14 Jul 2020 11:38:31 +0100
Subject: [PATCH 01/56] changing cpu to use same example as gpu in Project
 folder. Changing gpu saveData to not write halo regions

---
 Project/CPU/Src/main.cc     | 105 ++++++++++++++++++++----------------
 Project/GPU/Makefile        |   2 +-
 Project/GPU/Src/saveData.cu | 101 ++++++++++++++++++++++++++--------
 Project/compare.py          |  34 ++++++++++++
 4 files changed, 172 insertions(+), 70 deletions(-)
 create mode 100644 Project/compare.py
diff --git a/Project/CPU/Src/main.cc b/Project/CPU/Src/main.cc
index 231d0af1..941c5d1e 100644
--- a/Project/CPU/Src/main.cc
+++ b/Project/CPU/Src/main.cc
@@ -1,88 +1,101 @@
 // Serial main
-#include "parallelBoundaryConds.h"
-#include "fluxVectorSplitting.h"
-#include "parallelSaveData.h"
+#include "simData.h"
 #include "simulation.h"
 #include "initFunc.h"
-#include "simData.h"
-#include "RKPlus.h"
-#include "hybrid.h"
+#include "srmhd.h"
+#include "srrmhd.h"
+#include "boundaryConds.h"
+#include "rkSplit.h"
+#include "SSP2.h"
+#include "serialSaveData.h"
+#include "fluxVectorSplitting.h"
 #include "weno.h"
 
+#include <cstdio>
+#include <cstdlib>
 #include <ctime>
+#include <iostream>
 #include <cstring>
+#include <omp.h>
+
 
 using namespace std;
 
 int main(int argc, char *argv[]) {
 
 
+  const double MU(1000);
   // Set up domain
-  int Ng(7);
-  int nx(800);
-  int ny(0);
+  int Ng(4);
+  int nx(256);
+  int ny(512);
   int nz(0);
-  double xmin(0.0);
-  double xmax(1.0);
+  double xmin(-0.5);
+  double xmax(0.5);
   double ymin(-1.0);
   double ymax(1.0);
-  double zmin(0.0);
-  double zmax(1.0);
-  double endTime(0.4);
-  double gamma(2.0);
-  double cfl(0.5);
-  double cp(1);
-  double mu1(-1);
-  double mu2(1);
-  int frameSkip(1);
-  int reportItersPeriod(1);
-
-  double sigma(40);
-  bool functionalSigma(true);
-  double gam(6);
-
-  double nxRanks(4);
-  double nyRanks(1);
-  double nzRanks(1);
-
-  ParallelEnv env(&argc, &argv, nxRanks, nyRanks, nzRanks);
+  double zmin(-1.5);
+  double zmax(1.5);
+  double endTime(3.0);
+  double cfl(0.1);
+  double gamma(4.0/3.0);
+  double sigma(300);
+  double cp(1.0);
+  double mu1(-MU);
+  double mu2(MU);
+  int frameSkip(180);
+  bool output(true);
+  int safety(180);
+
+
+  char * ptr(0);
+  //! Overwrite any variables that have been passed in as main() arguments
+  for (int i(0); i < argc; i++) {
+    if (strcmp(argv[i], "sigma") == 0) {
+      sigma = (double)strtol(argv[i+1], &ptr, 10);
+    }
+  }
+
+  SerialEnv env(&argc, &argv, 1, 1, 1);
 
   Data data(nx, ny, nz, xmin, xmax, ymin, ymax, zmin, zmax, endTime, &env,
-            cfl, Ng, gamma, sigma, cp, mu1, mu2, frameSkip, reportItersPeriod, functionalSigma, gam);
+            cfl, Ng, gamma, sigma, cp, mu1, mu2, frameSkip);
+
 
   // Choose particulars of simulation
-  Hybrid model(&data);
+  SRRMHD model(&data);
 
-  Weno7 weno(&data);
+  Weno3 weno(&data);
 
   FVS fluxMethod(&data, &weno, &model);
 
-  model.setupREGIME(&fluxMethod);
-
-  ParallelOutflow bcs(&data, &env);
+  Flow bcs(&data);
 
   Simulation sim(&data, &env);
 
-  BrioWuSingleFluid init(&data);
+  KHInstabilitySingleFluid init(&data, 1);
 
-  RK4 timeInt(&data, &model, &bcs, &fluxMethod);
+  SSP2 timeInt(&data, &model, &bcs, &fluxMethod);
 
-  ParallelSaveData save(&data, &env, 0);
+  SerialSaveData save(&data, &env, 0);
 
   // Now objects have been created, set up the simulation
   sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save);
-
   // Time execution of programme
-  clock_t startTime(clock());
+  //double startTime(omp_get_wtime());
 
   // Run until end time and save results
-  sim.evolve();
-  // sim.updateTime();
+  // sim.evolve(output, safety);
+  sim.updateTime();
+  sim.updateTime();
+  sim.updateTime();
+  sim.updateTime();
+  sim.updateTime();
 
-  double timeTaken(double(clock() - startTime)/(double)CLOCKS_PER_SEC);
+  //double timeTaken(omp_get_wtime()- startTime);
 
   save.saveAll();
-  if (env.rank==0) printf("\nRuntime: %.5fs\nCompleted %d iterations.\n", timeTaken, data.iters);
+  //printf("\nRuntime: %.5fs\nCompleted %d iterations.\n", timeTaken, data.iters);
 
   return 0;
 
diff --git a/Project/GPU/Makefile b/Project/GPU/Makefile
index a84ac2e0..666dc027 100644
--- a/Project/GPU/Makefile
+++ b/Project/GPU/Makefile
@@ -23,7 +23,7 @@ RTFIND_SRC_DIR = ./CminpackLibrary/Src
 CXXFLAGS = -Xcompiler -fopenmp -Xcompiler -Wall
 
 # NVIDIA compiler flags
-NVFLAGS = -std=c++11 -Wno-deprecated-gpu-targets -rdc=true -fmad=false -O3
+NVFLAGS = -std=c++11 -Wno-deprecated-gpu-targets -rdc=true -fmad=false -O3 -arch=sm_52
 
 # Sources
 SRCS = main.cu \
diff --git a/Project/GPU/Src/saveData.cu b/Project/GPU/Src/saveData.cu
index 95798573..c87bbccb 100644
--- a/Project/GPU/Src/saveData.cu
+++ b/Project/GPU/Src/saveData.cu
@@ -35,7 +35,7 @@ void SaveData::saveCons()
 {
   FILE * f;
 
-  char fname[60];
+  char fname[120];
   strcpy(fname, dir);
   strcat(fname, "/Conserved/cons");
   strcat(fname, app);
@@ -43,7 +43,6 @@ void SaveData::saveCons()
 
   f = fopen(fname, "w");
   // Ensure file is open
-  printf("Writing into %s\n", fname);
   if (f == NULL) {
     printf("Error: could not open 'cons.dat' for writing.\n");
     exit(1);
@@ -56,17 +55,35 @@ void SaveData::saveCons()
   }
   fprintf(f, "%s\n", d->consLabels[d->Ncons-1].c_str());
 
-
-  for (int var(0); var < d->Ncons; var++) {
-    for (int i(0); i < d->Nx; i++) {
-      for (int j(0); j < d->Ny; j++) {
-        for (int k(0); k < d->Nz; k++) {
-          fprintf(f, "%.16f ", d->cons[ID(var, i, j, k)]);
+  if (d->dims==3){
+    for (int var(0); var < d->Ncons; var++) {
+      for (int i(0); i < d->Nx-(2*d->Ng); i++) {
+        for (int j(0); j < d->Ny-(2*d->Ng); j++) {
+          for (int k(0); k < d->Nz-(2*d->Ng); k++) {
+            fprintf(f, "%.16f ", d->cons[ID(var, i + d->Ng, j + d->Ng, k + d->Ng)]);
+          }
+          fprintf(f, "\n");
         }
+      }
+    }
+  } else if (d->dims==2){
+    for (int var(0); var < d->Ncons; var++) {
+      for (int i(0); i < d->Nx-(2*d->Ng); i++) {
+        for (int j(0); j < d->Ny-(2*d->Ng); j++) {
+          fprintf(f, "%.16f ", d->cons[ID(var, i + d->Ng, j + d->Ng, 0)]);
+          fprintf(f, "\n");
+        }
+      }
+    }
+  } else {
+    for (int var(0); var < d->Ncons; var++) {
+      for (int i(0); i < d->Nx-(2*d->Ng); i++) {
+        fprintf(f, "%.16f ", d->cons[ID(var, i + d->Ng, 0, 0)]);
         fprintf(f, "\n");
       }
     }
   }
+
   fclose(f);
 
 }
@@ -75,7 +92,7 @@ void SaveData::saveCons()
 void SaveData::savePrims()
 {
   FILE * f;
-  char fname[60];
+  char fname[120];
   strcpy(fname, dir);
   strcat(fname, "/Primitive/prims");
   strcat(fname, app);
@@ -91,12 +108,31 @@ void SaveData::savePrims()
   fprintf(f, "prims = ");
   for (int i(0); i < d->Nprims-1; i++) fprintf(f, "%s, ", d->primsLabels[i].c_str());
   fprintf(f, "%s\n", d->primsLabels[d->Nprims-1].c_str());
-  for (int var(0); var < d->Nprims; var++) {
-    for (int i(0); i < d->Nx; i++) {
-      for (int j(0); j < d->Ny; j++) {
-        for (int k(0); k < d->Nz; k++) {
-          fprintf(f, "%.16f ", d->prims[ID(var, i, j, k)]);
+
+  if (d->dims==3){
+    for (int var(0); var < d->Nprims; var++) {
+      for (int i(0); i < d->Nx-(2*d->Ng); i++) {
+        for (int j(0); j < d->Ny-(2*d->Ng); j++) {
+          for (int k(0); k < d->Nz-(2*d->Ng); k++) {
+            fprintf(f, "%.16f ", d->prims[ID(var, i + d->Ng, j + d->Ng, k + d->Ng)]);
+          }
+          fprintf(f, "\n");
+        }
+      }
+    }
+  } else if (d->dims==2){
+    for (int var(0); var < d->Nprims; var++) {
+      for (int i(0); i < d->Nx-(2*d->Ng); i++) {
+        for (int j(0); j < d->Ny-(2*d->Ng); j++) {
+          fprintf(f, "%.16f ", d->prims[ID(var, i + d->Ng, j + d->Ng, 0)]);
+          fprintf(f, "\n");
         }
+      }
+    }
+  } else {
+    for (int var(0); var < d->Nprims; var++) {
+      for (int i(0); i < d->Nx-(2*d->Ng); i++) {
+        fprintf(f, "%.16f ", d->prims[ID(var, i + d->Ng, 0, 0)]);
         fprintf(f, "\n");
       }
     }
@@ -109,7 +145,7 @@ void SaveData::savePrims()
 void SaveData::saveAux()
 {
   FILE * f;
-  char fname[60];
+  char fname[120];
   strcpy(fname, dir);
   strcat(fname, "/Auxiliary/aux");
   strcat(fname, app);
@@ -125,12 +161,31 @@ void SaveData::saveAux()
   fprintf(f, "aux = ");
   for (int i(0); i < d->Naux-1; i++) fprintf(f, "%s, ", d->auxLabels[i].c_str());
   fprintf(f, "%s\n", d->auxLabels[d->Naux-1].c_str());
-  for (int var(0); var < d->Naux; var++) {
-    for (int i(0); i < d->Nx; i++) {
-      for (int j(0); j < d->Ny; j++) {
-        for (int k(0); k < d->Nz; k++) {
-          fprintf(f, "%.16f ", d->aux[ID(var, i, j, k)]);
+
+  if (d->dims==3){
+    for (int var(0); var < d->Naux; var++) {
+      for (int i(0); i < d->Nx-(2*d->Ng); i++) {
+        for (int j(0); j < d->Ny-(2*d->Ng); j++) {
+          for (int k(0); k < d->Nz-(2*d->Ng); k++) {
+            fprintf(f, "%.16f ", d->aux[ID(var, i + d->Ng, j + d->Ng, k + d->Ng)]);
+          }
+          fprintf(f, "\n");
         }
+      }
+    }
+  } else if (d->dims==2){
+    for (int var(0); var < d->Naux; var++) {
+      for (int i(0); i < d->Nx-(2*d->Ng); i++) {
+        for (int j(0); j < d->Ny-(2*d->Ng); j++) {
+          fprintf(f, "%.16f ", d->aux[ID(var, i + d->Ng, j + d->Ng, 0)]);
+          fprintf(f, "\n");
+        }
+      }
+    }
+  } else {
+    for (int var(0); var < d->Naux; var++) {
+      for (int i(0); i < d->Nx-(2*d->Ng); i++) {
+        fprintf(f, "%.16f ", d->aux[ID(var, i + d->Ng, 0, 0)]);
         fprintf(f, "\n");
       }
     }
@@ -144,7 +199,7 @@ void SaveData::saveAux()
 void SaveData::saveDomain()
 {
   FILE * f;
-  char fname[60];
+  char fname[120];
   strcpy(fname, dir);
   strcat(fname, "/Domain/domain");
   strcat(fname, app);
@@ -176,7 +231,7 @@ void SaveData::saveDomain()
 void SaveData::saveConsts()
 {
   FILE * f;
-  char fname[60];
+  char fname[120];
   strcpy(fname, dir);
   strcat(fname, "/Constants/constants");
   strcat(fname, app);
@@ -204,7 +259,7 @@ void SaveData::saveVar(string variable, int num)
   int cpa(0); // cons=1,prims=2,aux=3
   int Nvar(0); // Variable number
   FILE * f;
-  char fname[60];
+  char fname[120];
 
   // Determine which variable the user wants saved
   for (int var(0); var < d->Ncons; var++) {
diff --git a/Project/compare.py b/Project/compare.py
new file mode 100644
index 00000000..99fa52ad
--- /dev/null
+++ b/Project/compare.py
@@ -0,0 +1,34 @@
+TOL=10e-15
+
+time_format_folder="Final"
+vars_folders=["Conserved", "Auxiliary", "Primitive"]
+vars_files=["cons", "aux", "prims"]
+extension=".dat"
+
+for index in range(len(vars_folders)):
+        serial_filename = "/".join(["CPU", "Data", time_format_folder, vars_folders[index], vars_files[index]])
+        parallel_filename = "/".join(["GPU", "Data", time_format_folder, vars_folders[index], vars_files[index]])
+        serial_filename = serial_filename+extension
+        parallel_filename = parallel_filename+extension
+        print("Processing: " + serial_filename + ", " + parallel_filename)
+
+        try:
+                with open(serial_filename, 'r') as serial_dat_file:
+                        with open(parallel_filename, 'r') as parallel_dat_file:
+                                skip_header = 1
+                                line_number = 0
+                                for serial_line, parallel_line in zip(serial_dat_file, parallel_dat_file):
+                                    if skip_header:
+                                        skip_header = 0
+                                        continue
+                                    serial_val = float(serial_line)
+                                    parallel_val = float(parallel_line)
+                                    line_number = line_number + 1
+                                    if (abs(serial_val-parallel_val) > TOL):
+                                        print("\n\n!! Error in {} (val={}, line={}), {}, (val={})\n\n".format(serial_filename, serial_val, line_number, parallel_filename, parallel_val))  
+                                        break
+
+        except IOError:
+                print("Could not read file:", filename)
+
+

From 175cdadb222437943f759dbf32e4c08a7f762815 Mon Sep 17 00:00:00 2001
From: AlexJamesWright <a.j.wright@soton.ac.uk>
Date: Wed, 5 Aug 2020 14:10:28 +0100
Subject: [PATCH 02/56] only for now

---
 Project/CPU/Src/interactivePlotCPU.py | 757 ++++++++++++++++++++++++++
 Tests/GPU/repeat.sh                   |   8 +
 Tests/play.py                         |  24 +
 3 files changed, 789 insertions(+)
 create mode 100644 Project/CPU/Src/interactivePlotCPU.py
 create mode 100755 Tests/GPU/repeat.sh
 create mode 100644 Tests/play.py

diff --git a/Project/CPU/Src/interactivePlotCPU.py b/Project/CPU/Src/interactivePlotCPU.py
new file mode 100644
index 00000000..d8a288ec
--- /dev/null
+++ b/Project/CPU/Src/interactivePlotCPU.py
@@ -0,0 +1,757 @@
+"""
+    Script gathers the state vectors stored in the Data directory and offers
+    functionality to plot various elements.
+"""
+
+
+import numpy as np
+from matplotlib import pyplot as plt
+from scipy.special import erf
+from matplotlib import cm
+import warnings
+from contextlib import suppress
+
+warnings.filterwarnings('ignore', "No labelled objects found. ")
+
+# Change this to the relative path to the data you want to plot
+# File names must start with e.g. `primitive`, anything between this
+# and `.dat` should be stored in appendix
+# By default, this script will gather data for the final condition of the
+# simulation at t=t_end. To gather different data, add arguments to the
+# constructor to include the path to the directory and any appendages.
+FinalDirectory = '../Data/Final/'
+appendix = ''
+
+class InteractivePlot(object):
+
+    def __init__(self, DatDirectory=None, append=None, states=True):
+        if DatDirectory is None:
+            self.DatDir = FinalDirectory
+        else:
+            self.DatDir = DatDirectory
+        if append is None:
+            self.appendix = appendix
+        else:
+            self.appendix = append
+        self.gatherData(states)
+        print("Ready!")
+
+    def gatherData(self, states):
+        """
+        Collects and stores all the data required for plotting the final state of
+        the system.
+        
+        Parameters
+        ----------
+        states : bool
+            Load all of the state arrays. If false, only the constants are
+            loaded to save time for animation.
+
+        Notes
+        -----
+        Stores the following public variables:
+
+            cons : array of float
+                (Ncons, nx, ny, nz) Array containing the conserved vector
+            consLabels : array of string
+                (Ncons,) The labels of the conserved elements
+            prims : array of float
+                (Nprims, nx, ny, nz) Array containing the primitive vector
+            primLabels : array of string
+                (Nprims,) The labels of the primitive elements
+            aux : array of float
+                (Naux, nx, ny, nz) Array containing the auxiliary vector
+            auxLabels : array of string
+                (Naux,) The labels of the auxiliary elements
+            c : dictionary
+                Dictionary containing all constant data saved in simData. Access
+                elements by typing as an argument the constant you want as a string.
+                E.g. to get zmax, enter  -->    c['zmax']
+                All links are the same as the constant name in the SimData class.
+
+        """
+
+        # Dictionary to hold constants
+        self.c = {}
+        c = self.c
+        # Get constants first
+        print("Fetching constants...")
+        with open(self.DatDir + 'Constants/constants' + self.appendix + '.dat', 'r') as f:
+            for i, line in enumerate(f):
+                if not i==0:
+                    line=line.split()
+                    c['nx'] = int(line[0])
+                    c['ny'] = int(line[1])
+                    if c['ny'] == 0:
+                        c['ny'] = 1
+                    c['nz'] = int(line[2])
+                    if c['nz'] == 0:
+                        c['nz'] = 1
+                    c['Nx'] = int(line[3])
+                    c['Ny'] = int(line[4])
+                    c['Nz'] = int(line[5])
+                    c['xmin'] = float(line[6])
+                    c['xmax'] = float(line[7])
+                    c['ymin'] = float(line[8])
+                    c['ymax'] = float(line[9])
+                    c['zmin'] = float(line[10])
+                    c['zmax'] = float(line[11])
+                    c['endTime'] = float(line[12])
+                    c['cfl'] = float(line[13])
+                    c['Ng'] = int(line[14])
+                    c['gamma'] = float(line[15])
+                    c['sigma'] = float(line[16])
+                    c['Ncons'] = int(line[17])
+                    c['Nprims'] = int(line[18])
+                    c['Naux'] = int(line[19])
+                    c['cp'] = float(line[20])
+                    c['dt'] = float(line[21])
+                    c['t'] = float(line[22])
+                    c['dx'] = float(line[23])
+                    c['dy'] = float(line[24])
+                    c['dz'] = float(line[25])
+
+        print("{} conserved vectors".format(c['Ncons']))
+        print("{} primitive vectors".format(c['Nprims']))
+        print("{} auxiliary vectors".format(c['Naux']))
+
+        if states:
+            # Now gather conserved data
+            self.cons = np.zeros([c['Ncons'], c['nx'], c['ny'], c['nz']])
+            print("Fetching conserved variables...")
+            with open(self.DatDir + 'Conserved/cons' + self.appendix + '.dat', 'r') as f:
+                for i, line in enumerate(f):
+                    # Get cons var labels
+                    if i==0:
+                        consLabels = line.split()[2:]
+                        # Get cons var data
+                    else:
+                        temp = line.split()
+                        for k in range(c['nz']):
+                            self.cons[self._getVarFromLine(i, c['nx'], c['ny'])][self._getXIndexFromLine(i, c['nx'], c['ny'])][self._getYIndexFromLine(i, c['nx'], c['ny'])][k] = float(temp[k])
+    
+    
+            # Clean up labels (remove the commas)
+            self.cleanConsLabels = []
+            for i in range(len(consLabels)-1):
+                self.cleanConsLabels.append(consLabels[i][:-1])
+            self.cleanConsLabels.append(consLabels[-1])
+    
+            with suppress(FileNotFoundError):
+                # Now get primitive variables if  and store the data in array...
+                self.prims = np.zeros([c['Nprims'], c['nx'], c['ny'], c['nz']])
+                print("Fetching primitive variables...")
+                with open(self.DatDir + 'Primitive/prims' + self.appendix + '.dat', 'r') as f:
+                    for i, line in enumerate(f):
+                        # Get primitive var labels
+                        if i==0:
+                            primLabels = line.split()[2:]
+                        # Get primitive var data
+                        else:
+                            temp = line.split()
+                            for k in range(c['nz']):
+                                self.prims[self._getVarFromLine(i, c['nx'], c['ny'])][self._getXIndexFromLine(i, c['nx'], c['ny'])][self._getYIndexFromLine(i, c['nx'], c['ny'])][k] = float(temp[k])
+    
+                # Clean up labels (remove the commas)
+                self.cleanPrimLabels = []
+                for i in range(len(primLabels)-1):
+                    self.cleanPrimLabels.append(primLabels[i][:-1])
+                self.cleanPrimLabels.append(primLabels[-1])
+    
+            with suppress(FileNotFoundError):
+                # And finally the aux vars if available
+                self.aux = np.zeros([c['Naux'], c['nx'], c['ny'], c['nz']])
+                print("Fetching auxiliary variables...")
+                with open(self.DatDir + 'Auxiliary/aux' + self.appendix +'.dat', 'r') as f:
+                    for i, line in enumerate(f):
+                        # Get cons var labels
+                        if i==0:
+                            auxLabels = line.split()[2:]
+                        # Get cons var data
+                        else:
+                            temp = line.split()
+                            for k in range(c['nz']):
+                                self.aux[self._getVarFromLine(i, c['nx'], c['ny'])][self._getXIndexFromLine(i, c['nx'], c['ny'])][self._getYIndexFromLine(i, c['nx'], c['ny'])][k] = float(temp[k])
+    
+                # Clean up labels (remove the commas)
+                self.cleanAuxLabels = []
+                for i in range(len(auxLabels)-1):
+                    self.cleanAuxLabels.append(auxLabels[i][:-1])
+                self.cleanAuxLabels.append(auxLabels[-1])
+            
+            with suppress(FileNotFoundError):
+                # Grab domain data
+                self.x = np.zeros(c['nx'])
+                self.y = np.zeros(c['ny'])
+                self.z = np.zeros(c['nz'])
+                coords = [self.x, self.y, self.z]
+                print("Fetching domain coordinates...")
+                with open(self.DatDir + 'Domain/domain' + self.appendix +'.dat', 'r') as f:
+                    for coord, (i, line) in zip(coords, enumerate(f)):
+                        temp = line.split()
+                        print(len(temp))
+                        for k, val in enumerate(temp):
+                            coord[k] = float(val)
+
+
+
+    def _getVarFromLine(self, line, nx, ny):
+        """
+        Given the line number that the iterator is on, and the size of the x-domain,
+        returns the index of the primitive variable this data belongs to.
+
+        Parameters
+        ----------
+            line: int
+                The line number the file pointer is pointing to. We want to know which
+                primitive variable this line's data corresponds to.
+            nx: int
+                The total number (incl ghost cells) of domain cells in the x-direction.
+            ny: int
+                The total number (incl ghost cells) of domain cells in the y-direction.
+
+        Returns
+        -------
+            var:
+                The primitive variable index of this line's data.
+
+        Other
+        -----
+            Function will throw a ValueError if trying to get the primitive index
+            of the first (zero'th) line.
+        """
+        if line == 0:
+            raise ValueError('Line zero does not contain any data')
+        else:
+            return ((line-1)//ny)//nx
+
+
+    def _getXIndexFromLine(self, line, nx, ny):
+        """
+        Given the line number that the iterator is on, and the size of the x-domain,
+        returns the x-index of this line's data.
+
+        Parameters
+        ----------
+            line: int
+                The line number the file pointer is pointing to. We want to know which
+                primitive variable this line's data corresponds to.
+            nx: int
+                The total number (incl ghost cells) of domain cells in the x-direction.
+            ny: int
+                The total number (incl ghost cells) of domain cells in the y-direction.
+
+        Returns
+        -------
+            index:
+                The x-index of the current line's data.
+        """
+        return ((line-1)//ny)%nx
+
+    def _getYIndexFromLine(self, line, nx, ny):
+        """
+        Given the line number that the iterator is on, and the size of the y-domain,
+        returns the y-index of this line's data.
+
+        Parameters
+        ----------
+            line: int
+                The line number the file pointer is pointing to. We want to know which
+                primitive variable this line's data corresponds to.
+            nx: int
+                The total number (incl ghost cells) of domain cells in the x-direction.
+            ny: int
+                The total number (incl ghost cells) of domain cells in the y-direction.
+
+        Returns
+        -------
+            index:
+                The y-index of the current line's data.
+        """
+        return (line-1)%ny
+
+
+
+
+    ###############################################################################
+    #                             Plotting  Functions                             #
+    ###############################################################################
+
+
+
+
+    def plotHeatMaps(self, data='prims', color=None, axis=2):
+        """
+        Plots the 2D heatmap of the given data. The axes to be plotted can be
+        selected via the axis parameter---this corresponds to the axis you want
+        to ignore.
+
+        Parameters
+        ----------
+            data: string
+                Describes which variables the user wants to plot. Choose from
+                'prims', 'cons', 'aux' or 'primitive', 'conserved' and 'auxiliary'
+            color: matplotlib color map
+                The colour theme to be plotting in. This can take string arguments
+                but best to stick to variants of cm.somecolourscheme
+                E.g. cm.magma
+            axis: int
+                The axis the user wants to ignore.
+                (0, 1, 2) = (x, y, z)
+        """
+        if data=='prims' or data=='primitive':
+            data = self.prims
+            dataLabels = self.cleanPrimLabels
+        elif data=='cons' or data=='conserved':
+            data = self.cons
+            dataLabels = self.cleanConsLabels
+        elif data=='aux' or data=='auxiliary':
+            data = self.aux
+            data = self.cleanAuxLabels
+        else:
+            raise ValueError("Variable type not recognised, please try again")
+        c = self.c
+
+        for i in range(data.shape[0]):
+            fig, ax = plt.subplots(1)
+            if (axis == 0):
+                plotVars = data[i, c['Nx']//2, :, :]
+                axisLabel1 = r'$y$'
+                axisLabel2 = r'$z$'
+            if (axis == 1):
+                plotVars = data[i, :, c['Ny']//2, :]
+                axisLabel1 = r'$x$'
+                axisLabel2 = r'$z$'
+            if (axis == 2):
+                plotVars = data[i, :, :, c['Nz']//2]
+                axisLabel1 = r'$x$'
+                axisLabel2 = r'$y$'
+
+            if color==None:
+                color = cm.afmhot
+            surf = ax.imshow(plotVars.T, cmap=color, interpolation='bicubic', aspect='auto')
+            ax.set_title(r'Time Evolution for {}: $t = {}$'.format(dataLabels[i], c['t']))
+            ax.set_xlim([0, self.c['nx']])
+            ax.set_ylim([0, self.c['ny']])
+            ax.set_xlabel(axisLabel1)
+            ax.set_ylabel(axisLabel2)
+            fig.colorbar(surf, shrink=0.5, aspect=5)
+            plt.show()
+        return ax
+
+    def plotSlice(self, data='prims', axis=0):
+        """
+        Plots the variation of data in the `axis` direction.
+
+        Parameters
+        ----------
+            data: string
+                Describes which variables the user wants to plot. Choose from
+                'prims', 'cons', 'aux' or 'primitive', 'conserved' and 'auxiliary'
+            color: matplotlib color map
+                The colour theme to be plotting in. This can take string arguments
+                but best to stick to variants of cm.somecolourscheme
+                E.g. cm.magma
+            axis: int, optional
+                The axis the user wants to plot in.
+                (0, 1, 2) = (x, y, z)
+                Defaults to axis=0, x-direction.
+        """
+        if data=='prims' or data=='primitive':
+            data = self.prims
+            dataLabels = self.cleanPrimLabels
+        elif data=='cons' or data=='conserved':
+            data = self.cons
+            dataLabels = self.cleanConsLabels
+        elif data=='aux' or data=='auxiliary':
+            data = self.aux
+            dataLabels = self.cleanAuxLabels
+        else:
+            raise ValueError("Variable type not recognised, please try again")
+        c = self.c
+
+        Nx, Ny, Nz = c['Nx'], c['Ny'], c['Nz']
+
+        for i in range(len(data)):
+            plt.figure()
+            if (axis == 0):
+                plotVars = data[i, :, Ny//2, Nz//2]
+                axisLabel = r'$x$'
+                step = c['dx']
+                n = c['nx']
+                left, right = c['xmin'], c['xmax']
+            if (axis == 1):
+                plotVars = data[i, Nx//2, :, Nz//2]
+                axisLabel = r'$y$'
+                step = c['dy']
+                n = c['ny']
+                left, right = c['ymin'], c['ymax']
+            if (axis == 2):
+                plotVars = data[i, Nx//2, Ny//2, :]
+                axisLabel = r'$z$'
+                step = c['dz']
+                n = c['nz']
+                left, right = c['zmin'], c['zmax']
+
+            ymin = np.min(plotVars)
+            ymax = np.max(plotVars)
+            rangeY = ymax - ymin
+            ylower = ymin - 0.025 * rangeY
+            yupper = ymax + 0.025 * rangeY
+            xs = np.linspace(left + step/2, right - step/2, n)
+            plt.plot(xs, plotVars, label='{}'.format(dataLabels[i]))
+            plt.title(r'Time Evolution for {}: $t = {}$'.format(dataLabels[i], c['t']))
+            plt.xlabel(axisLabel)
+            plt.ylabel(r'$q_{}(x)$'.format(i+1))
+            plt.xlim([c['xmin'], c['xmax']])
+#            plt.ylim((ylower, yupper))
+            plt.legend(loc='lower center', fontsize=10)
+            plt.show()
+
+
+    def plotTwoFluidSlice(self):
+        """
+        Plots the variation of total data in the x-direction of the two fluids.
+
+        """
+
+        c = self.c
+        Ny, Nz = c['Ny'], c['Nz']
+
+        rho = self.prims[0, :, Ny//2, Nz//2] + self.prims[5, :, Ny//2, Nz//2]
+        p   = self.prims[4, :, Ny//2, Nz//2] + self.prims[9, :, Ny//2, Nz//2]
+        var = [rho, *self.aux[31:34, :, Ny//2, Nz//2], p, *self.prims[10:, :, Ny//2, Nz//2]]
+        varLab = [r'$\rho$', r'$u_x$', r'$u_y$', r'$u_z$', r'$p$', r'$B_x$', r'$B_y$', r'$B_z$', r'$E_x$', r'$E_y$', r'$E_z$']
+
+        xs = np.linspace(c['xmin'] + c['dx']/2, c['xmax'] - c['dx']/2, c['nx'])
+
+        for i, v in enumerate(var):
+            plt.figure()
+            plt.plot(xs, v)
+            plt.title(varLab[i])
+            ymin = np.min(v)
+            ymax = np.max(v)
+            rangeY = ymax - ymin
+            ylower = ymin - 0.025 * rangeY
+            yupper = ymax + 0.025 * rangeY
+            plt.title(r'Time Evolution for {}: $t = {}$'.format(varLab[i], c['t']))
+            plt.xlabel(r'$x$')
+            plt.ylabel(r'$q_{}(x)$'.format(i+1))
+            plt.ylim((ylower, yupper))
+            plt.xlim([c['xmin'], c['xmax']])
+            plt.legend(loc='lower center', fontsize=10)
+            plt.show()
+
+    def plotTwoFluidCurrentSheetAgainstExact(self):
+        """
+        The current sheet has an analytical solution for the y-direction magnetic
+        field. This is plotted against the given B-field.
+        """
+        By = self.cons[11]
+        c = self.c
+        plt.figure()
+        xs = np.linspace(c['xmin'], c['xmax'], c['nx'])
+        exact = np.sign(xs)*erf(0.5 * np.sqrt(c['sigma'] * xs ** 2 / (c['t']+1)))
+        plt.plot(xs, By[:, 0, 0], label='Numerical')
+        plt.plot(xs, exact, label='Exact')
+        plt.xlim([c['xmin'], c['xmax']])
+        plt.ylim([-1.2, 1.2])
+        plt.xlabel(r'$x$')
+        plt.ylabel(r'$B_y$')
+        plt.title(r'Comparison of exact and numerical $B_y$ at $t={:.4f}$'.format(c['t']+1))
+        plt.legend(loc='upper left')
+        plt.show()
+        #return np.linalg.norm(exact - By[:, 0, 0])
+
+
+    def plotSingleFluidCurrentSheetAgainstExact(self, direction=0):
+        """
+        The current sheet has an analytical solution for the y-direction magnetic
+        field. This is plotted against the given B-field.
+        """
+        c = self.c
+        plt.figure()
+        nx = self.c['Nx'] // 2
+        ny = self.c['Ny'] // 2
+        nz = self.c['Nz'] // 2
+
+        if direction == 0:
+            B = self.cons[6, :, ny, nz]
+            x = np.linspace(c['xmin'], c['xmax'], c['nx'])
+        elif direction == 1:
+            B = self.cons[7, nx, :, nz]
+            x = np.linspace(c['ymin'], c['ymax'], c['ny'])
+        else:
+            B = self.cons[5, nx, ny, :]
+            x = np.linspace(c['zmin'], c['zmax'], c['nz'])
+
+        exact = np.sign(x)*erf(0.5 * np.sqrt(c['sigma'] * x ** 2 / (c['t']+1)))
+        initial = np.sign(x)*erf(0.5 * np.sqrt(c['sigma'] * x ** 2 ))
+        plt.plot(x, B, label='Numerical')
+        plt.plot(x, exact, 'k--', label='Exact')
+        plt.plot(x, initial, label='Initial')
+        plt.xlim([c['xmin'], c['xmax']])
+        plt.ylim([-1.2, 1.2])
+        plt.xlabel(r'$x$')
+        plt.ylabel(r'$B_y$')
+        plt.title(r'Comparison of exact and numerical $B_y$ at $t={:.4f}$'.format(c['t']+1))
+        plt.legend(loc='upper left')
+        plt.show()
+
+    def plotTwoFluidCPAlfvenWaveAgainstExact(self):
+        """
+        The cirularly polarized alfven wave has an exact solution, see Amano 2016
+        for details. This method plots all non-trivial prims against their exact
+        values for case 3.
+        """
+
+        rho1, vx1, vy1, vz1, p1, rho2, vx2, vy2, vz2, p2, Bx, By, Bz, Ex, Ey, Ez = self.prims[:]
+        c = self.c
+        xs = np.linspace(c['xmin'], c['xmax'], c['nx'])
+        t = c['t']
+
+        h = 1.04
+        B0 = h
+        omegaBar1 = -np.sqrt(1.04)
+        omegaBar2 = -omegaBar1
+        kx = 1.0/4.0
+
+        omega = 5.63803828148e-1
+        Wp = 5.19940020571e-6 + 1
+        We = 6.68453076522e-5 + 1
+        xsi = 0.01
+
+        U1 = -xsi * omega * omegaBar1 / (kx * (omega + omegaBar1 * We))
+        U2 = -xsi * omega * omegaBar2 / (kx * (omega + omegaBar2 * Wp))
+
+        phi = kx * xs - omega * t
+
+        BySol = xsi * B0 * np.cos(phi)
+        BzSol = -xsi * B0 * np.sin(phi)
+        EySol = -(omega/kx)*xsi*B0*np.sin(phi)
+        EzSol = -(omega/kx)*xsi*B0*np.cos(phi)
+        vy1sol = U1 * np.cos(phi)
+        vz1sol = -U1 * np.sin(phi)
+        vy2sol = U2 * np.cos(phi)
+        vz2sol = -U2 * np.sin(phi)
+
+        # Bx
+        BxSol = np.zeros_like(BySol)
+        BxSol[:] = B0
+        plt.figure()
+        plt.plot(xs, Bx[:, 0, 0], label='Numerical')
+        plt.plot(xs, BxSol, '--', label='Exact')
+        plt.title(r'Exact comparison for $B_x$ at $t={}$'.format(t))
+        plt.xlim([c['xmin'], c['xmax']])
+        plt.legend()
+        # By
+        plt.figure()
+        plt.plot(xs, By[:, 0, 0], label='Numerical')
+        plt.plot(xs, BySol, '--', label='Exact')
+        plt.title(r'Exact comparison for $B_y$ at $t={}$'.format(t))
+        plt.xlim([c['xmin'], c['xmax']])
+        plt.legend()
+        # By
+        plt.figure()
+        plt.plot(xs, Bz[:, 0, 0], label='Numerical')
+        plt.plot(xs, BzSol, '--', label='Exact')
+        plt.title(r'Exact comparison for $B_z$ at $t={}$'.format(t))
+        plt.xlim([c['xmin'], c['xmax']])
+        plt.legend()
+        # Ex
+        plt.figure()
+        plt.plot(xs, Ex[:, 0, 0], label='Numerical')
+        plt.plot(xs, np.zeros_like(xs), '--', label='Exact')
+        plt.title(r'Exact comparison for $E_x$ at $t={}$'.format(t))
+        plt.xlim([c['xmin'], c['xmax']])
+        minn = min(np.min(Ex), 0)
+        maxx = max(np.max(Ex), 0)
+        sep = maxx - minn
+        plt.ylim([minn-0.1*sep, maxx+0.1*sep])
+        plt.legend()
+        # Ey
+        plt.figure()
+        plt.plot(xs, Ey[:, 0, 0], label='Numerical')
+        plt.plot(xs, EySol, '--', label='Exact')
+        plt.title(r'Exact comparison for $E_y$ at $t={}$'.format(t))
+        plt.xlim([c['xmin'], c['xmax']])
+        plt.legend()
+        # Ez
+        plt.figure()
+        plt.plot(xs, Ez[:, 0, 0], label='Numerical')
+        plt.plot(xs, EzSol, '--', label='Exact')
+        plt.title(r'Exact comparison for $E_z$ at $t={}$'.format(t))
+        plt.xlim([c['xmin'], c['xmax']])
+        plt.legend()
+        # vx1
+        plt.figure()
+        plt.plot(xs, vx1[:, 0, 0], label='Numerical')
+        plt.plot(xs, np.zeros_like(xs), '--', label='Exact')
+        plt.title(r'Exact comparison for $v_x1$ at $t={}$'.format(t))
+        plt.xlim([c['xmin'], c['xmax']])
+        minn = min(np.min(vx1), 0)
+        maxx = max(np.max(vx1), 0)
+        sep = maxx - minn
+        plt.ylim([minn-0.1*sep, maxx+0.1*sep])
+        plt.legend()
+        # vy1
+        plt.figure()
+        plt.plot(xs, vy1[:, 0, 0], label='Numerical')
+        plt.plot(xs, vy1sol, '--', label='Exact')
+        plt.title(r'Exact comparison for $v_y1$ at $t={}$'.format(t))
+        plt.xlim([c['xmin'], c['xmax']])
+        plt.legend()
+        # vz1
+        plt.figure()
+        plt.plot(xs, vz1[:, 0, 0], label='Numerical')
+        plt.plot(xs, vz1sol, '--', label='Exact')
+        plt.title(r'Exact comparison for $v_z1$ at $t={}$'.format(t))
+        plt.xlim([c['xmin'], c['xmax']])
+        plt.legend()
+        # vx2
+        plt.figure()
+        plt.plot(xs, vx2[:, 0, 0], label='Numerical')
+        plt.plot(xs, np.zeros_like(xs), '--', label='Exact')
+        plt.title(r'Exact comparison for $v_x2$ at $t={}$'.format(t))
+        plt.xlim([c['xmin'], c['xmax']])
+        minn = min(np.min(vx2), 0)
+        maxx = max(np.max(vx2), 0)
+        sep = maxx - minn
+        plt.ylim([minn-0.1*sep, maxx+0.1*sep])
+        plt.legend()
+        # vy2
+        plt.figure()
+        plt.plot(xs, vy2[:, 0, 0], label='Numerical')
+        plt.plot(xs, vy2sol, '--', label='Exact')
+        plt.title(r'Exact comparison for $v_y2$ at $t={}$'.format(t))
+        plt.xlim([c['xmin'], c['xmax']])
+        plt.legend()
+        # vz2
+        plt.figure()
+        plt.plot(xs, vz2[:, 0, 0], label='Numerical')
+        plt.plot(xs, vz2sol, '--', label='Exact')
+        plt.title(r'Exact comparison for $v_z2$ at $t={}$'.format(t))
+        plt.xlim([c['xmin'], c['xmax']])
+        plt.legend()
+
+
+
+    def plot2DBrioWu(self, diag=0):
+        """
+        Plots the main diagonal of the 2D Brio-Wu problem
+
+        Parameters
+        ----------
+        diag : int
+            The diagonal to plot the slice
+        """
+
+        nx = self.c['nx']
+#        Ny = self.c['Ny']
+        midZ = self.c['Nz'] // 2
+        Ng = self.c['Ng']
+
+        if diag == 0:
+            LB = -Ng
+            RB = Ng
+            step = -1
+        else:
+            LB = Ng
+            RB = -Ng
+            step = 1
+
+
+        dens = self.prims[0, :, LB:RB:step, midZ].diagonal()
+        vx = self.prims[1, :, LB:RB:step, midZ].diagonal()
+        vy = self.prims[2, :, LB:RB:step, midZ].diagonal()
+
+
+        p = self.prims[4, :, LB:RB:step, midZ].diagonal()
+        B = self.prims[5, :, LB:RB:step, midZ].diagonal() / np.sqrt(2) + \
+            self.prims[6, :, LB:RB:step, midZ].diagonal() / np.sqrt(2)
+
+        # rho
+        plt.figure()
+        plt.plot(np.linspace(0, 1, nx), dens)
+        plt.ylabel(r'$\rho$')
+        plt.xlim([0, 1])
+        plt.show()
+        # vx
+        plt.figure()
+        plt.plot(np.linspace(0, 1, nx), vx)
+        plt.ylabel(r'$vx$')
+        plt.xlim([0, 1])
+        plt.show()
+        # vy
+        plt.figure()
+        plt.plot(np.linspace(0, 1, nx), vy)
+        plt.ylabel(r'$vy$')
+        plt.xlim([0, 1])
+        plt.show()
+        # v rel
+        plt.figure()
+        plt.plot(np.linspace(0, 1, nx),(vx-vy)/(1-vx*vy))
+        plt.ylabel(r'$v (rel)$')
+        plt.xlim([0, 1])
+        plt.show()
+        # v non-rel
+        plt.figure()
+        plt.plot(np.linspace(0, 1, nx), vx/np.sqrt(2) - vy/np.sqrt(2))
+        plt.ylabel(r'$v (non-rel)$')
+        plt.xlim([0, 1])
+        plt.show()
+        # p
+        plt.figure()
+        plt.plot(np.linspace(0, 1, nx), p)
+        plt.ylabel(r'$p$')
+        plt.xlim([0, 1])
+        plt.show()
+        # B
+        plt.figure()
+        plt.plot(np.linspace(0, 1, nx), B)
+        plt.ylabel(r'$B$')
+        plt.xlim([0, 1])
+        plt.show()
+
+        return B
+    
+    def plotAdvectionAgainstInitial(self):
+        xs = np.linspace(Plot.c['dx']/2, 1-Plot.c['dx']/2, Plot.c['nx'])
+        initialRho = np.ones_like(xs)*0.1
+        initialRho += 0.4*np.exp(-(10 * (xs - 0.5))**2)
+        
+        fig, axs = plt.subplots(2)
+        fig.set_size_inches(8, 6)
+        axs[0].plot(xs, initialRho, 'k-', linewidth=5, alpha=0.3, label='initial')
+        axs[0].plot(xs, Plot.prims[0, :, 0, 0], 'b:', label='rho')
+        axs[0].set_xlim(xs[0], xs[-1])
+        axs[0].set_xlabel(r'$x$')
+        axs[0].set_ylabel(r'$\rho$')
+        axs[0].legend()
+        
+        error = np.abs(initialRho-Plot.prims[0, :, 0, 0])
+        errorNorm = np.sum(error)/len(error)
+        axs[1].semilogy(xs, error, label=rf'Mean = ${errorNorm:.1e}$')
+        axs[1].set_xlabel(r"$x$")
+        axs[1].set_ylabel('Error')
+        axs[1].set_xlim(xs[0], xs[-1])
+        axs[1].legend()
+        plt.show()
+        
+        
+# Function declarations over, access data and plot!
+
+
+if __name__ == '__main__':
+
+    Plot = InteractivePlot()
+
+#    Plot.plotSlice()
+#    Plot.plotSingleFluidCurrentSheetAgainstExact()
+#    Plot.plotAdvectionAgainstInitial()
+#    Plot.plotHeatMaps()
+    
+    plt.figure()
+    plt.imshow(np.log(Plot.prims[4, :, :, 0].T), extent=[0, 8, 0, 4], origin='lower')
+    plt.show()
+    
diff --git a/Tests/GPU/repeat.sh b/Tests/GPU/repeat.sh
new file mode 100755
index 00000000..2716a559
--- /dev/null
+++ b/Tests/GPU/repeat.sh
@@ -0,0 +1,8 @@
+#!/bin/sh
+
+cd ../CPU
+make test_rk2
+./test_rk2
+cd ../GPU
+make test_rk2
+./test_rk2
diff --git a/Tests/play.py b/Tests/play.py
new file mode 100644
index 00000000..8f110abd
--- /dev/null
+++ b/Tests/play.py
@@ -0,0 +1,24 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Wed Aug  5 10:56:02 2020
+
+@author: alex
+"""
+
+import sys
+sys.path.append('../Project/GPU/Src')
+sys.path.append('../Project/CPU/Src')
+from interactivePlotGPU import InteractivePlot as PlotGPU
+from interactivePlotCPU import InteractivePlot as PlotCPU
+
+
+parallel = PlotGPU("TestData/GPU/", "RK2")
+#serial   = PlotCPU("TestData/Serial/", "RK2")
+
+pp = parallel.prims
+sp = serial.prims
+
+
+#for sv,  pv in zip(serial.prims, parallel.prims):
+#    print(f"{np.sum(np.abs(sv-pv) > 1e-15)}/{30**3} failures")
\ No newline at end of file

From eb8bb9e03cd69b560e486bcbb404ca54849a05f3 Mon Sep 17 00:00:00 2001
From: AlexJamesWright <a.j.wright@soton.ac.uk>
Date: Wed, 5 Aug 2020 15:13:05 +0100
Subject: [PATCH 03/56] GPU should now be match CPU

---
 Project/CPU/Src/interactivePlotCPU.py     | 757 ----------------------
 Project/GPU/Src/interactivePlot.py        |   7 +-
 Tests/CPU/Src/test_fvs.cc                 |   8 +-
 Tests/CPU/Src/test_rk2.cc                 |  14 +-
 Tests/GPU/Src/compareParallelAndSerial.py |  25 +-
 Tests/GPU/repeat.sh                       |   8 -
 Tests/play.py                             |  24 -
 7 files changed, 25 insertions(+), 818 deletions(-)
 delete mode 100644 Project/CPU/Src/interactivePlotCPU.py
 delete mode 100755 Tests/GPU/repeat.sh
 delete mode 100644 Tests/play.py

diff --git a/Project/CPU/Src/interactivePlotCPU.py b/Project/CPU/Src/interactivePlotCPU.py
deleted file mode 100644
index d8a288ec..00000000
--- a/Project/CPU/Src/interactivePlotCPU.py
+++ /dev/null
@@ -1,757 +0,0 @@
-"""
-    Script gathers the state vectors stored in the Data directory and offers
-    functionality to plot various elements.
-"""
-
-
-import numpy as np
-from matplotlib import pyplot as plt
-from scipy.special import erf
-from matplotlib import cm
-import warnings
-from contextlib import suppress
-
-warnings.filterwarnings('ignore', "No labelled objects found. ")
-
-# Change this to the relative path to the data you want to plot
-# File names must start with e.g. `primitive`, anything between this
-# and `.dat` should be stored in appendix
-# By default, this script will gather data for the final condition of the
-# simulation at t=t_end. To gather different data, add arguments to the
-# constructor to include the path to the directory and any appendages.
-FinalDirectory = '../Data/Final/'
-appendix = ''
-
-class InteractivePlot(object):
-
-    def __init__(self, DatDirectory=None, append=None, states=True):
-        if DatDirectory is None:
-            self.DatDir = FinalDirectory
-        else:
-            self.DatDir = DatDirectory
-        if append is None:
-            self.appendix = appendix
-        else:
-            self.appendix = append
-        self.gatherData(states)
-        print("Ready!")
-
-    def gatherData(self, states):
-        """
-        Collects and stores all the data required for plotting the final state of
-        the system.
-        
-        Parameters
-        ----------
-        states : bool
-            Load all of the state arrays. If false, only the constants are
-            loaded to save time for animation.
-
-        Notes
-        -----
-        Stores the following public variables:
-
-            cons : array of float
-                (Ncons, nx, ny, nz) Array containing the conserved vector
-            consLabels : array of string
-                (Ncons,) The labels of the conserved elements
-            prims : array of float
-                (Nprims, nx, ny, nz) Array containing the primitive vector
-            primLabels : array of string
-                (Nprims,) The labels of the primitive elements
-            aux : array of float
-                (Naux, nx, ny, nz) Array containing the auxiliary vector
-            auxLabels : array of string
-                (Naux,) The labels of the auxiliary elements
-            c : dictionary
-                Dictionary containing all constant data saved in simData. Access
-                elements by typing as an argument the constant you want as a string.
-                E.g. to get zmax, enter  -->    c['zmax']
-                All links are the same as the constant name in the SimData class.
-
-        """
-
-        # Dictionary to hold constants
-        self.c = {}
-        c = self.c
-        # Get constants first
-        print("Fetching constants...")
-        with open(self.DatDir + 'Constants/constants' + self.appendix + '.dat', 'r') as f:
-            for i, line in enumerate(f):
-                if not i==0:
-                    line=line.split()
-                    c['nx'] = int(line[0])
-                    c['ny'] = int(line[1])
-                    if c['ny'] == 0:
-                        c['ny'] = 1
-                    c['nz'] = int(line[2])
-                    if c['nz'] == 0:
-                        c['nz'] = 1
-                    c['Nx'] = int(line[3])
-                    c['Ny'] = int(line[4])
-                    c['Nz'] = int(line[5])
-                    c['xmin'] = float(line[6])
-                    c['xmax'] = float(line[7])
-                    c['ymin'] = float(line[8])
-                    c['ymax'] = float(line[9])
-                    c['zmin'] = float(line[10])
-                    c['zmax'] = float(line[11])
-                    c['endTime'] = float(line[12])
-                    c['cfl'] = float(line[13])
-                    c['Ng'] = int(line[14])
-                    c['gamma'] = float(line[15])
-                    c['sigma'] = float(line[16])
-                    c['Ncons'] = int(line[17])
-                    c['Nprims'] = int(line[18])
-                    c['Naux'] = int(line[19])
-                    c['cp'] = float(line[20])
-                    c['dt'] = float(line[21])
-                    c['t'] = float(line[22])
-                    c['dx'] = float(line[23])
-                    c['dy'] = float(line[24])
-                    c['dz'] = float(line[25])
-
-        print("{} conserved vectors".format(c['Ncons']))
-        print("{} primitive vectors".format(c['Nprims']))
-        print("{} auxiliary vectors".format(c['Naux']))
-
-        if states:
-            # Now gather conserved data
-            self.cons = np.zeros([c['Ncons'], c['nx'], c['ny'], c['nz']])
-            print("Fetching conserved variables...")
-            with open(self.DatDir + 'Conserved/cons' + self.appendix + '.dat', 'r') as f:
-                for i, line in enumerate(f):
-                    # Get cons var labels
-                    if i==0:
-                        consLabels = line.split()[2:]
-                        # Get cons var data
-                    else:
-                        temp = line.split()
-                        for k in range(c['nz']):
-                            self.cons[self._getVarFromLine(i, c['nx'], c['ny'])][self._getXIndexFromLine(i, c['nx'], c['ny'])][self._getYIndexFromLine(i, c['nx'], c['ny'])][k] = float(temp[k])
-    
-    
-            # Clean up labels (remove the commas)
-            self.cleanConsLabels = []
-            for i in range(len(consLabels)-1):
-                self.cleanConsLabels.append(consLabels[i][:-1])
-            self.cleanConsLabels.append(consLabels[-1])
-    
-            with suppress(FileNotFoundError):
-                # Now get primitive variables if  and store the data in array...
-                self.prims = np.zeros([c['Nprims'], c['nx'], c['ny'], c['nz']])
-                print("Fetching primitive variables...")
-                with open(self.DatDir + 'Primitive/prims' + self.appendix + '.dat', 'r') as f:
-                    for i, line in enumerate(f):
-                        # Get primitive var labels
-                        if i==0:
-                            primLabels = line.split()[2:]
-                        # Get primitive var data
-                        else:
-                            temp = line.split()
-                            for k in range(c['nz']):
-                                self.prims[self._getVarFromLine(i, c['nx'], c['ny'])][self._getXIndexFromLine(i, c['nx'], c['ny'])][self._getYIndexFromLine(i, c['nx'], c['ny'])][k] = float(temp[k])
-    
-                # Clean up labels (remove the commas)
-                self.cleanPrimLabels = []
-                for i in range(len(primLabels)-1):
-                    self.cleanPrimLabels.append(primLabels[i][:-1])
-                self.cleanPrimLabels.append(primLabels[-1])
-    
-            with suppress(FileNotFoundError):
-                # And finally the aux vars if available
-                self.aux = np.zeros([c['Naux'], c['nx'], c['ny'], c['nz']])
-                print("Fetching auxiliary variables...")
-                with open(self.DatDir + 'Auxiliary/aux' + self.appendix +'.dat', 'r') as f:
-                    for i, line in enumerate(f):
-                        # Get cons var labels
-                        if i==0:
-                            auxLabels = line.split()[2:]
-                        # Get cons var data
-                        else:
-                            temp = line.split()
-                            for k in range(c['nz']):
-                                self.aux[self._getVarFromLine(i, c['nx'], c['ny'])][self._getXIndexFromLine(i, c['nx'], c['ny'])][self._getYIndexFromLine(i, c['nx'], c['ny'])][k] = float(temp[k])
-    
-                # Clean up labels (remove the commas)
-                self.cleanAuxLabels = []
-                for i in range(len(auxLabels)-1):
-                    self.cleanAuxLabels.append(auxLabels[i][:-1])
-                self.cleanAuxLabels.append(auxLabels[-1])
-            
-            with suppress(FileNotFoundError):
-                # Grab domain data
-                self.x = np.zeros(c['nx'])
-                self.y = np.zeros(c['ny'])
-                self.z = np.zeros(c['nz'])
-                coords = [self.x, self.y, self.z]
-                print("Fetching domain coordinates...")
-                with open(self.DatDir + 'Domain/domain' + self.appendix +'.dat', 'r') as f:
-                    for coord, (i, line) in zip(coords, enumerate(f)):
-                        temp = line.split()
-                        print(len(temp))
-                        for k, val in enumerate(temp):
-                            coord[k] = float(val)
-
-
-
-    def _getVarFromLine(self, line, nx, ny):
-        """
-        Given the line number that the iterator is on, and the size of the x-domain,
-        returns the index of the primitive variable this data belongs to.
-
-        Parameters
-        ----------
-            line: int
-                The line number the file pointer is pointing to. We want to know which
-                primitive variable this line's data corresponds to.
-            nx: int
-                The total number (incl ghost cells) of domain cells in the x-direction.
-            ny: int
-                The total number (incl ghost cells) of domain cells in the y-direction.
-
-        Returns
-        -------
-            var:
-                The primitive variable index of this line's data.
-
-        Other
-        -----
-            Function will throw a ValueError if trying to get the primitive index
-            of the first (zero'th) line.
-        """
-        if line == 0:
-            raise ValueError('Line zero does not contain any data')
-        else:
-            return ((line-1)//ny)//nx
-
-
-    def _getXIndexFromLine(self, line, nx, ny):
-        """
-        Given the line number that the iterator is on, and the size of the x-domain,
-        returns the x-index of this line's data.
-
-        Parameters
-        ----------
-            line: int
-                The line number the file pointer is pointing to. We want to know which
-                primitive variable this line's data corresponds to.
-            nx: int
-                The total number (incl ghost cells) of domain cells in the x-direction.
-            ny: int
-                The total number (incl ghost cells) of domain cells in the y-direction.
-
-        Returns
-        -------
-            index:
-                The x-index of the current line's data.
-        """
-        return ((line-1)//ny)%nx
-
-    def _getYIndexFromLine(self, line, nx, ny):
-        """
-        Given the line number that the iterator is on, and the size of the y-domain,
-        returns the y-index of this line's data.
-
-        Parameters
-        ----------
-            line: int
-                The line number the file pointer is pointing to. We want to know which
-                primitive variable this line's data corresponds to.
-            nx: int
-                The total number (incl ghost cells) of domain cells in the x-direction.
-            ny: int
-                The total number (incl ghost cells) of domain cells in the y-direction.
-
-        Returns
-        -------
-            index:
-                The y-index of the current line's data.
-        """
-        return (line-1)%ny
-
-
-
-
-    ###############################################################################
-    #                             Plotting  Functions                             #
-    ###############################################################################
-
-
-
-
-    def plotHeatMaps(self, data='prims', color=None, axis=2):
-        """
-        Plots the 2D heatmap of the given data. The axes to be plotted can be
-        selected via the axis parameter---this corresponds to the axis you want
-        to ignore.
-
-        Parameters
-        ----------
-            data: string
-                Describes which variables the user wants to plot. Choose from
-                'prims', 'cons', 'aux' or 'primitive', 'conserved' and 'auxiliary'
-            color: matplotlib color map
-                The colour theme to be plotting in. This can take string arguments
-                but best to stick to variants of cm.somecolourscheme
-                E.g. cm.magma
-            axis: int
-                The axis the user wants to ignore.
-                (0, 1, 2) = (x, y, z)
-        """
-        if data=='prims' or data=='primitive':
-            data = self.prims
-            dataLabels = self.cleanPrimLabels
-        elif data=='cons' or data=='conserved':
-            data = self.cons
-            dataLabels = self.cleanConsLabels
-        elif data=='aux' or data=='auxiliary':
-            data = self.aux
-            data = self.cleanAuxLabels
-        else:
-            raise ValueError("Variable type not recognised, please try again")
-        c = self.c
-
-        for i in range(data.shape[0]):
-            fig, ax = plt.subplots(1)
-            if (axis == 0):
-                plotVars = data[i, c['Nx']//2, :, :]
-                axisLabel1 = r'$y$'
-                axisLabel2 = r'$z$'
-            if (axis == 1):
-                plotVars = data[i, :, c['Ny']//2, :]
-                axisLabel1 = r'$x$'
-                axisLabel2 = r'$z$'
-            if (axis == 2):
-                plotVars = data[i, :, :, c['Nz']//2]
-                axisLabel1 = r'$x$'
-                axisLabel2 = r'$y$'
-
-            if color==None:
-                color = cm.afmhot
-            surf = ax.imshow(plotVars.T, cmap=color, interpolation='bicubic', aspect='auto')
-            ax.set_title(r'Time Evolution for {}: $t = {}$'.format(dataLabels[i], c['t']))
-            ax.set_xlim([0, self.c['nx']])
-            ax.set_ylim([0, self.c['ny']])
-            ax.set_xlabel(axisLabel1)
-            ax.set_ylabel(axisLabel2)
-            fig.colorbar(surf, shrink=0.5, aspect=5)
-            plt.show()
-        return ax
-
-    def plotSlice(self, data='prims', axis=0):
-        """
-        Plots the variation of data in the `axis` direction.
-
-        Parameters
-        ----------
-            data: string
-                Describes which variables the user wants to plot. Choose from
-                'prims', 'cons', 'aux' or 'primitive', 'conserved' and 'auxiliary'
-            color: matplotlib color map
-                The colour theme to be plotting in. This can take string arguments
-                but best to stick to variants of cm.somecolourscheme
-                E.g. cm.magma
-            axis: int, optional
-                The axis the user wants to plot in.
-                (0, 1, 2) = (x, y, z)
-                Defaults to axis=0, x-direction.
-        """
-        if data=='prims' or data=='primitive':
-            data = self.prims
-            dataLabels = self.cleanPrimLabels
-        elif data=='cons' or data=='conserved':
-            data = self.cons
-            dataLabels = self.cleanConsLabels
-        elif data=='aux' or data=='auxiliary':
-            data = self.aux
-            dataLabels = self.cleanAuxLabels
-        else:
-            raise ValueError("Variable type not recognised, please try again")
-        c = self.c
-
-        Nx, Ny, Nz = c['Nx'], c['Ny'], c['Nz']
-
-        for i in range(len(data)):
-            plt.figure()
-            if (axis == 0):
-                plotVars = data[i, :, Ny//2, Nz//2]
-                axisLabel = r'$x$'
-                step = c['dx']
-                n = c['nx']
-                left, right = c['xmin'], c['xmax']
-            if (axis == 1):
-                plotVars = data[i, Nx//2, :, Nz//2]
-                axisLabel = r'$y$'
-                step = c['dy']
-                n = c['ny']
-                left, right = c['ymin'], c['ymax']
-            if (axis == 2):
-                plotVars = data[i, Nx//2, Ny//2, :]
-                axisLabel = r'$z$'
-                step = c['dz']
-                n = c['nz']
-                left, right = c['zmin'], c['zmax']
-
-            ymin = np.min(plotVars)
-            ymax = np.max(plotVars)
-            rangeY = ymax - ymin
-            ylower = ymin - 0.025 * rangeY
-            yupper = ymax + 0.025 * rangeY
-            xs = np.linspace(left + step/2, right - step/2, n)
-            plt.plot(xs, plotVars, label='{}'.format(dataLabels[i]))
-            plt.title(r'Time Evolution for {}: $t = {}$'.format(dataLabels[i], c['t']))
-            plt.xlabel(axisLabel)
-            plt.ylabel(r'$q_{}(x)$'.format(i+1))
-            plt.xlim([c['xmin'], c['xmax']])
-#            plt.ylim((ylower, yupper))
-            plt.legend(loc='lower center', fontsize=10)
-            plt.show()
-
-
-    def plotTwoFluidSlice(self):
-        """
-        Plots the variation of total data in the x-direction of the two fluids.
-
-        """
-
-        c = self.c
-        Ny, Nz = c['Ny'], c['Nz']
-
-        rho = self.prims[0, :, Ny//2, Nz//2] + self.prims[5, :, Ny//2, Nz//2]
-        p   = self.prims[4, :, Ny//2, Nz//2] + self.prims[9, :, Ny//2, Nz//2]
-        var = [rho, *self.aux[31:34, :, Ny//2, Nz//2], p, *self.prims[10:, :, Ny//2, Nz//2]]
-        varLab = [r'$\rho$', r'$u_x$', r'$u_y$', r'$u_z$', r'$p$', r'$B_x$', r'$B_y$', r'$B_z$', r'$E_x$', r'$E_y$', r'$E_z$']
-
-        xs = np.linspace(c['xmin'] + c['dx']/2, c['xmax'] - c['dx']/2, c['nx'])
-
-        for i, v in enumerate(var):
-            plt.figure()
-            plt.plot(xs, v)
-            plt.title(varLab[i])
-            ymin = np.min(v)
-            ymax = np.max(v)
-            rangeY = ymax - ymin
-            ylower = ymin - 0.025 * rangeY
-            yupper = ymax + 0.025 * rangeY
-            plt.title(r'Time Evolution for {}: $t = {}$'.format(varLab[i], c['t']))
-            plt.xlabel(r'$x$')
-            plt.ylabel(r'$q_{}(x)$'.format(i+1))
-            plt.ylim((ylower, yupper))
-            plt.xlim([c['xmin'], c['xmax']])
-            plt.legend(loc='lower center', fontsize=10)
-            plt.show()
-
-    def plotTwoFluidCurrentSheetAgainstExact(self):
-        """
-        The current sheet has an analytical solution for the y-direction magnetic
-        field. This is plotted against the given B-field.
-        """
-        By = self.cons[11]
-        c = self.c
-        plt.figure()
-        xs = np.linspace(c['xmin'], c['xmax'], c['nx'])
-        exact = np.sign(xs)*erf(0.5 * np.sqrt(c['sigma'] * xs ** 2 / (c['t']+1)))
-        plt.plot(xs, By[:, 0, 0], label='Numerical')
-        plt.plot(xs, exact, label='Exact')
-        plt.xlim([c['xmin'], c['xmax']])
-        plt.ylim([-1.2, 1.2])
-        plt.xlabel(r'$x$')
-        plt.ylabel(r'$B_y$')
-        plt.title(r'Comparison of exact and numerical $B_y$ at $t={:.4f}$'.format(c['t']+1))
-        plt.legend(loc='upper left')
-        plt.show()
-        #return np.linalg.norm(exact - By[:, 0, 0])
-
-
-    def plotSingleFluidCurrentSheetAgainstExact(self, direction=0):
-        """
-        The current sheet has an analytical solution for the y-direction magnetic
-        field. This is plotted against the given B-field.
-        """
-        c = self.c
-        plt.figure()
-        nx = self.c['Nx'] // 2
-        ny = self.c['Ny'] // 2
-        nz = self.c['Nz'] // 2
-
-        if direction == 0:
-            B = self.cons[6, :, ny, nz]
-            x = np.linspace(c['xmin'], c['xmax'], c['nx'])
-        elif direction == 1:
-            B = self.cons[7, nx, :, nz]
-            x = np.linspace(c['ymin'], c['ymax'], c['ny'])
-        else:
-            B = self.cons[5, nx, ny, :]
-            x = np.linspace(c['zmin'], c['zmax'], c['nz'])
-
-        exact = np.sign(x)*erf(0.5 * np.sqrt(c['sigma'] * x ** 2 / (c['t']+1)))
-        initial = np.sign(x)*erf(0.5 * np.sqrt(c['sigma'] * x ** 2 ))
-        plt.plot(x, B, label='Numerical')
-        plt.plot(x, exact, 'k--', label='Exact')
-        plt.plot(x, initial, label='Initial')
-        plt.xlim([c['xmin'], c['xmax']])
-        plt.ylim([-1.2, 1.2])
-        plt.xlabel(r'$x$')
-        plt.ylabel(r'$B_y$')
-        plt.title(r'Comparison of exact and numerical $B_y$ at $t={:.4f}$'.format(c['t']+1))
-        plt.legend(loc='upper left')
-        plt.show()
-
-    def plotTwoFluidCPAlfvenWaveAgainstExact(self):
-        """
-        The cirularly polarized alfven wave has an exact solution, see Amano 2016
-        for details. This method plots all non-trivial prims against their exact
-        values for case 3.
-        """
-
-        rho1, vx1, vy1, vz1, p1, rho2, vx2, vy2, vz2, p2, Bx, By, Bz, Ex, Ey, Ez = self.prims[:]
-        c = self.c
-        xs = np.linspace(c['xmin'], c['xmax'], c['nx'])
-        t = c['t']
-
-        h = 1.04
-        B0 = h
-        omegaBar1 = -np.sqrt(1.04)
-        omegaBar2 = -omegaBar1
-        kx = 1.0/4.0
-
-        omega = 5.63803828148e-1
-        Wp = 5.19940020571e-6 + 1
-        We = 6.68453076522e-5 + 1
-        xsi = 0.01
-
-        U1 = -xsi * omega * omegaBar1 / (kx * (omega + omegaBar1 * We))
-        U2 = -xsi * omega * omegaBar2 / (kx * (omega + omegaBar2 * Wp))
-
-        phi = kx * xs - omega * t
-
-        BySol = xsi * B0 * np.cos(phi)
-        BzSol = -xsi * B0 * np.sin(phi)
-        EySol = -(omega/kx)*xsi*B0*np.sin(phi)
-        EzSol = -(omega/kx)*xsi*B0*np.cos(phi)
-        vy1sol = U1 * np.cos(phi)
-        vz1sol = -U1 * np.sin(phi)
-        vy2sol = U2 * np.cos(phi)
-        vz2sol = -U2 * np.sin(phi)
-
-        # Bx
-        BxSol = np.zeros_like(BySol)
-        BxSol[:] = B0
-        plt.figure()
-        plt.plot(xs, Bx[:, 0, 0], label='Numerical')
-        plt.plot(xs, BxSol, '--', label='Exact')
-        plt.title(r'Exact comparison for $B_x$ at $t={}$'.format(t))
-        plt.xlim([c['xmin'], c['xmax']])
-        plt.legend()
-        # By
-        plt.figure()
-        plt.plot(xs, By[:, 0, 0], label='Numerical')
-        plt.plot(xs, BySol, '--', label='Exact')
-        plt.title(r'Exact comparison for $B_y$ at $t={}$'.format(t))
-        plt.xlim([c['xmin'], c['xmax']])
-        plt.legend()
-        # By
-        plt.figure()
-        plt.plot(xs, Bz[:, 0, 0], label='Numerical')
-        plt.plot(xs, BzSol, '--', label='Exact')
-        plt.title(r'Exact comparison for $B_z$ at $t={}$'.format(t))
-        plt.xlim([c['xmin'], c['xmax']])
-        plt.legend()
-        # Ex
-        plt.figure()
-        plt.plot(xs, Ex[:, 0, 0], label='Numerical')
-        plt.plot(xs, np.zeros_like(xs), '--', label='Exact')
-        plt.title(r'Exact comparison for $E_x$ at $t={}$'.format(t))
-        plt.xlim([c['xmin'], c['xmax']])
-        minn = min(np.min(Ex), 0)
-        maxx = max(np.max(Ex), 0)
-        sep = maxx - minn
-        plt.ylim([minn-0.1*sep, maxx+0.1*sep])
-        plt.legend()
-        # Ey
-        plt.figure()
-        plt.plot(xs, Ey[:, 0, 0], label='Numerical')
-        plt.plot(xs, EySol, '--', label='Exact')
-        plt.title(r'Exact comparison for $E_y$ at $t={}$'.format(t))
-        plt.xlim([c['xmin'], c['xmax']])
-        plt.legend()
-        # Ez
-        plt.figure()
-        plt.plot(xs, Ez[:, 0, 0], label='Numerical')
-        plt.plot(xs, EzSol, '--', label='Exact')
-        plt.title(r'Exact comparison for $E_z$ at $t={}$'.format(t))
-        plt.xlim([c['xmin'], c['xmax']])
-        plt.legend()
-        # vx1
-        plt.figure()
-        plt.plot(xs, vx1[:, 0, 0], label='Numerical')
-        plt.plot(xs, np.zeros_like(xs), '--', label='Exact')
-        plt.title(r'Exact comparison for $v_x1$ at $t={}$'.format(t))
-        plt.xlim([c['xmin'], c['xmax']])
-        minn = min(np.min(vx1), 0)
-        maxx = max(np.max(vx1), 0)
-        sep = maxx - minn
-        plt.ylim([minn-0.1*sep, maxx+0.1*sep])
-        plt.legend()
-        # vy1
-        plt.figure()
-        plt.plot(xs, vy1[:, 0, 0], label='Numerical')
-        plt.plot(xs, vy1sol, '--', label='Exact')
-        plt.title(r'Exact comparison for $v_y1$ at $t={}$'.format(t))
-        plt.xlim([c['xmin'], c['xmax']])
-        plt.legend()
-        # vz1
-        plt.figure()
-        plt.plot(xs, vz1[:, 0, 0], label='Numerical')
-        plt.plot(xs, vz1sol, '--', label='Exact')
-        plt.title(r'Exact comparison for $v_z1$ at $t={}$'.format(t))
-        plt.xlim([c['xmin'], c['xmax']])
-        plt.legend()
-        # vx2
-        plt.figure()
-        plt.plot(xs, vx2[:, 0, 0], label='Numerical')
-        plt.plot(xs, np.zeros_like(xs), '--', label='Exact')
-        plt.title(r'Exact comparison for $v_x2$ at $t={}$'.format(t))
-        plt.xlim([c['xmin'], c['xmax']])
-        minn = min(np.min(vx2), 0)
-        maxx = max(np.max(vx2), 0)
-        sep = maxx - minn
-        plt.ylim([minn-0.1*sep, maxx+0.1*sep])
-        plt.legend()
-        # vy2
-        plt.figure()
-        plt.plot(xs, vy2[:, 0, 0], label='Numerical')
-        plt.plot(xs, vy2sol, '--', label='Exact')
-        plt.title(r'Exact comparison for $v_y2$ at $t={}$'.format(t))
-        plt.xlim([c['xmin'], c['xmax']])
-        plt.legend()
-        # vz2
-        plt.figure()
-        plt.plot(xs, vz2[:, 0, 0], label='Numerical')
-        plt.plot(xs, vz2sol, '--', label='Exact')
-        plt.title(r'Exact comparison for $v_z2$ at $t={}$'.format(t))
-        plt.xlim([c['xmin'], c['xmax']])
-        plt.legend()
-
-
-
-    def plot2DBrioWu(self, diag=0):
-        """
-        Plots the main diagonal of the 2D Brio-Wu problem
-
-        Parameters
-        ----------
-        diag : int
-            The diagonal to plot the slice
-        """
-
-        nx = self.c['nx']
-#        Ny = self.c['Ny']
-        midZ = self.c['Nz'] // 2
-        Ng = self.c['Ng']
-
-        if diag == 0:
-            LB = -Ng
-            RB = Ng
-            step = -1
-        else:
-            LB = Ng
-            RB = -Ng
-            step = 1
-
-
-        dens = self.prims[0, :, LB:RB:step, midZ].diagonal()
-        vx = self.prims[1, :, LB:RB:step, midZ].diagonal()
-        vy = self.prims[2, :, LB:RB:step, midZ].diagonal()
-
-
-        p = self.prims[4, :, LB:RB:step, midZ].diagonal()
-        B = self.prims[5, :, LB:RB:step, midZ].diagonal() / np.sqrt(2) + \
-            self.prims[6, :, LB:RB:step, midZ].diagonal() / np.sqrt(2)
-
-        # rho
-        plt.figure()
-        plt.plot(np.linspace(0, 1, nx), dens)
-        plt.ylabel(r'$\rho$')
-        plt.xlim([0, 1])
-        plt.show()
-        # vx
-        plt.figure()
-        plt.plot(np.linspace(0, 1, nx), vx)
-        plt.ylabel(r'$vx$')
-        plt.xlim([0, 1])
-        plt.show()
-        # vy
-        plt.figure()
-        plt.plot(np.linspace(0, 1, nx), vy)
-        plt.ylabel(r'$vy$')
-        plt.xlim([0, 1])
-        plt.show()
-        # v rel
-        plt.figure()
-        plt.plot(np.linspace(0, 1, nx),(vx-vy)/(1-vx*vy))
-        plt.ylabel(r'$v (rel)$')
-        plt.xlim([0, 1])
-        plt.show()
-        # v non-rel
-        plt.figure()
-        plt.plot(np.linspace(0, 1, nx), vx/np.sqrt(2) - vy/np.sqrt(2))
-        plt.ylabel(r'$v (non-rel)$')
-        plt.xlim([0, 1])
-        plt.show()
-        # p
-        plt.figure()
-        plt.plot(np.linspace(0, 1, nx), p)
-        plt.ylabel(r'$p$')
-        plt.xlim([0, 1])
-        plt.show()
-        # B
-        plt.figure()
-        plt.plot(np.linspace(0, 1, nx), B)
-        plt.ylabel(r'$B$')
-        plt.xlim([0, 1])
-        plt.show()
-
-        return B
-    
-    def plotAdvectionAgainstInitial(self):
-        xs = np.linspace(Plot.c['dx']/2, 1-Plot.c['dx']/2, Plot.c['nx'])
-        initialRho = np.ones_like(xs)*0.1
-        initialRho += 0.4*np.exp(-(10 * (xs - 0.5))**2)
-        
-        fig, axs = plt.subplots(2)
-        fig.set_size_inches(8, 6)
-        axs[0].plot(xs, initialRho, 'k-', linewidth=5, alpha=0.3, label='initial')
-        axs[0].plot(xs, Plot.prims[0, :, 0, 0], 'b:', label='rho')
-        axs[0].set_xlim(xs[0], xs[-1])
-        axs[0].set_xlabel(r'$x$')
-        axs[0].set_ylabel(r'$\rho$')
-        axs[0].legend()
-        
-        error = np.abs(initialRho-Plot.prims[0, :, 0, 0])
-        errorNorm = np.sum(error)/len(error)
-        axs[1].semilogy(xs, error, label=rf'Mean = ${errorNorm:.1e}$')
-        axs[1].set_xlabel(r"$x$")
-        axs[1].set_ylabel('Error')
-        axs[1].set_xlim(xs[0], xs[-1])
-        axs[1].legend()
-        plt.show()
-        
-        
-# Function declarations over, access data and plot!
-
-
-if __name__ == '__main__':
-
-    Plot = InteractivePlot()
-
-#    Plot.plotSlice()
-#    Plot.plotSingleFluidCurrentSheetAgainstExact()
-#    Plot.plotAdvectionAgainstInitial()
-#    Plot.plotHeatMaps()
-    
-    plt.figure()
-    plt.imshow(np.log(Plot.prims[4, :, :, 0].T), extent=[0, 8, 0, 4], origin='lower')
-    plt.show()
-    
diff --git a/Project/GPU/Src/interactivePlot.py b/Project/GPU/Src/interactivePlot.py
index 6021e9ce..f34461dc 100644
--- a/Project/GPU/Src/interactivePlot.py
+++ b/Project/GPU/Src/interactivePlot.py
@@ -104,9 +104,10 @@ def gatherData(self):
         print("{} conserved vectors".format(c['Ncons']))
         print("{} primitive vectors".format(c['Nprims']))
         print("{} auxiliary vectors".format(c['Naux']))
+        print(f"Domain extent is {c['nx']}, {c['ny']}, {c['nz']}")
 
         # Now gather conserved data
-        self.cons = np.zeros([c['Ncons'], c['Nx'], c['Ny'], c['Nz']])
+        self.cons = np.zeros([c['Ncons'], c['nx'], c['ny'], c['nz']])
         print("Fetching conserved variables...")
         with open(self.DatDir + 'Conserved/cons' + self.appendix + '.dat', 'r') as f:
             for i, line in enumerate(f):
@@ -128,7 +129,7 @@ def gatherData(self):
 
         with suppress(FileNotFoundError):
             # Now get primitive variables if  and store the data in array...
-            self.prims = np.zeros([c['Nprims'], c['Nx'], c['Ny'], c['Nz']])
+            self.prims = np.zeros([c['Nprims'], c['nx'], c['ny'], c['nz']])
             print("Fetching primitive variables...")
             with open(self.DatDir + 'Primitive/prims' + self.appendix + '.dat', 'r') as f:
                 for i, line in enumerate(f):
@@ -149,7 +150,7 @@ def gatherData(self):
 
         with suppress(FileNotFoundError):
             # And finally the aux vars if available
-            self.aux = np.zeros([c['Naux'], c['Nx'], c['Ny'], c['Nz']])
+            self.aux = np.zeros([c['Naux'], c['nx'], c['ny'], c['nz']])
             print("Fetching auxiliary variables...")
             with open(self.DatDir + 'Auxiliary/aux' + self.appendix +'.dat', 'r') as f:
                 for i, line in enumerate(f):
diff --git a/Tests/CPU/Src/test_fvs.cc b/Tests/CPU/Src/test_fvs.cc
index 973c5b95..77d72eb2 100644
--- a/Tests/CPU/Src/test_fvs.cc
+++ b/Tests/CPU/Src/test_fvs.cc
@@ -19,7 +19,7 @@ TEST(FVS, SameFnetAsSerial)
 */
 {
   SerialEnv env(0, NULL, 1, 1, 1);
-  Data d(20, 20, 0, 0, 1, 0, 1, 0, 1, 0.8, &env);
+  Data d(20, 20, 20, 0, 1, 0, 1, 0, 1, 0.8, &env);
   SRMHD model(&d);
   Weno3 weno(&d);
   FVS fluxMethod(&d, &weno, &model);
@@ -56,7 +56,7 @@ TEST(FVS, SameFnetAsSerial)
 TEST(FVS, SameXReconstructionAsSerial)
 {
   SerialEnv env(0, NULL, 1, 1, 1);
-  Data d(20, 20, 0, 0, 1, 0, 1, 0, 1, 0.8, &env);
+  Data d(20, 20, 20, 0, 1, 0, 1, 0, 1, 0.8, &env);
   SRMHD model(&d);
   Weno3 weno(&d);
   FVS fluxMethod(&d, &weno, &model);
@@ -95,7 +95,7 @@ TEST(FVS, SameXReconstructionAsSerial)
 TEST(FVS, SameYReconstructionAsSerial)
 {
   SerialEnv env(0, NULL, 1, 1, 1);
-  Data d(20, 20, 0, 0, 1, 0, 1, 0, 1, 0.8, &env);
+  Data d(20, 20, 20, 0, 1, 0, 1, 0, 1, 0.8, &env);
   SRMHD model(&d);
   Weno3 weno(&d);
   FVS fluxMethod(&d, &weno, &model);
@@ -134,7 +134,7 @@ TEST(FVS, SameYReconstructionAsSerial)
 TEST(FVS, SameZReconstructionAsSerial)
 {
   SerialEnv env(0, NULL, 1, 1, 1);
-  Data d(20, 20, 0, 0, 1, 0, 1, 0, 1, 0.8, &env);
+  Data d(20, 20, 20, 0, 1, 0, 1, 0, 1, 0.8, &env);
   SRMHD model(&d);
   Weno3 weno(&d);
   FVS fluxMethod(&d, &weno, &model);
diff --git a/Tests/CPU/Src/test_rk2.cc b/Tests/CPU/Src/test_rk2.cc
index 840040a2..7cdcdb91 100644
--- a/Tests/CPU/Src/test_rk2.cc
+++ b/Tests/CPU/Src/test_rk2.cc
@@ -322,10 +322,8 @@ TEST(RK2OutputConsistentWithSerial, RK2SrmhdFlowBrioWuSF)
   save.saveAux();
   save.saveConsts();
 }
-#endif
 
-#if 0
-TEST(RK2OutputConsistentWithSerial, RK2SrrmhdOutflowOTVortexSingleFluidRK2)
+TEST(RK2, RK2OutputConsistentWithSerial)
 {
 
   /*
@@ -334,9 +332,10 @@ TEST(RK2OutputConsistentWithSerial, RK2SrrmhdOutflowOTVortexSingleFluidRK2)
   */
 
   SerialEnv env(0, NULL, 1, 1, 1, 1);
-  Data d(40, 40, 0, 0, 1, 0, 1, 0, 1, 0.004, &env);
+  Data d(30, 30, 30, 0, 1, 0, 1, 0, 1, 0.004, &env);
+  Weno3 weno(&d);
   SRRMHD model(&d);
-  FVS fluxMethod(&d, &model);
+  FVS fluxMethod(&d, &weno, &model);
   Outflow bcs(&d);
   Simulation sim(&d, &env);
   OTVortexSingleFluid init(&d);
@@ -349,11 +348,12 @@ TEST(RK2OutputConsistentWithSerial, RK2SrrmhdOutflowOTVortexSingleFluidRK2)
 
   // Save data in test directory
   strcpy(save.dir, "../TestData/Serial");
-  strcpy(save.app, "RK2SrrmhdOutflowOTVortexSingleFluid");
-
+  strcpy(save.app, "RK2");
   save.saveCons();
   save.savePrims();
   save.saveAux();
   save.saveConsts();
+
 }
+
 #endif
diff --git a/Tests/GPU/Src/compareParallelAndSerial.py b/Tests/GPU/Src/compareParallelAndSerial.py
index bd05205f..286fccac 100644
--- a/Tests/GPU/Src/compareParallelAndSerial.py
+++ b/Tests/GPU/Src/compareParallelAndSerial.py
@@ -45,9 +45,9 @@ class CompareParallelAndSerial(object):
     Ncons = []
     Nprims = []
     Naux = []
-    Nx = []
-    Ny = []
-    Nz = []
+    nx = []
+    ny = []
+    nz = []
     Ng = []
     xbounds = []
     ybounds = []
@@ -75,20 +75,15 @@ def getFiles(self):
             self.Ncons.append(self.Serials[i].c['Ncons'])
             self.Nprims.append(self.Serials[i].c['Nprims'])
             self.Naux.append(self.Serials[i].c['Naux'])
-            self.Nx.append(self.Serials[i].c['Nx'])
-            self.Ny.append(self.Serials[i].c['Ny'])
-            self.Nz.append(self.Serials[i].c['Nz'])
+            self.nx.append(self.Serials[i].c['nx'])
+            self.ny.append(self.Serials[i].c['ny'])
+            self.nz.append(self.Serials[i].c['nz'])
             self.Ng.append(self.Serials[i].c['Ng'])
 
-            self.xbounds.append((self.Ng[-1], self.Nx[-1] - self.Ng[-1]))
-            if (self.Ny[-1] > 1):
-                self.ybounds.append((self.Ng[-1], self.Ny[-1] - self.Ng[-1]))
-            else:
-                self.ybounds.append((0, 1))
-            if (self.Nz[-1] > 1):
-                self.zbounds.append((self.Ng[-1], self.Nz[-1] - self.Ng[-1]))
-            else:
-                self.zbounds.append((0, 1))
+            # Bounds within arrays which do not include ghost cells
+            self.xbounds.append((0, self.nx[-1]))
+            self.ybounds.append((0, self.ny[-1]))
+            self.zbounds.append((0, self.nz[-1]))
 
 
 
diff --git a/Tests/GPU/repeat.sh b/Tests/GPU/repeat.sh
deleted file mode 100755
index 2716a559..00000000
--- a/Tests/GPU/repeat.sh
+++ /dev/null
@@ -1,8 +0,0 @@
-#!/bin/sh
-
-cd ../CPU
-make test_rk2
-./test_rk2
-cd ../GPU
-make test_rk2
-./test_rk2
diff --git a/Tests/play.py b/Tests/play.py
deleted file mode 100644
index 8f110abd..00000000
--- a/Tests/play.py
+++ /dev/null
@@ -1,24 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Wed Aug  5 10:56:02 2020
-
-@author: alex
-"""
-
-import sys
-sys.path.append('../Project/GPU/Src')
-sys.path.append('../Project/CPU/Src')
-from interactivePlotGPU import InteractivePlot as PlotGPU
-from interactivePlotCPU import InteractivePlot as PlotCPU
-
-
-parallel = PlotGPU("TestData/GPU/", "RK2")
-#serial   = PlotCPU("TestData/Serial/", "RK2")
-
-pp = parallel.prims
-sp = serial.prims
-
-
-#for sv,  pv in zip(serial.prims, parallel.prims):
-#    print(f"{np.sum(np.abs(sv-pv) > 1e-15)}/{30**3} failures")
\ No newline at end of file

From ba80bd07002d96f9dad20e497841ac137dda65ec Mon Sep 17 00:00:00 2001
From: AlexJamesWright <a.j.wright@soton.ac.uk>
Date: Wed, 5 Aug 2020 15:22:35 +0100
Subject: [PATCH 04/56] Ready

---
 Project/CPU/Src/main.cc | 25 +++++++++++--------------
 Project/GPU/Src/main.cu |  7 +------
 2 files changed, 12 insertions(+), 20 deletions(-)

diff --git a/Project/CPU/Src/main.cc b/Project/CPU/Src/main.cc
index d82aff43..c1caf758 100644
--- a/Project/CPU/Src/main.cc
+++ b/Project/CPU/Src/main.cc
@@ -1,5 +1,7 @@
-// Serial main
-#include "simData.h"
+// CPU main
+#include "parallelBoundaryConds.h"
+#include "fluxVectorSplitting.h"
+#include "parallelSaveData.h"
 #include "simulation.h"
 #include "initFunc.h"
 #include "simData.h"
@@ -7,20 +9,14 @@
 #include "Euler.h"
 #include "weno.h"
 
-#include <cstdio>
-#include <cstdlib>
 #include <ctime>
-#include <iostream>
 #include <cstring>
-#include <omp.h>
-
 
 using namespace std;
 
 int main(int argc, char *argv[]) {
 
 
-  const double MU(1000);
   // Set up domain
   int Ng(5);
   int nx(800);
@@ -55,7 +51,7 @@ int main(int argc, char *argv[]) {
   // Choose particulars of simulation
   Euler model(&data);
 
-  Weno3 weno(&data);
+  Weno7 weno(&data);
 
   FVS fluxMethod(&data, &weno, &model);
 
@@ -65,23 +61,24 @@ int main(int argc, char *argv[]) {
 
   FancyMETHODData init(&data);
 
-  SSP2 timeInt(&data, &model, &bcs, &fluxMethod);
+  RK4 timeInt(&data, &model, &bcs, &fluxMethod);
 
-  SerialSaveData save(&data, &env, 0);
+  ParallelSaveData save(&data, &env, 0);
 
   // Now objects have been created, set up the simulation
   sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save);
+
   // Time execution of programme
-  //double startTime(omp_get_wtime());
+  clock_t startTime(clock());
 
   // Run until end time and save results
   sim.evolve(output, safety);
 
 
-  //double timeTaken(omp_get_wtime()- startTime);
+  double timeTaken(double(clock() - startTime)/(double)CLOCKS_PER_SEC);
 
   save.saveAll();
-  //printf("\nRuntime: %.5fs\nCompleted %d iterations.\n", timeTaken, data.iters);
+  if (env.rank==0) printf("\nRuntime: %.5fs\nCompleted %d iterations.\n", timeTaken, data.iters);
 
   return 0;
 
diff --git a/Project/GPU/Src/main.cu b/Project/GPU/Src/main.cu
index 50efa978..a5642b3a 100644
--- a/Project/GPU/Src/main.cu
+++ b/Project/GPU/Src/main.cu
@@ -80,12 +80,7 @@ int main(int argc, char *argv[]) {
   double startTime(omp_get_wtime());
 
   // Run until end time and save results
-  // sim.evolve(output, safety);
-  sim.updateTime();
-  sim.updateTime();
-  sim.updateTime();
-  sim.updateTime();
-  sim.updateTime();
+  sim.evolve(output, safety);
 
   double timeTaken(omp_get_wtime()- startTime);
 

From c2951d0a7480b1f79afeacea2fb080aa534a213e Mon Sep 17 00:00:00 2001
From: "A.M.Brown" <amb1u19@cyan51.cluster.local>
Date: Mon, 24 Aug 2020 11:53:16 +0100
Subject: [PATCH 05/56] updated makefile to compile gtest with std=c++11 to
 work on more cuda versions

---
 Tests/GPU/Makefile | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/Tests/GPU/Makefile b/Tests/GPU/Makefile
index 603ae7d4..8f8bfd1a 100644
--- a/Tests/GPU/Makefile
+++ b/Tests/GPU/Makefile
@@ -31,6 +31,7 @@ INC_DIR = ./../../Project/GPU/Include
 # Set Google Test's header directory as a system directory, such that
 # the compiler doesn't generate warnings in Google Test headers.
 CPPFLAGS = -isystem $(GTEST_DIR)/include
+STDFLAGS = -std=c++11
 
 # Flags passed to the C++ compiler.
 # c++11 is required for the vector looping srmhd
@@ -120,11 +121,11 @@ RTFIND_OBJS = $(RTFIND_SRC_DIR)/dogleg.o \
 
 
 gtest-all.o : $(GTEST_SRCS_)
-	@$(CXX) $(CPPFLAGS) -I$(GTEST_DIR) $(CXXFLAGS) -c \
+	@$(CXX) $(CPPFLAGS) $(STDFLAGS) -I$(GTEST_DIR) $(CXXFLAGS) -c \
             $(GTEST_DIR)/src/gtest-all.cc
 
 gtest_main.o : $(GTEST_SRCS_)
-	@$(CXX) $(CPPFLAGS) -I$(GTEST_DIR) $(CXXFLAGS) -c \
+	@$(CXX) $(CPPFLAGS) $(STDFLAGS) -I$(GTEST_DIR) $(CXXFLAGS) -c \
             $(GTEST_DIR)/src/gtest_main.cc
 
 gtest.a : gtest-all.o

From 0ab3caea72fa6e7c3e2eba572e394f902ef0e478 Mon Sep 17 00:00:00 2001
From: "A.M.Brown" <amb1u19@cyan51.cluster.local>
Date: Mon, 24 Aug 2020 16:22:53 +0100
Subject: [PATCH 06/56] added FlowKHSingleFluid unit test. Changed project to
 use KHSingleFluidRandom example

---
 Project/CPU/Src/main.cc                   | 53 +++++++--------
 Project/GPU/Src/main.cu                   | 48 ++++++--------
 Tests/CPU/Src/test_imex.cc                | 80 +++++++++++++++++++++++
 Tests/GPU/Src/compareParallelAndSerial.py | 40 ++++++++++++
 Tests/GPU/Src/test_imex.cu                | 74 +++++++++++++++++++++
 5 files changed, 237 insertions(+), 58 deletions(-)

diff --git a/Project/CPU/Src/main.cc b/Project/CPU/Src/main.cc
index c1caf758..b362e58e 100644
--- a/Project/CPU/Src/main.cc
+++ b/Project/CPU/Src/main.cc
@@ -5,7 +5,7 @@
 #include "simulation.h"
 #include "initFunc.h"
 #include "simData.h"
-#include "RKPlus.h"
+#include "SSP2.h"
 #include "Euler.h"
 #include "weno.h"
 
@@ -15,53 +15,48 @@
 using namespace std;
 
 int main(int argc, char *argv[]) {
-
-
+  const double MU(1000);
   // Set up domain
-  int Ng(5);
-  int nx(800);
-  int ny(400);
+  int Ng(4);
+  int nx(64);
+  int ny(16);
   int nz(0);
-  double xmin(0.0);
-  double xmax(8.0);
-  double ymin(0.0);
-  double ymax(4.0);
-  double zmin(0.0);
-  double zmax(1.0);
-  double endTime(30.0);
-  double gamma(2.0);
-  double cfl(0.5);
-  double cp(1);
-  double mu1(-1);
-  double mu2(1);
+  double xmin(-0.5);
+  double xmax(0.5);
+  double ymin(-1.0);
+  double ymax(1.0);
+  double zmin(-1.5);
+  double zmax(1.5);
+  double endTime(0.5);
+  double cfl(0.1);
+  double gamma(4.0/3.0);
+  double sigma(0);
   bool output(true);
-  int frameSkip(50);
-  int safety(frameSkip);
-  int reportItersPeriod(1);
-  double sigma(50);
-  double nxRanks(4);
+  int safety(180);
+
+  double nxRanks(1);
   double nyRanks(1);
   double nzRanks(1);
 
   ParallelEnv env(&argc, &argv, nxRanks, nyRanks, nzRanks);
 
   Data data(nx, ny, nz, xmin, xmax, ymin, ymax, zmin, zmax, endTime, &env,
-            cfl, Ng, gamma, sigma, cp, mu1, mu2, frameSkip, reportItersPeriod);
+            cfl, Ng, gamma, sigma);
 
   // Choose particulars of simulation
-  Euler model(&data);
+  SRRMHD model(&data);
 
-  Weno7 weno(&data);
+  Weno3 weno(&data);
 
   FVS fluxMethod(&data, &weno, &model);
 
-  ParallelOutflow bcs(&data, &env);
+  ParallelFlow bcs(&data, &env);
 
   Simulation sim(&data, &env);
 
-  FancyMETHODData init(&data);
+  KHInstabilitySingleFluid init(&data);
 
-  RK4 timeInt(&data, &model, &bcs, &fluxMethod);
+  SSP2 timeInt(&data, &model, &bcs, &fluxMethod);
 
   ParallelSaveData save(&data, &env, 0);
 
diff --git a/Project/GPU/Src/main.cu b/Project/GPU/Src/main.cu
index a5642b3a..005a4199 100644
--- a/Project/GPU/Src/main.cu
+++ b/Project/GPU/Src/main.cu
@@ -23,44 +23,34 @@ using namespace std;
 
 int main(int argc, char *argv[]) {
 
-
-  const double MU(1000);
   // Set up domain
   int Ng(4);
-  int nx(256);
-  int ny(512);
+  int nx(64);
+  int ny(64);
   int nz(0);
-  double xmin(-0.5);
-  double xmax(0.5);
-  double ymin(-1.0);
+  double xmin(0.0);
+  double xmax(1.0);
+  double ymin(0.0);
   double ymax(1.0);
-  double zmin(-1.5);
-  double zmax(1.5);
+  double zmin(0.0);
+  double zmax(1.0);
   double endTime(3.0);
-  double cfl(0.1);
+  double cfl(0.6);
   double gamma(4.0/3.0);
-  double sigma(300);
+  double sigma(10);
   double cp(1.0);
-  double mu1(-MU);
-  double mu2(MU);
-  int frameSkip(180);
+  double mu1(-100);
+  double mu2(100);
+  int frameSkip(10);
   bool output(true);
-  int safety(180);
-
-
-  char * ptr(0);
-  //! Overwrite any variables that have been passed in as main() arguments
-  for (int i(0); i < argc; i++) {
-    if (strcmp(argv[i], "sigma") == 0) {
-      sigma = (double)strtol(argv[i+1], &ptr, 10);
-    }
-  }
+  if (argc != 2) throw std::invalid_argument("Expected ./main seed!\n");
+  int seed(atoi(argv[1]));
 
   Data data(nx, ny, nz, xmin, xmax, ymin, ymax, zmin, zmax, endTime,
             cfl, Ng, gamma, sigma, cp, mu1, mu2, frameSkip);
 
   // Choose particulars of simulation
-  SRRMHD model(&data);
+  SRMHD model(&data);
 
   FVS fluxMethod(&data, &model);
 
@@ -68,21 +58,21 @@ int main(int argc, char *argv[]) {
 
   KHInstabilitySingleFluid init(&data, 1);
 
-  Flow bcs(&data);
+  Periodic bcs(&data);
 
-  SSP2 timeInt(&data, &model, &bcs, &fluxMethod);
+  RKSplit timeInt(&data, &model, &bcs, &fluxMethod);
 
   SaveData save(&data);
 
   // Now objects have been created, set up the simulation
   sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save);
   // Time execution of programme
-  double startTime(omp_get_wtime());
+  //double startTime(omp_get_wtime());
 
   // Run until end time and save results
   sim.evolve(output, safety);
 
-  double timeTaken(omp_get_wtime()- startTime);
+  //double timeTaken(omp_get_wtime()- startTime);
 
   save.saveAll();
   printf("\nRuntime: %.5fs\nCompleted %d iterations.\n", timeTaken, data.iters);
diff --git a/Tests/CPU/Src/test_imex.cc b/Tests/CPU/Src/test_imex.cc
index 8c65d3fa..2ff2c566 100644
--- a/Tests/CPU/Src/test_imex.cc
+++ b/Tests/CPU/Src/test_imex.cc
@@ -51,6 +51,86 @@ TEST(SSP2, IMEX2BenchmarkForParallelCode)
   save.saveConsts();
 }
 
+TEST(SSP2FlowKHSingleFluid, IMEX2ConsistentWithSerialVersion)
+{
+  /*
+    Run a resistive MHD problem to test the IMEX2 scheme. This test will run
+    the simulation and save the output data in the TestData directory, ready
+    to be compared with the parallel output.
+  */
+
+  double sigma(0);
+
+  SerialEnv env(0, NULL, 1, 1, 1);
+  Data data(64, 16, 0, -0.5, 0.5, -1, 1, 0, 1, 0.05, &env,
+            0.5, 4, 4.0/3.0, sigma);
+
+  // Choose particulars of simulation
+  SRRMHD model(&data);
+  Weno3 weno(&data);
+  FVS fluxMethod(&data, &weno, &model);
+  Flow bcs(&data);
+  Simulation sim(&data, &env);
+  KHInstabilitySingleFluid init(&data);
+  SSP2 timeInt(&data, &model, &bcs, &fluxMethod);
+  SerialSaveData save(&data, &env);
+
+  sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save);
+  sim.evolve();
+
+
+  // Save data in test directory
+  strcpy(save.dir, "../TestData/Serial");
+  strcpy(save.app, "SSP2FlowKHSingleFluid");
+  save.saveCons();
+  save.savePrims();
+  save.saveAux();
+  save.saveConsts();
+}
+#if 0
+TEST(SSP2FlowKHSingleFluid, IMEX2ConsistentWithSerialVersion)
+{
+  /*
+    Run a resistive MHD problem to test the IMEX2 scheme. This test will run
+    the simulation and save the output data in the TestData directory, ready
+    to be compared with the parallel output.
+  */
+  const double MU(1000);
+  double sigma(300);
+  double cp(1.0);
+  double mu1(-MU);
+  double mu2(MU);
+  int nx(256);
+  int ny(512);
+  double endTime(0.01);
+
+  SerialEnv env(0, NULL, 1, 1, 1);
+  Data data(nx, ny, 0, -0.5, 0.5, -1, 1, 0, 1, endTime, &env,
+            0.1, 4, 4.0/3.0, sigma, cp, mu1, mu2);
+
+  // Choose particulars of simulation
+  SRRMHD model(&data);
+  Weno3 weno(&data);
+  FVS fluxMethod(&data, &weno, &model);
+  Flow bcs(&data);
+  Simulation sim(&data, &env);
+  KHInstabilitySingleFluid init(&data);
+  SSP2 timeInt(&data, &model, &bcs, &fluxMethod);
+  SerialSaveData save(&data, &env);
+
+  sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save);
+  sim.evolve();
+
+
+  // Save data in test directory
+  strcpy(save.dir, "../TestData/Serial");
+  strcpy(save.app, "SSP2FlowKHSingleFluid");
+  save.saveCons();
+  save.savePrims();
+  save.saveAux();
+  save.saveConsts();
+}
+#endif
 
 TEST(SSP3, IMEX3BenchmarkForParallelCode)
 {
diff --git a/Tests/GPU/Src/compareParallelAndSerial.py b/Tests/GPU/Src/compareParallelAndSerial.py
index 286fccac..808c5340 100644
--- a/Tests/GPU/Src/compareParallelAndSerial.py
+++ b/Tests/GPU/Src/compareParallelAndSerial.py
@@ -174,6 +174,46 @@ def test_AuxEquivalentForSSP2():
                         print("Error for (Nv, i, j, k) = ({}, {}, {}, {})".format(Nv, i, j, k))
                         assert(abs((Serial.aux[Nv, i, j, k] - Parallel.aux[Nv, i, j, k]) < TOL))
 
+def test_ConsEquivalentForSSP2FlowKHSingleFluid():
+    Obj = Compare.Appendicies.index('SSP2FlowKHSingleFluid')
+    Serial, Parallel = Compare.Serials[Obj], Compare.Parallels[Obj]
+    for Nv in range(Compare.Ncons[Obj]):
+        for i in range(*Compare.xbounds[Obj]):
+            for j in range(*Compare.ybounds[Obj]):
+                for k in range(*Compare.zbounds[Obj]):
+                    try:
+                        assert(abs((Serial.cons[Nv, i, j, k] - Parallel.cons[Nv, i, j, k]) < TOL))
+                    except AssertionError:
+                        print("Error for (Nv, i, j, k) = ({}, {}, {}, {})".format(Nv, i, j, k))
+                        assert(abs((Serial.cons[Nv, i, j, k] - Parallel.cons[Nv, i, j, k]) < TOL))
+
+def test_PrimsEquivalentForSSP2FlowKHSingleFluid():
+    Obj = Compare.Appendicies.index('SSP2FlowKHSingleFluid')
+    Serial, Parallel = Compare.Serials[Obj], Compare.Parallels[Obj]
+    for Nv in range(Compare.Nprims[Obj]):
+        for i in range(*Compare.xbounds[Obj]):
+            for j in range(*Compare.ybounds[Obj]):
+                for k in range(*Compare.zbounds[Obj]):
+                    try:
+                        assert(abs((Serial.prims[Nv, i, j, k] - Parallel.prims[Nv, i, j, k]) < TOL))
+                    except AssertionError:
+                        print("Error for (Nv, i, j, k) = ({}, {}, {}, {})".format(Nv, i, j, k))
+                        assert(abs((Serial.prims[Nv, i, j, k] - Parallel.prims[Nv, i, j, k]) < TOL))
+
+def test_AuxEquivalentForSSP2FlowKHSingleFluid():
+    Obj = Compare.Appendicies.index('SSP2FlowKHSingleFluid')
+    Serial, Parallel = Compare.Serials[Obj], Compare.Parallels[Obj]
+    for Nv in range(Compare.Naux[Obj]):
+        for i in range(*Compare.xbounds[Obj]):
+            for j in range(*Compare.ybounds[Obj]):
+                for k in range(*Compare.zbounds[Obj]):
+                    try:
+                        assert(abs((Serial.aux[Nv, i, j, k] - Parallel.aux[Nv, i, j, k]) < TOL))
+                    except AssertionError:
+                        print("Error for (Nv, i, j, k) = ({}, {}, {}, {})".format(Nv, i, j, k))
+                        assert(abs((Serial.aux[Nv, i, j, k] - Parallel.aux[Nv, i, j, k]) < TOL))
+
+
 # RK2
 def test_ConsEquivalentForRK2():
    Obj = Compare.Appendicies.index('RK2')
diff --git a/Tests/GPU/Src/test_imex.cu b/Tests/GPU/Src/test_imex.cu
index aa695e0b..830da06e 100644
--- a/Tests/GPU/Src/test_imex.cu
+++ b/Tests/GPU/Src/test_imex.cu
@@ -45,6 +45,80 @@ TEST(SSP2, IMEX2ConsistentWithSerialVersion)
   save.saveConsts();
 }
 
+TEST(SSP2FlowKHSingleFluid, IMEX2ConsistentWithSerialVersion)
+{
+  /*
+    Run a resistive MHD problem to test the IMEX2 scheme. This test will run
+    the simulation and save the output data in the TestData directory, ready
+    to be compared to the serial output.
+  */
+  double sigma(0);
+
+  Data data(64, 16, 0, -0.5, 0.5, -1, 1, 0, 1, 0.05,
+            0.5, 4, 4.0/3.0, sigma);
+
+  // Choose particulars of simulation
+  SRRMHD model(&data);
+  FVS fluxMethod(&data, &model);
+  Simulation sim(&data);
+  KHInstabilitySingleFluid init(&data);
+  Flow bcs(&data);
+  SSP2 timeInt(&data, &model, &bcs, &fluxMethod);
+  SaveData save(&data);
+
+  sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save);
+  sim.evolve();
+
+  // Save data in test directory
+  strcpy(save.dir, "../TestData/GPU");
+  strcpy(save.app, "SSP2FlowKHSingleFluid");
+  save.saveCons();
+  save.savePrims();
+  save.saveAux();
+  save.saveConsts();
+}
+
+#if 0
+TEST(SSP2FlowKHSingleFluid, IMEX2ConsistentWithSerialVersion)
+{
+  /*
+    Run a resistive MHD problem to test the IMEX2 scheme. This test will run
+    the simulation and save the output data in the TestData directory, ready
+    to be compared to the serial output.
+  */
+  const double MU(1000); 
+  double sigma(300);
+  double cp(1.0);
+  double mu1(-MU);
+  double mu2(MU); 
+  int nx(256);
+  int ny(512); 
+  double endTime(0.01);
+
+  Data data(nx, ny, 0, -0.5, 0.5, -1, 1, 0, 1, endTime,
+            0.1, 4, 4.0/3.0, sigma, cp, mu1, mu2);
+
+  // Choose particulars of simulation
+  SRRMHD model(&data);
+  FVS fluxMethod(&data, &model);
+  Simulation sim(&data);
+  KHInstabilitySingleFluid init(&data);
+  Flow bcs(&data);
+  SSP2 timeInt(&data, &model, &bcs, &fluxMethod);
+  SaveData save(&data);
+
+  sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save);
+  sim.evolve();
+
+  // Save data in test directory
+  strcpy(save.dir, "../TestData/GPU");
+  strcpy(save.app, "SSP2FlowKHSingleFluid");
+  save.saveCons();
+  save.savePrims();
+  save.saveAux();
+  save.saveConsts();
+}
+#endif
 
 TEST(SSP3, IMEX3ConsistentWithSerialVersion)
 {

From 1c5ff45cc32377b2c2d35e0d2f04e3a7fd5f66d1 Mon Sep 17 00:00:00 2001
From: "ania.brown" <ania.brown@gmail.com>
Date: Tue, 25 Aug 2020 10:39:47 +0100
Subject: [PATCH 07/56] updated main functions in Project/CPU and Project/GPU
 to simulate the same systems

---
 Project/CPU/Src/main.cc                   | 61 ++++++++---------
 Project/GPU/Makefile                      |  2 +-
 Project/GPU/Src/main.cu                   | 14 ++--
 Tests/CPU/Src/test_imex.cc                | 80 +++++++++++++++++++++++
 Tests/GPU/Src/compareParallelAndSerial.py | 40 ++++++++++++
 Tests/GPU/Src/test_imex.cu                | 74 +++++++++++++++++++++
 6 files changed, 228 insertions(+), 43 deletions(-)

diff --git a/Project/CPU/Src/main.cc b/Project/CPU/Src/main.cc
index c1caf758..4cd28f9e 100644
--- a/Project/CPU/Src/main.cc
+++ b/Project/CPU/Src/main.cc
@@ -1,11 +1,11 @@
 // CPU main
-#include "parallelBoundaryConds.h"
+#include "boundaryConds.h"
 #include "fluxVectorSplitting.h"
-#include "parallelSaveData.h"
+#include "serialSaveData.h"
 #include "simulation.h"
 #include "initFunc.h"
 #include "simData.h"
-#include "RKPlus.h"
+#include "SSP2.h"
 #include "Euler.h"
 #include "weno.h"
 
@@ -15,55 +15,50 @@
 using namespace std;
 
 int main(int argc, char *argv[]) {
-
-
+  const double MU(1000);
   // Set up domain
-  int Ng(5);
-  int nx(800);
-  int ny(400);
+  int Ng(4);
+  int nx(64);
+  int ny(16);
   int nz(0);
-  double xmin(0.0);
-  double xmax(8.0);
-  double ymin(0.0);
-  double ymax(4.0);
-  double zmin(0.0);
-  double zmax(1.0);
-  double endTime(30.0);
-  double gamma(2.0);
-  double cfl(0.5);
-  double cp(1);
-  double mu1(-1);
-  double mu2(1);
+  double xmin(-0.5);
+  double xmax(0.5);
+  double ymin(-1.0);
+  double ymax(1.0);
+  double zmin(-1.5);
+  double zmax(1.5);
+  double endTime(0.05);
+  double cfl(0.1);
+  double gamma(4.0/3.0);
+  double sigma(0);
   bool output(true);
-  int frameSkip(50);
-  int safety(frameSkip);
-  int reportItersPeriod(1);
-  double sigma(50);
-  double nxRanks(4);
+  int safety(180);
+
+  double nxRanks(1);
   double nyRanks(1);
   double nzRanks(1);
 
-  ParallelEnv env(&argc, &argv, nxRanks, nyRanks, nzRanks);
+  SerialEnv env(&argc, &argv, nxRanks, nyRanks, nzRanks);
 
   Data data(nx, ny, nz, xmin, xmax, ymin, ymax, zmin, zmax, endTime, &env,
-            cfl, Ng, gamma, sigma, cp, mu1, mu2, frameSkip, reportItersPeriod);
+            cfl, Ng, gamma, sigma);
 
   // Choose particulars of simulation
-  Euler model(&data);
+  SRRMHD model(&data);
 
-  Weno7 weno(&data);
+  Weno3 weno(&data);
 
   FVS fluxMethod(&data, &weno, &model);
 
-  ParallelOutflow bcs(&data, &env);
+  Flow bcs(&data);
 
   Simulation sim(&data, &env);
 
-  FancyMETHODData init(&data);
+  KHInstabilitySingleFluid init(&data);
 
-  RK4 timeInt(&data, &model, &bcs, &fluxMethod);
+  SSP2 timeInt(&data, &model, &bcs, &fluxMethod);
 
-  ParallelSaveData save(&data, &env, 0);
+  SerialSaveData save(&data, &env, 0);
 
   // Now objects have been created, set up the simulation
   sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save);
diff --git a/Project/GPU/Makefile b/Project/GPU/Makefile
index 666dc027..a84ac2e0 100644
--- a/Project/GPU/Makefile
+++ b/Project/GPU/Makefile
@@ -23,7 +23,7 @@ RTFIND_SRC_DIR = ./CminpackLibrary/Src
 CXXFLAGS = -Xcompiler -fopenmp -Xcompiler -Wall
 
 # NVIDIA compiler flags
-NVFLAGS = -std=c++11 -Wno-deprecated-gpu-targets -rdc=true -fmad=false -O3 -arch=sm_52
+NVFLAGS = -std=c++11 -Wno-deprecated-gpu-targets -rdc=true -fmad=false -O3
 
 # Sources
 SRCS = main.cu \
diff --git a/Project/GPU/Src/main.cu b/Project/GPU/Src/main.cu
index a5642b3a..10a10340 100644
--- a/Project/GPU/Src/main.cu
+++ b/Project/GPU/Src/main.cu
@@ -27,8 +27,8 @@ int main(int argc, char *argv[]) {
   const double MU(1000);
   // Set up domain
   int Ng(4);
-  int nx(256);
-  int ny(512);
+  int nx(64);
+  int ny(16);
   int nz(0);
   double xmin(-0.5);
   double xmax(0.5);
@@ -36,14 +36,10 @@ int main(int argc, char *argv[]) {
   double ymax(1.0);
   double zmin(-1.5);
   double zmax(1.5);
-  double endTime(3.0);
+  double endTime(0.05);
   double cfl(0.1);
   double gamma(4.0/3.0);
-  double sigma(300);
-  double cp(1.0);
-  double mu1(-MU);
-  double mu2(MU);
-  int frameSkip(180);
+  double sigma(0);
   bool output(true);
   int safety(180);
 
@@ -57,7 +53,7 @@ int main(int argc, char *argv[]) {
   }
 
   Data data(nx, ny, nz, xmin, xmax, ymin, ymax, zmin, zmax, endTime,
-            cfl, Ng, gamma, sigma, cp, mu1, mu2, frameSkip);
+            cfl, Ng, gamma, sigma);
 
   // Choose particulars of simulation
   SRRMHD model(&data);
diff --git a/Tests/CPU/Src/test_imex.cc b/Tests/CPU/Src/test_imex.cc
index 8c65d3fa..2ff2c566 100644
--- a/Tests/CPU/Src/test_imex.cc
+++ b/Tests/CPU/Src/test_imex.cc
@@ -51,6 +51,86 @@ TEST(SSP2, IMEX2BenchmarkForParallelCode)
   save.saveConsts();
 }
 
+TEST(SSP2FlowKHSingleFluid, IMEX2ConsistentWithSerialVersion)
+{
+  /*
+    Run a resistive MHD problem to test the IMEX2 scheme. This test will run
+    the simulation and save the output data in the TestData directory, ready
+    to be compared with the parallel output.
+  */
+
+  double sigma(0);
+
+  SerialEnv env(0, NULL, 1, 1, 1);
+  Data data(64, 16, 0, -0.5, 0.5, -1, 1, 0, 1, 0.05, &env,
+            0.5, 4, 4.0/3.0, sigma);
+
+  // Choose particulars of simulation
+  SRRMHD model(&data);
+  Weno3 weno(&data);
+  FVS fluxMethod(&data, &weno, &model);
+  Flow bcs(&data);
+  Simulation sim(&data, &env);
+  KHInstabilitySingleFluid init(&data);
+  SSP2 timeInt(&data, &model, &bcs, &fluxMethod);
+  SerialSaveData save(&data, &env);
+
+  sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save);
+  sim.evolve();
+
+
+  // Save data in test directory
+  strcpy(save.dir, "../TestData/Serial");
+  strcpy(save.app, "SSP2FlowKHSingleFluid");
+  save.saveCons();
+  save.savePrims();
+  save.saveAux();
+  save.saveConsts();
+}
+#if 0
+TEST(SSP2FlowKHSingleFluid, IMEX2ConsistentWithSerialVersion)
+{
+  /*
+    Run a resistive MHD problem to test the IMEX2 scheme. This test will run
+    the simulation and save the output data in the TestData directory, ready
+    to be compared with the parallel output.
+  */
+  const double MU(1000);
+  double sigma(300);
+  double cp(1.0);
+  double mu1(-MU);
+  double mu2(MU);
+  int nx(256);
+  int ny(512);
+  double endTime(0.01);
+
+  SerialEnv env(0, NULL, 1, 1, 1);
+  Data data(nx, ny, 0, -0.5, 0.5, -1, 1, 0, 1, endTime, &env,
+            0.1, 4, 4.0/3.0, sigma, cp, mu1, mu2);
+
+  // Choose particulars of simulation
+  SRRMHD model(&data);
+  Weno3 weno(&data);
+  FVS fluxMethod(&data, &weno, &model);
+  Flow bcs(&data);
+  Simulation sim(&data, &env);
+  KHInstabilitySingleFluid init(&data);
+  SSP2 timeInt(&data, &model, &bcs, &fluxMethod);
+  SerialSaveData save(&data, &env);
+
+  sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save);
+  sim.evolve();
+
+
+  // Save data in test directory
+  strcpy(save.dir, "../TestData/Serial");
+  strcpy(save.app, "SSP2FlowKHSingleFluid");
+  save.saveCons();
+  save.savePrims();
+  save.saveAux();
+  save.saveConsts();
+}
+#endif
 
 TEST(SSP3, IMEX3BenchmarkForParallelCode)
 {
diff --git a/Tests/GPU/Src/compareParallelAndSerial.py b/Tests/GPU/Src/compareParallelAndSerial.py
index 286fccac..808c5340 100644
--- a/Tests/GPU/Src/compareParallelAndSerial.py
+++ b/Tests/GPU/Src/compareParallelAndSerial.py
@@ -174,6 +174,46 @@ def test_AuxEquivalentForSSP2():
                         print("Error for (Nv, i, j, k) = ({}, {}, {}, {})".format(Nv, i, j, k))
                         assert(abs((Serial.aux[Nv, i, j, k] - Parallel.aux[Nv, i, j, k]) < TOL))
 
+def test_ConsEquivalentForSSP2FlowKHSingleFluid():
+    Obj = Compare.Appendicies.index('SSP2FlowKHSingleFluid')
+    Serial, Parallel = Compare.Serials[Obj], Compare.Parallels[Obj]
+    for Nv in range(Compare.Ncons[Obj]):
+        for i in range(*Compare.xbounds[Obj]):
+            for j in range(*Compare.ybounds[Obj]):
+                for k in range(*Compare.zbounds[Obj]):
+                    try:
+                        assert(abs((Serial.cons[Nv, i, j, k] - Parallel.cons[Nv, i, j, k]) < TOL))
+                    except AssertionError:
+                        print("Error for (Nv, i, j, k) = ({}, {}, {}, {})".format(Nv, i, j, k))
+                        assert(abs((Serial.cons[Nv, i, j, k] - Parallel.cons[Nv, i, j, k]) < TOL))
+
+def test_PrimsEquivalentForSSP2FlowKHSingleFluid():
+    Obj = Compare.Appendicies.index('SSP2FlowKHSingleFluid')
+    Serial, Parallel = Compare.Serials[Obj], Compare.Parallels[Obj]
+    for Nv in range(Compare.Nprims[Obj]):
+        for i in range(*Compare.xbounds[Obj]):
+            for j in range(*Compare.ybounds[Obj]):
+                for k in range(*Compare.zbounds[Obj]):
+                    try:
+                        assert(abs((Serial.prims[Nv, i, j, k] - Parallel.prims[Nv, i, j, k]) < TOL))
+                    except AssertionError:
+                        print("Error for (Nv, i, j, k) = ({}, {}, {}, {})".format(Nv, i, j, k))
+                        assert(abs((Serial.prims[Nv, i, j, k] - Parallel.prims[Nv, i, j, k]) < TOL))
+
+def test_AuxEquivalentForSSP2FlowKHSingleFluid():
+    Obj = Compare.Appendicies.index('SSP2FlowKHSingleFluid')
+    Serial, Parallel = Compare.Serials[Obj], Compare.Parallels[Obj]
+    for Nv in range(Compare.Naux[Obj]):
+        for i in range(*Compare.xbounds[Obj]):
+            for j in range(*Compare.ybounds[Obj]):
+                for k in range(*Compare.zbounds[Obj]):
+                    try:
+                        assert(abs((Serial.aux[Nv, i, j, k] - Parallel.aux[Nv, i, j, k]) < TOL))
+                    except AssertionError:
+                        print("Error for (Nv, i, j, k) = ({}, {}, {}, {})".format(Nv, i, j, k))
+                        assert(abs((Serial.aux[Nv, i, j, k] - Parallel.aux[Nv, i, j, k]) < TOL))
+
+
 # RK2
 def test_ConsEquivalentForRK2():
    Obj = Compare.Appendicies.index('RK2')
diff --git a/Tests/GPU/Src/test_imex.cu b/Tests/GPU/Src/test_imex.cu
index aa695e0b..830da06e 100644
--- a/Tests/GPU/Src/test_imex.cu
+++ b/Tests/GPU/Src/test_imex.cu
@@ -45,6 +45,80 @@ TEST(SSP2, IMEX2ConsistentWithSerialVersion)
   save.saveConsts();
 }
 
+TEST(SSP2FlowKHSingleFluid, IMEX2ConsistentWithSerialVersion)
+{
+  /*
+    Run a resistive MHD problem to test the IMEX2 scheme. This test will run
+    the simulation and save the output data in the TestData directory, ready
+    to be compared to the serial output.
+  */
+  double sigma(0);
+
+  Data data(64, 16, 0, -0.5, 0.5, -1, 1, 0, 1, 0.05,
+            0.5, 4, 4.0/3.0, sigma);
+
+  // Choose particulars of simulation
+  SRRMHD model(&data);
+  FVS fluxMethod(&data, &model);
+  Simulation sim(&data);
+  KHInstabilitySingleFluid init(&data);
+  Flow bcs(&data);
+  SSP2 timeInt(&data, &model, &bcs, &fluxMethod);
+  SaveData save(&data);
+
+  sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save);
+  sim.evolve();
+
+  // Save data in test directory
+  strcpy(save.dir, "../TestData/GPU");
+  strcpy(save.app, "SSP2FlowKHSingleFluid");
+  save.saveCons();
+  save.savePrims();
+  save.saveAux();
+  save.saveConsts();
+}
+
+#if 0
+TEST(SSP2FlowKHSingleFluid, IMEX2ConsistentWithSerialVersion)
+{
+  /*
+    Run a resistive MHD problem to test the IMEX2 scheme. This test will run
+    the simulation and save the output data in the TestData directory, ready
+    to be compared to the serial output.
+  */
+  const double MU(1000); 
+  double sigma(300);
+  double cp(1.0);
+  double mu1(-MU);
+  double mu2(MU); 
+  int nx(256);
+  int ny(512); 
+  double endTime(0.01);
+
+  Data data(nx, ny, 0, -0.5, 0.5, -1, 1, 0, 1, endTime,
+            0.1, 4, 4.0/3.0, sigma, cp, mu1, mu2);
+
+  // Choose particulars of simulation
+  SRRMHD model(&data);
+  FVS fluxMethod(&data, &model);
+  Simulation sim(&data);
+  KHInstabilitySingleFluid init(&data);
+  Flow bcs(&data);
+  SSP2 timeInt(&data, &model, &bcs, &fluxMethod);
+  SaveData save(&data);
+
+  sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save);
+  sim.evolve();
+
+  // Save data in test directory
+  strcpy(save.dir, "../TestData/GPU");
+  strcpy(save.app, "SSP2FlowKHSingleFluid");
+  save.saveCons();
+  save.savePrims();
+  save.saveAux();
+  save.saveConsts();
+}
+#endif
 
 TEST(SSP3, IMEX3ConsistentWithSerialVersion)
 {

From 6c89018936779847b0e2237e1fb97b7952f47294 Mon Sep 17 00:00:00 2001
From: "ania.brown" <ania.brown@gmail.com>
Date: Tue, 25 Aug 2020 11:50:24 +0100
Subject: [PATCH 08/56] switching back to Project/main with implemented model
 etc, to better test update to new API

---
 Project/CPU/Src/main.cc | 12 +++++------
 Project/GPU/Src/main.cu | 48 +++++++++++++++++++++++------------------
 2 files changed, 33 insertions(+), 27 deletions(-)

diff --git a/Project/CPU/Src/main.cc b/Project/CPU/Src/main.cc
index b362e58e..4cd28f9e 100644
--- a/Project/CPU/Src/main.cc
+++ b/Project/CPU/Src/main.cc
@@ -1,7 +1,7 @@
 // CPU main
-#include "parallelBoundaryConds.h"
+#include "boundaryConds.h"
 #include "fluxVectorSplitting.h"
-#include "parallelSaveData.h"
+#include "serialSaveData.h"
 #include "simulation.h"
 #include "initFunc.h"
 #include "simData.h"
@@ -27,7 +27,7 @@ int main(int argc, char *argv[]) {
   double ymax(1.0);
   double zmin(-1.5);
   double zmax(1.5);
-  double endTime(0.5);
+  double endTime(0.05);
   double cfl(0.1);
   double gamma(4.0/3.0);
   double sigma(0);
@@ -38,7 +38,7 @@ int main(int argc, char *argv[]) {
   double nyRanks(1);
   double nzRanks(1);
 
-  ParallelEnv env(&argc, &argv, nxRanks, nyRanks, nzRanks);
+  SerialEnv env(&argc, &argv, nxRanks, nyRanks, nzRanks);
 
   Data data(nx, ny, nz, xmin, xmax, ymin, ymax, zmin, zmax, endTime, &env,
             cfl, Ng, gamma, sigma);
@@ -50,7 +50,7 @@ int main(int argc, char *argv[]) {
 
   FVS fluxMethod(&data, &weno, &model);
 
-  ParallelFlow bcs(&data, &env);
+  Flow bcs(&data);
 
   Simulation sim(&data, &env);
 
@@ -58,7 +58,7 @@ int main(int argc, char *argv[]) {
 
   SSP2 timeInt(&data, &model, &bcs, &fluxMethod);
 
-  ParallelSaveData save(&data, &env, 0);
+  SerialSaveData save(&data, &env, 0);
 
   // Now objects have been created, set up the simulation
   sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save);
diff --git a/Project/GPU/Src/main.cu b/Project/GPU/Src/main.cu
index 005a4199..10a10340 100644
--- a/Project/GPU/Src/main.cu
+++ b/Project/GPU/Src/main.cu
@@ -23,34 +23,40 @@ using namespace std;
 
 int main(int argc, char *argv[]) {
 
+
+  const double MU(1000);
   // Set up domain
   int Ng(4);
   int nx(64);
-  int ny(64);
+  int ny(16);
   int nz(0);
-  double xmin(0.0);
-  double xmax(1.0);
-  double ymin(0.0);
+  double xmin(-0.5);
+  double xmax(0.5);
+  double ymin(-1.0);
   double ymax(1.0);
-  double zmin(0.0);
-  double zmax(1.0);
-  double endTime(3.0);
-  double cfl(0.6);
+  double zmin(-1.5);
+  double zmax(1.5);
+  double endTime(0.05);
+  double cfl(0.1);
   double gamma(4.0/3.0);
-  double sigma(10);
-  double cp(1.0);
-  double mu1(-100);
-  double mu2(100);
-  int frameSkip(10);
+  double sigma(0);
   bool output(true);
-  if (argc != 2) throw std::invalid_argument("Expected ./main seed!\n");
-  int seed(atoi(argv[1]));
+  int safety(180);
+
+
+  char * ptr(0);
+  //! Overwrite any variables that have been passed in as main() arguments
+  for (int i(0); i < argc; i++) {
+    if (strcmp(argv[i], "sigma") == 0) {
+      sigma = (double)strtol(argv[i+1], &ptr, 10);
+    }
+  }
 
   Data data(nx, ny, nz, xmin, xmax, ymin, ymax, zmin, zmax, endTime,
-            cfl, Ng, gamma, sigma, cp, mu1, mu2, frameSkip);
+            cfl, Ng, gamma, sigma);
 
   // Choose particulars of simulation
-  SRMHD model(&data);
+  SRRMHD model(&data);
 
   FVS fluxMethod(&data, &model);
 
@@ -58,21 +64,21 @@ int main(int argc, char *argv[]) {
 
   KHInstabilitySingleFluid init(&data, 1);
 
-  Periodic bcs(&data);
+  Flow bcs(&data);
 
-  RKSplit timeInt(&data, &model, &bcs, &fluxMethod);
+  SSP2 timeInt(&data, &model, &bcs, &fluxMethod);
 
   SaveData save(&data);
 
   // Now objects have been created, set up the simulation
   sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save);
   // Time execution of programme
-  //double startTime(omp_get_wtime());
+  double startTime(omp_get_wtime());
 
   // Run until end time and save results
   sim.evolve(output, safety);
 
-  //double timeTaken(omp_get_wtime()- startTime);
+  double timeTaken(omp_get_wtime()- startTime);
 
   save.saveAll();
   printf("\nRuntime: %.5fs\nCompleted %d iterations.\n", timeTaken, data.iters);

From aa846704a850e5f97e9a5347796107acd03c437e Mon Sep 17 00:00:00 2001
From: "ania.brown" <ania.brown@gmail.com>
Date: Tue, 25 Aug 2020 11:52:58 +0100
Subject: [PATCH 09/56] adding platformEnv files

---
 Project/GPU/Include/parallelEnv.h |  50 +++++++++++++
 Project/GPU/Include/platformEnv.h |  65 ++++++++++++++++
 Project/GPU/Include/serialEnv.h   |  48 ++++++++++++
 Project/GPU/Src/parallelEnv.cu    | 118 ++++++++++++++++++++++++++++++
 Project/GPU/Src/serialEnv.cu      |  59 +++++++++++++++
 5 files changed, 340 insertions(+)
 create mode 100644 Project/GPU/Include/parallelEnv.h
 create mode 100644 Project/GPU/Include/platformEnv.h
 create mode 100644 Project/GPU/Include/serialEnv.h
 create mode 100644 Project/GPU/Src/parallelEnv.cu
 create mode 100644 Project/GPU/Src/serialEnv.cu

diff --git a/Project/GPU/Include/parallelEnv.h b/Project/GPU/Include/parallelEnv.h
new file mode 100644
index 00000000..7c67181e
--- /dev/null
+++ b/Project/GPU/Include/parallelEnv.h
@@ -0,0 +1,50 @@
+#ifndef PARALLEL_ENV_H
+#define PARALLEL_ENV_H
+
+#include <mpi.h>
+#include "platformEnv.h"
+
+//! <b> ParallelEnv</b>
+/*!
+  @par
+    For keeping track of parameters related to the platform that the code is running on --
+    currently serial on a single core or multi-core using MPI. For the MPI version, processes are mapped onto a
+    cartesian grid with the number of processes in each dimension specified by the user.
+
+    For a 2D problem, specify nzRanks = 1
+    For a 1D problem, specify nzRanks = 1, nyRanks = 1
+
+    The number of ranks in each dimension must be a factor of the number of cells in the dimension
+*/
+class ParallelEnv : public PlatformEnv
+{
+	public:
+		MPI_Comm mpiCartesianComm;  //!< Cartesian MPI communicator that maps processes to the simulation grid
+
+    //! Constructor -- Initialize global MPI communicator
+		ParallelEnv(int *argcP, char **argvP[], int nxRanks, int nyRanks, int nzRanks, int testing=0);
+
+    //! Destructor
+		virtual ~ParallelEnv();
+
+    //! Check for external boundary
+    /*!
+			@par
+         Returns true if a subdomain is on the external boundary of the simulation grid in a particular direction
+       @param[in] dimension {x=0, y=1, z=2}
+       @param[in] direction direction to look for the external boundary in a particular direction {low=0, high=1}
+    */
+    int isNeighbourExternal(int dimension, int direction);
+
+    //! Create cartesian grid of processes and calculate neighbours along that grid for each process
+    /*!
+			@par
+         Creates the cartesian grid of processes that are responsible for the corresponding subdomains in the simulation grid
+       @param[in] xPeriodic whether the x dimension has periodic boundary conditions
+       @param[in] yPeriodic whether the y dimension has periodic boundary conditions
+       @param[in] zPeriodic whether the z dimension has periodic boundary conditions
+     */
+		void setParallelDecomposition(int xPeriodic, int yPeriodic, int zPeriodic);
+};
+
+#endif
diff --git a/Project/GPU/Include/platformEnv.h b/Project/GPU/Include/platformEnv.h
new file mode 100644
index 00000000..3f3c7e11
--- /dev/null
+++ b/Project/GPU/Include/platformEnv.h
@@ -0,0 +1,65 @@
+#ifndef PLATFORM_ENV_H
+#define PLATFORM_ENV_H
+
+//! <b> PlatformEnv</b>
+/*!
+  @par
+    For keeping track of parameters related to the platform that the code is running on --
+    currently serial on a single core or multi-core using MPI. For the MPI version, processes are mapped onto a
+    cartesian grid with the number of processes in each dimension specified by the user.
+
+    For a 2D problem, specify nzRanks = 1
+    For a 1D problem, specify nzRanks = 1, nyRanks = 1
+
+    The number of ranks in each dimension must be a factor of the number of cells in the dimension
+*/
+class PlatformEnv
+{
+	public:
+		int
+		nProc,      //!< Number of MPI processes in total (1 for serial job)
+    rank,       //!< Global id of this MPI process (0 for serial job)
+    //@{
+    nxRanks, nyRanks, nzRanks,      //!< Number of processes in each dimension of the cartesian grid of processes
+    //@}
+    //@{
+    xRankId, yRankId, zRankId,      //!< Id of this MPI process in each dimension of the cartesian grid of processes
+    //@}
+    //@{
+    leftXNeighbourRank, rightXNeighbourRank,    //!< Global ids of this process's left and right neighbours
+    //@}
+    //@{
+    leftYNeighbourRank, rightYNeighbourRank,    //!< Global ids of this process's front and back neighbours
+    //@}
+    //@{
+    leftZNeighbourRank, rightZNeighbourRank,    //!< Global ids of this process's bottom and top neighbour
+    //@}
+    testing;    //!< boolean flag used to disable MPI init/finalise during unit testing
+
+    //! Constructor -- Initialize global MPI communicator
+		PlatformEnv(int testing=0) : testing(testing) {}
+
+    //! Destructor
+		virtual ~PlatformEnv() {}
+
+    //! Check for external boundary
+    /*!
+			@par
+         Returns true if a subdomain is on the external boundary of the simulation grid in a particular direction
+       @param[in] dimension {x=0, y=1, z=2}
+       @param[in] direction direction to look for the external boundary in a particular direction {low=0, high=1}
+    */
+    virtual int isNeighbourExternal(int dimension, int direction) = 0;
+
+    //! Create cartesian grid of processes
+    /*!
+			@par
+         Creates the cartesian grid of processes that are responsible for the corresponding subdomains in the simulation grid
+       @param[in] xPeriodic whether the x dimension has periodic boundary conditions
+       @param[in] yPeriodic whether the y dimension has periodic boundary conditions
+       @param[in] zPeriodic whether the z dimension has periodic boundary conditions
+     */
+		virtual void setParallelDecomposition(int xPeriodic, int yPeriodic, int zPeriodic) = 0;
+};
+
+#endif
diff --git a/Project/GPU/Include/serialEnv.h b/Project/GPU/Include/serialEnv.h
new file mode 100644
index 00000000..f1cae491
--- /dev/null
+++ b/Project/GPU/Include/serialEnv.h
@@ -0,0 +1,48 @@
+#ifndef SERIAL_ENV_H
+#define SERIAL_ENV_H
+
+#include "platformEnv.h"
+
+//! <b> SerialEnv</b>
+/*!
+  @par
+    For keeping track of parameters related to the platform that the code is running on --
+    currently serial on a single core or multi-core using MPI. For the MPI version, processes are mapped onto a
+    cartesian grid with the number of processes in each dimension specified by the user.
+
+    For a 2D problem, specify nzRanks = 1
+    For a 1D problem, specify nzRanks = 1, nyRanks = 1
+
+    The number of ranks in each dimension must be a factor of the number of cells in the dimension
+*/
+class SerialEnv : public PlatformEnv
+{
+	public:
+
+    //! Constructor -- Initialize global MPI communicator
+		SerialEnv(int *argcP, char **argvP[], int nxRanks, int nyRanks, int nzRanks, int testing=0);
+
+    //! Destructor
+		virtual ~SerialEnv();
+
+    //! Check for external boundary
+    /*!
+			@par
+         Returns true if a subdomain is on the external boundary of the simulation grid in a particular direction
+       @param[in] dimension {x=0, y=1, z=2}
+       @param[in] direction direction to look for the external boundary in a particular direction {low=0, high=1}
+    */
+    int isNeighbourExternal(int dimension, int direction);
+
+    //! Create cartesian grid of processes and calculate neighbours along that grid for each process
+    /*!
+			@par
+         Creates the cartesian grid of processes that are responsible for the corresponding subdomains in the simulation grid
+       @param[in] xPeriodic whether the x dimension has periodic boundary conditions
+       @param[in] yPeriodic whether the y dimension has periodic boundary conditions
+       @param[in] zPeriodic whether the z dimension has periodic boundary conditions
+     */
+		void setParallelDecomposition(int xPeriodic, int yPeriodic, int zPeriodic);
+};
+
+#endif
diff --git a/Project/GPU/Src/parallelEnv.cu b/Project/GPU/Src/parallelEnv.cu
new file mode 100644
index 00000000..0c8141a8
--- /dev/null
+++ b/Project/GPU/Src/parallelEnv.cu
@@ -0,0 +1,118 @@
+#include "parallelEnv.h"
+#include <cmath>
+#include "simData.h"
+#include "parallelBoundaryConds.h"
+#include <stdexcept>
+#include <cstdio>
+#include <cstdlib>
+
+#include <mpi.h>
+
+// TODO -- rename setParallelDecomposition and split it out into more functions
+
+ParallelEnv::ParallelEnv(int *argcP, char **argvP[], int nxRanks, int nyRanks, int nzRanks, int testing) : PlatformEnv(testing)
+{
+    int initialized;
+    MPI_Initialized(&initialized);
+	if (!initialized && !testing) MPI_Init(argcP, argvP);
+
+	MPI_Comm_size(MPI_COMM_WORLD, &nProc);
+	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+
+	if (rank==0){
+        printf("Running in multi-process mode with %d processes\n", nProc);
+    }
+
+	this->nxRanks = nxRanks;
+	this->nyRanks = nyRanks;
+	this->nzRanks = nzRanks;
+}
+
+ParallelEnv::~ParallelEnv()
+{
+    // TODO -- Free cartesian communicator
+    
+    int finalized;
+    MPI_Finalized(&finalized);
+    if (!finalized && !testing) MPI_Finalize();
+}
+
+int ParallelEnv::isNeighbourExternal(int dimension, int direction)
+{
+    int isExternal = 0;
+    int dimRank = 0;
+    int maxRank = 0;
+
+    if (dimension==0) {
+        dimRank = xRankId;
+        maxRank = nxRanks;
+    } else if (dimension==1) {
+        dimRank = yRankId;
+        maxRank = nyRanks;
+    } else {
+        dimRank = zRankId;
+        maxRank = nzRanks;
+    }
+
+    if (direction==0){
+        isExternal = (dimRank==0);
+    } else {
+        isExternal = (dimRank==maxRank-1);
+    }
+    
+    return isExternal;
+}
+
+void ParallelEnv::setParallelDecomposition(int xPeriodic, int yPeriodic, int zPeriodic)
+{
+	// number of dimensions in process grid
+	int ndims=1;
+	// number of ranks in each dimension of the grid
+	int dims[3]; 
+	// bool: whether grid is periodic in each dimension
+	int periods[3];
+	// bool: whether reordering of processes is allowed
+	int reorder=0;
+
+	// TODO -- Could choose best nxRanks, nyRanks, nzRanks proportionate to nx, ny, nz, with errors if nRanks is prime
+
+	// TODO -- Could use properties on bcs to set whether grid is periodic
+
+	// TODO -- We are setting up a 3D topology even when nyRanks, nzRanks == 1, as we may want to find 
+	// neighbours in y even when there is only one process if ny>1 and boundary conditions are periodic.
+	// Does this introduce too much overhead? Could also send through nx, ny, nz from data.
+
+	dims[0] = nxRanks;
+	periods[0] = xPeriodic;
+	dims[1] = nyRanks;
+	periods[1] = yPeriodic;
+	dims[2] = nzRanks;
+	periods[2] = zPeriodic;
+	ndims = 3;
+
+	// Create MPI communicator in a cartesian grid that matches the domain
+	MPI_Cart_create(MPI_COMM_WORLD, ndims, dims, periods, reorder, &mpiCartesianComm);
+
+	int coords[3];
+
+	// Get (x,y,z) coords of rank in grid and set on object
+    // This is a 3D topology regardless of how many processes we use in each dimension
+	MPI_Cart_coords(mpiCartesianComm, rank, ndims, coords);
+	xRankId = coords[0]; 
+	yRankId = coords[1];
+	zRankId = coords[2];	
+
+	// Get neighbour rank  
+	int direction = 0;
+ 	int displacement = 1;
+	MPI_Cart_shift(mpiCartesianComm, direction, displacement, 
+		&(leftXNeighbourRank), &(rightXNeighbourRank));
+	direction = 1;
+	MPI_Cart_shift(mpiCartesianComm, direction, displacement, 
+		&(leftYNeighbourRank), &(rightYNeighbourRank));
+	direction = 2;
+	MPI_Cart_shift(mpiCartesianComm, direction, displacement, 
+		&(leftZNeighbourRank), &(rightZNeighbourRank));
+}
+
+
diff --git a/Project/GPU/Src/serialEnv.cu b/Project/GPU/Src/serialEnv.cu
new file mode 100644
index 00000000..7daf1799
--- /dev/null
+++ b/Project/GPU/Src/serialEnv.cu
@@ -0,0 +1,59 @@
+#include "serialEnv.h"
+#include <cmath>
+#include "simData.h"
+#include "boundaryConds.h"
+#include <stdexcept>
+#include <cstdio>
+#include <cstdlib>
+
+// TODO -- rename setParallelDecomposition and split it out into more functions
+
+SerialEnv::SerialEnv(int *argcP, char **argvP[], int nxRanks, int nyRanks, int nzRanks, int testing) : PlatformEnv(testing)
+{
+    this->nxRanks = 1;
+    this->nyRanks = 1;
+    this->nzRanks = 1;
+    this->xRankId = 0;
+    this->yRankId = 0;
+    this->zRankId = 0;
+    this->rank = 0;
+    this->nProc = 1;
+}
+
+SerialEnv::~SerialEnv()
+{
+
+}
+
+int SerialEnv::isNeighbourExternal(int dimension, int direction)
+{
+    int isExternal = 0;
+    int dimRank = 0;
+    int maxRank = 0;
+
+    if (dimension==0) {
+        dimRank = xRankId;
+        maxRank = nxRanks;
+    } else if (dimension==1) {
+        dimRank = yRankId;
+        maxRank = nyRanks;
+    } else {
+        dimRank = zRankId;
+        maxRank = nzRanks;
+    }
+
+    if (direction==0){
+        isExternal = (dimRank==0);
+    } else {
+        isExternal = (dimRank==maxRank-1);
+    }
+    
+    return isExternal;
+}
+
+void SerialEnv::setParallelDecomposition(int xPeriodic, int yPeriodic, int zPeriodic)
+{
+
+}
+
+

From f984cb417ce5bd0ed5cac487327c589dee71b19d Mon Sep 17 00:00:00 2001
From: "ania.brown" <ania.brown@gmail.com>
Date: Tue, 25 Aug 2020 15:06:57 +0100
Subject: [PATCH 10/56] adding platformEnv to Data

---
 Project/GPU/Include/simData.h |  5 ++++-
 Project/GPU/Makefile          |  4 ++++
 Project/GPU/Src/main.cu       |  9 +++++++--
 Project/GPU/Src/simData.cu    | 15 ++++++++++-----
 4 files changed, 25 insertions(+), 8 deletions(-)

diff --git a/Project/GPU/Include/simData.h b/Project/GPU/Include/simData.h
index dfe0e8e9..7e0056d2 100644
--- a/Project/GPU/Include/simData.h
+++ b/Project/GPU/Include/simData.h
@@ -3,6 +3,7 @@
 
 #include <vector>
 #include <string>
+#include "platformEnv.h"
 
 /*!
   Currently (and possibly permanently) a very hacky way of keeping singleCell cons2prims function
@@ -76,6 +77,7 @@ class Data
     sigma;                 //!< Resistivity
     int
     memSet,                //!< Indicator that memory has been allocated for state vectors
+    bcsSet,                //!< Indicator that boundary conditions have been created (before this information about the domain decomposition used in MPI version will not be correct).
     //@{
     Ncons, Nprims, Naux;   //!< Number of specified variables
     //@}
@@ -169,7 +171,8 @@ class Data
          double xmin, double xmax,
          double ymin, double ymax,
          double zmin, double zmax,
-         double endTime, double cfl=0.5, int Ng=4,
+         double endTime, PlatformEnv *env,
+         double cfl=0.5, int Ng=4,
          double gamma=5.0/3.0, double sigma=1e3,
          double cp=0.1,
          double mu1=-1.0e4, double mu2=1.0e4,
diff --git a/Project/GPU/Makefile b/Project/GPU/Makefile
index 666dc027..28576504 100644
--- a/Project/GPU/Makefile
+++ b/Project/GPU/Makefile
@@ -43,6 +43,7 @@ SRCS = main.cu \
 			 IMEX3Args.cu \
 			 boundaryConds.cu \
 			 saveData.cu \
+			 serialEnv.cu \
 			 fluxVectorSplitting.cu \
 			 srrmhd.cu \
 			 C2PArgs.cu
@@ -153,6 +154,9 @@ boundaryConds.o : $(MODULE_DIR)/boundaryConds.cu $(INC_DIR)/boundaryConds.h
 saveData.o : $(MODULE_DIR)/saveData.cu $(INC_DIR)/saveData.h
 	$(CC)	$<	-c	$(NVFLAGS) -I$(INC_DIR)
 
+serialEnv.o : $(MODULE_DIR)/serialEnv.cu $(INC_DIR)/serialEnv.h
+	$(CC)	$<	-c	$(NVFLAGS) -I$(INC_DIR)
+
 fluxVectorSplitting.o : $(MODULE_DIR)/fluxVectorSplitting.cu $(INC_DIR)/fluxVectorSplitting.h $(INC_DIR)/weno.h
 	$(CC)	$<	-c	$(NVFLAGS) -I$(INC_DIR) -Xcompiler -fopenmp
 
diff --git a/Project/GPU/Src/main.cu b/Project/GPU/Src/main.cu
index 10a10340..996e8dc6 100644
--- a/Project/GPU/Src/main.cu
+++ b/Project/GPU/Src/main.cu
@@ -9,6 +9,7 @@
 #include "SSP2.h"
 #include "saveData.h"
 #include "fluxVectorSplitting.h"
+#include "serialEnv.h"
 
 #include <cstdio>
 #include <cstdlib>
@@ -42,7 +43,9 @@ int main(int argc, char *argv[]) {
   double sigma(0);
   bool output(true);
   int safety(180);
-
+  int nxRanks(4);
+  int nyRanks(1);
+  int nzRanks(1);
 
   char * ptr(0);
   //! Overwrite any variables that have been passed in as main() arguments
@@ -52,7 +55,9 @@ int main(int argc, char *argv[]) {
     }
   }
 
-  Data data(nx, ny, nz, xmin, xmax, ymin, ymax, zmin, zmax, endTime,
+  SerialEnv env(&argc, &argv, nxRanks, nyRanks, nzRanks);
+
+  Data data(nx, ny, nz, xmin, xmax, ymin, ymax, zmin, zmax, endTime, &env,
             cfl, Ng, gamma, sigma);
 
   // Choose particulars of simulation
diff --git a/Project/GPU/Src/simData.cu b/Project/GPU/Src/simData.cu
index 581224fe..84896524 100644
--- a/Project/GPU/Src/simData.cu
+++ b/Project/GPU/Src/simData.cu
@@ -1,4 +1,5 @@
 #include "simData.h"
+#include "platformEnv.h"
 #include "cudaErrorCheck.h"
 #include <stdexcept>
 #include <cstdio>
@@ -7,7 +8,8 @@ Data::Data(int nx, int ny, int nz,
            double xmin, double xmax,
            double ymin, double ymax,
            double zmin, double zmax,
-           double endTime, double cfl, int Ng,
+           double endTime, PlatformEnv *env,
+           double cfl, int Ng,
            double gamma, double sigma,
            double cp,
            double mu1, double mu2,
@@ -19,16 +21,19 @@ Data::Data(int nx, int ny, int nz,
            zmin(zmin), zmax(zmax),
            endTime(endTime), cfl(cfl), Ng(Ng),
            gamma(gamma), sigma(sigma),
-           memSet(0),
+           memSet(0), bcsSet(0),
            Ncons(0), Nprims(0), Naux(0),
            cp(cp),
            mu1(mu1), mu2(mu2),
            frameSkip(frameSkip)
 {
+  // TODO -- handle nx not dividing perfectly into nxRanks
+
+  // Set Nx to be nx per MPI process + ghost cells
+  this->Nx = nx/env->nxRanks + 2 * Ng;
+  this->Ny = ny/env->nyRanks + 2 * Ng;
+  this->Nz = nz/env->nzRanks + 2 * Ng;
 
-  this->Nx = nx + 2 * Ng;
-  this->Ny = ny + 2 * Ng;
-  this->Nz = nz + 2 * Ng;
   dims = 3;
 
   // Catch 2D case

From 86cd8be4fcf69df9640b5dee67f60ae39ddef1fb Mon Sep 17 00:00:00 2001
From: "ania.brown" <ania.brown@gmail.com>
Date: Tue, 25 Aug 2020 16:49:22 +0100
Subject: [PATCH 11/56] added env to Simulation

---
 Project/GPU/Include/simulation.h |  8 ++++++--
 Project/GPU/Src/main.cu          |  3 +--
 Project/GPU/Src/simulation.cu    | 29 ++++++++++++++++++++---------
 3 files changed, 27 insertions(+), 13 deletions(-)

diff --git a/Project/GPU/Include/simulation.h b/Project/GPU/Include/simulation.h
index 9dd9f4bc..83802731 100644
--- a/Project/GPU/Include/simulation.h
+++ b/Project/GPU/Include/simulation.h
@@ -8,6 +8,7 @@
 #include "boundaryConds.h"
 #include "flux.h"
 #include "saveData.h"
+#include "platformEnv.h"
 
 
 //! <b> The Simulation interface for the programme </b>
@@ -36,6 +37,8 @@ class Simulation
 
     SaveData * save;            //!< Pointer to SaveData object
 
+    PlatformEnv *env;           //!< Pointer to PlatformEnv object
+
   public:
 
     Data * data;                //!< Pointer to Data class containing global simulation data
@@ -50,11 +53,12 @@ class Simulation
       and once this has been completed, the initial function class may be implemented.
 
       @param[in] *data pointer to Data class containing global simulation data
+      @param[in] *env pointer to the PlatformEnv object
     */
-    Simulation(Data * data);
+    Simulation(Data * data, PlatformEnv *env);
 
     //! Destructor frees alloc'd memory
-    ~Simulation();
+    virtual ~Simulation();
 
 
 
diff --git a/Project/GPU/Src/main.cu b/Project/GPU/Src/main.cu
index 996e8dc6..ddf37c4b 100644
--- a/Project/GPU/Src/main.cu
+++ b/Project/GPU/Src/main.cu
@@ -25,7 +25,6 @@ using namespace std;
 int main(int argc, char *argv[]) {
 
 
-  const double MU(1000);
   // Set up domain
   int Ng(4);
   int nx(64);
@@ -65,7 +64,7 @@ int main(int argc, char *argv[]) {
 
   FVS fluxMethod(&data, &model);
 
-  Simulation sim(&data);
+  Simulation sim(&data, &env);
 
   KHInstabilitySingleFluid init(&data, 1);
 
diff --git a/Project/GPU/Src/simulation.cu b/Project/GPU/Src/simulation.cu
index ff868076..d692a0a0 100644
--- a/Project/GPU/Src/simulation.cu
+++ b/Project/GPU/Src/simulation.cu
@@ -1,4 +1,5 @@
 #include "simulation.h"
+#include "platformEnv.h"
 #include "cudaErrorCheck.h"
 #include <cmath>
 #include <stdexcept>
@@ -7,7 +8,7 @@
 #define ID(variable, idx, jdx, kdx)  ((variable)*(d->Nx)*(d->Ny)*(d->Nz) + (idx)*(d->Ny)*(d->Nz) + (jdx)*(d->Nz) + (kdx))
 
 
-Simulation::Simulation(Data * data) : data(data)
+Simulation::Simulation(Data * data, PlatformEnv *env) : data(data), env(env)
 {
   // Simplify syntax
   Data * d;
@@ -16,7 +17,10 @@ Simulation::Simulation(Data * data) : data(data)
   // Allocate memory for state arrays
   int Ntot(d->Nx * d->Ny * d->Nz);
 
-  if (d->Ncons == 0) throw std::runtime_error("Must set model before constructing simulation");
+  if (env->rank==0){
+      if (d->Ncons == 0) throw std::runtime_error("Must set model before constructing simulation");
+      if (d->bcsSet != 1) throw std::runtime_error("Must construct boundary condition class before implementing simulation. Need to set domain decomposition parameters including periodicity.");
+  }
 
   gpuErrchk( cudaHostAlloc((void **)&d->cons,
                 sizeof(double) * Ntot * d->Ncons,
@@ -62,17 +66,20 @@ Simulation::Simulation(Data * data) : data(data)
   d->dt = (dtX < dtY && dtX < dtZ) ? dtX : ((dtY < dtZ) ? dtY : dtZ);
   d->memSet = 1;
 
+  int iOffset = (d->Nx - 2*d->Ng)*env->xRankId;
+  int jOffset = (d->Ny - 2*d->Ng)*env->yRankId;
+  int kOffset = (d->Nz - 2*d->Ng)*env->zRankId;
+
   // Set axes
   for (int i(0); i < d->Nx; i++) {
-    d->x[i] = d->xmin + (i + 0.5 - d->Ng) * d->dx;
+    d->x[i] = d->xmin + (i + iOffset + 0.5 - d->Ng) * d->dx;
   }
   for (int j(0); j < d->Ny; j++) {
-    d->y[j] = d->ymin + (j + 0.5 - d->Ng) * d->dy;
+    d->y[j] = d->ymin + (j + jOffset + 0.5 - d->Ng) * d->dy;
   }
   for (int k(0); k < d->Nz; k++) {
-    d->z[k] = d->zmin + (k + 0.5 - d->Ng) * d->dz;
+    d->z[k] = d->zmin + (k + kOffset + 0.5 - d->Ng) * d->dz;
   }
-
 }
 
 Simulation::~Simulation()
@@ -116,7 +123,9 @@ void Simulation::updateTime()
   // Syntax
   Data * d(this->data);
 
-  printf("t = %f\n", d->t);
+  if (env->rank == 0){
+    printf("t = %f\n", d->t);
+  }
 
   // Calculate the size of the next timestep
   double dtX(d->cfl * d->dx / (d->alphaX * sqrt(d->dims)));
@@ -183,7 +192,7 @@ void Simulation::evolve(bool output, int safety)
 
     if (safety>0 && d->iters%safety==0) {
       this->save->saveAll();
-      printf("Data saved...\n");
+      if (env->rank==0) printf("Data saved...\n");
     }
 
   }
@@ -205,6 +214,8 @@ void Simulation::evolve(bool output, int safety)
       this->save->saveVar("Ez", 11);
     }
 
-  printf("\n");
+  if (env->rank == 0){
+    printf("\n");
+  }
 
 }

From 6b265a73b0525cb9df8b8c2b8ad234badef3bb9a Mon Sep 17 00:00:00 2001
From: AlexJamesWright <a.j.wright@soton.ac.uk>
Date: Tue, 25 Aug 2020 19:12:47 +0100
Subject: [PATCH 12/56] Update the initFunc for KHI (also some stuff related to
 interactive plot that wasnt related to #26)

---
 Project/CPU/Src/interactivePlot.py |  44 ++--
 Project/CPU/Src/main.cc            |   4 +-
 Project/GPU/Src/initFunc.cu        |   8 +
 Project/GPU/Src/interactivePlot.py | 401 ++++++++++++++++++-----------
 Project/GPU/Src/main.cu            |   2 +-
 Project/compare.py                 |   6 +-
 6 files changed, 281 insertions(+), 184 deletions(-)

diff --git a/Project/CPU/Src/interactivePlot.py b/Project/CPU/Src/interactivePlot.py
index d8a288ec..cd6deb73 100644
--- a/Project/CPU/Src/interactivePlot.py
+++ b/Project/CPU/Src/interactivePlot.py
@@ -179,19 +179,18 @@ def gatherData(self, states):
                     self.cleanAuxLabels.append(auxLabels[i][:-1])
                 self.cleanAuxLabels.append(auxLabels[-1])
             
-            with suppress(FileNotFoundError):
-                # Grab domain data
-                self.x = np.zeros(c['nx'])
-                self.y = np.zeros(c['ny'])
-                self.z = np.zeros(c['nz'])
-                coords = [self.x, self.y, self.z]
-                print("Fetching domain coordinates...")
-                with open(self.DatDir + 'Domain/domain' + self.appendix +'.dat', 'r') as f:
-                    for coord, (i, line) in zip(coords, enumerate(f)):
-                        temp = line.split()
-                        print(len(temp))
-                        for k, val in enumerate(temp):
-                            coord[k] = float(val)
+#            with suppress(FileNotFoundError):
+#                # Grab domain data
+#                self.x = np.zeros(c['nx'])
+#                self.y = np.zeros(c['ny'])
+#                self.z = np.zeros(c['nz'])
+#                coords = [self.x, self.y, self.z]
+#                print("Fetching domain coordinates...")
+#                with open(self.DatDir + 'Domain/domain' + self.appendix +'.dat', 'r') as f:
+#                    for coord, (i, line) in zip(coords, enumerate(f)):
+#                        temp = line.split()
+#                        for k, val in enumerate(temp):
+#                            coord[k] = float(val)
 
 
 
@@ -259,7 +258,7 @@ def _getYIndexFromLine(self, line, nx, ny):
                 The line number the file pointer is pointing to. We want to know which
                 primitive variable this line's data corresponds to.
             nx: int
-                The total number (incl ghost cells) of domain cells in the x-direction.
+                The total number (incl ghost cells)n of domain cells in the x-direction.
             ny: int
                 The total number (incl ghost cells) of domain cells in the y-direction.
 
@@ -329,10 +328,11 @@ def plotHeatMaps(self, data='prims', color=None, axis=2):
 
             if color==None:
                 color = cm.afmhot
-            surf = ax.imshow(plotVars.T, cmap=color, interpolation='bicubic', aspect='auto')
+            ext = [self.c['xmin'], self.c['xmax'], self.c['ymin'], self.c['ymax']]
+            surf = ax.imshow(plotVars.T, cmap=color, interpolation='bicubic', aspect='auto', origin='lower', extent=ext)
             ax.set_title(r'Time Evolution for {}: $t = {}$'.format(dataLabels[i], c['t']))
-            ax.set_xlim([0, self.c['nx']])
-            ax.set_ylim([0, self.c['ny']])
+            ax.set_xlim([self.c['xmin'], self.c['xmax']])
+            ax.set_ylim([self.c['ymin'], self.c['ymax']])
             ax.set_xlabel(axisLabel1)
             ax.set_ylabel(axisLabel2)
             fig.colorbar(surf, shrink=0.5, aspect=5)
@@ -746,12 +746,4 @@ def plotAdvectionAgainstInitial(self):
 
     Plot = InteractivePlot()
 
-#    Plot.plotSlice()
-#    Plot.plotSingleFluidCurrentSheetAgainstExact()
-#    Plot.plotAdvectionAgainstInitial()
-#    Plot.plotHeatMaps()
-    
-    plt.figure()
-    plt.imshow(np.log(Plot.prims[4, :, :, 0].T), extent=[0, 8, 0, 4], origin='lower')
-    plt.show()
-    
+#    Plot.plotHeatMaps()
\ No newline at end of file
diff --git a/Project/CPU/Src/main.cc b/Project/CPU/Src/main.cc
index 4cd28f9e..8f76b365 100644
--- a/Project/CPU/Src/main.cc
+++ b/Project/CPU/Src/main.cc
@@ -27,7 +27,7 @@ int main(int argc, char *argv[]) {
   double ymax(1.0);
   double zmin(-1.5);
   double zmax(1.5);
-  double endTime(0.05);
+  double endTime(0.0005);
   double cfl(0.1);
   double gamma(4.0/3.0);
   double sigma(0);
@@ -54,7 +54,7 @@ int main(int argc, char *argv[]) {
 
   Simulation sim(&data, &env);
 
-  KHInstabilitySingleFluid init(&data);
+  KHInstabilitySingleFluid init(&data, 1);
 
   SSP2 timeInt(&data, &model, &bcs, &fluxMethod);
 
diff --git a/Project/GPU/Src/initFunc.cu b/Project/GPU/Src/initFunc.cu
index 2941769d..c9a99601 100644
--- a/Project/GPU/Src/initFunc.cu
+++ b/Project/GPU/Src/initFunc.cu
@@ -441,6 +441,14 @@ KHInstabilitySingleFluid::KHInstabilitySingleFluid(Data * data, int mag) : Initi
           d->prims[ID(1, i, j, k)] = - vShear * tanh((d->y[j] + 0.5)/a);
           d->prims[ID(2, i, j, k)] = - A0 * vShear * sin(2*PI*d->x[i]) * (exp(-pow((d->y[j] + 0.5), 2)/(sig*sig)));
         }
+
+        // If we have electric fields, set to the ideal values
+        if (d->Ncons > 9)
+        {
+          d->prims[ID(8, i, j, k)]  = -(d->prims[ID(2, i, j, k)] * d->prims[ID(7, i, j, k)] - d->prims[ID(3, i, j, k)] * d->prims[ID(6, i, j, k)]);
+          d->prims[ID(9, i, j, k)]  = -(d->prims[ID(3, i, j, k)] * d->prims[ID(5, i, j, k)] - d->prims[ID(1, i, j, k)] * d->prims[ID(7, i, j, k)]);
+          d->prims[ID(10, i, j, k)] = -(d->prims[ID(1, i, j, k)] * d->prims[ID(6, i, j, k)] - d->prims[ID(2, i, j, k)] * d->prims[ID(5, i, j, k)]);
+        }
       }
     }
   }
diff --git a/Project/GPU/Src/interactivePlot.py b/Project/GPU/Src/interactivePlot.py
index f34461dc..9015edbf 100644
--- a/Project/GPU/Src/interactivePlot.py
+++ b/Project/GPU/Src/interactivePlot.py
@@ -5,7 +5,7 @@
 
 
 import numpy as np
-import pylab as plt
+from matplotlib import pyplot as plt
 from scipy.special import erf
 from matplotlib import cm
 import warnings
@@ -24,7 +24,7 @@
 
 class InteractivePlot(object):
 
-    def __init__(self, DatDirectory=None, append=None):
+    def __init__(self, DatDirectory=None, append=None, states=True):
         if DatDirectory is None:
             self.DatDir = FinalDirectory
         else:
@@ -33,28 +33,34 @@ def __init__(self, DatDirectory=None, append=None):
             self.appendix = appendix
         else:
             self.appendix = append
-        self.gatherData()
+        self.gatherData(states)
         print("Ready!")
 
-    def gatherData(self):
+    def gatherData(self, states):
         """
         Collects and stores all the data required for plotting the final state of
         the system.
+        
+        Parameters
+        ----------
+        states : bool
+            Load all of the state arrays. If false, only the constants are
+            loaded to save time for animation.
 
         Notes
         -----
         Stores the following public variables:
 
             cons : array of float
-                (Ncons, Nx, Ny, Nz) Array containing the conserved vector
+                (Ncons, nx, ny, nz) Array containing the conserved vector
             consLabels : array of string
                 (Ncons,) The labels of the conserved elements
             prims : array of float
-                (Nprims, Nx, Ny, Nz) Array containing the primitive vector
+                (Nprims, nx, ny, nz) Array containing the primitive vector
             primLabels : array of string
                 (Nprims,) The labels of the primitive elements
             aux : array of float
-                (Naux, Nx, Ny, Nz) Array containing the auxiliary vector
+                (Naux, nx, ny, nz) Array containing the auxiliary vector
             auxLabels : array of string
                 (Naux,) The labels of the auxiliary elements
             c : dictionary
@@ -76,7 +82,11 @@ def gatherData(self):
                     line=line.split()
                     c['nx'] = int(line[0])
                     c['ny'] = int(line[1])
+                    if c['ny'] == 0:
+                        c['ny'] = 1
                     c['nz'] = int(line[2])
+                    if c['nz'] == 0:
+                        c['nz'] = 1
                     c['Nx'] = int(line[3])
                     c['Ny'] = int(line[4])
                     c['Nz'] = int(line[5])
@@ -104,92 +114,87 @@ def gatherData(self):
         print("{} conserved vectors".format(c['Ncons']))
         print("{} primitive vectors".format(c['Nprims']))
         print("{} auxiliary vectors".format(c['Naux']))
-        print(f"Domain extent is {c['nx']}, {c['ny']}, {c['nz']}")
-
-        # Now gather conserved data
-        self.cons = np.zeros([c['Ncons'], c['nx'], c['ny'], c['nz']])
-        print("Fetching conserved variables...")
-        with open(self.DatDir + 'Conserved/cons' + self.appendix + '.dat', 'r') as f:
-            for i, line in enumerate(f):
-                # Get cons var labels
-                if i==0:
-                    consLabels = line.split()[2:]
-                    # Get cons var data
-                else:
-                    temp = line.split()
-                    for k in range(c['Nz']-2*c['Ng']):
-                        self.cons[self._getVarFromLine(i, c['Nx'], c['Ny'], c['Ng'])][self._getXIndexFromLine(i, c['Nx'], c['Ny'], c['Ng'])][self._getYIndexFromLine(i, c['Nx'], c['Ny'], c['Ng'])][k] = float(temp[k])
-
-        # Clean up labels (remove the commas)
-        self.cleanConsLabels = []
-        for i in range(len(consLabels)-1):
-            self.cleanConsLabels.append(consLabels[i][:-1])
-        self.cleanConsLabels.append(consLabels[-1])
-
-
-        with suppress(FileNotFoundError):
-            # Now get primitive variables if  and store the data in array...
-            self.prims = np.zeros([c['Nprims'], c['nx'], c['ny'], c['nz']])
-            print("Fetching primitive variables...")
-            with open(self.DatDir + 'Primitive/prims' + self.appendix + '.dat', 'r') as f:
-                for i, line in enumerate(f):
-                    # Get primitive var labels
-                    if i==0:
-                        primLabels = line.split()[2:]
-                    # Get primitive var data
-                    else:
-                        temp = line.split()
-                        for k in range(c['Nz']-2*c['Ng']):
-                            self.prims[self._getVarFromLine(i, c['Nx'], c['Ny'], c['Ng'])][self._getXIndexFromLine(i, c['Nx'], c['Ny'], c['Ng'])][self._getYIndexFromLine(i, c['Nx'], c['Ny'], c['Ng'])][k] = float(temp[k])
 
-            # Clean up labels (remove the commas)
-            self.cleanPrimLabels = []
-            for i in range(len(primLabels)-1):
-                self.cleanPrimLabels.append(primLabels[i][:-1])
-            self.cleanPrimLabels.append(primLabels[-1])
-
-        with suppress(FileNotFoundError):
-            # And finally the aux vars if available
-            self.aux = np.zeros([c['Naux'], c['nx'], c['ny'], c['nz']])
-            print("Fetching auxiliary variables...")
-            with open(self.DatDir + 'Auxiliary/aux' + self.appendix +'.dat', 'r') as f:
+        if states:
+            # Now gather conserved data
+            self.cons = np.zeros([c['Ncons'], c['nx'], c['ny'], c['nz']])
+            print("Fetching conserved variables...")
+            with open(self.DatDir + 'Conserved/cons' + self.appendix + '.dat', 'r') as f:
                 for i, line in enumerate(f):
                     # Get cons var labels
                     if i==0:
-                        auxLabels = line.split()[2:]
-                    # Get cons var data
+                        consLabels = line.split()[2:]
+                        # Get cons var data
                     else:
                         temp = line.split()
-                        for k in range(c['Nz']-2*c['Ng']):
-                            self.aux[self._getVarFromLine(i, c['Nx'], c['Ny'], c['Ng'])][self._getXIndexFromLine(i, c['Nx'], c['Ny'], c['Ng'])][self._getYIndexFromLine(i, c['Nx'], c['Ny'], c['Ng'])][k] = float(temp[k])
-
+                        for k in range(c['nz']):
+                            self.cons[self._getVarFromLine(i, c['nx'], c['ny'])][self._getXIndexFromLine(i, c['nx'], c['ny'])][self._getYIndexFromLine(i, c['nx'], c['ny'])][k] = float(temp[k])
+    
+    
             # Clean up labels (remove the commas)
-            self.cleanAuxLabels = []
-            for i in range(len(auxLabels)-1):
-                self.cleanAuxLabels.append(auxLabels[i][:-1])
-            self.cleanAuxLabels.append(auxLabels[-1])
-
-        with suppress(FileNotFoundError):
-            # Grab domain data
-            self.x = np.zeros(c['Nx'])
-            self.y = np.zeros(c['Ny'])
-            self.z = np.zeros(c['Nz'])
-            coords = [self.x, self.y, self.z]
-            print("Fetching domain coordinates...")
-            with open(self.DatDir + 'Domain/domain' + self.appendix +'.dat', 'r') as f:
-                for coord, (i, line) in zip(coords, enumerate(f)):
-                    temp = line.split()
-                    for k, val in enumerate(temp):
-                        coord[k] = float(val)
-
-            # Clean up labels (remove the commas)
-            self.cleanAuxLabels = []
-            for i in range(len(auxLabels)-1):
-                self.cleanAuxLabels.append(auxLabels[i][:-1])
-            self.cleanAuxLabels.append(auxLabels[-1])
-
-
-    def _getVarFromLine(self, line, Nx, Ny, Ng):
+            self.cleanConsLabels = []
+            for i in range(len(consLabels)-1):
+                self.cleanConsLabels.append(consLabels[i][:-1])
+            self.cleanConsLabels.append(consLabels[-1])
+    
+            with suppress(FileNotFoundError):
+                # Now get primitive variables if  and store the data in array...
+                self.prims = np.zeros([c['Nprims'], c['nx'], c['ny'], c['nz']])
+                print("Fetching primitive variables...")
+                with open(self.DatDir + 'Primitive/prims' + self.appendix + '.dat', 'r') as f:
+                    for i, line in enumerate(f):
+                        # Get primitive var labels
+                        if i==0:
+                            primLabels = line.split()[2:]
+                        # Get primitive var data
+                        else:
+                            temp = line.split()
+                            for k in range(c['nz']):
+                                self.prims[self._getVarFromLine(i, c['nx'], c['ny'])][self._getXIndexFromLine(i, c['nx'], c['ny'])][self._getYIndexFromLine(i, c['nx'], c['ny'])][k] = float(temp[k])
+    
+                # Clean up labels (remove the commas)
+                self.cleanPrimLabels = []
+                for i in range(len(primLabels)-1):
+                    self.cleanPrimLabels.append(primLabels[i][:-1])
+                self.cleanPrimLabels.append(primLabels[-1])
+    
+            with suppress(FileNotFoundError):
+                # And finally the aux vars if available
+                self.aux = np.zeros([c['Naux'], c['nx'], c['ny'], c['nz']])
+                print("Fetching auxiliary variables...")
+                with open(self.DatDir + 'Auxiliary/aux' + self.appendix +'.dat', 'r') as f:
+                    for i, line in enumerate(f):
+                        # Get cons var labels
+                        if i==0:
+                            auxLabels = line.split()[2:]
+                        # Get cons var data
+                        else:
+                            temp = line.split()
+                            for k in range(c['nz']):
+                                self.aux[self._getVarFromLine(i, c['nx'], c['ny'])][self._getXIndexFromLine(i, c['nx'], c['ny'])][self._getYIndexFromLine(i, c['nx'], c['ny'])][k] = float(temp[k])
+    
+                # Clean up labels (remove the commas)
+                self.cleanAuxLabels = []
+                for i in range(len(auxLabels)-1):
+                    self.cleanAuxLabels.append(auxLabels[i][:-1])
+                self.cleanAuxLabels.append(auxLabels[-1])
+            
+#            with suppress(FileNotFoundError):
+#                # Grab domain data
+#                self.x = np.zeros(c['nx'])
+#                self.y = np.zeros(c['ny'])
+#                self.z = np.zeros(c['nz'])
+#                coords = [self.x, self.y, self.z]
+#                print("Fetching domain coordinates...")
+#                with open(self.DatDir + 'Domain/domain' + self.appendix +'.dat', 'r') as f:
+#                    for coord, (i, line) in zip(coords, enumerate(f)):
+#                        temp = line.split()
+#                        for k, val in enumerate(temp):
+#                            coord[k] = float(val)
+
+
+
+    def _getVarFromLine(self, line, nx, ny):
         """
         Given the line number that the iterator is on, and the size of the x-domain,
         returns the index of the primitive variable this data belongs to.
@@ -199,9 +204,9 @@ def _getVarFromLine(self, line, Nx, Ny, Ng):
             line: int
                 The line number the file pointer is pointing to. We want to know which
                 primitive variable this line's data corresponds to.
-            Nx: int
+            nx: int
                 The total number (incl ghost cells) of domain cells in the x-direction.
-            Ny: int
+            ny: int
                 The total number (incl ghost cells) of domain cells in the y-direction.
 
         Returns
@@ -217,14 +222,10 @@ def _getVarFromLine(self, line, Nx, Ny, Ng):
         if line == 0:
             raise ValueError('Line zero does not contain any data')
         else:
-            # Remove ghost cells from count as these are no longer included in output
-            Nx = Nx - 2*Ng
-            if Ny > 1:
-                Ny = Ny - 2*Ng
-            return ((line-1)//Ny)//Nx
+            return ((line-1)//ny)//nx
 
 
-    def _getXIndexFromLine(self, line, Nx, Ny, Ng):
+    def _getXIndexFromLine(self, line, nx, ny):
         """
         Given the line number that the iterator is on, and the size of the x-domain,
         returns the x-index of this line's data.
@@ -234,9 +235,9 @@ def _getXIndexFromLine(self, line, Nx, Ny, Ng):
             line: int
                 The line number the file pointer is pointing to. We want to know which
                 primitive variable this line's data corresponds to.
-            Nx: int
+            nx: int
                 The total number (incl ghost cells) of domain cells in the x-direction.
-            Ny: int
+            ny: int
                 The total number (incl ghost cells) of domain cells in the y-direction.
 
         Returns
@@ -244,14 +245,9 @@ def _getXIndexFromLine(self, line, Nx, Ny, Ng):
             index:
                 The x-index of the current line's data.
         """
-        # Remove ghost cells from count as these are no longer included in output
-        Nx = Nx - 2*Ng
-        if Ny > 1:
-            Ny = Ny - 2*Ng
+        return ((line-1)//ny)%nx
 
-        return ((line-1)//Ny)%Nx
-
-    def _getYIndexFromLine(self, line, Nx, Ny, Ng):
+    def _getYIndexFromLine(self, line, nx, ny):
         """
         Given the line number that the iterator is on, and the size of the y-domain,
         returns the y-index of this line's data.
@@ -261,9 +257,9 @@ def _getYIndexFromLine(self, line, Nx, Ny, Ng):
             line: int
                 The line number the file pointer is pointing to. We want to know which
                 primitive variable this line's data corresponds to.
-            Nx: int
-                The total number (incl ghost cells) of domain cells in the x-direction.
-            Ny: int
+            nx: int
+                The total number (incl ghost cells)n of domain cells in the x-direction.
+            ny: int
                 The total number (incl ghost cells) of domain cells in the y-direction.
 
         Returns
@@ -271,12 +267,7 @@ def _getYIndexFromLine(self, line, Nx, Ny, Ng):
             index:
                 The y-index of the current line's data.
         """
-        # Remove ghost cells from count as these are no longer included in output
-        Nx = Nx - 2*Ng
-        if Ny > 1:
-            Ny = Ny - 2*Ng
-
-        return (line-1)%Ny
+        return (line-1)%ny
 
 
 
@@ -315,36 +306,38 @@ def plotHeatMaps(self, data='prims', color=None, axis=2):
             dataLabels = self.cleanConsLabels
         elif data=='aux' or data=='auxiliary':
             data = self.aux
-            dataLabels = self.cleanAuxLabels
+            data = self.cleanAuxLabels
         else:
             raise ValueError("Variable type not recognised, please try again")
         c = self.c
 
         for i in range(data.shape[0]):
-            fig = plt.figure()
+            fig, ax = plt.subplots(1)
             if (axis == 0):
-                plotVars = data[i, c['Nx']//2, c['Ng']:-c['Ng'], c['Ng']:-c['Ng']]
+                plotVars = data[i, c['Nx']//2, :, :]
                 axisLabel1 = r'$y$'
                 axisLabel2 = r'$z$'
             if (axis == 1):
-                plotVars = data[i, c['Ng']:-c['Ng'], c['Ny']//2, c['Ng']:-c['Ng']]
+                plotVars = data[i, :, c['Ny']//2, :]
                 axisLabel1 = r'$x$'
                 axisLabel2 = r'$z$'
             if (axis == 2):
-                plotVars = data[i, c['Ng']:-c['Ng'], c['Ng']:-c['Ng'], c['Nz']//2]
+                plotVars = data[i, :, :, c['Nz']//2]
                 axisLabel1 = r'$x$'
                 axisLabel2 = r'$y$'
 
             if color==None:
                 color = cm.afmhot
-            surf = plt.imshow(plotVars.T, cmap=color, interpolation='bicubic', aspect='auto',
-                              extent=[c['xmin'], c['xmax'], c['ymin'], c['ymax']])
-            plt.title(r'Time Evolution for {}: $t = {}$'.format(dataLabels[i], c['t']))
-            plt.xlabel(axisLabel2)
-            plt.ylabel(axisLabel1)
+            ext = [self.c['xmin'], self.c['xmax'], self.c['ymin'], self.c['ymax']]
+            surf = ax.imshow(plotVars.T, cmap=color, interpolation='bicubic', aspect='auto', origin='lower', extent=ext)
+            ax.set_title(r'Time Evolution for {}: $t = {}$'.format(dataLabels[i], c['t']))
+            ax.set_xlim([self.c['xmin'], self.c['xmax']])
+            ax.set_ylim([self.c['ymin'], self.c['ymax']])
+            ax.set_xlabel(axisLabel1)
+            ax.set_ylabel(axisLabel2)
             fig.colorbar(surf, shrink=0.5, aspect=5)
             plt.show()
-
+        return ax
 
     def plotSlice(self, data='prims', axis=0):
         """
@@ -377,24 +370,24 @@ def plotSlice(self, data='prims', axis=0):
             raise ValueError("Variable type not recognised, please try again")
         c = self.c
 
-        Nx, Ny, Nz, Ng= c['Nx'], c['Ny'], c['Nz'], c['Ng']
+        Nx, Ny, Nz = c['Nx'], c['Ny'], c['Nz']
 
         for i in range(len(data)):
             plt.figure()
             if (axis == 0):
-                plotVars = data[i, Ng:-Ng, Ny//2, Nz//2]
+                plotVars = data[i, :, Ny//2, Nz//2]
                 axisLabel = r'$x$'
                 step = c['dx']
                 n = c['nx']
                 left, right = c['xmin'], c['xmax']
             if (axis == 1):
-                plotVars = data[i, Nx//2, Ng:-Ng, Nz//2]
+                plotVars = data[i, Nx//2, :, Nz//2]
                 axisLabel = r'$y$'
                 step = c['dy']
                 n = c['ny']
                 left, right = c['ymin'], c['ymax']
             if (axis == 2):
-                plotVars = data[i, Nx//2, Ny//2, Ng:-Ng]
+                plotVars = data[i, Nx//2, Ny//2, :]
                 axisLabel = r'$z$'
                 step = c['dz']
                 n = c['nz']
@@ -411,7 +404,7 @@ def plotSlice(self, data='prims', axis=0):
             plt.xlabel(axisLabel)
             plt.ylabel(r'$q_{}(x)$'.format(i+1))
             plt.xlim([c['xmin'], c['xmax']])
-            plt.ylim((ylower, yupper))
+#            plt.ylim((ylower, yupper))
             plt.legend(loc='lower center', fontsize=10)
             plt.show()
 
@@ -423,11 +416,11 @@ def plotTwoFluidSlice(self):
         """
 
         c = self.c
-        Ny, Nz, Ng = c['Ny'], c['Nz'], c['Ng']
+        Ny, Nz = c['Ny'], c['Nz']
 
-        rho = self.prims[0, Ng:-Ng, Ny//2, Nz//2] + self.prims[5, Ng:-Ng, Ny//2, Nz//2]
-        p   = self.prims[4, Ng:-Ng, Ny//2, Nz//2] + self.prims[9, Ng:-Ng, Ny//2, Nz//2]
-        var = [rho, *self.aux[31:34, Ng:-Ng, Ny//2, Nz//2], p, *self.prims[10:, Ng:-Ng, Ny//2, Nz//2]]
+        rho = self.prims[0, :, Ny//2, Nz//2] + self.prims[5, :, Ny//2, Nz//2]
+        p   = self.prims[4, :, Ny//2, Nz//2] + self.prims[9, :, Ny//2, Nz//2]
+        var = [rho, *self.aux[31:34, :, Ny//2, Nz//2], p, *self.prims[10:, :, Ny//2, Nz//2]]
         varLab = [r'$\rho$', r'$u_x$', r'$u_y$', r'$u_z$', r'$p$', r'$B_x$', r'$B_y$', r'$B_z$', r'$E_x$', r'$E_y$', r'$E_z$']
 
         xs = np.linspace(c['xmin'] + c['dx']/2, c['xmax'] - c['dx']/2, c['nx'])
@@ -459,7 +452,7 @@ def plotTwoFluidCurrentSheetAgainstExact(self):
         plt.figure()
         xs = np.linspace(c['xmin'], c['xmax'], c['nx'])
         exact = np.sign(xs)*erf(0.5 * np.sqrt(c['sigma'] * xs ** 2 / (c['t']+1)))
-        plt.plot(xs, By[c['Ng']:-c['Ng'], 0, 0], label='Numerical')
+        plt.plot(xs, By[:, 0, 0], label='Numerical')
         plt.plot(xs, exact, label='Exact')
         plt.xlim([c['xmin'], c['xmax']])
         plt.ylim([-1.2, 1.2])
@@ -468,7 +461,7 @@ def plotTwoFluidCurrentSheetAgainstExact(self):
         plt.title(r'Comparison of exact and numerical $B_y$ at $t={:.4f}$'.format(c['t']+1))
         plt.legend(loc='upper left')
         plt.show()
-        #return np.linalg.norm(exact - By[c['Ng']:-c['Ng'], 0, 0])
+        #return np.linalg.norm(exact - By[:, 0, 0])
 
 
     def plotSingleFluidCurrentSheetAgainstExact(self, direction=0):
@@ -483,13 +476,13 @@ def plotSingleFluidCurrentSheetAgainstExact(self, direction=0):
         nz = self.c['Nz'] // 2
 
         if direction == 0:
-            B = self.cons[6, c['Ng']:-c['Ng'], ny, nz]
+            B = self.cons[6, :, ny, nz]
             x = np.linspace(c['xmin'], c['xmax'], c['nx'])
         elif direction == 1:
-            B = self.cons[7, nx, c['Ng']:-c['Ng'], nz]
+            B = self.cons[7, nx, :, nz]
             x = np.linspace(c['ymin'], c['ymax'], c['ny'])
         else:
-            B = self.cons[5, nx, ny, c['Ng']:-c['Ng']]
+            B = self.cons[5, nx, ny, :]
             x = np.linspace(c['zmin'], c['zmax'], c['nz'])
 
         exact = np.sign(x)*erf(0.5 * np.sqrt(c['sigma'] * x ** 2 / (c['t']+1)))
@@ -516,7 +509,6 @@ def plotTwoFluidCPAlfvenWaveAgainstExact(self):
         c = self.c
         xs = np.linspace(c['xmin'], c['xmax'], c['nx'])
         t = c['t']
-        Ng = c['Ng']
 
         h = 1.04
         B0 = h
@@ -547,28 +539,28 @@ def plotTwoFluidCPAlfvenWaveAgainstExact(self):
         BxSol = np.zeros_like(BySol)
         BxSol[:] = B0
         plt.figure()
-        plt.plot(xs, Bx[Ng:-Ng, 0, 0], label='Numerical')
+        plt.plot(xs, Bx[:, 0, 0], label='Numerical')
         plt.plot(xs, BxSol, '--', label='Exact')
         plt.title(r'Exact comparison for $B_x$ at $t={}$'.format(t))
         plt.xlim([c['xmin'], c['xmax']])
         plt.legend()
         # By
         plt.figure()
-        plt.plot(xs, By[Ng:-Ng, 0, 0], label='Numerical')
+        plt.plot(xs, By[:, 0, 0], label='Numerical')
         plt.plot(xs, BySol, '--', label='Exact')
         plt.title(r'Exact comparison for $B_y$ at $t={}$'.format(t))
         plt.xlim([c['xmin'], c['xmax']])
         plt.legend()
         # By
         plt.figure()
-        plt.plot(xs, Bz[Ng:-Ng, 0, 0], label='Numerical')
+        plt.plot(xs, Bz[:, 0, 0], label='Numerical')
         plt.plot(xs, BzSol, '--', label='Exact')
         plt.title(r'Exact comparison for $B_z$ at $t={}$'.format(t))
         plt.xlim([c['xmin'], c['xmax']])
         plt.legend()
         # Ex
         plt.figure()
-        plt.plot(xs, Ex[Ng:-Ng, 0, 0], label='Numerical')
+        plt.plot(xs, Ex[:, 0, 0], label='Numerical')
         plt.plot(xs, np.zeros_like(xs), '--', label='Exact')
         plt.title(r'Exact comparison for $E_x$ at $t={}$'.format(t))
         plt.xlim([c['xmin'], c['xmax']])
@@ -579,21 +571,21 @@ def plotTwoFluidCPAlfvenWaveAgainstExact(self):
         plt.legend()
         # Ey
         plt.figure()
-        plt.plot(xs, Ey[Ng:-Ng, 0, 0], label='Numerical')
+        plt.plot(xs, Ey[:, 0, 0], label='Numerical')
         plt.plot(xs, EySol, '--', label='Exact')
         plt.title(r'Exact comparison for $E_y$ at $t={}$'.format(t))
         plt.xlim([c['xmin'], c['xmax']])
         plt.legend()
         # Ez
         plt.figure()
-        plt.plot(xs, Ez[Ng:-Ng, 0, 0], label='Numerical')
+        plt.plot(xs, Ez[:, 0, 0], label='Numerical')
         plt.plot(xs, EzSol, '--', label='Exact')
         plt.title(r'Exact comparison for $E_z$ at $t={}$'.format(t))
         plt.xlim([c['xmin'], c['xmax']])
         plt.legend()
         # vx1
         plt.figure()
-        plt.plot(xs, vx1[Ng:-Ng, 0, 0], label='Numerical')
+        plt.plot(xs, vx1[:, 0, 0], label='Numerical')
         plt.plot(xs, np.zeros_like(xs), '--', label='Exact')
         plt.title(r'Exact comparison for $v_x1$ at $t={}$'.format(t))
         plt.xlim([c['xmin'], c['xmax']])
@@ -604,21 +596,21 @@ def plotTwoFluidCPAlfvenWaveAgainstExact(self):
         plt.legend()
         # vy1
         plt.figure()
-        plt.plot(xs, vy1[Ng:-Ng, 0, 0], label='Numerical')
+        plt.plot(xs, vy1[:, 0, 0], label='Numerical')
         plt.plot(xs, vy1sol, '--', label='Exact')
         plt.title(r'Exact comparison for $v_y1$ at $t={}$'.format(t))
         plt.xlim([c['xmin'], c['xmax']])
         plt.legend()
         # vz1
         plt.figure()
-        plt.plot(xs, vz1[Ng:-Ng, 0, 0], label='Numerical')
+        plt.plot(xs, vz1[:, 0, 0], label='Numerical')
         plt.plot(xs, vz1sol, '--', label='Exact')
         plt.title(r'Exact comparison for $v_z1$ at $t={}$'.format(t))
         plt.xlim([c['xmin'], c['xmax']])
         plt.legend()
         # vx2
         plt.figure()
-        plt.plot(xs, vx2[Ng:-Ng, 0, 0], label='Numerical')
+        plt.plot(xs, vx2[:, 0, 0], label='Numerical')
         plt.plot(xs, np.zeros_like(xs), '--', label='Exact')
         plt.title(r'Exact comparison for $v_x2$ at $t={}$'.format(t))
         plt.xlim([c['xmin'], c['xmax']])
@@ -629,23 +621,130 @@ def plotTwoFluidCPAlfvenWaveAgainstExact(self):
         plt.legend()
         # vy2
         plt.figure()
-        plt.plot(xs, vy2[Ng:-Ng, 0, 0], label='Numerical')
+        plt.plot(xs, vy2[:, 0, 0], label='Numerical')
         plt.plot(xs, vy2sol, '--', label='Exact')
         plt.title(r'Exact comparison for $v_y2$ at $t={}$'.format(t))
         plt.xlim([c['xmin'], c['xmax']])
         plt.legend()
         # vz2
         plt.figure()
-        plt.plot(xs, vz2[Ng:-Ng, 0, 0], label='Numerical')
+        plt.plot(xs, vz2[:, 0, 0], label='Numerical')
         plt.plot(xs, vz2sol, '--', label='Exact')
         plt.title(r'Exact comparison for $v_z2$ at $t={}$'.format(t))
         plt.xlim([c['xmin'], c['xmax']])
         plt.legend()
 
+
+
+    def plot2DBrioWu(self, diag=0):
+        """
+        Plots the main diagonal of the 2D Brio-Wu problem
+
+        Parameters
+        ----------
+        diag : int
+            The diagonal to plot the slice
+        """
+
+        nx = self.c['nx']
+#        Ny = self.c['Ny']
+        midZ = self.c['Nz'] // 2
+        Ng = self.c['Ng']
+
+        if diag == 0:
+            LB = -Ng
+            RB = Ng
+            step = -1
+        else:
+            LB = Ng
+            RB = -Ng
+            step = 1
+
+
+        dens = self.prims[0, :, LB:RB:step, midZ].diagonal()
+        vx = self.prims[1, :, LB:RB:step, midZ].diagonal()
+        vy = self.prims[2, :, LB:RB:step, midZ].diagonal()
+
+
+        p = self.prims[4, :, LB:RB:step, midZ].diagonal()
+        B = self.prims[5, :, LB:RB:step, midZ].diagonal() / np.sqrt(2) + \
+            self.prims[6, :, LB:RB:step, midZ].diagonal() / np.sqrt(2)
+
+        # rho
+        plt.figure()
+        plt.plot(np.linspace(0, 1, nx), dens)
+        plt.ylabel(r'$\rho$')
+        plt.xlim([0, 1])
+        plt.show()
+        # vx
+        plt.figure()
+        plt.plot(np.linspace(0, 1, nx), vx)
+        plt.ylabel(r'$vx$')
+        plt.xlim([0, 1])
+        plt.show()
+        # vy
+        plt.figure()
+        plt.plot(np.linspace(0, 1, nx), vy)
+        plt.ylabel(r'$vy$')
+        plt.xlim([0, 1])
+        plt.show()
+        # v rel
+        plt.figure()
+        plt.plot(np.linspace(0, 1, nx),(vx-vy)/(1-vx*vy))
+        plt.ylabel(r'$v (rel)$')
+        plt.xlim([0, 1])
+        plt.show()
+        # v non-rel
+        plt.figure()
+        plt.plot(np.linspace(0, 1, nx), vx/np.sqrt(2) - vy/np.sqrt(2))
+        plt.ylabel(r'$v (non-rel)$')
+        plt.xlim([0, 1])
+        plt.show()
+        # p
+        plt.figure()
+        plt.plot(np.linspace(0, 1, nx), p)
+        plt.ylabel(r'$p$')
+        plt.xlim([0, 1])
+        plt.show()
+        # B
+        plt.figure()
+        plt.plot(np.linspace(0, 1, nx), B)
+        plt.ylabel(r'$B$')
+        plt.xlim([0, 1])
+        plt.show()
+
+        return B
+    
+    def plotAdvectionAgainstInitial(self):
+        xs = np.linspace(Plot.c['dx']/2, 1-Plot.c['dx']/2, Plot.c['nx'])
+        initialRho = np.ones_like(xs)*0.1
+        initialRho += 0.4*np.exp(-(10 * (xs - 0.5))**2)
+        
+        fig, axs = plt.subplots(2)
+        fig.set_size_inches(8, 6)
+        axs[0].plot(xs, initialRho, 'k-', linewidth=5, alpha=0.3, label='initial')
+        axs[0].plot(xs, Plot.prims[0, :, 0, 0], 'b:', label='rho')
+        axs[0].set_xlim(xs[0], xs[-1])
+        axs[0].set_xlabel(r'$x$')
+        axs[0].set_ylabel(r'$\rho$')
+        axs[0].legend()
+        
+        error = np.abs(initialRho-Plot.prims[0, :, 0, 0])
+        errorNorm = np.sum(error)/len(error)
+        axs[1].semilogy(xs, error, label=rf'Mean = ${errorNorm:.1e}$')
+        axs[1].set_xlabel(r"$x$")
+        axs[1].set_ylabel('Error')
+        axs[1].set_xlim(xs[0], xs[-1])
+        axs[1].legend()
+        plt.show()
+        
+        
 # Function declarations over, access data and plot!
 
+
 if __name__ == '__main__':
 
     Plot = InteractivePlot()
 
-    Plot.plotHeatMaps()
+#    Plot.plotHeatMaps()
+    
diff --git a/Project/GPU/Src/main.cu b/Project/GPU/Src/main.cu
index 10a10340..8d4536aa 100644
--- a/Project/GPU/Src/main.cu
+++ b/Project/GPU/Src/main.cu
@@ -36,7 +36,7 @@ int main(int argc, char *argv[]) {
   double ymax(1.0);
   double zmin(-1.5);
   double zmax(1.5);
-  double endTime(0.05);
+  double endTime(0.0005);
   double cfl(0.1);
   double gamma(4.0/3.0);
   double sigma(0);
diff --git a/Project/compare.py b/Project/compare.py
index 99fa52ad..47f74f90 100644
--- a/Project/compare.py
+++ b/Project/compare.py
@@ -1,4 +1,4 @@
-TOL=10e-15
+TOL=1e-15
 
 time_format_folder="Final"
 vars_folders=["Conserved", "Auxiliary", "Primitive"]
@@ -25,10 +25,8 @@
                                     parallel_val = float(parallel_line)
                                     line_number = line_number + 1
                                     if (abs(serial_val-parallel_val) > TOL):
-                                        print("\n\n!! Error in {} (val={}, line={}), {}, (val={})\n\n".format(serial_filename, serial_val, line_number, parallel_filename, parallel_val))  
+                                        print("\tError in {} (val={}, line={}), {}, (val={})\n".format(serial_filename, serial_val, line_number, parallel_filename, parallel_val))
                                         break
 
         except IOError:
                 print("Could not read file:", filename)
-
-

From 0568b6fa0ac546809be661932c27a6d832c70312 Mon Sep 17 00:00:00 2001
From: "ania.brown" <ania.brown@gmail.com>
Date: Wed, 26 Aug 2020 13:21:45 +0100
Subject: [PATCH 13/56] added new API for saving data

---
 Project/GPU/Include/parallelSaveData.h        | 129 ++++
 Project/GPU/Include/saveData.h                |  28 +-
 Project/GPU/Include/serialSaveData.h          |  82 +++
 Project/GPU/Include/simData.h                 |   1 +
 Project/GPU/Makefile                          |   4 +-
 Project/GPU/Src/main.cu                       |   4 +-
 Project/GPU/Src/parallelSaveData.cu           | 550 ++++++++++++++++++
 .../Src/{saveData.cu => serialSaveData.cu}    |  17 +-
 8 files changed, 787 insertions(+), 28 deletions(-)
 create mode 100644 Project/GPU/Include/parallelSaveData.h
 create mode 100644 Project/GPU/Include/serialSaveData.h
 create mode 100644 Project/GPU/Src/parallelSaveData.cu
 rename Project/GPU/Src/{saveData.cu => serialSaveData.cu} (96%)

diff --git a/Project/GPU/Include/parallelSaveData.h b/Project/GPU/Include/parallelSaveData.h
new file mode 100644
index 00000000..3fb33dfd
--- /dev/null
+++ b/Project/GPU/Include/parallelSaveData.h
@@ -0,0 +1,129 @@
+#ifndef PARALLELSAVEDATA_H
+#define PARALLELSAVEDATA_H
+
+#include <string>
+#include <iostream>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include "simData.h"
+#include "saveData.h"
+#include "parallelEnv.h"
+
+using namespace std;
+
+//! <b> Class used to save simulation data using multiple processes</b>
+/*!
+  @par
+  Write outputs through the simple system of collecting all simulation data onto process 0
+  and writing out from process 0. This is easy to code but has the downside of limiting
+  the problem size to one that will fit onto one node.
+
+  Class is initialized with the data that is to be saved. Saves the simulation
+  data in the Data directory, located within the Project folder. All data is
+  saved automatically, including all constant data (xmin, ymax, endTime etc) and
+  and the values of all prims, aux and cons variables.
+*/
+class ParallelSaveData : public SaveData
+{
+  public:
+      ParallelEnv * env;     //!< Pointer to PlatformEnv class containing platform specific info such as MPI details
+
+  private:
+
+    /*!
+        For each particular state vector (cons, prims, aux) packs a buffer containing all cells in a subdomain
+      (not including ghost values) to be sent to process 0
+      @param[out] *buffer pointer to the buffer to pack
+      @param[in] *stateVector pointer to cons, prims or aux array
+      @param[in] nVars number of variables in the cons, prims or aux array
+     */
+    void packStateVectorBuffer(double *buffer, double *stateVector, int nVars);
+
+    /*!
+        For each subdomain, send a buffer containing the non-ghost cells in that subdomain to a buffer on process 0.
+      @param[in, out] *buffer pointer to the buffer to send or receive
+      @param[in] numCellsSent number of cells in the buffer
+      @param[in] rank global id of the process sending its buffer to process 0
+     */
+    void sendStateVectorBufferToMaster(double *buffer, int numCellsSent, int rank);
+
+    /*!
+        For each particular state vector (cons, prims, aux) unpacks a buffer containing all cells
+      (not including ghost values) received from a particular subdomain into a vector containing
+      the full simulation domain
+      @param[in] *buffer pointer to the buffer to unpack
+      @param[in, out] *stateVector pointer to cons, prims or aux array of size equal to the full simulation domain
+      @param[in] rank global id of the process that sent its buffer to process 0
+     */
+    void unpackStateVectorBuffer(double *buffer, double *stateVector, int nVars, int rank);
+
+    /*!
+        Process 0 already holds the values for its own subdomain, so does not need to send them anywhere.
+      Instead, it needs to copy its subdomain values (cons, prims, aux) to the vector containing
+      the full simulation domain
+      @param[in, out] *fullStateVector pointer to cons, prims or aux array of size equal to the full simulation domain
+      @param[in] *stateVector pointer to cons, prims or aux array for process 0's subdomain
+      @param[in] nVars number of variables in the cons, prims or aux array
+     */
+    void copyMasterStateVectorToFullStateVector(double *fullStateVector, double *stateVector, int nVars);
+
+    // TODO -- docstring
+    void writeStateVectorToFile(FILE *f, double *fullStateVector, int nVars);
+
+  public:
+
+    //! Saves the conserved vector state
+    void saveCons();
+
+    //! Saves the primitive vector state
+    void savePrims();
+
+    //! Saves the auxiliary vector state
+    void saveAux();
+
+    //! Saves the domain coordinates
+    void saveDomain();
+
+    //! Saves the constant data
+    void saveConsts();
+
+
+    //! Constructor
+    /*!
+      @par
+        The constructor take a pointer to the data class which the user wants
+      to save. All this data is automatically saved in the Data directory, located
+      in the Project folder.
+
+      @param *data pointer to the Data class
+      @param test integar flagging if we are in the 'Examples' directory or not,
+      Only used for running the given examples, can ignore otherwise.
+    */
+    ParallelSaveData(Data * data, ParallelEnv * env, int test=0) : SaveData(data, test), env(env) { }
+
+    virtual ~ParallelSaveData() { }     //!< Destructor
+
+    //! Saves all cons, prims, aux and constant data
+    /*!
+      @par
+        This calls the other member functions to save their respective
+      simulation data.
+
+      @param[in] timeSeries flags whether the saved data is final or transient
+    */
+    void saveAll(bool timeSeries=false);
+
+    //! Saves user specified variable
+    /*!
+      @par
+        Function saves the data for the variable specified by the string `var`
+
+      @param[in] variable Defines the variable the user wants to save. Should match a variable label
+      @param[in] num number of user-specified variables to save in total (required for consistent numbering of files)
+    */
+    void saveVar(string variable, int num=1);
+
+};
+
+#endif
diff --git a/Project/GPU/Include/saveData.h b/Project/GPU/Include/saveData.h
index c737e1ab..95734cc2 100644
--- a/Project/GPU/Include/saveData.h
+++ b/Project/GPU/Include/saveData.h
@@ -7,13 +7,15 @@
 #include <cstdlib>
 #include <cstring>
 #include "simData.h"
+#include "platformEnv.h"
 
 using namespace std;
 
 //! <b> Class used to save simulation data </b>
 /*!
   @par
-    Class is initialized with the data that is to be saved. Saves the simulation
+    Abstract base class to allow for different output schemes in a parallel environment.
+  Class is initialized with the data that is to be saved. Saves the simulation
   data in the Data directory, located within the Project folder. All data is
   saved automatically, including all constant data (xmin, ymax, endTime etc) and
   and the values of all prims, aux and cons variables.
@@ -24,34 +26,29 @@ class SaveData
   public:
     Data * d; //!< Pointer to Data class containing global simulation data
 
-  private:
-
     int
     Nouts,         //!< Number of output files
     Ncount,        //!< Which user defined variable is this?
     test;          //!< Flags if we are running one of the given examples
 
-  public:
-
     //! Saves the conserved vector state
-    void saveCons();
+    virtual void saveCons() = 0;
 
     //! Saves the primitive vector state
-    void savePrims();
+    virtual void savePrims() = 0;
 
     //! Saves the auxiliary vector state
-    void saveAux();
+    virtual void saveAux() = 0;
 
     //! Saves the domain coordinates
-    void saveDomain();
+    virtual void saveDomain() = 0;
 
     //! Saves the constant data
-    void saveConsts();
+    virtual void saveConsts() = 0;
 
     char
     dir[50],   //!< String path to the directory in which to write files
-    app[10];   //!< String appendix to add to end of file names
-
+    app[50];   //!< String appendix to add to end of file names
 
     //! Constructor
     /*!
@@ -73,6 +70,7 @@ class SaveData
       }
     }
 
+    virtual ~SaveData() { }     //!< Destructor
 
     //! Saves all cons, prims, aux and constant data
     /*!
@@ -82,17 +80,17 @@ class SaveData
 
       @param[in] timeSeries flags whether the saved data is final or transient
     */
-    void saveAll(bool timeSeries=false);
+    virtual void saveAll(bool timeSeries=false) = 0;
 
     //! Saves user specified variable
     /*!
       @par
         Function saves the data for the variable specified by the string `var`
 
-      @param[in] var Defines the variable the user wants to save. Should match a variable label
+      @param[in] variable Defines the variable the user wants to save. Should match a variable label
       @param[in] num number of user-specified variables to save in total (required for consistent numbering of files)
     */
-    void saveVar(string variable, int num=1);
+    virtual void saveVar(string variable, int num=1) = 0;
 
 };
 
diff --git a/Project/GPU/Include/serialSaveData.h b/Project/GPU/Include/serialSaveData.h
new file mode 100644
index 00000000..7900ccba
--- /dev/null
+++ b/Project/GPU/Include/serialSaveData.h
@@ -0,0 +1,82 @@
+#ifndef SERIALSAVEDATA_H
+#define SERIALSAVEDATA_H
+
+#include <string>
+#include <iostream>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include "simData.h"
+#include "saveData.h"
+#include "serialEnv.h"
+
+using namespace std;
+
+//! <b> Class used to save simulation data using a single process</b>
+/*!
+  @par
+  Class is initialized with the data that is to be saved. Saves the simulation
+  data in the Data directory, located within the Project folder. All data is
+  saved automatically, including all constant data (xmin, ymax, endTime etc) and
+  and the values of all prims, aux and cons variables.
+*/
+class SerialSaveData : public SaveData
+{
+
+  public:
+
+    SerialEnv * env; //!< Pointer to PlatformEnv class containing platform specific info such as MPI details
+
+    //! Saves the conserved vector state
+    void saveCons();
+
+    //! Saves the primitive vector state
+    void savePrims();
+
+    //! Saves the auxiliary vector state
+    void saveAux();
+
+    //! Saves the domain coordinates
+    void saveDomain();
+
+    //! Saves the constant data
+    void saveConsts();
+
+    //! Constructor
+    /*!
+      @par
+        The constructor take a pointer to the data class which the user wants
+      to save. All this data is automatically saved in the Data directory, located
+      in the Project folder.
+
+      @param *data pointer to the Data class
+      @param test integar flagging if we are in the 'Examples' directory or not,
+      Only used for running the given examples, can ignore otherwise.
+    */
+    SerialSaveData(Data * data, SerialEnv * env, int test=0) : SaveData(data, test), env(env) { }
+
+    virtual ~SerialSaveData() { }     //!< Destructor
+
+    //! Saves all cons, prims, aux and constant data
+    /*!
+      @par
+        This calls the other member functions to save their respective
+      simulation data.
+
+      @param[in] timeSeries flags whether the saved data is final or transient
+    */
+    void saveAll(bool timeSeries=false);
+
+    //! Saves user specified variable
+    /*!
+      @par
+        Function saves the data for the variable specified by the string `var`
+
+      @param[in] variable Defines the variable the user wants to save. Should match a variable label
+      @param[in] num number of user-specified variables to save in total (required for consistent numbering of files)
+    */
+    void saveVar(string variable, int num=1);
+
+};
+
+#endif
diff --git a/Project/GPU/Include/simData.h b/Project/GPU/Include/simData.h
index 7e0056d2..1111ab3b 100644
--- a/Project/GPU/Include/simData.h
+++ b/Project/GPU/Include/simData.h
@@ -5,6 +5,7 @@
 #include <string>
 #include "platformEnv.h"
 
+
 /*!
   Currently (and possibly permanently) a very hacky way of keeping singleCell cons2prims function
   general for the benefit of the IMEX integrator.
diff --git a/Project/GPU/Makefile b/Project/GPU/Makefile
index 28576504..9028e8b3 100644
--- a/Project/GPU/Makefile
+++ b/Project/GPU/Makefile
@@ -42,7 +42,7 @@ SRCS = main.cu \
 			 SSP3.cu \
 			 IMEX3Args.cu \
 			 boundaryConds.cu \
-			 saveData.cu \
+			 serialSaveData.cu \
 			 serialEnv.cu \
 			 fluxVectorSplitting.cu \
 			 srrmhd.cu \
@@ -151,7 +151,7 @@ IMEX3Args.o : $(MODULE_DIR)/IMEX3Args.cu $(INC_DIR)/IMEX3Args.h $(INC_DIR)/IMEX2
 boundaryConds.o : $(MODULE_DIR)/boundaryConds.cu $(INC_DIR)/boundaryConds.h
 	$(CC)	$<	-c	$(NVFLAGS) -I$(INC_DIR) -Xcompiler -fopenmp
 
-saveData.o : $(MODULE_DIR)/saveData.cu $(INC_DIR)/saveData.h
+serialSaveData.o : $(MODULE_DIR)/serialSaveData.cu $(INC_DIR)/serialSaveData.h
 	$(CC)	$<	-c	$(NVFLAGS) -I$(INC_DIR)
 
 serialEnv.o : $(MODULE_DIR)/serialEnv.cu $(INC_DIR)/serialEnv.h
diff --git a/Project/GPU/Src/main.cu b/Project/GPU/Src/main.cu
index ddf37c4b..4be013f4 100644
--- a/Project/GPU/Src/main.cu
+++ b/Project/GPU/Src/main.cu
@@ -7,7 +7,7 @@
 #include "boundaryConds.h"
 #include "rkSplit.h"
 #include "SSP2.h"
-#include "saveData.h"
+#include "serialSaveData.h"
 #include "fluxVectorSplitting.h"
 #include "serialEnv.h"
 
@@ -72,7 +72,7 @@ int main(int argc, char *argv[]) {
 
   SSP2 timeInt(&data, &model, &bcs, &fluxMethod);
 
-  SaveData save(&data);
+  SerialSaveData save(&data, &env);
 
   // Now objects have been created, set up the simulation
   sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save);
diff --git a/Project/GPU/Src/parallelSaveData.cu b/Project/GPU/Src/parallelSaveData.cu
new file mode 100644
index 00000000..0c592d5c
--- /dev/null
+++ b/Project/GPU/Src/parallelSaveData.cu
@@ -0,0 +1,550 @@
+#include "parallelSaveData.h"
+#include <cstdlib>
+#include <cstdio>
+#include <fstream>
+#include <mpi.h>
+
+using namespace std;
+
+// Id in a state vector that does not include ghost cells
+// TODO -- Should probably just define a variable on Data that is (Nz-2*Ng or 1 if nz=0) to avoid having a copy for each dimension
+#define ID_PHYS_3D(variable, idx, jdx, kdx) ((variable)*(d->Nx-(d->Ng*2))*(d->Ny-(d->Ng*2))*(d->Nz-(d->Ng*2)) + (idx)*(d->Ny-(d->Ng*2))*(d->Nz-(d->Ng*2)) + (jdx)*(d->Nz-(d->Ng*2)) + (kdx))
+#define ID_PHYS_2D(variable, idx, jdx) ((variable)*(d->Nx-(d->Ng*2))*(d->Ny-(d->Ng*2)) + (idx)*(d->Ny-(d->Ng*2)) + (jdx))
+#define ID_PHYS_1D(variable, idx) ((variable)*(d->Nx-(d->Ng*2)) + (idx))
+
+#define ID_FULL_3D(variable, idx, jdx, kdx) ((variable)*(d->nx)*(d->ny)*(d->nz) + (idx)*(d->ny)*(d->nz) + (jdx)*(d->nz) + (kdx))
+#define ID_FULL_2D(variable, idx, jdx) ((variable)*(d->nx)*(d->ny) + (idx)*(d->ny) + (jdx))
+#define ID_FULL_1D(variable, idx) ((variable)*(d->nx) + (idx))
+#define ID(variable, idx, jdx, kdx) ((variable)*(d->Nx)*(d->Ny)*(d->Nz) + (idx)*(d->Ny)*(d->Nz) + (jdx)*(d->Nz) + (kdx))
+
+void ParallelSaveData::saveAll(bool timeSeries)
+{
+  // Clean directory variable
+  dir[0] = '\0';
+  // Determine the directory to write files to
+  if (test)
+    strcpy(dir, "../../");
+  if (!timeSeries && strcmp(dir, "Data/Final")!=0) {
+    strcat(dir, "Data/Final");
+    app[0]=0;
+  }
+  else {
+    strcat(dir, "Data/TimeSeries");
+    sprintf(app, "%d", Nouts++);
+  }
+
+  // Cons
+  this->saveCons();
+
+  // Prims
+  this->savePrims();
+
+  // Aux
+  this->saveAux();
+
+  // TODO -- could gather this to proc0 like for the other state vectors but not sure if it is required
+  //this->saveDomain();
+
+  // TODO -- Nx, Ny are per process -- may need to print out a global version as well (nx, ny don't include ghost cells)
+  this->saveConsts();
+
+}
+
+void ParallelSaveData::packStateVectorBuffer(double *buffer, double *stateVector, int nVars){
+  // Prepare send buffer, which doesn't include ghost cells, by copying from local state vectors
+  if (d->dims==3){
+    for (int var(0); var < nVars; var++) {
+      for (int i(0); i < d->Nx-(d->Ng*2); i++) {
+        for (int j(0); j < d->Ny-(d->Ng*2); j++) {
+          for (int k(0); k < d->Nz-(d->Ng*2); k++) {
+            buffer[ID_PHYS_3D(var, i, j, k)] = stateVector[ID(var, i + d->Ng, j + d->Ng, k + d->Ng)];
+          }
+        }
+      }
+    }
+  } else if (d->dims==2){
+    for (int var(0); var < nVars; var++) {
+      for (int i(0); i < d->Nx-(d->Ng*2); i++) {
+        for (int j(0); j < d->Ny-(d->Ng*2); j++) {
+          buffer[ID_PHYS_2D(var, i, j)] = stateVector[ID(var, i + d->Ng, j + d->Ng, 0)];
+        }
+      }
+    }
+  } else {
+    for (int var(0); var < nVars; var++) {
+      for (int i(0); i < d->Nx-(d->Ng*2); i++) {
+        buffer[ID_PHYS_1D(var, i)] = stateVector[ID(var, i + d->Ng, 0, 0)];
+      }
+    }
+  }
+}
+
+void ParallelSaveData::copyMasterStateVectorToFullStateVector(double *fullStateVector, double *stateVector, int nVars){
+  // This requires proc0 to have xRankId=yRankId=zRankId=0
+  if (d->dims==3){
+    for (int var(0); var < nVars; var++) {
+      for (int i(0); i < d->Nx-(d->Ng*2); i++) {
+        for (int j(0); j < d->Ny-(d->Ng*2); j++) {
+          for (int k(0); k < d->Nz-(d->Ng*2); k++) {
+            fullStateVector[ID_FULL_3D(var, i, j, k)] = stateVector[ID(var, i + d->Ng, j + d->Ng, k + d->Ng)];
+          }
+        }
+      }
+    }
+  } else if (d->dims==2){
+    for (int var(0); var < nVars; var++) {
+      for (int i(0); i < d->Nx-(d->Ng*2); i++) {
+        for (int j(0); j < d->Ny-(d->Ng*2); j++) {
+          //printf("nx: %d, ny: %d\n", d->nx, d->ny);
+          //printf("var: %d i: %d j: %d, id: %d, id_full: %d\n", var, i, j, ID(var, i+d->Ng, j+d->Ng, 0),
+                  //ID_FULL_2D(var, i, j));
+          fullStateVector[ID_FULL_2D(var, i, j)] = stateVector[ID(var, i + d->Ng, j + d->Ng, 0)];
+        }
+      }
+    }
+  } else {
+    for (int var(0); var < nVars; var++) {
+      for (int i(0); i < d->Nx-(d->Ng*2); i++) {
+        fullStateVector[ID_FULL_1D(var, i)] = stateVector[ID(var, i + d->Ng, 0, 0)];
+      }
+    }
+  }
+}
+
+void ParallelSaveData::sendStateVectorBufferToMaster(double *buffer, int numCellsSent, int rank){
+   // MPI message vars
+   int tag = 101;
+   MPI_Status status;
+   if (env->rank == rank){
+       MPI_Send(buffer, numCellsSent, MPI_DOUBLE, 0, tag, env->mpiCartesianComm);
+   } else if (env->rank == 0){
+       MPI_Recv(buffer, numCellsSent, MPI_DOUBLE, rank, tag, env->mpiCartesianComm, &status);
+   }
+}
+
+void ParallelSaveData::unpackStateVectorBuffer(double *buffer, double *stateVector, int nVars, int rank){
+  // Unpack send buffer, which don't include ghost cells, into the global state vector
+
+  // Get (x,y,z) coords of rank that sent data to proc0
+  int rankCoords[3];
+  int ndims = 3; // rank grid is always 3D
+  MPI_Cart_coords(env->mpiCartesianComm, rank, ndims, rankCoords);
+
+  int iOffset, jOffset, kOffset;
+  iOffset = rankCoords[0] * (d->Nx - (d->Ng*2));
+  if (d->dims > 1) {
+      jOffset = rankCoords[1] * (d->Ny - (d->Ng*2));
+  } else jOffset = 0;
+
+  if (d->dims > 2) {
+      kOffset = rankCoords[2] * (d->Nz - (d->Ng*2));
+  } else kOffset = 0;
+
+  if (d->dims==3){
+    for (int var(0); var < nVars; var++) {
+      for (int i(0); i < d->Nx-(d->Ng*2); i++) {
+        for (int j(0); j < d->Ny-(d->Ng*2); j++) {
+          for (int k(0); k < d->Nz-(d->Ng*2); k++) {
+            stateVector[ID_FULL_3D(var, i + iOffset, j + jOffset, k + kOffset)] = buffer[ID_PHYS_3D(var, i, j, k)];
+          }
+        }
+      }
+    }
+  } else if (d->dims==2){
+    for (int var(0); var < nVars; var++) {
+      for (int i(0); i < d->Nx-(d->Ng*2); i++) {
+        for (int j(0); j < d->Ny-(d->Ng*2); j++) {
+          stateVector[ID_FULL_2D(var, i + iOffset, j + jOffset)] = buffer[ID_PHYS_2D(var, i, j)];
+        }
+      }
+    }
+  } else {
+    for (int var(0); var < nVars; var++) {
+      for (int i(0); i < d->Nx-(d->Ng*2); i++) {
+        stateVector[ID_FULL_1D(var, i + iOffset)] = buffer[ID_PHYS_1D(var, i)];
+      }
+    }
+  }
+}
+
+void ParallelSaveData::writeStateVectorToFile(FILE *f, double *fullStateVector, int nVars){
+  if (d->dims==3){
+    for (int var(0); var < nVars; var++) {
+      for (int i(0); i < d->nx; i++) {
+        for (int j(0); j < d->ny; j++) {
+          for (int k(0); k < d->nz; k++) {
+            fprintf(f, "%.16f ", fullStateVector[ID_FULL_3D(var, i, j, k)]);
+          }
+          fprintf(f, "\n");
+        }
+      }
+    }
+  } else if (d->dims==2){
+    for (int var(0); var < nVars; var++) {
+      for (int i(0); i < d->nx; i++) {
+        for (int j(0); j < d->ny; j++) {
+          fprintf(f, "%.16f ", fullStateVector[ID_FULL_2D(var, i, j)]);
+          fprintf(f, "\n");
+        }
+      }
+    }
+  } else {
+    for (int var(0); var < nVars; var++) {
+      for (int i(0); i < d->nx; i++) {
+        fprintf(f, "%.16f ", fullStateVector[ID_FULL_1D(var, i)]);
+        fprintf(f, "\n");
+      }
+    }
+  }
+}
+
+void ParallelSaveData::saveCons()
+{
+  FILE * f;
+
+  char fname[120];
+  strcpy(fname, dir);
+  strcat(fname, "/Conserved/cons");
+  strcat(fname, app);
+  strcat(fname, ".dat\0");
+
+  // Allocate buffers for gathering distributed state vectors onto master process
+  // We do this here rather than in saveAll to allow saveCons to be called independently
+  // We don't want to do this in the ParallelSaveData constructor as we don't want to use up this large
+  // amount of memory until it's needed.
+  int numCellsInBuffer = d->Ncons * (d->Nx-(2*d->Ng));
+  if (d->dims > 1) numCellsInBuffer *= (d->Ny - (2*d->Ng));
+  if (d->dims > 2) numCellsInBuffer *= (d->Nz - (2*d->Ng));
+  double *buffer = (double*) malloc(numCellsInBuffer * sizeof(double));
+  int numCellsInFullStateVector = numCellsInBuffer * env->nProc;
+  double *fullStateVector = (double*) malloc(numCellsInFullStateVector * sizeof(double));
+
+  // For all procs other than proc0, copy local statevector to a buffer that does not include ghost cells
+  // for sending to proc0. Proc0 can copy directly from its local statevector to the fullstatevector
+  if (env->rank != 0) packStateVectorBuffer(buffer, d->cons, d->Ncons);
+  else copyMasterStateVectorToFullStateVector(fullStateVector, d->cons, d->Ncons);
+
+  for (int r(1); r < env->nProc; r++){
+      int numCellsSent = d->Ncons * (d->Nx-(2*d->Ng));
+      if (d->dims > 1) numCellsSent *= (d->Ny-(2*d->Ng));
+      if (d->dims > 2) numCellsSent *= (d->Nz-(2*d->Ng));
+      sendStateVectorBufferToMaster(buffer, numCellsSent, r);
+      if (env->rank == 0) unpackStateVectorBuffer(buffer, fullStateVector, d->Ncons, r);
+  }
+
+  if (env->rank == 0){
+    f = fopen(fname, "w");
+    // Ensure file is open
+    if (f == NULL) {
+      printf("Error: could not open 'cons.dat' for writing.\n");
+      exit(1);
+    }
+
+    // File is open, write data
+    fprintf(f, "cons = ");
+    for (int i(0); i < d->Ncons-1; i++) {
+      fprintf(f, "%s, ", d->consLabels[i].c_str());
+    }
+    fprintf(f, "%s\n", d->consLabels[d->Ncons-1].c_str());
+
+    writeStateVectorToFile(f, fullStateVector, d->Ncons);
+
+    fclose(f);
+  }
+
+  free(buffer);
+  free(fullStateVector);
+}
+
+void ParallelSaveData::savePrims()
+{
+  FILE * f;
+  char fname[120];
+  strcpy(fname, dir);
+  strcat(fname, "/Primitive/prims");
+  strcat(fname, app);
+  strcat(fname, ".dat\0");  f = fopen(fname, "w");
+
+  // Allocate buffers for gathering distributed state vectors onto master process
+  // We do this here rather than in saveAll to allow savePrims to be called independently
+  // We don't want to do this in the ParallelSaveData constructor as we don't want to use up this large
+  // amount of memory until it's needed.
+  int numCellsInBuffer = d->Nprims * (d->Nx-(2*d->Ng));
+  if (d->dims > 1) numCellsInBuffer *= (d->Ny - (2*d->Ng));
+  if (d->dims > 2) numCellsInBuffer *= (d->Nz - (2*d->Ng));
+  double *buffer = (double*) malloc(numCellsInBuffer * sizeof(double));
+  int numCellsInFullStateVector = numCellsInBuffer * env->nProc;
+  double *fullStateVector = (double*) malloc(numCellsInFullStateVector * sizeof(double));
+
+  if (env->rank != 0) packStateVectorBuffer(buffer, d->prims, d->Nprims);
+  else copyMasterStateVectorToFullStateVector(fullStateVector, d->prims, d->Nprims);
+  for (int r(1); r < env->nProc; r++){
+      int numCellsSent = d->Nprims * (d->Nx-(2*d->Ng));
+      if (d->dims > 1) numCellsSent *= (d->Ny-(2*d->Ng));
+      if (d->dims > 2) numCellsSent *= (d->Nz-(2*d->Ng));
+      sendStateVectorBufferToMaster(buffer, numCellsSent, r);
+      if (env->rank == 0) unpackStateVectorBuffer(buffer, fullStateVector, d->Nprims, r);
+  }
+
+  if (env->rank == 0){
+    // Ensure file is open
+    if (f == NULL) {
+      printf("Error: could not open 'prims.dat' for writing.\n");
+      exit(1);
+    }
+
+    // File is open, write data
+    fprintf(f, "prims = ");
+    for (int i(0); i < d->Nprims-1; i++) fprintf(f, "%s, ", d->primsLabels[i].c_str());
+    fprintf(f, "%s\n", d->primsLabels[d->Nprims-1].c_str());
+
+    writeStateVectorToFile(f, fullStateVector, d->Nprims);
+    fclose(f);
+  }
+
+  free(buffer);
+  free(fullStateVector);
+}
+
+void ParallelSaveData::saveAux()
+{
+  FILE * f;
+  char fname[120];
+  strcpy(fname, dir);
+  strcat(fname, "/Auxiliary/aux");
+  strcat(fname, app);
+  strcat(fname, ".dat\0");  f = fopen(fname, "w");
+
+  // Allocate buffers for gathering distributed state vectors onto master process
+  // We do this here rather than in saveAll to allow saveAux to be called independently
+  // We don't want to do this in the ParallelSaveData constructor as we don't want to use up this large
+  // amount of memory until it's needed.
+  int numCellsInBuffer = d->Naux * (d->Nx-(2*d->Ng));
+  if (d->dims > 1) numCellsInBuffer *= (d->Ny - (2*d->Ng));
+  if (d->dims > 2) numCellsInBuffer *= (d->Nz - (2*d->Ng));
+  double *buffer = (double*) malloc(numCellsInBuffer * sizeof(double));
+  int numCellsInFullStateVector = numCellsInBuffer * env->nProc;
+  double *fullStateVector = (double*) malloc(numCellsInFullStateVector * sizeof(double));
+
+  if (env->rank != 0) packStateVectorBuffer(buffer, d->aux, d->Naux);
+  else copyMasterStateVectorToFullStateVector(fullStateVector, d->aux, d->Naux);
+  for (int r(1); r < env->nProc; r++){
+      int numCellsSent = d->Naux * (d->Nx-(2*d->Ng));
+      if (d->dims > 1) numCellsSent *= (d->Ny-(2*d->Ng));
+      if (d->dims > 2) numCellsSent *= (d->Nz-(2*d->Ng));
+      sendStateVectorBufferToMaster(buffer, numCellsSent, r);
+      if (env->rank == 0) unpackStateVectorBuffer(buffer, fullStateVector, d->Naux, r);
+  }
+
+  if (env->rank == 0){
+    // Ensure file is open
+    if (f == NULL) {
+      printf("Error: could not open 'aux.dat' for writing.\n");
+      exit(1);
+    }
+
+    // File is open, write data
+    fprintf(f, "aux = ");
+    for (int i(0); i < d->Naux-1; i++) fprintf(f, "%s, ", d->auxLabels[i].c_str());
+    fprintf(f, "%s\n", d->auxLabels[d->Naux-1].c_str());
+
+    writeStateVectorToFile(f, fullStateVector, d->Naux);
+    fclose(f);
+  }
+
+  free(buffer);
+  free(fullStateVector);
+
+}
+
+
+void ParallelSaveData::saveDomain()
+{
+  FILE * f;
+  char fname[120];
+  strcpy(fname, dir);
+  strcat(fname, "/Domain/domain");
+  strcat(fname, app);
+  strcat(fname, ".dat\0");  f = fopen(fname, "w");
+
+  // Ensure file is open
+  if (f == NULL) {
+    printf("Error: could not open 'domain.dat' for writing.\n");
+    exit(1);
+  }
+
+  // File is open, write data
+  for (int i(0); i < d->Nx; i++)
+    fprintf(f, "%.16f ", d->x[i]);
+  fprintf(f, "\n");
+  for (int j(0); j < d->Ny; j++)
+    fprintf(f, "%.16f ", d->y[j]);
+  fprintf(f, "\n");
+  for (int k(0); k < d->Nz; k++)
+    fprintf(f, "%.16f ", d->z[k]);
+  fprintf(f, "\n");
+
+
+  fclose(f);
+
+}
+
+
+void ParallelSaveData::saveConsts()
+{
+  FILE * f;
+  char fname[120];
+  strcpy(fname, dir);
+  strcat(fname, "/Constants/constants");
+  strcat(fname, app);
+  strcat(fname, ".dat\0");  f = fopen(fname, "w");
+
+  if (env->rank == 0){
+      // Ensure file is open
+      if (f == NULL) {
+        printf("Error: could not open 'constants.dat' for writing.\n");
+        exit(1);
+      }
+
+      fprintf(f, "constants = nx, ny, nz, Nx, Ny, Nz, xmin, xmax, ymin, ymax, zmin, zmax, endTime, cfl, Ng, gamma, sigma, ");
+      fprintf(f, "Ncons, Nprims, Naux, cp, dt, t, dx, dy, dz\n");
+      fprintf(f, "%d %d %d %d %d %d %.16f %.16f %.16f %.16f %.16f %.16f %.16f %.16f %d %.16f %.16f %d %d %d %.16f %.16f %.16f %.16f %.16f %.16f\n",
+              d->nx, d->ny, d->nz, d->Nx, d->Ny, d->Nz, d->xmin, d->xmax, d->ymin, d->ymax, d->zmin, d->zmax, d->endTime, d->cfl, d->Ng,
+              d->gamma, d->sigma, d->Ncons, d->Nprims, d->Naux, d->cp, d->dt, d->t, d->dx, d->dy, d->dz);
+
+      fclose(f);
+  }
+}
+
+
+void ParallelSaveData::saveVar(string variable, int num)
+{
+  int cpa(0); // cons=1,prims=2,aux=3
+  int Nvar(0); // Variable number
+  FILE * f;
+  char fname[120];
+  double * sendVec; // Pointer to the array to send to master and save
+
+  // Determine which variable the user wants saved
+  for (int var(0); var < d->Ncons; var++) {
+    if (strcmp(d->consLabels[var].c_str(), variable.c_str()) == 0) {
+      cpa=1; Nvar=var;
+      break;
+    }
+  }
+
+  if (!cpa) {
+    for (int var(0); var < d->Nprims; var++) {
+      if (strcmp(d->primsLabels[var].c_str(), variable.c_str()) == 0) {
+        cpa=2; Nvar=var;
+        break;
+      }
+    }
+  }
+
+  if (!cpa) {
+    for (int var(0); var < d->Naux; var++) {
+      if (strcmp(d->auxLabels[var].c_str(), variable.c_str()) == 0) {
+        cpa=3; Nvar=var;
+        break;
+      }
+    }
+  }
+
+  if (!cpa) {
+    printf("Error: Could not find user specified variable '%s'\n", variable.c_str());
+    exit(1);
+  }
+
+  if (cpa==1) sendVec = &d->cons[ID(Nvar, 0, 0, 0)];
+  else if (cpa==2) sendVec = &d->prims[ID(Nvar, 0, 0, 0)];
+  else sendVec = &d->aux[ID(Nvar, 0, 0, 0)];
+
+  // Allocate buffers for gathering distributed state vectors onto master process
+  // We do this here rather than in saveAll to allow savePrims to be called independently
+  // We don't want to do this in the ParallelSaveData constructor as we don't want to use up this large
+  // amount of memory until it's needed.
+  int numCellsInBuffer = (d->Nx-(2*d->Ng));
+  if (d->dims > 1) numCellsInBuffer *= (d->Ny - (2*d->Ng));
+  if (d->dims > 2) numCellsInBuffer *= (d->Nz - (2*d->Ng));
+  double *buffer = (double*) malloc(numCellsInBuffer * sizeof(double));
+  int numCellsInFullStateVector = numCellsInBuffer * env->nProc;
+  double *fullStateVector = (double*) malloc(numCellsInFullStateVector * sizeof(double));
+
+  if (env->rank != 0) packStateVectorBuffer(buffer, sendVec, 1);
+  else copyMasterStateVectorToFullStateVector(fullStateVector, sendVec, 1);
+  for (int r(1); r < env->nProc; r++){
+      int numCellsSent = 1 * (d->Nx-(2*d->Ng));
+      if (d->dims > 1) numCellsSent *= (d->Ny-(2*d->Ng));
+      if (d->dims > 2) numCellsSent *= (d->Nz-(2*d->Ng));
+      sendStateVectorBufferToMaster(buffer, numCellsSent, r);
+      if (env->rank == 0) unpackStateVectorBuffer(buffer, fullStateVector, 1, r);
+  }
+
+
+
+
+
+
+  if (env->rank == 0){
+
+    // Directory
+    if (this->test)
+      strcpy(fname, "../../Data/TimeSeries/UserDef/");
+    else
+      strcpy(fname, "Data/TimeSeries/UserDef/");
+    sprintf(app, "%d", Nouts);
+
+    // Location of output file
+    strcat(fname, variable.c_str());
+    strcat(fname, app);
+    strcat(fname, ".dat\0");
+    f = fopen(fname, "w");
+
+    // Ensure file is open
+    if (f == NULL) {
+      printf("Error: could not open user-defined file for writing.\n");
+      exit(1);
+    }
+
+    // File is open, write data
+    fprintf(f, "var = %s, t = %18.16f\n", variable.c_str(), d->t);
+
+    writeStateVectorToFile(f, fullStateVector, 1);
+
+
+    fclose(f);
+
+
+    // For first output add the variables we are saving
+    if (Nouts==0) {
+      if (Ncount==0) {
+        ofstream info;
+        if (this->test)
+          strcpy(fname, "../../Data/TimeSeries/UserDef/");
+        else
+          strcpy(fname, "Data/TimeSeries/UserDef/");
+        strcat(fname, "info");
+        info.open(fname);
+        info << variable << endl;
+        info.close();
+      }
+      else {
+        ofstream info;
+        info.open("Data/TimeSeries/UserDef/info", ios::app);
+        info << variable << endl;
+        info.close();
+      }
+    }
+    Ncount++;
+    // Increment if this is the last variable to save in this timestep
+    if (Ncount == num) {
+      Ncount = 0;
+      Nouts++;
+    }
+  }
+
+  free(buffer);
+  free(fullStateVector);
+
+}
diff --git a/Project/GPU/Src/saveData.cu b/Project/GPU/Src/serialSaveData.cu
similarity index 96%
rename from Project/GPU/Src/saveData.cu
rename to Project/GPU/Src/serialSaveData.cu
index c87bbccb..a0115466 100644
--- a/Project/GPU/Src/saveData.cu
+++ b/Project/GPU/Src/serialSaveData.cu
@@ -1,14 +1,13 @@
-#include "saveData.h"
+#include "serialSaveData.h"
 #include <cstdlib>
 #include <cstdio>
 #include <fstream>
 
 using namespace std;
 
-// Macro for getting array index
 #define ID(variable, idx, jdx, kdx) ((variable)*(d->Nx)*(d->Ny)*(d->Nz) + (idx)*(d->Ny)*(d->Nz) + (jdx)*(d->Nz) + (kdx))
 
-void SaveData::saveAll(bool timeSeries)
+void SerialSaveData::saveAll(bool timeSeries)
 {
   // Clean directory variable
   dir[0] = '\0';
@@ -31,7 +30,7 @@ void SaveData::saveAll(bool timeSeries)
   this->saveConsts();
 }
 
-void SaveData::saveCons()
+void SerialSaveData::saveCons()
 {
   FILE * f;
 
@@ -89,7 +88,7 @@ void SaveData::saveCons()
 }
 
 
-void SaveData::savePrims()
+void SerialSaveData::savePrims()
 {
   FILE * f;
   char fname[120];
@@ -142,7 +141,7 @@ void SaveData::savePrims()
 
 }
 
-void SaveData::saveAux()
+void SerialSaveData::saveAux()
 {
   FILE * f;
   char fname[120];
@@ -196,7 +195,7 @@ void SaveData::saveAux()
 }
 
 
-void SaveData::saveDomain()
+void SerialSaveData::saveDomain()
 {
   FILE * f;
   char fname[120];
@@ -228,7 +227,7 @@ void SaveData::saveDomain()
 }
 
 
-void SaveData::saveConsts()
+void SerialSaveData::saveConsts()
 {
   FILE * f;
   char fname[120];
@@ -254,7 +253,7 @@ void SaveData::saveConsts()
 }
 
 
-void SaveData::saveVar(string variable, int num)
+void SerialSaveData::saveVar(string variable, int num)
 {
   int cpa(0); // cons=1,prims=2,aux=3
   int Nvar(0); // Variable number

From e879fb8896edd8400ab25f3916b22eab39338025 Mon Sep 17 00:00:00 2001
From: "ania.brown" <ania.brown@gmail.com>
Date: Tue, 1 Sep 2020 09:40:26 +0100
Subject: [PATCH 14/56] split out compiling and final linking into nvcc/g++, to
 prepare for adding MPI to build system

---
 Project/GPU/Makefile | 75 +++++++++++++++++++++++++++-----------------
 1 file changed, 47 insertions(+), 28 deletions(-)

diff --git a/Project/GPU/Makefile b/Project/GPU/Makefile
index 9028e8b3..abde3e2c 100644
--- a/Project/GPU/Makefile
+++ b/Project/GPU/Makefile
@@ -2,7 +2,12 @@
 # and links for main.cc
 
 # Compiler
-CC = nvcc
+
+# We will compile most object files with (NVCC), other than those object files that use MPI. If using MPI, set CC_CPU=mpicc. In that case, the object files which use MPI will be compiled with mpicc. For linking, CC_CPU will be used (this should be set to mpicc if using MPI)
+CC_CPU = g++
+CC_GPU = nvcc
+
+USE_MPI=0
 
 # Module directory
 MODULE_DIR = ./Src
@@ -20,10 +25,10 @@ RTFIND_INC_DIR = ./CminpackLibrary/Include
 RTFIND_SRC_DIR = ./CminpackLibrary/Src
 
 # C++ compiler flags
-CXXFLAGS = -Xcompiler -fopenmp -Xcompiler -Wall
+CXXFLAGS = -fopenmp -Wall
 
 # NVIDIA compiler flags
-NVFLAGS = -std=c++11 -Wno-deprecated-gpu-targets -rdc=true -fmad=false -O3 -arch=sm_52
+NVFLAGS = -std=c++11 -Wno-deprecated-gpu-targets -rdc=true -fmad=false -O3 -arch=sm_61 -Xcompiler -fopenmp -Xcompiler -Wall
 
 # Sources
 SRCS = main.cu \
@@ -42,16 +47,27 @@ SRCS = main.cu \
 			 SSP3.cu \
 			 IMEX3Args.cu \
 			 boundaryConds.cu \
-			 serialSaveData.cu \
-			 serialEnv.cu \
 			 fluxVectorSplitting.cu \
 			 srrmhd.cu \
 			 C2PArgs.cu
 
+PARALLEL_SRCS = parallelSaveData.cu \
+	parallelEnv.cu \
+	parallelBoundaryConds.cu
+
+SERIAL_SRCS = serialSaveData.cu \
+	serialEnv.cu
 
 # Headers
 HDRS = ${SRCS:.cu=.h} cudaErrorCheck.h
 
+
+ifeq ($(USE_MPI), 1)
+	SRCS += ${PARALLEL_SRCS}
+else
+	SRCS += ${SERIAL_SRCS}
+endif
+
 # Objects
 OBJS = ${SRCS:.cu=.o}
 
@@ -98,72 +114,75 @@ clean :
 #################
 
 simData.o : $(MODULE_DIR)/simData.cu $(INC_DIR)/simData.h
-	$(CC)	$<	-c	$(NVFLAGS) -I$(INC_DIR)
+	$(CC_GPU)	$<	-c	$(NVFLAGS) -I$(INC_DIR)
 
 initFunc.o : $(MODULE_DIR)/initFunc.cu $(INC_DIR)/initFunc.h	$(INC_DIR)/simData.h
-	$(CC)	$<	-c	$(NVFLAGS) -I$(INC_DIR)
+	$(CC_GPU)	$<	-c	$(NVFLAGS) -I$(INC_DIR)
 
 C2PArgs.o : $(MODULE_DIR)/C2PArgs.cu $(INC_DIR)/C2PArgs.h	$(INC_DIR)/simData.h
-	$(CC)	$<	-c	$(NVFLAGS) -I$(INC_DIR)
+	$(CC_GPU)	$<	-c	$(NVFLAGS) -I$(INC_DIR)
 
 twoFluidEMHD.o : $(MODULE_DIR)/twoFluidEMHD.cu $(INC_DIR)/twoFluidEMHD.h
-	$(CC)	$<	-c	$(NVFLAGS) -I$(INC_DIR)
+	$(CC_GPU)	$<	-c	$(NVFLAGS) -I$(INC_DIR)
 
 srmhd.o : $(MODULE_DIR)/srmhd.cu $(INC_DIR)/srmhd.h
-	$(CC)	$<	-c	$(NVFLAGS) -I$(INC_DIR) -I$(RTFIND_INC_DIR) -Xcompiler -fopenmp
+	$(CC_GPU)	$<	-c	$(NVFLAGS) -I$(INC_DIR) -I$(RTFIND_INC_DIR) -Xcompiler -fopenmp
 
 srrmhd.o : $(MODULE_DIR)/srrmhd.cu $(INC_DIR)/srrmhd.h $(INC_DIR)/C2PArgs.h
-	$(CC)	$<	-c	$(NVFLAGS) -I$(INC_DIR) -I$(RTFIND_INC_DIR)
+	$(CC_GPU)	$<	-c	$(NVFLAGS) -I$(INC_DIR) -I$(RTFIND_INC_DIR)
 
 simulation.o : $(MODULE_DIR)/simulation.cu	$(INC_DIR)/simulation.h $(INC_DIR)/model.h	$(INC_DIR)/simData.h $(INC_DIR)/saveData.h
-	$(CC)	$<	-c	$(NVFLAGS) -I$(INC_DIR)
+	$(CC_GPU)	$<	-c	$(NVFLAGS) -I$(INC_DIR)
 
 main.o : $(MODULE_DIR)/main.cu $(INC_DIR)/simulation.h $(INC_DIR)/model.h $(INC_DIR)/simData.h $(INC_DIR)/initFunc.h
-	$(CC)	$<	-c	$(NVFLAGS) -I$(INC_DIR) -I$(RTFIND_INC_DIR) $(CXXFLAGS)
+	$(CC_GPU)	$<	-c	$(NVFLAGS) -I$(INC_DIR) -I$(RTFIND_INC_DIR) 
 
 weno.o : $(MODULE_DIR)/weno.cu $(INC_DIR)/weno.h
-	$(CC)	$<	-c	$(NVFLAGS) -I$(INC_DIR)
+	$(CC_GPU)	$<	-c	$(NVFLAGS) -I$(INC_DIR)
 
 RK2.o : $(MODULE_DIR)/RK2.cu $(INC_DIR)/RK2.h
-		$(CC)	$<	-c	$(NVFLAGS) -I$(INC_DIR) -Xcompiler -fopenmp
+		$(CC_GPU)	$<	-c	$(NVFLAGS) -I$(INC_DIR) -Xcompiler -fopenmp
 
 rkSplit.o : $(MODULE_DIR)/rkSplit.cu $(INC_DIR)/rkSplit.h $(INC_DIR)/RK2.h
-	$(CC)	$<	-c	$(NVFLAGS) -I$(INC_DIR) -Xcompiler -fopenmp
+	$(CC_GPU)	$<	-c	$(NVFLAGS) -I$(INC_DIR) -Xcompiler -fopenmp
 
 backwardsRK.o : $(MODULE_DIR)/backwardsRK.cu $(INC_DIR)/backwardsRK.h $(INC_DIR)/backRKArgs.h $(INC_DIR)/rkSplit.h
-	$(CC)	$<	-c	$(NVFLAGS) -I$(INC_DIR) -I$(RTFIND_INC_DIR)
+	$(CC_GPU)	$<	-c	$(NVFLAGS) -I$(INC_DIR) -I$(RTFIND_INC_DIR)
 
 backRKArgs.o : $(MODULE_DIR)/backRKArgs.cu $(INC_DIR)/backRKArgs.h
-	$(CC)	$<	-c	$(NVFLAGS) -I$(INC_DIR) -I$(RTFIND_INC_DIR)
+	$(CC_GPU)	$<	-c	$(NVFLAGS) -I$(INC_DIR) -I$(RTFIND_INC_DIR)
 
 SSP2.o : $(MODULE_DIR)/SSP2.cu $(INC_DIR)/SSP2.h $(INC_DIR)/IMEX2Args.h $(INC_DIR)/timeInt.h
-	$(CC)	$<	-c	$(NVFLAGS) -I$(INC_DIR) -I$(RTFIND_INC_DIR) -Xcompiler -fopenmp
+	$(CC_GPU)	$<	-c	$(NVFLAGS) -I$(INC_DIR) -I$(RTFIND_INC_DIR) -Xcompiler -fopenmp
 
 IMEX2Args.o : $(MODULE_DIR)/IMEX2Args.cu $(INC_DIR)/IMEX2Args.h
-	$(CC)	$<	-c	$(NVFLAGS) -I$(INC_DIR) -I$(RTFIND_INC_DIR)
+	$(CC_GPU)	$<	-c	$(NVFLAGS) -I$(INC_DIR) -I$(RTFIND_INC_DIR)
 
 SSP3.o : $(MODULE_DIR)/SSP3.cu $(INC_DIR)/SSP3.h $(INC_DIR)/IMEX3Args.h $(INC_DIR)/timeInt.h
-	$(CC)	$<	-c	$(NVFLAGS) -I$(INC_DIR) -I$(RTFIND_INC_DIR)
+	$(CC_GPU)	$<	-c	$(NVFLAGS) -I$(INC_DIR) -I$(RTFIND_INC_DIR)
 
 IMEX3Args.o : $(MODULE_DIR)/IMEX3Args.cu $(INC_DIR)/IMEX3Args.h $(INC_DIR)/IMEX2Args.h
-	$(CC)	$<	-c	$(NVFLAGS) -I$(INC_DIR) -I$(RTFIND_INC_DIR)
+	$(CC_GPU)	$<	-c	$(NVFLAGS) -I$(INC_DIR) -I$(RTFIND_INC_DIR)
 
 boundaryConds.o : $(MODULE_DIR)/boundaryConds.cu $(INC_DIR)/boundaryConds.h
-	$(CC)	$<	-c	$(NVFLAGS) -I$(INC_DIR) -Xcompiler -fopenmp
+	$(CC_GPU)	$<	-c	$(NVFLAGS) -I$(INC_DIR) -Xcompiler -fopenmp
 
 serialSaveData.o : $(MODULE_DIR)/serialSaveData.cu $(INC_DIR)/serialSaveData.h
-	$(CC)	$<	-c	$(NVFLAGS) -I$(INC_DIR)
+	$(CC_GPU)	$<	-c	$(NVFLAGS) -I$(INC_DIR)
 
 serialEnv.o : $(MODULE_DIR)/serialEnv.cu $(INC_DIR)/serialEnv.h
-	$(CC)	$<	-c	$(NVFLAGS) -I$(INC_DIR)
+	$(CC_GPU)	$<	-c	$(NVFLAGS) -I$(INC_DIR)
 
 fluxVectorSplitting.o : $(MODULE_DIR)/fluxVectorSplitting.cu $(INC_DIR)/fluxVectorSplitting.h $(INC_DIR)/weno.h
-	$(CC)	$<	-c	$(NVFLAGS) -I$(INC_DIR) -Xcompiler -fopenmp
+	$(CC_GPU)	$<	-c	$(NVFLAGS) -I$(INC_DIR) -Xcompiler -fopenmp
 
+# link device code using CC_GPU (g++/mpi++ can't do this)
+gpu_link.o : $(RTFIND_OBJS) $(OBJS)
+	$(CC_GPU) $^	-dlink -o $@ $(NVFLAGS) -lcudadevrt
 
 # Executable
-main : $(RTFIND_OBJS) $(OBJS)
-	@$(CC) $^	-o $@ $(CXXFLAGS) $(NVFLAGS)
+main : gpu_link.o $(RTFIND_OBJS) $(OBJS)
+	$(CC_CPU) $^	-o $@ $(CXXFLAGS) -lcudart -lcudadevrt -L/local/software/cuda/8.0/lib64
 
 buildRootfinder:
 	@cd $(RTFIND_DIR) && $(MAKE)	objects

From 30ff5774f3ab42adcbb7901c52ba43951589e34c Mon Sep 17 00:00:00 2001
From: "ania.brown" <ania.brown@gmail.com>
Date: Tue, 1 Sep 2020 11:01:31 +0100
Subject: [PATCH 15/56] adding parallel BCS files

---
 Project/GPU/Include/parallelBoundaryConds.h | 314 +++++++++
 Project/GPU/Src/parallelBoundaryConds.cu    | 725 ++++++++++++++++++++
 2 files changed, 1039 insertions(+)
 create mode 100644 Project/GPU/Include/parallelBoundaryConds.h
 create mode 100644 Project/GPU/Src/parallelBoundaryConds.cu

diff --git a/Project/GPU/Include/parallelBoundaryConds.h b/Project/GPU/Include/parallelBoundaryConds.h
new file mode 100644
index 00000000..85978654
--- /dev/null
+++ b/Project/GPU/Include/parallelBoundaryConds.h
@@ -0,0 +1,314 @@
+#ifndef PARALLEL_BOUNDARYCONDS_H
+#define PARALLEL_BOUNDARYCONDS_H
+
+#include "simData.h"
+#include "boundaryConds.h"
+#include "parallelEnv.h"
+
+//! <b> Boundary Conditions for a data structure that has been distributed across ranks</b>
+/*!
+  @par
+    Base class for implementations of different boundary conditions across a distributed data structure. Contains common functions
+    used by more than one Boundary Condition type.
+  The fields to which the boundary conditions are applied are those passed into
+  the function apply, not those in the SimData class.
+*/
+class ParallelBcs : public Bcs
+{
+
+  public:
+
+    ParallelEnv * env; //!< Pointer to ParallelEnv class containing platform specific info such as MPI details
+
+    int xPeriodic, yPeriodic, zPeriodic;
+
+    //! Constructor
+    /*!
+        Calls constructor of base class to store the pointer to the Data class and ParallelEnv class.
+
+      @param[in] *data pointer to Data class
+      @param[in] *env pointer to ParallelEnv class
+      @sa Bcs::Bcs
+    */
+    ParallelBcs(Data *data, ParallelEnv *env, int xPeriodic=1, int yPeriodic=1, int zPeriodic=1) : Bcs(data), env(env)
+    {
+        env->setParallelDecomposition(xPeriodic, yPeriodic, zPeriodic);
+    }
+
+    virtual ~ParallelBcs() { }     //!< Destructor
+
+    /*!
+        Exchanges buffers packed with ghost cells with neighbouring subdomains using MPI.
+
+      @param[in] *sendToLeftBuf pointer to the buffer contaning ghost cells at the left (front, bottom) face,
+            to be sent to the left (front, bottom) neighbour process
+      @param[in] *sendToRightBuf pointer to the buffer contaning ghost cells at the right (back, top) face,
+            to be sent to the right (back, top) neighbour process
+      @param[out] *recvFromLeftBuf buffer for receiving ghost cells from the left (front, bottom) process
+      @param[out] *recvFromRightBuf buffer for receiving ghost cells from the right (back, top) process
+      @param[in] leftNeighbour id of the left (front, bottom) process in the global MPI communicator
+      @param[in] rightNeighbour id of the right (back, top) process in the global MPI communicator
+      @param[in] numCellsSent number of cells in the ghost region
+    */
+    void swapGhostBuffers(double *sendToLeftBuf, double *sendToRightBuf, double *recvFromLeftBuf,
+        double *recvFromRightBuf,  int leftNeighbour, int rightNeighbour, int numCellsSent);
+
+    /*!
+        For a particular state vector (cons, prims, aux) copies cells along the left and right faces
+            of the physical (non-ghost) cells in a subdomain and packs them into buffers for MPI communication to
+            another process.
+
+      @param[out] *sendToLeftBuf pointer to the buffer to pack with cells at the left face,
+            to be sent to the left neighbour process
+      @param[out] *sendToRightBuf pointer to the buffer to pack with cells at the right face,
+            to be sent to the right neighbour process
+      @param[in] *stateVector pointer to cons, prims or aux array
+      @param[in] nVars number of variables in the cons, prims or aux array
+    */
+    void packXBuffer(double *sendToLeftBuf, double *sendToRightBuf, double *stateVector, int nVars);
+
+    /*!
+        For a particular state vector (cons, prims, aux) copies cells received from a neighbour process into the ghost
+            cell region at the left and right faces of a subdomain.
+
+      @param[out] *sendToLeftBuf pointer to the buffer to pack with cells at the left face,
+            to be sent to the left neighbour process
+      @param[out] *sendToRightBuf pointer to the buffer to pack with cells at the right face,
+            to be sent to the right neighbour process
+      @param[in] *stateVector pointer to cons, prims or aux array
+      @param[in] nVars number of variables in the cons, prims or aux array
+    */
+    void unpackXBuffer(double *recvFromLeftBuf, double *recfFromRightBuf, double *stateVector, int nVars);
+
+    /*!
+        For a particular state vector (cons, prims, aux) copies cells along the front and back faces
+            of the physical (non-ghost) cells in a subdomain and packs them into buffers for MPI communication to
+            another process.
+
+      @param[out] *sendToLeftBuf pointer to the buffer to pack with cells at the front face,
+            to be sent to the front neighbour process
+      @param[out] *sendToRightBuf pointer to the buffer to pack with cells at the back face,
+            to be sent to the back neighbour process
+      @param[in] *stateVector pointer to cons, prims or aux array
+      @param[in] nVars number of variables in the cons, prims or aux array
+    */
+    void packYBuffer(double *sendToLeftBuf, double *sendToRightBuf, double *stateVector, int nVars);
+
+    /*!
+        For a particular state vector (cons, prims, aux) copies cells received from a neighbour process into the ghost
+            cell region at the front and back faces of a subdomain.
+
+      @param[out] *sendToLeftBuf pointer to the buffer to pack with cells at the front face,
+            to be sent to the front neighbour process
+      @param[out] *sendToRightBuf pointer to the buffer to pack with cells at the back face,
+            to be sent to the back neighbour process
+      @param[in] *stateVector pointer to cons, prims or aux array
+      @param[in] nVars number of variables in the cons, prims or aux array
+    */
+    void unpackYBuffer(double *recvFromLeftBuf, double *recfFromRightBuf, double *stateVector, int nVars);
+
+    /*!
+        For a particular state vector (cons, prims, aux) copies cells received from a neighbour process into the ghost
+            cell region at the bottom and top faces of a subdomain.
+
+      @param[out] *sendToLeftBuf pointer to the buffer to pack with cells at the bottom face,
+            to be sent to the bottom neighbour process
+      @param[out] *sendToRightBuf pointer to the buffer to pack with cells at the top face,
+            to be sent to the top neighbour process
+      @param[in] *stateVector pointer to cons, prims or aux array
+      @param[in] nVars number of variables in the cons, prims or aux array
+    */
+    void packZBuffer(double *sendToLeftBuf, double *sendToRightBuf, double *stateVector, int nVars);
+
+/*!
+        For a particular state vector (cons, prims, aux) copies cells received from a neighbour process into the ghost
+            cell region at the bottom and top faces of a subdomain.
+
+      @param[out] *sendToLeftBuf pointer to the buffer to pack with cells at the bottom face,
+            to be sent to the bottom neighbour process
+      @param[out] *sendToRightBuf pointer to the buffer to pack with cells at the top face,
+            to be sent to the top neighbour process
+      @param[in] *stateVector pointer to cons, prims or aux array
+      @param[in] nVars number of variables in the cons, prims or aux array
+    */
+    void unpackZBuffer(double *recvFromLeftBuf, double *recfFromRightBuf, double *stateVector, int nVars);
+
+};
+
+//! <b> Outflow boundary conditions for a data structure that has been distributed across ranks</b>
+/*!
+    Imposes flows that exit the domain freely at all boundaries, analogous to a
+  domain that extends to infinity in each direction.
+    All ghost cells are identical to their nearest physical cell. <br>
+  For left-right reconstruction:<br>
+  Before...<br>
+  ______________________________<br>
+  |0|1|2|3|4||5|6|.....  |12||13||14|15|16|17|<br>
+  |0|1|2|3|4||5|6|.....  |12||13||14|15|16|17|<br>
+  <br>
+<br>
+  After....<br>
+  ______________________________<br>
+  |4|4|4|4||4|5|6|.....  |12||13||13|13|13|13|<br>
+  |4|4|4|4||4|5|6|.....  |12||13||13|13|13|13|<br>
+  <br>
+<br>
+  ..and similar in other directions.
+*/
+class ParallelOutflow : public ParallelBcs
+{
+  public:
+    //! Constructor
+    /*!
+        Calls constructor of base class to store the pointer to the Data class.
+
+      @param[in] *data pointer to Data class
+      @sa ParallelBcs::ParallelBcs
+    */
+    ParallelOutflow(Data * data, ParallelEnv *env) : ParallelBcs(data, env, xPeriodic=0, yPeriodic=0, zPeriodic=0) { }
+
+    virtual ~ParallelOutflow() { }     //!< Destructor
+
+    //! Application function
+    /*!
+        Applies the Outflow boundary conditions to the ghost cells.
+
+      @param[in, out] *cons pointer to the conservative (sized) vector
+      @param[in, out] *prims optional pointer to the primitive vector
+      @param[in, out] *aux optional pointer to the primitive vector
+      @sa Bcs::apply
+    */
+    void apply(double * cons, double * prims = NULL, double * aux = NULL);
+
+    /*!
+        Applies the Outflow boundary conditions to the ghost cells of subdomains that have an external face along
+        the x dimension.
+
+      @param[in, out] *stateVector pointer to one of cons, prims, aux
+      @param[in] nVars number of variables in the cons, prims or aux array
+    */
+    void setXBoundary(double *stateVector, int nVars);
+
+    /*!
+        Applies the Outflow boundary conditions to the ghost cells of subdomains that have an external face along
+        the y dimension.
+
+      @param[in, out] *stateVector pointer to one of cons, prims, aux
+      @param[in] nVars number of variables in the cons, prims or aux array
+    */
+    void setYBoundary(double *stateVector, int nVars);
+
+    /*!
+        Applies the Outflow boundary conditions to the ghost cells of subdomains that have an external face along
+        the z dimension.
+
+      @param[in, out] *stateVector pointer to one of cons, prims, aux
+      @param[in] nVars number of variables in the cons, prims or aux array
+    */
+    void setZBoundary(double *stateVector, int nVars);
+};
+
+
+//! <b> Periodic boundary conditions for a data structure that has been distributed across ranks</b>
+/*!
+    Flows that exit across one domain boundary re-enter at the opposing
+  end. I.e. the N ghost cells at one edge of the domain are set to the values
+  of the N physical cells before the ghost cells at the opposing edge.
+
+  For left-right reconstruction:<br>
+  (Note that the lower and upper halves of each row will lie on different ranks) <br>
+  Before...<br>
+  ____________________________<br>
+  |0|1|2|3||4|5|6|.....  |13||14|15|16|17|<br>
+  |0|1|2|3||4|5|6|.....  |13||14|15|16|17|<br>
+<br>
+  After....<br>
+  ____________________________<br>
+  |10|11|12|13||4|5|6|.....  |13||4|5|6|7|<br>
+  |10|11|12|13||4|5|6|.....  |13||4|5|6|7|<br>
+  <br>
+  ..and similar in other directions.
+
+*/
+class ParallelPeriodic : public ParallelBcs
+{
+
+  public:
+
+    //! Constructor
+    /*!
+        Calls constructor of base class to store the pointer to the Data class and ParallelEnv class.
+
+      @param[in] *data pointer to Data class
+      @param[in] *env pointer to ParallelEnv class
+      @sa ParallelBcs::ParallelBcs
+    */
+    ParallelPeriodic(Data * data, ParallelEnv * env) : ParallelBcs(data, env, xPeriodic=1, yPeriodic=1, zPeriodic=1) { }
+
+    virtual ~ParallelPeriodic() { }     //!< Destructor
+
+    //! Application function
+    /*!
+        Applies the Periodic boundary conditions to the ghost cells.
+
+      @param[in, out] *cons pointer to the conservative (sized) vector
+      @param[in, out] *prims optional pointer to the primitive vector
+      @param[in, out] *aux optional pointer to the primitive vector
+      @sa Bcs::apply
+    */
+    void apply(double * cons, double * prims = NULL, double * aux = NULL);
+
+};
+
+//! <b> Flow boundary conditions </b>
+/*!
+    Boundary conditions used for the Kelvin Helmholtz instability. The
+  x-direction is periodic and y- and z-directions are outflow.
+*/
+
+class ParallelFlow : public ParallelBcs
+{
+  public:
+    //! Constructor
+    /*!
+        Calls constructor of base class to store the pointer to the Data class.
+
+      @param[in] *data pointer to Data class
+      @sa ParallelBcs::ParallelBcs
+    */
+    ParallelFlow(Data * data, ParallelEnv *env) : ParallelBcs(data, env, xPeriodic=1, yPeriodic=0, zPeriodic=0) { }
+
+    virtual ~ParallelFlow() { }     //!< Destructor
+
+    //! Application function
+    /*!
+        Applies the Outflow boundary conditions to the ghost cells.
+
+      @param[in, out] *cons pointer to the conservative (sized) vector
+      @param[in, out] *prims optional pointer to the primitive vector
+      @param[in, out] *aux optional pointer to the primitive vector
+      @sa Bcs::apply
+    */
+    void apply(double * cons, double * prims = NULL, double * aux = NULL);
+
+    /*!
+        Applies the Outflow boundary conditions to the ghost cells of subdomains that have an external face along
+        the y dimension.
+
+      @param[in, out] *stateVector pointer to one of cons, prims, aux
+      @param[in] nVars number of variables in the cons, prims or aux array
+    */
+    void setYBoundary(double *stateVector, int nVars);
+
+    /*!
+        Applies the Outflow boundary conditions to the ghost cells of subdomains that have an external face along
+        the z dimension.
+
+      @param[in, out] *stateVector pointer to one of cons, prims, aux
+      @param[in] nVars number of variables in the cons, prims or aux array
+    */
+    void setZBoundary(double *stateVector, int nVars);
+};
+
+
+#endif
diff --git a/Project/GPU/Src/parallelBoundaryConds.cu b/Project/GPU/Src/parallelBoundaryConds.cu
new file mode 100644
index 00000000..219d589b
--- /dev/null
+++ b/Project/GPU/Src/parallelBoundaryConds.cu
@@ -0,0 +1,725 @@
+#include "parallelBoundaryConds.h"
+#include "mpi.h"
+#include "platformEnv.h"
+#include <stdio.h>
+
+// TODO -- Using three arrays here means we can keep the same (i,j,k) order for each neighbour direction. Decide if this is worth it.
+#define ID_XBUFF(variable, gdx, jdx, kdx) ((variable)*(d->Ng)*(d->Ny)*(d->Nz) + (gdx)*(d->Ny)*(d->Nz) + (jdx)*(d->Nz) + (kdx))
+#define ID_YBUFF(variable, idx, gdx, kdx) ((variable)*(d->Nx)*(d->Ng)*(d->Nz) + (idx)*(d->Ng)*(d->Nz) + (gdx)*(d->Nz) + (kdx))
+#define ID_ZBUFF(variable, idx, jdx, gdx) ((variable)*(d->Nx)*(d->Ny)*(d->Ng) + (idx)*(d->Ny)*(d->Ng) + (jdx)*(d->Ng) + (gdx))
+
+void ParallelBcs::swapGhostBuffers(double *sendToLeftBuf, double *sendToRightBuf, double *recvFromLeftBuf,
+	double *recvFromRightBuf,  int leftNeighbour, int rightNeighbour, int numCellsSent){
+
+  // MPI message vars
+  int tag = 100;
+  MPI_Status status;
+
+  // Send to left and receive from right neighbour process
+  MPI_Sendrecv(sendToLeftBuf, numCellsSent, MPI_DOUBLE,
+	leftNeighbour, tag,
+	recvFromRightBuf, numCellsSent, MPI_DOUBLE,
+	rightNeighbour, tag,
+	env->mpiCartesianComm, &status);
+  // Send to right and receive from left neighbour process
+  MPI_Sendrecv(sendToRightBuf, numCellsSent, MPI_DOUBLE,
+	rightNeighbour, tag,
+	recvFromLeftBuf, numCellsSent, MPI_DOUBLE,
+	leftNeighbour, tag,
+	env->mpiCartesianComm, &status);
+}
+
+void ParallelBcs::packXBuffer(double *sendToLeftBuf, double *sendToRightBuf, double *stateVector, int nVars){
+  Data * d(this->data);
+  for (int var(0); var < nVars; var++) {
+    for (int i(0); i < d->Ng; i++) {
+      for (int j(0); j < d->Ny; j++) {
+        for (int k(0); k < d->Nz; k++) {
+	  // Prepare buffer to send left
+          sendToLeftBuf[ID_XBUFF(var, i, j, k)] = stateVector[ID(var, d->Ng + i, j, k)];
+	  // Prepare buffer to send right
+          sendToRightBuf[ID_XBUFF(var, i, j, k)] = stateVector[ID(var, d->Nx-(2*d->Ng) + i, j, k)];
+        }
+      }
+    }
+  }
+}
+
+void ParallelBcs::unpackXBuffer(double *recvFromLeftBuf, double *recvFromRightBuf, double *stateVector, int nVars){
+  Data * d(this->data);
+  for (int var(0); var < nVars; var++) {
+    for (int i(0); i < d->Ng; i++) {
+      for (int j(0); j < d->Ny; j++) {
+        for (int k(0); k < d->Nz; k++) {
+	  // Unpack buffer from right neighbour
+          stateVector[ID(var, d->Nx - d->Ng + i, j, k)] = recvFromRightBuf[ID_XBUFF(var, i, j, k)];
+	  // Unpack buffer from left neighbour
+          stateVector[ID(var, i, j, k)] = recvFromLeftBuf[ID_XBUFF(var, i, j, k)];
+        }
+      }
+    }
+  }
+}
+
+void ParallelBcs::packYBuffer(double *sendToLeftBuf, double *sendToRightBuf, double *stateVector, int nVars){
+  Data * d(this->data);
+  for (int var(0); var < nVars; var++) {
+    for (int i(0); i < d->Nx; i++) {
+      for (int j(0); j < d->Ng; j++) {
+        for (int k(0); k < d->Nz; k++) {
+	  // Prepare buffer to send left
+          sendToLeftBuf[ID_YBUFF(var, i, j, k)] = stateVector[ID(var, i, d->Ng + j, k)];
+	  // Prepare buffer to send right
+          sendToRightBuf[ID_YBUFF(var, i, j, k)] = stateVector[ID(var, i, d->Ny-(2*d->Ng) + j, k)];
+        }
+      }
+    }
+  }
+}
+
+void ParallelBcs::unpackYBuffer(double *recvFromLeftBuf, double *recvFromRightBuf, double *stateVector, int nVars){
+  Data * d(this->data);
+  for (int var(0); var < nVars; var++) {
+    for (int i(0); i < d->Nx; i++) {
+      for (int j(0); j < d->Ng; j++) {
+        for (int k(0); k < d->Nz; k++) {
+	  // Unpack buffer from right neighbour
+          stateVector[ID(var, i, d->Ny - d->Ng + j, k)] = recvFromRightBuf[ID_YBUFF(var, i, j, k)];
+	  // Unpack buffer from left neighbour
+          stateVector[ID(var, i, j, k)] = recvFromLeftBuf[ID_YBUFF(var, i, j, k)];
+        }
+      }
+    }
+  }
+}
+
+void ParallelBcs::packZBuffer(double *sendToLeftBuf, double *sendToRightBuf, double *stateVector, int nVars){
+  Data * d(this->data);
+  for (int var(0); var < nVars; var++) {
+    for (int i(0); i < d->Nx; i++) {
+      for (int j(0); j < d->Ny; j++) {
+        for (int k(0); k < d->Ng; k++) {
+	  // Prepare buffer to send left
+          sendToLeftBuf[ID_ZBUFF(var, i, j, k)] = stateVector[ID(var, i, j, d->Ng + k)];
+	  // Prepare buffer to send right
+          sendToRightBuf[ID_ZBUFF(var, i, j, k)] = stateVector[ID(var, i, j, d->Nz-(2*d->Ng) + k)];
+        }
+      }
+    }
+  }
+}
+
+void ParallelBcs::unpackZBuffer(double *recvFromLeftBuf, double *recvFromRightBuf, double *stateVector, int nVars){
+  Data * d(this->data);
+  for (int var(0); var < nVars; var++) {
+    for (int i(0); i < d->Nx; i++) {
+      for (int j(0); j < d->Ng; j++) {
+        for (int k(0); k < d->Nz; k++) {
+	  // Unpack buffer from right neighbour
+          stateVector[ID(var, i, j, d->Nz - d->Ng + k)] = recvFromRightBuf[ID_ZBUFF(var, i, j, k)];
+	  // Unpack buffer from left neighbour
+          stateVector[ID(var, i, j, k)] = recvFromLeftBuf[ID_ZBUFF(var, i, j, k)];
+        }
+      }
+    }
+  }
+}
+
+void ParallelPeriodic::apply(double * cons, double * prims, double * aux)
+{
+  // Syntax
+  Data * d(this->data);
+
+  // Allocate one ghost region buffer array the size of the largest ghost region
+  int maxSendBufSize = std::max(std::max(d->Ncons, d->Nprims), d->Naux) * d->Ng;
+  if (d->Ny > 1) {
+      maxSendBufSize *= std::max(d->Nx, d->Ny);
+  }
+  if (d->Nz > 1) {
+    maxSendBufSize *= std::max(std::min(d->Nx, d->Ny), (d->Nz));
+  }
+
+  // TODO -- Could do left and right halo exchange separately and allocate half as many buffers but this would
+  // add twice as many loops
+
+  // Allocate temporary buffers for ghost region exchange
+  // TODO -- should allocate this once at beginning of run
+  double *sendToLeftBuf = (double *) malloc(maxSendBufSize*sizeof(double));
+  double *sendToRightBuf = (double *) malloc(maxSendBufSize*sizeof(double));
+  double *recvFromRightBuf = (double *) malloc(maxSendBufSize*sizeof(double));
+  double *recvFromLeftBuf = (double *) malloc(maxSendBufSize*sizeof(double));
+
+  int numCellsSent;
+
+  // x dimension
+
+  // Cons
+  numCellsSent = d->Ncons * d->Ng * d->Ny * d->Nz;
+  packXBuffer(sendToLeftBuf, sendToRightBuf, cons, d->Ncons);
+
+  swapGhostBuffers(sendToLeftBuf, sendToRightBuf, recvFromLeftBuf, recvFromRightBuf, env->leftXNeighbourRank,
+	env->rightXNeighbourRank, numCellsSent);
+
+  unpackXBuffer(recvFromLeftBuf, recvFromRightBuf, cons, d->Ncons);
+  
+  // Prims
+  if (prims) {
+    numCellsSent = d->Nprims * d->Ng * d->Ny * d->Nz;
+    packXBuffer(sendToLeftBuf, sendToRightBuf, prims, d->Nprims);
+
+    swapGhostBuffers(sendToLeftBuf, sendToRightBuf, recvFromLeftBuf, recvFromRightBuf, env->leftXNeighbourRank,
+          env->rightXNeighbourRank, numCellsSent);
+
+    unpackXBuffer(recvFromLeftBuf, recvFromRightBuf, prims, d->Nprims);
+  }
+
+  // Aux
+  if (aux) {
+    numCellsSent = d->Naux * d->Ng * d->Ny * d->Nz;
+    packXBuffer(sendToLeftBuf, sendToRightBuf, aux, d->Naux);
+
+    swapGhostBuffers(sendToLeftBuf, sendToRightBuf, recvFromLeftBuf, recvFromRightBuf, env->leftXNeighbourRank,
+          env->rightXNeighbourRank, numCellsSent);
+
+    unpackXBuffer(recvFromLeftBuf, recvFromRightBuf, aux, d->Naux);
+  }
+
+  if (d->Ny > 1) {
+    // y dimension
+  
+    // Cons
+    numCellsSent = d->Ncons * d->Nx * d->Ng * d->Nz;
+    packYBuffer(sendToLeftBuf, sendToRightBuf, cons, d->Ncons);
+  
+    swapGhostBuffers(sendToLeftBuf, sendToRightBuf, recvFromLeftBuf, recvFromRightBuf, env->leftYNeighbourRank,
+  	env->rightYNeighbourRank, numCellsSent);
+  
+    unpackYBuffer(recvFromLeftBuf, recvFromRightBuf, cons, d->Ncons);
+    
+    // Prims
+    if (prims) {
+      numCellsSent = d->Nprims * d->Nx * d->Ng * d->Nz;
+      packYBuffer(sendToLeftBuf, sendToRightBuf, prims, d->Nprims);
+  
+      swapGhostBuffers(sendToLeftBuf, sendToRightBuf, recvFromLeftBuf, recvFromRightBuf, env->leftYNeighbourRank,
+            env->rightYNeighbourRank, numCellsSent);
+  
+      unpackYBuffer(recvFromLeftBuf, recvFromRightBuf, prims, d->Nprims);
+    }
+  
+    // Aux
+    if (aux) {
+      numCellsSent = d->Naux * d->Nx * d->Ng * d->Nz;
+      packYBuffer(sendToLeftBuf, sendToRightBuf, aux, d->Naux);
+  
+      swapGhostBuffers(sendToLeftBuf, sendToRightBuf, recvFromLeftBuf, recvFromRightBuf, env->leftYNeighbourRank,
+            env->rightYNeighbourRank, numCellsSent);
+  
+      unpackYBuffer(recvFromLeftBuf, recvFromRightBuf, aux, d->Naux);
+    }
+  }
+
+
+  if (d->Nz > 1) {
+    // y dimension
+  
+    // Cons
+    numCellsSent = d->Ncons * d->Nx * d->Ny * d->Ng;
+    packZBuffer(sendToLeftBuf, sendToRightBuf, cons, d->Ncons);
+  
+    swapGhostBuffers(sendToLeftBuf, sendToRightBuf, recvFromLeftBuf, recvFromRightBuf, env->leftZNeighbourRank,
+  	env->rightZNeighbourRank, numCellsSent);
+  
+    unpackZBuffer(recvFromLeftBuf, recvFromRightBuf, cons, d->Ncons);
+    
+    // Prims
+    if (prims) {
+      numCellsSent = d->Nprims * d->Nx * d->Ny * d->Ng;
+      packZBuffer(sendToLeftBuf, sendToRightBuf, prims, d->Nprims);
+  
+      swapGhostBuffers(sendToLeftBuf, sendToRightBuf, recvFromLeftBuf, recvFromRightBuf, env->leftZNeighbourRank,
+            env->rightZNeighbourRank, numCellsSent);
+  
+      unpackZBuffer(recvFromLeftBuf, recvFromRightBuf, prims, d->Nprims);
+    }
+  
+    // Aux
+    if (aux) {
+      numCellsSent = d->Naux * d->Nx * d->Ny * d->Ng;
+      packZBuffer(sendToLeftBuf, sendToRightBuf, aux, d->Naux);
+  
+      swapGhostBuffers(sendToLeftBuf, sendToRightBuf, recvFromLeftBuf, recvFromRightBuf, env->leftZNeighbourRank,
+            env->rightZNeighbourRank, numCellsSent);
+  
+      unpackZBuffer(recvFromLeftBuf, recvFromRightBuf, aux, d->Naux);
+    }
+  }
+
+  // Todo -- allocate and free in constructor/destructor
+  free(sendToLeftBuf);
+  free(sendToRightBuf);
+  free(recvFromRightBuf);
+  free(recvFromLeftBuf);
+
+}
+
+void ParallelOutflow::setXBoundary(double *stateVector, int nVars){
+  // Syntax
+  Data * d(this->data);
+
+  // Left boundary
+  // TODO -- could technically only check this once per [cons, aux, prims] but time to check should be negligible
+  if (env->isNeighbourExternal(0, 0)){
+    for (int var(0); var < nVars; var++) {
+      for (int i(0); i < d->Ng; i++) {
+        for (int j(0); j < d->Ny; j++) {
+          for (int k(0); k < d->Nz; k++) {
+            stateVector[ID(var, i, j, k)] = stateVector[ID(var, d->Ng, j, k)];
+          }
+        }
+      }
+    }
+  }
+
+  // Right boundary
+  if (env->isNeighbourExternal(0, 1)){
+    for (int var(0); var < nVars; var++) {
+      for (int i(0); i < d->Ng; i++) {
+        for (int j(0); j < d->Ny; j++) {
+          for (int k(0); k < d->Nz; k++) {
+            stateVector[ID(var, d->Nx - d->Ng + i, j, k)] = stateVector[ID(var, d->Nx - d->Ng - 1, j, k)];
+          }
+        }
+      }
+    }
+  }
+}
+
+
+void ParallelOutflow::setYBoundary(double *stateVector, int nVars){
+  // Syntax
+  Data * d(this->data);
+
+  // Front boundary
+  if (env->isNeighbourExternal(1, 0)){
+    for (int var(0); var < nVars; var++) {
+      for (int i(0); i < d->Nx; i++) {
+        for (int j(0); j < d->Ng; j++) {
+          for (int k(0); k < d->Nz; k++) {
+            // Front
+            stateVector[ID(var, i, j, k)] = stateVector[ID(var, i, d->Ng, k)];
+          }
+        }
+      }
+    }
+  }
+
+  // Back boundary
+  if (env->isNeighbourExternal(1, 1)){
+    for (int var(0); var < nVars; var++) {
+      for (int i(0); i < d->Nx; i++) {
+        for (int j(0); j < d->Ng; j++) {
+          for (int k(0); k < d->Nz; k++) {
+            // Back
+            stateVector[ID(var, i, d->Ny - d->Ng + j, k)] = stateVector[ID(var, i, d->Ny - d->Ng - 1, k)];
+          }
+        }
+      }
+    }
+  }
+}
+
+void ParallelOutflow::setZBoundary(double *stateVector, int nVars){
+  // Syntax
+  Data * d(this->data);
+
+  // Bottom boundary
+  if (env->isNeighbourExternal(2, 0)){
+    for (int var(0); var < nVars; var++) {
+      for (int i(0); i < d->Nx; i++) {
+        for (int j(0); j < d->Ny; j++) {
+          for (int k(0); k < d->Ng; k++) {
+            // Bottom
+            stateVector[ID(var, i, j, k)] = stateVector[ID(var, i, j, d->Ng)];
+          }
+        }
+      }
+    }
+  }
+
+  // Top boundary
+  if (env->isNeighbourExternal(2, 1)){
+    for (int var(0); var < nVars; var++) {
+      for (int i(0); i < d->Nx; i++) {
+        for (int j(0); j < d->Ny; j++) {
+          for (int k(0); k < d->Ng; k++) {
+            // Top
+            stateVector[ID(var, i, j, d->Nz - d->Ng + k)] = stateVector[ID(var, i, j, d->Nz - d->Ng - 1)];
+          }
+        }
+      }
+    }
+  }
+}
+
+
+
+void ParallelOutflow::apply(double * cons, double * prims, double * aux)
+{
+  // Syntax
+  Data * d(this->data);
+
+  // Allocate one ghost region buffer array the size of the largest ghost region
+  int maxSendBufSize = std::max(std::max(d->Ncons, d->Nprims), d->Naux) * d->Ng;
+  if (d->Ny > 1) {
+      maxSendBufSize *= std::max(d->Nx, d->Ny);
+  }
+  if (d->Nz > 1) {
+    maxSendBufSize *= std::max(std::min(d->Nx, d->Ny), (d->Nz));
+  }
+
+  // TODO -- Could do left and right halo exchange separately and allocate half as many buffers but this would
+  // add twice as many loops
+
+  // Allocate temporary buffers for ghost region exchange
+  // TODO -- should allocate this once at beginning of run
+  double *sendToLeftBuf = (double *) malloc(maxSendBufSize*sizeof(double));
+  double *sendToRightBuf = (double *) malloc(maxSendBufSize*sizeof(double));
+  double *recvFromRightBuf = (double *) malloc(maxSendBufSize*sizeof(double));
+  double *recvFromLeftBuf = (double *) malloc(maxSendBufSize*sizeof(double));
+
+  int numCellsSent;
+
+  // x dimension
+
+  // Cons
+  numCellsSent = d->Ncons * d->Ng * d->Ny * d->Nz;
+  packXBuffer(sendToLeftBuf, sendToRightBuf, cons, d->Ncons);
+
+  swapGhostBuffers(sendToLeftBuf, sendToRightBuf, recvFromLeftBuf, recvFromRightBuf, env->leftXNeighbourRank,
+	env->rightXNeighbourRank, numCellsSent);
+
+  unpackXBuffer(recvFromLeftBuf, recvFromRightBuf, cons, d->Ncons);
+  setXBoundary(cons, d->Ncons);
+  
+  // Prims
+  if (prims) {
+    numCellsSent = d->Nprims * d->Ng * d->Ny * d->Nz;
+    packXBuffer(sendToLeftBuf, sendToRightBuf, prims, d->Nprims);
+
+    swapGhostBuffers(sendToLeftBuf, sendToRightBuf, recvFromLeftBuf, recvFromRightBuf, env->leftXNeighbourRank,
+          env->rightXNeighbourRank, numCellsSent);
+
+    unpackXBuffer(recvFromLeftBuf, recvFromRightBuf, prims, d->Nprims);
+    setXBoundary(prims, d->Nprims);
+  }
+
+  // Aux
+  if (aux) {
+    numCellsSent = d->Naux * d->Ng * d->Ny * d->Nz;
+    packXBuffer(sendToLeftBuf, sendToRightBuf, aux, d->Naux);
+
+    swapGhostBuffers(sendToLeftBuf, sendToRightBuf, recvFromLeftBuf, recvFromRightBuf, env->leftXNeighbourRank,
+          env->rightXNeighbourRank, numCellsSent);
+
+    unpackXBuffer(recvFromLeftBuf, recvFromRightBuf, aux, d->Naux);
+    setXBoundary(aux, d->Naux);
+  }
+
+  if (d->Ny > 1) {
+    // y dimension
+  
+    // Cons
+    numCellsSent = d->Ncons * d->Nx * d->Ng * d->Nz;
+    packYBuffer(sendToLeftBuf, sendToRightBuf, cons, d->Ncons);
+  
+    swapGhostBuffers(sendToLeftBuf, sendToRightBuf, recvFromLeftBuf, recvFromRightBuf, env->leftYNeighbourRank,
+  	env->rightYNeighbourRank, numCellsSent);
+  
+    unpackYBuffer(recvFromLeftBuf, recvFromRightBuf, cons, d->Ncons);
+    setYBoundary(cons, d->Ncons);
+    
+    // Prims
+    if (prims) {
+      numCellsSent = d->Nprims * d->Nx * d->Ng * d->Nz;
+      packYBuffer(sendToLeftBuf, sendToRightBuf, prims, d->Nprims);
+  
+      swapGhostBuffers(sendToLeftBuf, sendToRightBuf, recvFromLeftBuf, recvFromRightBuf, env->leftYNeighbourRank,
+            env->rightYNeighbourRank, numCellsSent);
+  
+      unpackYBuffer(recvFromLeftBuf, recvFromRightBuf, prims, d->Nprims);
+      setYBoundary(prims, d->Nprims);
+    }
+  
+    // Aux
+    if (aux) {
+      numCellsSent = d->Naux * d->Nx * d->Ng * d->Nz;
+      packYBuffer(sendToLeftBuf, sendToRightBuf, aux, d->Naux);
+  
+      swapGhostBuffers(sendToLeftBuf, sendToRightBuf, recvFromLeftBuf, recvFromRightBuf, env->leftYNeighbourRank,
+            env->rightYNeighbourRank, numCellsSent);
+  
+      unpackYBuffer(recvFromLeftBuf, recvFromRightBuf, aux, d->Naux);
+      setYBoundary(aux, d->Naux);
+    }
+  }
+
+
+  if (d->Nz > 1) {
+    // y dimension
+  
+    // Cons
+    numCellsSent = d->Ncons * d->Nx * d->Ny * d->Ng;
+    packZBuffer(sendToLeftBuf, sendToRightBuf, cons, d->Ncons);
+  
+    swapGhostBuffers(sendToLeftBuf, sendToRightBuf, recvFromLeftBuf, recvFromRightBuf, env->leftZNeighbourRank,
+  	env->rightZNeighbourRank, numCellsSent);
+  
+    unpackZBuffer(recvFromLeftBuf, recvFromRightBuf, cons, d->Ncons);
+    setZBoundary(cons, d->Ncons);
+    
+    // Prims
+    if (prims) {
+      numCellsSent = d->Nprims * d->Nx * d->Ny * d->Ng;
+      packZBuffer(sendToLeftBuf, sendToRightBuf, prims, d->Nprims);
+  
+      swapGhostBuffers(sendToLeftBuf, sendToRightBuf, recvFromLeftBuf, recvFromRightBuf, env->leftZNeighbourRank,
+            env->rightZNeighbourRank, numCellsSent);
+  
+      unpackZBuffer(recvFromLeftBuf, recvFromRightBuf, prims, d->Nprims);
+      setZBoundary(prims, d->Nprims);
+    }
+  
+    // Aux
+    if (aux) {
+      numCellsSent = d->Naux * d->Nx * d->Ny * d->Ng;
+      packZBuffer(sendToLeftBuf, sendToRightBuf, aux, d->Naux);
+  
+      swapGhostBuffers(sendToLeftBuf, sendToRightBuf, recvFromLeftBuf, recvFromRightBuf, env->leftZNeighbourRank,
+            env->rightZNeighbourRank, numCellsSent);
+  
+      unpackZBuffer(recvFromLeftBuf, recvFromRightBuf, aux, d->Naux);
+      setZBoundary(aux, d->Naux);
+    }
+  }
+
+  free(sendToLeftBuf);
+  free(sendToRightBuf);
+  free(recvFromRightBuf);
+  free(recvFromLeftBuf);
+
+}
+
+
+// TODO -- these are shared by ParallelOutflow, so could be added to the ParallelBcs base class.
+void ParallelFlow::setYBoundary(double *stateVector, int nVars){
+  // Syntax
+  Data * d(this->data);
+
+  // Front boundary
+  if (env->isNeighbourExternal(1, 0)){
+    for (int var(0); var < nVars; var++) {
+      for (int i(0); i < d->Nx; i++) {
+        for (int j(0); j < d->Ng; j++) {
+          for (int k(0); k < d->Nz; k++) {
+            // Front
+            stateVector[ID(var, i, j, k)] = stateVector[ID(var, i, d->Ng, k)];
+          }
+        }
+      }
+    }
+  }
+
+  // Back boundary
+  if (env->isNeighbourExternal(1, 1)){
+    for (int var(0); var < nVars; var++) {
+      for (int i(0); i < d->Nx; i++) {
+        for (int j(0); j < d->Ng; j++) {
+          for (int k(0); k < d->Nz; k++) {
+            // Back
+            stateVector[ID(var, i, d->Ny - d->Ng + j, k)] = stateVector[ID(var, i, d->Ny - d->Ng - 1, k)];
+          }
+        }
+      }
+    }
+  }
+}
+
+void ParallelFlow::setZBoundary(double *stateVector, int nVars){
+  // Syntax
+  Data * d(this->data);
+
+  // Bottom boundary
+  if (env->isNeighbourExternal(2, 0)){
+    for (int var(0); var < nVars; var++) {
+      for (int i(0); i < d->Nx; i++) {
+        for (int j(0); j < d->Ny; j++) {
+          for (int k(0); k < d->Ng; k++) {
+            // Bottom
+            stateVector[ID(var, i, j, k)] = stateVector[ID(var, i, j, d->Ng)];
+          }
+        }
+      }
+    }
+  }
+
+  // Top boundary
+  if (env->isNeighbourExternal(2, 1)){
+    for (int var(0); var < nVars; var++) {
+      for (int i(0); i < d->Nx; i++) {
+        for (int j(0); j < d->Ny; j++) {
+          for (int k(0); k < d->Ng; k++) {
+            // Top
+            stateVector[ID(var, i, j, d->Nz - d->Ng + k)] = stateVector[ID(var, i, j, d->Nz - d->Ng - 1)];
+          }
+        }
+      }
+    }
+  }
+}
+
+
+
+void ParallelFlow::apply(double * cons, double * prims, double * aux)
+{
+  // Syntax
+  Data * d(this->data);
+
+  // Allocate one ghost region buffer array the size of the largest ghost region
+  int maxSendBufSize = std::max(std::max(d->Ncons, d->Nprims), d->Naux) * d->Ng;
+  if (d->Ny > 1) {
+      maxSendBufSize *= std::max(d->Nx, d->Ny);
+  }
+  if (d->Nz > 1) {
+    maxSendBufSize *= std::max(std::min(d->Nx, d->Ny), (d->Nz));
+  }
+
+  // TODO -- Could do left and right halo exchange separately and allocate half as many buffers but this would
+  // add twice as many loops
+
+  // Allocate temporary buffers for ghost region exchange
+  // TODO -- should allocate this once at beginning of run
+  double *sendToLeftBuf = (double *) malloc(maxSendBufSize*sizeof(double));
+  double *sendToRightBuf = (double *) malloc(maxSendBufSize*sizeof(double));
+  double *recvFromRightBuf = (double *) malloc(maxSendBufSize*sizeof(double));
+  double *recvFromLeftBuf = (double *) malloc(maxSendBufSize*sizeof(double));
+
+  int numCellsSent;
+
+  // x dimension
+
+  // Cons
+  numCellsSent = d->Ncons * d->Ng * d->Ny * d->Nz;
+  packXBuffer(sendToLeftBuf, sendToRightBuf, cons, d->Ncons);
+
+  swapGhostBuffers(sendToLeftBuf, sendToRightBuf, recvFromLeftBuf, recvFromRightBuf, env->leftXNeighbourRank,
+	env->rightXNeighbourRank, numCellsSent);
+
+  unpackXBuffer(recvFromLeftBuf, recvFromRightBuf, cons, d->Ncons);
+  
+  // Prims
+  if (prims) {
+    numCellsSent = d->Nprims * d->Ng * d->Ny * d->Nz;
+    packXBuffer(sendToLeftBuf, sendToRightBuf, prims, d->Nprims);
+
+    swapGhostBuffers(sendToLeftBuf, sendToRightBuf, recvFromLeftBuf, recvFromRightBuf, env->leftXNeighbourRank,
+          env->rightXNeighbourRank, numCellsSent);
+
+    unpackXBuffer(recvFromLeftBuf, recvFromRightBuf, prims, d->Nprims);
+  }
+
+  // Aux
+  if (aux) {
+    numCellsSent = d->Naux * d->Ng * d->Ny * d->Nz;
+    packXBuffer(sendToLeftBuf, sendToRightBuf, aux, d->Naux);
+
+    swapGhostBuffers(sendToLeftBuf, sendToRightBuf, recvFromLeftBuf, recvFromRightBuf, env->leftXNeighbourRank,
+          env->rightXNeighbourRank, numCellsSent);
+
+    unpackXBuffer(recvFromLeftBuf, recvFromRightBuf, aux, d->Naux);
+  }
+
+  if (d->Ny > 1) {
+    // y dimension
+  
+    // Cons
+    numCellsSent = d->Ncons * d->Nx * d->Ng * d->Nz;
+    packYBuffer(sendToLeftBuf, sendToRightBuf, cons, d->Ncons);
+  
+    swapGhostBuffers(sendToLeftBuf, sendToRightBuf, recvFromLeftBuf, recvFromRightBuf, env->leftYNeighbourRank,
+  	env->rightYNeighbourRank, numCellsSent);
+  
+    unpackYBuffer(recvFromLeftBuf, recvFromRightBuf, cons, d->Ncons);
+    setYBoundary(cons, d->Ncons);
+    
+    // Prims
+    if (prims) {
+      numCellsSent = d->Nprims * d->Nx * d->Ng * d->Nz;
+      packYBuffer(sendToLeftBuf, sendToRightBuf, prims, d->Nprims);
+  
+      swapGhostBuffers(sendToLeftBuf, sendToRightBuf, recvFromLeftBuf, recvFromRightBuf, env->leftYNeighbourRank,
+            env->rightYNeighbourRank, numCellsSent);
+  
+      unpackYBuffer(recvFromLeftBuf, recvFromRightBuf, prims, d->Nprims);
+      setYBoundary(prims, d->Nprims);
+    }
+  
+    // Aux
+    if (aux) {
+      numCellsSent = d->Naux * d->Nx * d->Ng * d->Nz;
+      packYBuffer(sendToLeftBuf, sendToRightBuf, aux, d->Naux);
+  
+      swapGhostBuffers(sendToLeftBuf, sendToRightBuf, recvFromLeftBuf, recvFromRightBuf, env->leftYNeighbourRank,
+            env->rightYNeighbourRank, numCellsSent);
+  
+      unpackYBuffer(recvFromLeftBuf, recvFromRightBuf, aux, d->Naux);
+      setYBoundary(aux, d->Naux);
+    }
+  }
+
+
+  if (d->Nz > 1) {
+    // y dimension
+  
+    // Cons
+    numCellsSent = d->Ncons * d->Nx * d->Ny * d->Ng;
+    packZBuffer(sendToLeftBuf, sendToRightBuf, cons, d->Ncons);
+  
+    swapGhostBuffers(sendToLeftBuf, sendToRightBuf, recvFromLeftBuf, recvFromRightBuf, env->leftZNeighbourRank,
+  	env->rightZNeighbourRank, numCellsSent);
+  
+    unpackZBuffer(recvFromLeftBuf, recvFromRightBuf, cons, d->Ncons);
+    setZBoundary(cons, d->Ncons);
+    
+    // Prims
+    if (prims) {
+      numCellsSent = d->Nprims * d->Nx * d->Ny * d->Ng;
+      packZBuffer(sendToLeftBuf, sendToRightBuf, prims, d->Nprims);
+  
+      swapGhostBuffers(sendToLeftBuf, sendToRightBuf, recvFromLeftBuf, recvFromRightBuf, env->leftZNeighbourRank,
+            env->rightZNeighbourRank, numCellsSent);
+  
+      unpackZBuffer(recvFromLeftBuf, recvFromRightBuf, prims, d->Nprims);
+      setZBoundary(prims, d->Nprims);
+    }
+  
+    // Aux
+    if (aux) {
+      numCellsSent = d->Naux * d->Nx * d->Ny * d->Ng;
+      packZBuffer(sendToLeftBuf, sendToRightBuf, aux, d->Naux);
+  
+      swapGhostBuffers(sendToLeftBuf, sendToRightBuf, recvFromLeftBuf, recvFromRightBuf, env->leftZNeighbourRank,
+            env->rightZNeighbourRank, numCellsSent);
+  
+      unpackZBuffer(recvFromLeftBuf, recvFromRightBuf, aux, d->Naux);
+      setZBoundary(aux, d->Naux);
+    }
+  }
+
+  free(sendToLeftBuf);
+  free(sendToRightBuf);
+  free(recvFromRightBuf);
+  free(recvFromLeftBuf);
+
+}
+

From de65862a9e8ed8a88100fbae47c57785053096f5 Mon Sep 17 00:00:00 2001
From: "ania.brown" <ania.brown@gmail.com>
Date: Tue, 1 Sep 2020 13:05:57 +0100
Subject: [PATCH 16/56] attempt at building with mpicc, not yet working

---
 Project/GPU/Makefile                          | 36 ++++++++++++++-----
 Project/GPU/Src/main.cu                       | 13 +++----
 ...ndaryConds.cu => parallelBoundaryConds.cc} |  0
 .../Src/{parallelEnv.cu => parallelEnv.cc}    |  0
 ...arallelSaveData.cu => parallelSaveData.cc} |  0
 5 files changed, 35 insertions(+), 14 deletions(-)
 rename Project/GPU/Src/{parallelBoundaryConds.cu => parallelBoundaryConds.cc} (100%)
 rename Project/GPU/Src/{parallelEnv.cu => parallelEnv.cc} (100%)
 rename Project/GPU/Src/{parallelSaveData.cu => parallelSaveData.cc} (100%)

diff --git a/Project/GPU/Makefile b/Project/GPU/Makefile
index abde3e2c..d714390b 100644
--- a/Project/GPU/Makefile
+++ b/Project/GPU/Makefile
@@ -4,10 +4,10 @@
 # Compiler
 
 # We will compile most object files with (NVCC), other than those object files that use MPI. If using MPI, set CC_CPU=mpicc. In that case, the object files which use MPI will be compiled with mpicc. For linking, CC_CPU will be used (this should be set to mpicc if using MPI)
-CC_CPU = g++
+CC_CPU = mpic++
 CC_GPU = nvcc
 
-USE_MPI=0
+USE_MPI=1
 
 # Module directory
 MODULE_DIR = ./Src
@@ -25,7 +25,7 @@ RTFIND_INC_DIR = ./CminpackLibrary/Include
 RTFIND_SRC_DIR = ./CminpackLibrary/Src
 
 # C++ compiler flags
-CXXFLAGS = -fopenmp -Wall
+CXXFLAGS = -fopenmp -Wall -std=c++11 -O3
 
 # NVIDIA compiler flags
 NVFLAGS = -std=c++11 -Wno-deprecated-gpu-targets -rdc=true -fmad=false -O3 -arch=sm_61 -Xcompiler -fopenmp -Xcompiler -Wall
@@ -61,16 +61,18 @@ SERIAL_SRCS = serialSaveData.cu \
 # Headers
 HDRS = ${SRCS:.cu=.h} cudaErrorCheck.h
 
-
 ifeq ($(USE_MPI), 1)
-	SRCS += ${PARALLEL_SRCS}
+	ENV_SRCS = ${PARALLEL_SRCS}
 else
-	SRCS += ${SERIAL_SRCS}
+	ENV_SRCS = ${SERIAL_SRCS}
 endif
 
 # Objects
 OBJS = ${SRCS:.cu=.o}
 
+# Serial or Parallel CPU files. These cannot contain device code 
+ENV_OBJS = ${ENV_SRCS:.cu=.o}
+
 # Rootfinder objects
 RTFIND_OBJS = $(RTFIND_SRC_DIR)/dogleg.o \
 							$(RTFIND_SRC_DIR)/dpmpar.o \
@@ -106,7 +108,7 @@ run : $(RTFIND) $(EXEC)
 	@./$(EXEC)
 
 clean :
-	rm	-f	$(EXEC)	$(OBJS)	*.gch
+	rm	-f	$(EXEC)	$(OBJS)	$(ENV_OBJS) *.gch
 
 
 #################
@@ -170,18 +172,36 @@ boundaryConds.o : $(MODULE_DIR)/boundaryConds.cu $(INC_DIR)/boundaryConds.h
 serialSaveData.o : $(MODULE_DIR)/serialSaveData.cu $(INC_DIR)/serialSaveData.h
 	$(CC_GPU)	$<	-c	$(NVFLAGS) -I$(INC_DIR)
 
+
 serialEnv.o : $(MODULE_DIR)/serialEnv.cu $(INC_DIR)/serialEnv.h
 	$(CC_GPU)	$<	-c	$(NVFLAGS) -I$(INC_DIR)
 
 fluxVectorSplitting.o : $(MODULE_DIR)/fluxVectorSplitting.cu $(INC_DIR)/fluxVectorSplitting.h $(INC_DIR)/weno.h
 	$(CC_GPU)	$<	-c	$(NVFLAGS) -I$(INC_DIR) -Xcompiler -fopenmp
 
+#parallel sources -- these need to be compiled with the MPI library linked, which can be accomplished by compiling with mpic++
+
+parallelSaveData.o : $(MODULE_DIR)/parallelSaveData.cc $(INC_DIR)/parallelSaveData.h
+	$(CC_CPU)	$<	$(CXXFLAGS)	-I$(INC_DIR) -lcudart -lcudadevrt -L/local/software/cuda/8.0/lib64
+
+parallelBoundaryConds.o : $(MODULE_DIR)/parallelBoundaryConds.cc $(INC_DIR)/parallelBoundaryConds.h
+	$(CC_CPU)	$<	$(CXXFLAGS) -I$(INC_DIR) -lcudart -lcudadevrt -L/local/software/cuda/8.0/lib64 
+
+parallelEnv.o : $(MODULE_DIR)/parallelEnv.cc $(INC_DIR)/parallelEnv.h
+	$(CC_CPU)	$<	$(CXXFLAGS) -I$(INC_DIR) -lcudart -lcudadevrt -L/local/software/cuda/8.0/lib64
+
+#end parallel sources
+
+
+cpu_link.o : $(ENV_OBJS) 
+	$(CC_CPU) $^	-o $@ $(CXXFLAGS) -lcudart -lcudadevrt -L/local/software/cuda/8.0/lib64
+
 # link device code using CC_GPU (g++/mpi++ can't do this)
 gpu_link.o : $(RTFIND_OBJS) $(OBJS)
 	$(CC_GPU) $^	-dlink -o $@ $(NVFLAGS) -lcudadevrt
 
 # Executable
-main : gpu_link.o $(RTFIND_OBJS) $(OBJS)
+main : gpu_link.o $(RTFIND_OBJS) $(OBJS) cpu_link.o 
 	$(CC_CPU) $^	-o $@ $(CXXFLAGS) -lcudart -lcudadevrt -L/local/software/cuda/8.0/lib64
 
 buildRootfinder:
diff --git a/Project/GPU/Src/main.cu b/Project/GPU/Src/main.cu
index 4be013f4..1898ecf4 100644
--- a/Project/GPU/Src/main.cu
+++ b/Project/GPU/Src/main.cu
@@ -5,9 +5,10 @@
 #include "srmhd.h"
 #include "srrmhd.h"
 #include "boundaryConds.h"
+#include "parallelBoundaryConds.h"
 #include "rkSplit.h"
 #include "SSP2.h"
-#include "serialSaveData.h"
+#include "parallelSaveData.h"
 #include "fluxVectorSplitting.h"
 #include "serialEnv.h"
 
@@ -42,7 +43,7 @@ int main(int argc, char *argv[]) {
   double sigma(0);
   bool output(true);
   int safety(180);
-  int nxRanks(4);
+  int nxRanks(1);
   int nyRanks(1);
   int nzRanks(1);
 
@@ -54,7 +55,7 @@ int main(int argc, char *argv[]) {
     }
   }
 
-  SerialEnv env(&argc, &argv, nxRanks, nyRanks, nzRanks);
+  ParallelEnv env(&argc, &argv, nxRanks, nyRanks, nzRanks);
 
   Data data(nx, ny, nz, xmin, xmax, ymin, ymax, zmin, zmax, endTime, &env,
             cfl, Ng, gamma, sigma);
@@ -64,15 +65,15 @@ int main(int argc, char *argv[]) {
 
   FVS fluxMethod(&data, &model);
 
+  ParallelFlow bcs(&data, &env);
+
   Simulation sim(&data, &env);
 
   KHInstabilitySingleFluid init(&data, 1);
 
-  Flow bcs(&data);
-
   SSP2 timeInt(&data, &model, &bcs, &fluxMethod);
 
-  SerialSaveData save(&data, &env);
+  ParallelSaveData save(&data, &env);
 
   // Now objects have been created, set up the simulation
   sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save);
diff --git a/Project/GPU/Src/parallelBoundaryConds.cu b/Project/GPU/Src/parallelBoundaryConds.cc
similarity index 100%
rename from Project/GPU/Src/parallelBoundaryConds.cu
rename to Project/GPU/Src/parallelBoundaryConds.cc
diff --git a/Project/GPU/Src/parallelEnv.cu b/Project/GPU/Src/parallelEnv.cc
similarity index 100%
rename from Project/GPU/Src/parallelEnv.cu
rename to Project/GPU/Src/parallelEnv.cc
diff --git a/Project/GPU/Src/parallelSaveData.cu b/Project/GPU/Src/parallelSaveData.cc
similarity index 100%
rename from Project/GPU/Src/parallelSaveData.cu
rename to Project/GPU/Src/parallelSaveData.cc

From a62c51ea73a6d3efd0ddf9233a18ffaec483f76f Mon Sep 17 00:00:00 2001
From: "ania.brown" <ania.brown@gmail.com>
Date: Tue, 1 Sep 2020 13:29:02 +0100
Subject: [PATCH 17/56] changed build strategy -- build everything with nvcc
 and link MPI libraries. This is simpler than doing some compilation with
 mpicc and some with nvcc but does require the user knowing the location of
 their MPI build.

---
 Project/GPU/Makefile                          | 29 ++++++++-----------
 ...ndaryConds.cc => parallelBoundaryConds.cu} |  1 +
 .../Src/{parallelEnv.cc => parallelEnv.cu}    |  0
 ...arallelSaveData.cc => parallelSaveData.cu} |  0
 4 files changed, 13 insertions(+), 17 deletions(-)
 rename Project/GPU/Src/{parallelBoundaryConds.cc => parallelBoundaryConds.cu} (99%)
 rename Project/GPU/Src/{parallelEnv.cc => parallelEnv.cu} (100%)
 rename Project/GPU/Src/{parallelSaveData.cc => parallelSaveData.cu} (100%)

diff --git a/Project/GPU/Makefile b/Project/GPU/Makefile
index d714390b..6cfdfbbb 100644
--- a/Project/GPU/Makefile
+++ b/Project/GPU/Makefile
@@ -3,12 +3,12 @@
 
 # Compiler
 
-# We will compile most object files with (NVCC), other than those object files that use MPI. If using MPI, set CC_CPU=mpicc. In that case, the object files which use MPI will be compiled with mpicc. For linking, CC_CPU will be used (this should be set to mpicc if using MPI)
-CC_CPU = mpic++
 CC_GPU = nvcc
 
 USE_MPI=1
 
+MPI_LIBRARY = /local/software/openmpi/3.0.0/gcc-cuda8.0
+
 # Module directory
 MODULE_DIR = ./Src
 
@@ -24,6 +24,8 @@ RTFIND_INC_DIR = ./CminpackLibrary/Include
 # Cminpack source directory
 RTFIND_SRC_DIR = ./CminpackLibrary/Src
 
+MPI_FLAGS = -lmpi -L${MPI_LIBRARY_PATH}/lib -I${MPI_LIBRARY_PATH}/include
+
 # C++ compiler flags
 CXXFLAGS = -fopenmp -Wall -std=c++11 -O3
 
@@ -181,28 +183,21 @@ fluxVectorSplitting.o : $(MODULE_DIR)/fluxVectorSplitting.cu $(INC_DIR)/fluxVect
 
 #parallel sources -- these need to be compiled with the MPI library linked, which can be accomplished by compiling with mpic++
 
-parallelSaveData.o : $(MODULE_DIR)/parallelSaveData.cc $(INC_DIR)/parallelSaveData.h
-	$(CC_CPU)	$<	$(CXXFLAGS)	-I$(INC_DIR) -lcudart -lcudadevrt -L/local/software/cuda/8.0/lib64
+parallelSaveData.o : $(MODULE_DIR)/parallelSaveData.cu $(INC_DIR)/parallelSaveData.h
+	$(CC_GPU)	$<	-c $(NVFLAGS)	-I$(INC_DIR) $(MPI_FLAGS)
 
-parallelBoundaryConds.o : $(MODULE_DIR)/parallelBoundaryConds.cc $(INC_DIR)/parallelBoundaryConds.h
-	$(CC_CPU)	$<	$(CXXFLAGS) -I$(INC_DIR) -lcudart -lcudadevrt -L/local/software/cuda/8.0/lib64 
+parallelBoundaryConds.o : $(MODULE_DIR)/parallelBoundaryConds.cu $(INC_DIR)/parallelBoundaryConds.h
+	$(CC_GPU)	$<	-c $(NVFLAGS)	-I$(INC_DIR) $(MPI_FLAGS)
 
-parallelEnv.o : $(MODULE_DIR)/parallelEnv.cc $(INC_DIR)/parallelEnv.h
-	$(CC_CPU)	$<	$(CXXFLAGS) -I$(INC_DIR) -lcudart -lcudadevrt -L/local/software/cuda/8.0/lib64
+parallelEnv.o : $(MODULE_DIR)/parallelEnv.cu $(INC_DIR)/parallelEnv.h
+	$(CC_GPU)	$<	-c $(NVFLAGS)	-I$(INC_DIR) $(MPI_FLAGS)
 
 #end parallel sources
 
 
-cpu_link.o : $(ENV_OBJS) 
-	$(CC_CPU) $^	-o $@ $(CXXFLAGS) -lcudart -lcudadevrt -L/local/software/cuda/8.0/lib64
-
-# link device code using CC_GPU (g++/mpi++ can't do this)
-gpu_link.o : $(RTFIND_OBJS) $(OBJS)
-	$(CC_GPU) $^	-dlink -o $@ $(NVFLAGS) -lcudadevrt
-
 # Executable
-main : gpu_link.o $(RTFIND_OBJS) $(OBJS) cpu_link.o 
-	$(CC_CPU) $^	-o $@ $(CXXFLAGS) -lcudart -lcudadevrt -L/local/software/cuda/8.0/lib64
+main : $(RTFIND_OBJS) $(OBJS) $(ENV_OBJS)
+	$(CC_GPU) $^	-o $@ 	$(NVFLAGS) $(MPI_FLAGS)
 
 buildRootfinder:
 	@cd $(RTFIND_DIR) && $(MAKE)	objects
diff --git a/Project/GPU/Src/parallelBoundaryConds.cc b/Project/GPU/Src/parallelBoundaryConds.cu
similarity index 99%
rename from Project/GPU/Src/parallelBoundaryConds.cc
rename to Project/GPU/Src/parallelBoundaryConds.cu
index 219d589b..45908767 100644
--- a/Project/GPU/Src/parallelBoundaryConds.cc
+++ b/Project/GPU/Src/parallelBoundaryConds.cu
@@ -7,6 +7,7 @@
 #define ID_XBUFF(variable, gdx, jdx, kdx) ((variable)*(d->Ng)*(d->Ny)*(d->Nz) + (gdx)*(d->Ny)*(d->Nz) + (jdx)*(d->Nz) + (kdx))
 #define ID_YBUFF(variable, idx, gdx, kdx) ((variable)*(d->Nx)*(d->Ng)*(d->Nz) + (idx)*(d->Ng)*(d->Nz) + (gdx)*(d->Nz) + (kdx))
 #define ID_ZBUFF(variable, idx, jdx, gdx) ((variable)*(d->Nx)*(d->Ny)*(d->Ng) + (idx)*(d->Ny)*(d->Ng) + (jdx)*(d->Ng) + (gdx))
+#define ID(variable, idx, jdx, kdx) ((variable)*(d->Nx)*(d->Ny)*(d->Nz) + (idx)*(d->Ny)*(d->Nz) + (jdx)*(d->Nz) + (kdx))
 
 void ParallelBcs::swapGhostBuffers(double *sendToLeftBuf, double *sendToRightBuf, double *recvFromLeftBuf,
 	double *recvFromRightBuf,  int leftNeighbour, int rightNeighbour, int numCellsSent){
diff --git a/Project/GPU/Src/parallelEnv.cc b/Project/GPU/Src/parallelEnv.cu
similarity index 100%
rename from Project/GPU/Src/parallelEnv.cc
rename to Project/GPU/Src/parallelEnv.cu
diff --git a/Project/GPU/Src/parallelSaveData.cc b/Project/GPU/Src/parallelSaveData.cu
similarity index 100%
rename from Project/GPU/Src/parallelSaveData.cc
rename to Project/GPU/Src/parallelSaveData.cu

From e9c5b2455c79bc493ddacb49d192688d05426ec8 Mon Sep 17 00:00:00 2001
From: aniabrown <ania.brown@gmail.com>
Date: Tue, 1 Sep 2020 16:48:15 +0100
Subject: [PATCH 18/56] fix to bcs for parallel version

---
 Project/GPU/Include/boundaryConds.h | 82 +++++++++++++++++++++++++++--
 Project/GPU/Makefile                |  9 ++--
 2 files changed, 84 insertions(+), 7 deletions(-)

diff --git a/Project/GPU/Include/boundaryConds.h b/Project/GPU/Include/boundaryConds.h
index 8607d072..6db94089 100644
--- a/Project/GPU/Include/boundaryConds.h
+++ b/Project/GPU/Include/boundaryConds.h
@@ -21,8 +21,27 @@ class Bcs
         Constructor simply stores the pointer to the Data class.
 
       @param[in] *data pointer to the Data class
+      @param[in] *env pointer to the PlatformEnv class
     */
-    Bcs(Data * data) : data(data) { }
+    Bcs(Data * data, PlatformEnv * env) : data(data)
+    {
+        data->bcsSet = 1;
+    }
+
+    //TODO -- We may not want to allow creation of Bcs object without env in future
+    //! Constructor store data about simulation (needed for domain)
+    /*!
+        Constructor simply stores the pointer to the Data class.
+
+      @param[in] *data pointer to the Data class
+    */
+
+    Bcs(Data * data) : data(data)
+    {
+        data->bcsSet = 1;
+    }
+
+    virtual ~Bcs() { }     //!< Destructor
 
   public:
 
@@ -75,6 +94,8 @@ class Outflow : public Bcs
     */
     Outflow(Data * data) : Bcs(data) { }
 
+    virtual ~Outflow() { }     //!< Destructor
+
     //! Application function
     /*!
         Applies the Outflow boundary conditions to the ghost cells.
@@ -88,6 +109,40 @@ class Outflow : public Bcs
 };
 
 
+//! <b> Out flow boundary conditions for the rotated 2D Brio-Wu </b>
+/*!
+    Using the conventional outflow BCs for the diagonal BW problem results in
+  shocks entering from along the main diagonal. This class deals with these
+  shocks.
+    Using this.apply behaves as if the BW problem has been rotated, as required.
+*/
+class OutflowRotatedBW : public Bcs
+{
+public:
+  //! Constructor
+  /*!
+  Calls constructor of base class to store the pointer to the Data class.
+
+  @param[in] *data pointer to Data class
+  @sa Bcs::Bcs
+  */
+  OutflowRotatedBW(Data * data) : Bcs(data) { }
+
+  virtual ~OutflowRotatedBW() { }     //!< Destructor
+
+  //! Application function
+  /*!
+  Applies the Outflow boundary conditions to the ghost cells.
+
+  @param[in, out] *cons pointer to the conservative (sized) vector
+  @param[in, out] *prims optional pointer to the primitive vector
+  @param[in, out] *aux optional pointer to the primitive vector
+  @sa Bcs::apply
+  */
+  void apply(double * cons, double * prims = NULL, double * aux = NULL);
+};
+
+
 //! <b> Periodic boundary conditions </b>
 /*!
     Flows that exit across one domain boundary re-enter at the opposing
@@ -122,6 +177,8 @@ class Periodic : public Bcs
     */
     Periodic(Data * data) : Bcs(data) { }
 
+    virtual ~Periodic() { }     //!< Destructor
+
     //! Application function
     /*!
         Applies the Periodic boundary conditions to the ghost cells.
@@ -135,16 +192,35 @@ class Periodic : public Bcs
 
 };
 
+//! <b> Flow boundary conditions </b>
 /*!
-  Boundary conditions for the Kelvin Helmholtz instability
-  x-direction is periodic and others are outflow
+    Boundary conditions used for the Kelvin Helmholtz instability. The
+  x-direction is periodic and y- and z-directions are outflow.
 */
 class Flow : public Bcs
 {
 
   public:
+    //! Constructor
+    /*!
+        Calls constructor of base class to store the pointer to the Data class.
+
+      @param[in] *data pointer to Data class
+      @sa Bcs::Bcs
+    */
     Flow(Data * data) : Bcs(data) { }
 
+    virtual ~Flow() { }     //!< Destructor
+
+    //! Application function
+    /*!
+        Applies the Flow boundary conditions to the ghost cells.
+
+      @param[in, out] *cons pointer to the conservative (sized) vector
+      @param[in, out] *prims optional pointer to the primitive vector
+      @param[in, out] *aux optional pointer to the primitive vector
+      @sa Bcs::apply
+    */
     void apply(double * cons, double * prims = NULL, double * aux = NULL);
 
 };
diff --git a/Project/GPU/Makefile b/Project/GPU/Makefile
index 6cfdfbbb..a88806e2 100644
--- a/Project/GPU/Makefile
+++ b/Project/GPU/Makefile
@@ -7,7 +7,7 @@ CC_GPU = nvcc
 
 USE_MPI=1
 
-MPI_LIBRARY = /local/software/openmpi/3.0.0/gcc-cuda8.0
+MPI_LIBRARY_PATH = /local/software/openmpi/3.0.0/gcc-cuda8.0
 
 # Module directory
 MODULE_DIR = ./Src
@@ -24,13 +24,14 @@ RTFIND_INC_DIR = ./CminpackLibrary/Include
 # Cminpack source directory
 RTFIND_SRC_DIR = ./CminpackLibrary/Src
 
-MPI_FLAGS = -lmpi -L${MPI_LIBRARY_PATH}/lib -I${MPI_LIBRARY_PATH}/include
+#MPI_FLAGS = -lmpi -L${MPI_LIBRARY_PATH}/lib -I${MPI_LIBRARY_PATH}/include
+MPI_FLAGS = -I/usr/include/mpich -L/usr/lib/x86_64-linux-gnu -lmpichcxx -lmpich 
 
 # C++ compiler flags
 CXXFLAGS = -fopenmp -Wall -std=c++11 -O3
 
 # NVIDIA compiler flags
-NVFLAGS = -std=c++11 -Wno-deprecated-gpu-targets -rdc=true -fmad=false -O3 -arch=sm_61 -Xcompiler -fopenmp -Xcompiler -Wall
+NVFLAGS = -std=c++11 -Wno-deprecated-gpu-targets -rdc=true -fmad=false -O3 -arch=sm_52 -Xcompiler -fopenmp -Xcompiler -Wall
 
 # Sources
 SRCS = main.cu \
@@ -139,7 +140,7 @@ simulation.o : $(MODULE_DIR)/simulation.cu	$(INC_DIR)/simulation.h $(INC_DIR)/mo
 	$(CC_GPU)	$<	-c	$(NVFLAGS) -I$(INC_DIR)
 
 main.o : $(MODULE_DIR)/main.cu $(INC_DIR)/simulation.h $(INC_DIR)/model.h $(INC_DIR)/simData.h $(INC_DIR)/initFunc.h
-	$(CC_GPU)	$<	-c	$(NVFLAGS) -I$(INC_DIR) -I$(RTFIND_INC_DIR) 
+	$(CC_GPU)	$<	-c	$(NVFLAGS) -I$(INC_DIR) -I$(RTFIND_INC_DIR) $(MPI_FLAGS)
 
 weno.o : $(MODULE_DIR)/weno.cu $(INC_DIR)/weno.h
 	$(CC_GPU)	$<	-c	$(NVFLAGS) -I$(INC_DIR)

From ee4992a110a6d4141a8e861895e2d00884a364dc Mon Sep 17 00:00:00 2001
From: aniabrown <ania.brown@gmail.com>
Date: Tue, 1 Sep 2020 16:59:44 +0100
Subject: [PATCH 19/56] shortening time. Comparison to CPU version passes at
 this reduced end time

---
 Project/CPU/Src/main.cc | 2 +-
 Project/GPU/Src/main.cu | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/Project/CPU/Src/main.cc b/Project/CPU/Src/main.cc
index 3a76a067..8f76b365 100644
--- a/Project/CPU/Src/main.cc
+++ b/Project/CPU/Src/main.cc
@@ -27,7 +27,7 @@ int main(int argc, char *argv[]) {
   double ymax(1.0);
   double zmin(-1.5);
   double zmax(1.5);
-  double endTime(0.05);
+  double endTime(0.0005);
   double cfl(0.1);
   double gamma(4.0/3.0);
   double sigma(0);
diff --git a/Project/GPU/Src/main.cu b/Project/GPU/Src/main.cu
index 1898ecf4..acf10c94 100644
--- a/Project/GPU/Src/main.cu
+++ b/Project/GPU/Src/main.cu
@@ -37,14 +37,14 @@ int main(int argc, char *argv[]) {
   double ymax(1.0);
   double zmin(-1.5);
   double zmax(1.5);
-  double endTime(0.05);
+  double endTime(0.0005);
   double cfl(0.1);
   double gamma(4.0/3.0);
   double sigma(0);
   bool output(true);
   int safety(180);
-  int nxRanks(1);
-  int nyRanks(1);
+  int nxRanks(2);
+  int nyRanks(2);
   int nzRanks(1);
 
   char * ptr(0);

From 4671878d100120d3d5a9c524ab9b08395caa4248 Mon Sep 17 00:00:00 2001
From: aniabrown <ania.brown@gmail.com>
Date: Mon, 21 Sep 2020 11:01:22 +0100
Subject: [PATCH 20/56] tweaking srmhd.cu so that it matches the CPU version
 (both versions run on CPU currently)

---
 Project/CPU/Src/main.cc       |  7 ++++---
 Project/GPU/Src/main.cu       | 10 +++++-----
 Project/GPU/Src/simulation.cu | 13 ++++++++-----
 Project/GPU/Src/srmhd.cu      | 30 ++++++------------------------
 4 files changed, 23 insertions(+), 37 deletions(-)

diff --git a/Project/CPU/Src/main.cc b/Project/CPU/Src/main.cc
index 8f76b365..ea21d219 100644
--- a/Project/CPU/Src/main.cc
+++ b/Project/CPU/Src/main.cc
@@ -6,6 +6,7 @@
 #include "initFunc.h"
 #include "simData.h"
 #include "SSP2.h"
+#include "RK2.h"
 #include "Euler.h"
 #include "weno.h"
 
@@ -19,7 +20,7 @@ int main(int argc, char *argv[]) {
   // Set up domain
   int Ng(4);
   int nx(64);
-  int ny(16);
+  int ny(8);
   int nz(0);
   double xmin(-0.5);
   double xmax(0.5);
@@ -44,7 +45,7 @@ int main(int argc, char *argv[]) {
             cfl, Ng, gamma, sigma);
 
   // Choose particulars of simulation
-  SRRMHD model(&data);
+  SRMHD model(&data);
 
   Weno3 weno(&data);
 
@@ -56,7 +57,7 @@ int main(int argc, char *argv[]) {
 
   KHInstabilitySingleFluid init(&data, 1);
 
-  SSP2 timeInt(&data, &model, &bcs, &fluxMethod);
+  RK2 timeInt(&data, &model, &bcs, &fluxMethod);
 
   SerialSaveData save(&data, &env, 0);
 
diff --git a/Project/GPU/Src/main.cu b/Project/GPU/Src/main.cu
index acf10c94..6ef7cd8b 100644
--- a/Project/GPU/Src/main.cu
+++ b/Project/GPU/Src/main.cu
@@ -29,7 +29,7 @@ int main(int argc, char *argv[]) {
   // Set up domain
   int Ng(4);
   int nx(64);
-  int ny(16);
+  int ny(8);
   int nz(0);
   double xmin(-0.5);
   double xmax(0.5);
@@ -43,8 +43,8 @@ int main(int argc, char *argv[]) {
   double sigma(0);
   bool output(true);
   int safety(180);
-  int nxRanks(2);
-  int nyRanks(2);
+  int nxRanks(1);
+  int nyRanks(1);
   int nzRanks(1);
 
   char * ptr(0);
@@ -61,7 +61,7 @@ int main(int argc, char *argv[]) {
             cfl, Ng, gamma, sigma);
 
   // Choose particulars of simulation
-  SRRMHD model(&data);
+  SRMHD model(&data);
 
   FVS fluxMethod(&data, &model);
 
@@ -71,7 +71,7 @@ int main(int argc, char *argv[]) {
 
   KHInstabilitySingleFluid init(&data, 1);
 
-  SSP2 timeInt(&data, &model, &bcs, &fluxMethod);
+  RK2 timeInt(&data, &model, &bcs, &fluxMethod);
 
   ParallelSaveData save(&data, &env);
 
diff --git a/Project/GPU/Src/simulation.cu b/Project/GPU/Src/simulation.cu
index d692a0a0..46d36208 100644
--- a/Project/GPU/Src/simulation.cu
+++ b/Project/GPU/Src/simulation.cu
@@ -156,7 +156,7 @@ void Simulation::evolve(bool output, int safety)
 
   // Save initial data
   if (output && save) {
-
+/*
       this->save->saveVar("rho", 11);
       this->save->saveVar("vx", 11);
       this->save->saveVar("vy", 11);
@@ -167,8 +167,9 @@ void Simulation::evolve(bool output, int safety)
       this->save->saveVar("Bz", 11);
       this->save->saveVar("Ex", 11);
       this->save->saveVar("Ey", 11);
-      this->save->saveVar("Ez", 11);  }
-
+      this->save->saveVar("Ez", 11);  
+*/
+      }
   while (d->t < d->endTime) {
 
     this->updateTime();
@@ -176,7 +177,7 @@ void Simulation::evolve(bool output, int safety)
     // Save data for animation
     if (output && save && d->iters%d->frameSkip==0) {
       // Save initial data
-
+/*
       this->save->saveVar("rho", 11);
       this->save->saveVar("vx", 11);
       this->save->saveVar("vy", 11);
@@ -188,6 +189,7 @@ void Simulation::evolve(bool output, int safety)
       this->save->saveVar("Ex", 11);
       this->save->saveVar("Ey", 11);
       this->save->saveVar("Ez", 11);
+      */
     }
 
     if (safety>0 && d->iters%safety==0) {
@@ -200,7 +202,7 @@ void Simulation::evolve(bool output, int safety)
   // Save final state
   if (output && save) {
     // Save initial data
-
+/*
       this->save->saveVar("rho", 11);
       this->save->saveVar("vx", 11);
       this->save->saveVar("vy", 11);
@@ -212,6 +214,7 @@ void Simulation::evolve(bool output, int safety)
       this->save->saveVar("Ex", 11);
       this->save->saveVar("Ey", 11);
       this->save->saveVar("Ez", 11);
+      */
     }
 
   if (env->rank == 0){
diff --git a/Project/GPU/Src/srmhd.cu b/Project/GPU/Src/srmhd.cu
index a37e41e6..c0b39205 100644
--- a/Project/GPU/Src/srmhd.cu
+++ b/Project/GPU/Src/srmhd.cu
@@ -14,7 +14,6 @@
 #include <cstdio>
 #include <iostream>
 #include <stdexcept>
-#include <omp.h>
 
 // Macro for getting array index
 #define ID(variable, idx, jdx, kdx) ((variable)*(d->Nx)*(d->Ny)*(d->Nz) + (idx)*(d->Ny)*(d->Nz) + (jdx)*(d->Nz) + (kdx))
@@ -44,6 +43,7 @@ SRMHD::SRMHD(Data * data) : Model(data)
   // Solutions for C2P all cells
   cudaHostAlloc((void **)&solution, sizeof(double)*2*data->Nx*data->Ny*data->Nz,
                 cudaHostAllocPortable);
+  //solution = (double *) malloc(sizeof(double)*2*data->Nx*data->Ny*data->Nz);
 
   smartGuesses = 0;
 
@@ -91,11 +91,8 @@ void SRMHD::fluxVector(double *cons, double *prims, double *aux, double *f, cons
   // Generate flux vector
   // Fx: flux in x-direction
   if (dir == 0) {
-    #pragma omp parallel for
     for (int i=0; i < d->Nx; i++) {
-      #pragma omp parallel for
       for (int j=0; j < d->Ny; j++) {
-        #pragma omp parallel for
         for (int k=0; k < d->Nz; k++) {
           // D
           f[ID(0, i, j, k)] = cons[ID(0, i, j, k)] * prims[ID(1, i, j, k)];
@@ -137,11 +134,8 @@ void SRMHD::fluxVector(double *cons, double *prims, double *aux, double *f, cons
 
   // Fy: flux in y-direction
   else if (dir==1) {
-    #pragma omp parallel for
     for (int i=0; i < d->Nx; i++) {
-      #pragma omp parallel for
       for (int j=0; j < d->Ny; j++) {
-        #pragma omp parallel for
         for (int k=0; k < d->Nz; k++) {
           // D
           f[ID(0, i, j, k)] = cons[ID(0, i, j, k)] * prims[ID(2, i, j, k)];
@@ -183,11 +177,8 @@ void SRMHD::fluxVector(double *cons, double *prims, double *aux, double *f, cons
 
   // Fz: flux in z-direction
   else {
-    #pragma omp parallel for
     for (int i=0; i < d->Nx; i++) {
-      #pragma omp parallel for
       for (int j=0; j < d->Ny; j++) {
-        #pragma omp parallel for
         for (int k=0; k < d->Nz; k++) {
           // D
           f[ID(0, i, j, k)] = cons[ID(0, i, j, k)] * prims[ID(3, i, j, k)];
@@ -238,7 +229,6 @@ void SRMHD::fluxVector(double *cons, double *prims, double *aux, double *f, cons
 void SRMHD::sourceTermSingleCell(double *cons, double *prims, double *aux, double *source, int i, int j, int k)
 {
 
-  #pragma omp parallel for
   for (int var=0; var < this->data->Ncons; var++) {
     if (var == 8) {
       // phi
@@ -258,13 +248,9 @@ void SRMHD::sourceTermSingleCell(double *cons, double *prims, double *aux, doubl
 void SRMHD::sourceTerm(double *cons, double *prims, double *aux, double *source)
 {
 
-  #pragma omp parallel for
   for (int i=0; i < this->data->Nx; i++) {
-    #pragma omp parallel for
     for (int j=0; j < this->data->Ny; j++) {
-      #pragma omp parallel for
       for (int k=0; k < this->data->Nz; k++) {
-        #pragma omp parallel for
         for (int var=0; var < this->data->Ncons; var++) {
           if (var == 8) {
             // phi
@@ -335,7 +321,7 @@ void SRMHD::getPrimitiveVarsSingleCell(double *cons, double *prims, double *aux,
   double sol[2];                      // Guess and solution vector
   double res[2];                      // Residual/fvec vector
   int info;                           // Rootfinder flag
-  const double tol = 1.49011612e-8;   // Tolerance of rootfinder
+  const double tol = 1.4e-8;   // Tolerance of rootfinder
   const int lwa = 19;                 // Length of work array = n * (3*n + 13) / 2
   double wa[lwa];                     // Work array
 
@@ -422,9 +408,9 @@ void SRMHD::getPrimitiveVars(double *cons, double *prims, double *aux)
   // Syntax
   Data * d(this->data);
   // Solutions
-  double * solution;
-  cudaHostAlloc((void **)&solution, sizeof(double)*2*d->Nx*d->Ny*d->Nz,
-                cudaHostAllocPortable);
+  //double * solution;
+  //cudaHostAlloc((void **)&solution, sizeof(double)*2*d->Nx*d->Ny*d->Nz,
+                //cudaHostAllocPortable);
 
   // Hybrd1 set-up
   Args args;                          // Additional arguments structure
@@ -432,7 +418,7 @@ void SRMHD::getPrimitiveVars(double *cons, double *prims, double *aux)
   double sol[2];                      // Guess and solution vector
   double res[2];                      // Residual/fvec vector
   int info;                           // Rootfinder flag
-  const double tol = 1.49011612e-8;   // Tolerance of rootfinder
+  const double tol = 1.49011612e-7;   // Tolerance of rootfinder
   const int lwa = 19;                 // Length of work array = n * (3*n + 13) / 2
   double wa[lwa];                     // Work array
   std::vector<Failed> fails;          // Vector of failed structs. Stores location of failed cons2prims cells.
@@ -542,11 +528,8 @@ void SRMHD::getPrimitiveVars(double *cons, double *prims, double *aux)
   }
 
 
-  #pragma omp parallel for
   for (int i=0; i < d->Nx; i++) {
-    #pragma omp parallel for
     for (int j=0; j < d->Ny; j++) {
-      #pragma omp parallel for
       for (int k=0; k < d->Nz; k++) {
         // W
         aux[ID(1, i, j, k)] = 1 / sqrt(1 - solution[ID(0, i, j, k)]);
@@ -598,7 +581,6 @@ void SRMHD::getPrimitiveVars(double *cons, double *prims, double *aux)
     } // End j-loop
   } // End i-loop
 
-  cudaFreeHost(solution);
 
 }
 

From 849d28569904e5ff862fc9ab8908530dc5a78f3d Mon Sep 17 00:00:00 2001
From: aniabrown <ania.brown@gmail.com>
Date: Tue, 22 Sep 2020 10:54:33 +0100
Subject: [PATCH 21/56] debugging tweaks. Reverse later

---
 Project/CPU/Makefile     |  2 +-
 Project/CPU/Src/RK2.cc   | 27 ++++++++++++++++++---------
 Project/CPU/Src/srmhd.cc | 24 ++++++++++++++++--------
 Project/GPU/Makefile     |  4 ++--
 Project/GPU/Src/main.cu  |  2 +-
 5 files changed, 38 insertions(+), 21 deletions(-)

diff --git a/Project/CPU/Makefile b/Project/CPU/Makefile
index 84d91915..e1df13f5 100644
--- a/Project/CPU/Makefile
+++ b/Project/CPU/Makefile
@@ -25,7 +25,7 @@ RTFIND_INC_DIR = ./CminpackLibrary/Include
 RTFIND_SRC_DIR = ./CminpackLibrary/Src
 
 # C++ compiler flags
-CXXFLAGS = -Wall -std=c++11 -g -DUSE_MPI=$(USE_MPI) -O3 $(OMP_FLAGS) -Wno-unknown-pragmas
+CXXFLAGS = -Wall -std=c++11 -g -DUSE_MPI=$(USE_MPI) -O0 $(OMP_FLAGS) -Wno-unknown-pragmas
 
 # Sources
 SRCS = main.cc 								\
diff --git a/Project/CPU/Src/RK2.cc b/Project/CPU/Src/RK2.cc
index 88633e98..4dd16a99 100644
--- a/Project/CPU/Src/RK2.cc
+++ b/Project/CPU/Src/RK2.cc
@@ -38,9 +38,12 @@ void RK2::predictorStep(double * cons, double * prims, double * aux, double dt)
 
   // Cons2prims conversion for p1 estimate stage requires old values to start
   // the rootfind
-  for (int i(d->is); i < d->ie; i++) {
-    for (int j(d->js); j < d->je; j++) {
-      for (int k(d->ks); k < d->ke; k++) {
+  for (int i(0); i < d->Nx; i++) {
+    for (int j(0); j < d->Ny; j++) {
+      for (int k(0); k < d->Nz; k++) {
+  //for (int i(d->is); i < d->ie; i++) {
+    //for (int j(d->js); j < d->je; j++) {
+      //for (int k(d->ks); k < d->ke; k++) {
         for (int var(0); var < d->Naux; var++) {
           p1aux[ID(var, i, j, k)] = aux[ID(var, i, j, k)];
         }
@@ -56,9 +59,12 @@ void RK2::predictorStep(double * cons, double * prims, double * aux, double dt)
 
   // First stage approximation
   for (int var(0); var < d->Ncons; var++) {
-    for (int i(d->is); i < d->ie; i++) {
-      for (int j(d->js); j < d->je; j++) {
-        for (int k(d->ks); k < d->ke; k++) {
+    for (int i(0); i < d->Nx; i++) {
+      for (int j(0); j < d->Ny; j++) {
+        for (int k(0); k < d->Nz; k++) {
+    //for (int i(d->is); i < d->ie; i++) {
+      //for (int j(d->js); j < d->je; j++) {
+        //for (int k(d->ks); k < d->ke; k++) {
           p1cons[ID(var, i, j, k)] = cons[ID(var, i, j, k)] - dt * args1[ID(var, i, j, k)];
         }
       }
@@ -79,9 +85,12 @@ void RK2::correctorStep(double * cons, double * prims, double * aux, double dt)
 
   // Construct solution
   for (int var(0); var < d->Ncons; var++) {
-    for (int i(d->is); i < d->ie; i++) {
-      for (int j(d->js); j < d->je; j++) {
-        for (int k(d->ks); k < d->ke; k++) {
+    for (int i(0); i < d->Nx; i++) {
+      for (int j(0); j < d->Ny; j++) {
+        for (int k(0); k < d->Nz; k++) {
+    //for (int i(d->is); i < d->ie; i++) {
+      //for (int j(d->js); j < d->je; j++) {
+        //for (int k(d->ks); k < d->ke; k++) {
           cons[ID(var, i, j, k)] = 0.5 * (cons[ID(var, i, j, k)] + p1cons[ID(var, i, j, k)] -
                                       dt * args2[ID(var, i, j, k)]);
         }
diff --git a/Project/CPU/Src/srmhd.cc b/Project/CPU/Src/srmhd.cc
index b169da4b..ba2bfea4 100644
--- a/Project/CPU/Src/srmhd.cc
+++ b/Project/CPU/Src/srmhd.cc
@@ -404,9 +404,14 @@ void SRMHD::getPrimitiveVars(double *cons, double *prims, double *aux)
   std::vector<Failed> fails;          // Vector of failed structs. Stores location of failed cons2prims cells.
 
   // Loop through domain solving and setting the prim and aux vars
-  for (int i(d->is); i < d->ie; i++) {
-    for (int j(d->js); j < d->je; j++) {
-      for (int k(d->ks); k < d->ke; k++) {
+  //for (int i(d->is); i < d->ie; i++) {
+    //for (int j(d->js); j < d->je; j++) {
+      //for (int k(d->ks); k < d->ke; k++) {
+
+  for (int i(0); i < d->Nx; i++) {
+    for (int j(0); j < d->Ny; j++) {
+      for (int k(0); k < d->Nz; k++) {
+
         // Update possible values
         // Bx, By, Bz
         prims[ID(5, i, j, k)] = cons[ID(5, i, j, k)];
@@ -461,7 +466,7 @@ void SRMHD::getPrimitiveVars(double *cons, double *prims, double *aux)
 
 
 
-
+/*
 
   // ################################## Smart guessing ########################### //
   // Are there any failures?
@@ -506,11 +511,14 @@ void SRMHD::getPrimitiveVars(double *cons, double *prims, double *aux)
       // }
     }
   }
+*/
 
-
-  for (int i(d->is); i < d->ie; i++) {
-    for (int j(d->js); j < d->je; j++) {
-      for (int k(d->ks); k < d->ke; k++) {
+  for (int i(0); i < d->Nx; i++) {
+    for (int j(0); j < d->Ny; j++) {
+      for (int k(0); k < d->Nz; k++) {
+  //for (int i(d->is); i < d->ie; i++) {
+    //for (int j(d->js); j < d->je; j++) {
+      //for (int k(d->ks); k < d->ke; k++) {
         // W
         aux[ID(1, i, j, k)] = 1 / sqrt(1 - solution[ID(0, i, j, k)]);
         // rho
diff --git a/Project/GPU/Makefile b/Project/GPU/Makefile
index a88806e2..43f055c9 100644
--- a/Project/GPU/Makefile
+++ b/Project/GPU/Makefile
@@ -28,10 +28,10 @@ RTFIND_SRC_DIR = ./CminpackLibrary/Src
 MPI_FLAGS = -I/usr/include/mpich -L/usr/lib/x86_64-linux-gnu -lmpichcxx -lmpich 
 
 # C++ compiler flags
-CXXFLAGS = -fopenmp -Wall -std=c++11 -O3
+CXXFLAGS = -fopenmp -Wall -std=c++11 -O0 -lineinfo -g
 
 # NVIDIA compiler flags
-NVFLAGS = -std=c++11 -Wno-deprecated-gpu-targets -rdc=true -fmad=false -O3 -arch=sm_52 -Xcompiler -fopenmp -Xcompiler -Wall
+NVFLAGS = -std=c++11 -Wno-deprecated-gpu-targets -rdc=true -fmad=false -O0 -arch=sm_52 -Xcompiler -fopenmp -Xcompiler -Wall -lineinfo -g -G
 
 # Sources
 SRCS = main.cu \
diff --git a/Project/GPU/Src/main.cu b/Project/GPU/Src/main.cu
index 6ef7cd8b..103875b7 100644
--- a/Project/GPU/Src/main.cu
+++ b/Project/GPU/Src/main.cu
@@ -28,7 +28,7 @@ int main(int argc, char *argv[]) {
 
   // Set up domain
   int Ng(4);
-  int nx(64);
+  int nx(16);
   int ny(8);
   int nz(0);
   double xmin(-0.5);

From e247098415f89dc97afeb96593aa1cf7d1ef65fa Mon Sep 17 00:00:00 2001
From: aniabrown <ania.brown@gmail.com>
Date: Tue, 22 Sep 2020 10:54:52 +0100
Subject: [PATCH 22/56] debugging tweaks. Reverse later

---
 Project/CPU/Src/main.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project/CPU/Src/main.cc b/Project/CPU/Src/main.cc
index ea21d219..dbf80ab2 100644
--- a/Project/CPU/Src/main.cc
+++ b/Project/CPU/Src/main.cc
@@ -19,7 +19,7 @@ int main(int argc, char *argv[]) {
   const double MU(1000);
   // Set up domain
   int Ng(4);
-  int nx(64);
+  int nx(16);
   int ny(8);
   int nz(0);
   double xmin(-0.5);

From 68128450090a462025a40f3ad92ef9f6e67b5644 Mon Sep 17 00:00:00 2001
From: aniabrown <ania.brown@gmail.com>
Date: Tue, 22 Sep 2020 10:55:44 +0100
Subject: [PATCH 23/56] moving SRMHD::getPrimitiveVars to GPU. In progress,
 needs debugging

---
 Project/GPU/Include/srmhd.h |   4 +
 Project/GPU/Src/simData.cu  |   5 +-
 Project/GPU/Src/srmhd.cu    | 189 +++++++++++++++++++++++++++++++++++-
 3 files changed, 193 insertions(+), 5 deletions(-)

diff --git a/Project/GPU/Include/srmhd.h b/Project/GPU/Include/srmhd.h
index 4bf1fbb6..48f0ac7a 100644
--- a/Project/GPU/Include/srmhd.h
+++ b/Project/GPU/Include/srmhd.h
@@ -3,6 +3,7 @@
 
 #include "model.h"
 #include "deviceArguments.h"
+#include "C2PArgs.h"
 
 
 /*
@@ -100,6 +101,9 @@ class SRMHD : public Model
 
     double * solution;    //!< Pointer to array to hold solution of C2P for every cell. Size is 2*Nx*Ny*Nz
 
+    // Work array
+    C2PArgs * c2pArgs;
+
 
     SRMHD();     //!< Default constructor
 
diff --git a/Project/GPU/Src/simData.cu b/Project/GPU/Src/simData.cu
index 84896524..0e829e03 100644
--- a/Project/GPU/Src/simData.cu
+++ b/Project/GPU/Src/simData.cu
@@ -73,7 +73,10 @@ Data::Data(int nx, int ny, int nz,
   cudaGetDeviceProperties(&prop, 0);
   cudaDeviceSetLimit(cudaLimitStackSize, 2048); // Needed for SRMHS and SSP2, hybrd called recursively meaning nvcc does not know the stack size at compile time. Manually set.
   // Determine the number of GPU streams
-  Nstreams = Ncells / (tpb * bpg) + 1;
+
+  //Nstreams = Ncells / (tpb * bpg) + 1;
+  //! TODO -- for debugging. Remove
+  Nstreams = 1;
 
   if (false)
   {
diff --git a/Project/GPU/Src/srmhd.cu b/Project/GPU/Src/srmhd.cu
index c0b39205..57e5dc00 100644
--- a/Project/GPU/Src/srmhd.cu
+++ b/Project/GPU/Src/srmhd.cu
@@ -14,16 +14,21 @@
 #include <cstdio>
 #include <iostream>
 #include <stdexcept>
+#include "cudaErrorCheck.h"
 
 // Macro for getting array index
 #define ID(variable, idx, jdx, kdx) ((variable)*(d->Nx)*(d->Ny)*(d->Nz) + (idx)*(d->Ny)*(d->Nz) + (jdx)*(d->Nz) + (kdx))
+#define IDCons(var, idx, jdx, kdx) ( (var) + (idx)*(d->Ncons)*(d->Nz)*(d->Ny) + (jdx)*(d->Ncons)*(d->Nz) + (kdx)*(d->Ncons)  )
+#define IDPrims(var, idx, jdx, kdx) ( (var) + (idx)*(d->Nprims)*(d->Nz)*(d->Ny) + (jdx)*(d->Nprims)*(d->Nz) + (kdx)*(d->Nprims)  )
+#define IDAux(var, idx, jdx, kdx) ( (var) + (idx)*(d->Naux)*(d->Nz)*(d->Ny) + (jdx)*(d->Naux)*(d->Nz) + (kdx)*(d->Naux)  )
 
 __device__
 int SRMHDresidualParallel(void *p, int n, const double *x, double *fvec, int iflag);
 
 int SRMHDresidual(void *p, int n, const double *x, double *fvec, int iflag);
 
-
+__global__
+static void getPrimitiveVarsParallel(double *cons, double *prims, double *aux, double *guess, int stream, double gamma, double sigma, int Ncons, int Nprims, int Naux, int origWidth, int streamWidth);
 
 SRMHD::SRMHD() : Model()
 {
@@ -65,11 +70,14 @@ SRMHD::SRMHD(Data * data) : Model(data)
   this->data->auxLabels.push_back("bsq"); this->data->auxLabels.push_back("vsq");
   this->data->auxLabels.push_back("BS");  this->data->auxLabels.push_back("Bsq");
   this->data->auxLabels.push_back("Ssq");
+
+  c2pArgs = new C2PArgs(this->data);
 }
 
 SRMHD::~SRMHD()
 {
   cudaFreeHost(solution);
+  delete c2pArgs;
 }
 
 
@@ -403,7 +411,8 @@ void SRMHD::getPrimitiveVarsSingleCell(double *cons, double *prims, double *aux,
   old values for the prims and aux vectors.
   Output is the current values of cons, prims and aux.
 */
-void SRMHD::getPrimitiveVars(double *cons, double *prims, double *aux)
+/*
+void SRMHD::getPrimitiveVarsCPU(double *cons, double *prims, double *aux)
 {
   // Syntax
   Data * d(this->data);
@@ -583,7 +592,7 @@ void SRMHD::getPrimitiveVars(double *cons, double *prims, double *aux)
 
 
 }
-
+*/
 
 
 
@@ -700,7 +709,7 @@ void SRMHD::primsToAll(double *cons, double *prims, double *aux)
 //! Need a structure to pass to C2P hybrd rootfind to hold the current cons values
 typedef struct
 {
-  double guess[9];
+  double guess[8];
   double gamma;
 } getPrimVarsArgs;
 
@@ -731,6 +740,178 @@ int SRMHDresidualParallel(void *p, int n, const double *x, double *fvec, int ifl
 
   return 0;
 }
+
+void SRMHD::getPrimitiveVars(double *cons, double *prims, double *aux)
+{
+  // Syntax
+  Data * d(this->data);
+
+  // First need to copy data to the device
+  // A single cell requires all cons variables and aux10 to start the guessing
+  // Rearrange data into host arrays ready for copying
+  for (int i(0); i < d->Nx; i++) {
+    for (int j(0); j < d->Ny; j++) {
+      for (int k(0); k < d->Nz; k++) {
+        for (int var(0); var < d->Ncons; var++) {
+          c2pArgs->cons_h[IDCons(var, i, j, k)] = cons[ID(var, i, j, k)];
+        }
+        c2pArgs->guess_h[ID(0, i, j, k)] = aux[ID(10, i, j, k)];
+      }
+    }
+  }
+
+  // Data is in correct order, now stream data to the device
+  for (int i(0); i < c2pArgs->Nstreams; i++) {
+    // Which cell is at the left bound?
+    int lcell(i * c2pArgs->streamWidth);
+    // Which cell is at the right bound?
+    int rcell(lcell + c2pArgs->streamWidth);
+    if (rcell > d->Ncells) rcell = d->Ncells;
+    // Memory size to copy in
+    int width(rcell - lcell);
+    int inMemsize(width * sizeof(double));
+
+    // Send stream's data
+    gpuErrchk( cudaMemcpyAsync(c2pArgs->cons_d[i], c2pArgs->cons_h + lcell*d->Ncons, inMemsize*d->Ncons, cudaMemcpyHostToDevice, c2pArgs->stream[i]) );
+    gpuErrchk( cudaMemcpyAsync(c2pArgs->guess_d[i], c2pArgs->guess_h + lcell, inMemsize, cudaMemcpyHostToDevice, c2pArgs->stream[i]) );
+
+
+    // Call kernel and operate on data
+    getPrimitiveVarsParallel <<< c2pArgs->bpg, c2pArgs->tpb,
+        c2pArgs->tpb * c2pArgs->cellMem, c2pArgs->stream[i] >>> (c2pArgs->cons_d[i],
+        c2pArgs->prims_d[i], c2pArgs->aux_d[i], c2pArgs->guess_d[i], i, d->gamma, d->sigma, d->Ncons,
+        d->Nprims, d->Naux, c2pArgs->streamWidth, width);
+
+
+    // Copy all data back
+    gpuErrchk( cudaMemcpyAsync(c2pArgs->prims_h + lcell*d->Nprims, c2pArgs->prims_d[i], inMemsize*d->Nprims, cudaMemcpyDeviceToHost, c2pArgs->stream[i]) );
+    gpuErrchk( cudaMemcpyAsync(c2pArgs->aux_h + lcell*d->Naux, c2pArgs->aux_d[i], inMemsize*d->Naux, cudaMemcpyDeviceToHost, c2pArgs->stream[i]) );
+  }
+  gpuErrchk( cudaDeviceSynchronize() );
+
+  // Rearrange data back into arrays
+  for (int i(0); i < d->Nx; i++) {
+    for (int j(0); j < d->Ny; j++) {
+      for (int k(0); k < d->Nz; k++) {
+
+        for (int var(0); var < d->Nprims; var++) {
+          prims[ID(var, i, j, k)] = c2pArgs->prims_h[IDPrims(var, i, j, k)];
+        }
+        for (int var(0); var < d->Naux; var++) {
+          aux[ID(var, i, j, k)] = c2pArgs->aux_h[IDAux(var, i, j, k)];
+        }
+      }
+    }
+  }
+}
+
+// /*!
+//     This is the device version of the getPrimitiveVars that takes a streams data
+//     and computes the rest of the prims and aux vars. This is called when
+//     SRRMHD::getPrimitiveVars is required, i.e. all cells need to be found.
+// */
+__global__
+static void getPrimitiveVarsParallel(double *streamCons, double *streamPrims, double *streamAux, double *guess, int stream, double gamma, double sigma, int Ncons, int Nprims, int Naux, int origWidth, int streamWidth)
+{
+  // First need thread indicies
+  const int tID(threadIdx.x);                     //!< thread index (in block)
+  const int lID(tID + blockIdx.x * blockDim.x);   //!< local index (in stream)
+  // const int gID(lID + stream * origWidth);        //!< global index (in domain)
+  // Allocate shared memory
+  extern __shared__ double sharedArray [];
+  double * cons = &sharedArray[tID * (Ncons + Nprims + Naux)];
+  double * prims = &cons[Ncons];
+  double * aux = &prims[Nprims];
+
+  // Hybrd1 set-up
+  double sol[2];                      // Guess and solution vector
+  double res[2];                      // Residual/fvec vector
+  int info;                           // Rootfinder flag
+  double wa[19];                     // Work array
+
+  if (lID < streamWidth) {
+
+
+    // Load conserved vector into shared memory, and the initial guess
+    for (int i(0); i < Ncons; i++) cons[i] = streamCons[lID * Ncons + i];
+
+
+  
+  
+    // Update known values
+    // Bx, By, Bz
+    prims[5] = cons[5];
+    prims[6] = cons[6];
+    prims[7] = cons[8];
+  
+    // BS
+    aux[10] = cons[5] * cons[1] + cons[6] * cons[2] + cons[7] * cons[3];
+    // Bsq
+    aux[11] = cons[5] * cons[5] + cons[6] * cons[6] + cons[7] * cons[7];
+    // Ssq
+    aux[12] = cons[1] * cons[1] + cons[2] * cons[2] + cons[3] * cons[3];
+  
+  
+
+    // Set args for rootfind
+    getPrimVarsArgs GPVAArgs = {cons[0], cons[1], cons[2], cons[3], cons[4], cons[6], cons[7], cons[8], gamma};
+  
+    // Guesses of solution
+    sol[0] = prims[1] * prims[1] + prims[2] * prims[2] + prims[3] * prims[3];
+    sol[1] = prims[0] * aux[0] / (1 - sol[0]);
+  
+  
+    // Solve residual = 0
+    if ((info = __cminpack_func__(hybrd1) (SRMHDresidualParallel, &GPVAArgs, 2, sol, res, 1.49011612e-7, wa, 19))!=1)
+    {
+      printf("C2P single cell failed at lID %d, hybrd returns info=%d\n", lID, info);
+    }
+    if (lID == 0){
+       printf("IN LANE %f\n", prims[5]); 
+       printf("GPU GAMMA %f\n", gamma);
+       printf("sol %f %f res %f %f\n", sol[0], sol[1], res[0], res[1]);
+    }
+    // W
+    aux[1] = 1 / sqrt(1 - sol[0]);
+    // rho
+    prims[0] = cons[0] / aux[1];
+    // h
+    aux[0] = sol[1] / (prims[0] * aux[1] * aux[1]);
+    // p
+    prims[4] = (aux[0] - 1) * prims[0] *
+                               (gamma - 1) / gamma;
+    // e
+    aux[2] = prims[4] / (prims[0] * (gamma - 1));
+    // vx, vy, vz
+    prims[1] = (cons[5] * aux[10] + cons[1] * sol[1]) / (sol[1] * (aux[11] + sol[1]));
+    prims[2] = (cons[6] * aux[10] + cons[2] * sol[1]) / (sol[1] * (aux[11] + sol[1]));
+    prims[3] = (cons[7] * aux[10] + cons[3] * sol[1]) / (sol[1] * (aux[11] + sol[1]));
+    // vsq
+    aux[9] = prims[1] * prims[1] + prims[2] * prims[2] + prims[3] * prims[3];
+    // c
+    aux[3] = sqrt(aux[2] * gamma * (gamma - 1) /   aux[0]);
+    // b0
+    aux[4] = aux[1] * (cons[5] * prims[1] + cons[6] * prims[2] + cons[7] * prims[3]);
+    // bx, by, bz
+    aux[5] = cons[5] / aux[1] + aux[4] * prims[1];
+    aux[6] = cons[6] / aux[1] + aux[4] * prims[2];
+    aux[7] = cons[7] / aux[1] + aux[4] * prims[3];
+    // bsq
+    aux[8] = (prims[5] * prims[5] + prims[6] * prims[6] + prims[7] * prims[7] +
+                             aux[4] * aux[4]) / (aux[1] * aux[1]);
+  
+
+
+  }
+
+  // Copy data back from shared memory into device arrays
+  for (int i(0); i < Nprims; i++) streamPrims[lID * Nprims + i] = prims[i];
+  for (int i(0); i < Naux; i++) streamAux[lID * Naux + i] = aux[i];
+
+}
+
+
+
 __device__
 void SRMHD_D::getPrimitiveVarsSingleCell(double *cons, double *prims, double *aux)
 {

From dbdaf552639f9706a409fcc947bf27ceebc8af70 Mon Sep 17 00:00:00 2001
From: aniabrown <ania.brown@gmail.com>
Date: Tue, 22 Sep 2020 14:15:32 +0100
Subject: [PATCH 24/56] gpu version no longer failing to converge in getPrims
 but still gives wrong answer. Currently copying all prims and aux to gpu --
 need to copy only those values that are required for the guess in future

---
 Project/CPU/Src/main.cc  |  2 +-
 Project/CPU/Src/srmhd.cc |  8 ++++
 Project/GPU/Makefile     |  4 +-
 Project/GPU/Src/main.cu  |  2 +-
 Project/GPU/Src/srmhd.cu | 98 ++++++++++++++++++++++++++++++----------
 5 files changed, 85 insertions(+), 29 deletions(-)

diff --git a/Project/CPU/Src/main.cc b/Project/CPU/Src/main.cc
index dbf80ab2..c412eaa2 100644
--- a/Project/CPU/Src/main.cc
+++ b/Project/CPU/Src/main.cc
@@ -28,7 +28,7 @@ int main(int argc, char *argv[]) {
   double ymax(1.0);
   double zmin(-1.5);
   double zmax(1.5);
-  double endTime(0.0005);
+  double endTime(0.00005);
   double cfl(0.1);
   double gamma(4.0/3.0);
   double sigma(0);
diff --git a/Project/CPU/Src/srmhd.cc b/Project/CPU/Src/srmhd.cc
index ba2bfea4..48e1060a 100644
--- a/Project/CPU/Src/srmhd.cc
+++ b/Project/CPU/Src/srmhd.cc
@@ -446,9 +446,17 @@ void SRMHD::getPrimitiveVars(double *cons, double *prims, double *aux)
         sol[1] = prims[ID(0, i, j, k)] * aux[ID(0, i, j, k)] /
                  (1 - sol[0]);
 
+
         // Solve residual = 0
         info = __cminpack_func__(hybrd1) (&SRMHDresidual, &args, n, sol, res,
                                           tol, wa, lwa);
+        if (i==4 && j==4 && k==0){
+            printf("CPU, IN LANE (%d,%d,%d)\n", i, j, k);
+            printf("prims: %f %f %f\n", prims[ID(3, i, j, k)], prims[ID(4, i, j, k)], prims[ID(5, i, j, k)]);
+            printf("cons: %f %f %f\n", cons[ID(3, i, j, k)], cons[ID(4, i, j, k)], cons[ID(5, i, j, k)]);
+            printf("args: %f %f %f\n", aux[ID(10, i, j, k)], aux[ID(11, i, j, k)], aux[ID(12, i, j, k)]);
+            printf("sol %f %f res %f %f\n", sol[0], sol[1], res[0], res[1]);
+        }
         // If root find fails, add failed cell to the list
         if (info!=1) {
           Failed fail = {i, j, k};
diff --git a/Project/GPU/Makefile b/Project/GPU/Makefile
index 43f055c9..1801d2d2 100644
--- a/Project/GPU/Makefile
+++ b/Project/GPU/Makefile
@@ -28,10 +28,10 @@ RTFIND_SRC_DIR = ./CminpackLibrary/Src
 MPI_FLAGS = -I/usr/include/mpich -L/usr/lib/x86_64-linux-gnu -lmpichcxx -lmpich 
 
 # C++ compiler flags
-CXXFLAGS = -fopenmp -Wall -std=c++11 -O0 -lineinfo -g
+CXXFLAGS = -fopenmp -Wall -std=c++11 -O3 -lineinfo
 
 # NVIDIA compiler flags
-NVFLAGS = -std=c++11 -Wno-deprecated-gpu-targets -rdc=true -fmad=false -O0 -arch=sm_52 -Xcompiler -fopenmp -Xcompiler -Wall -lineinfo -g -G
+NVFLAGS = -std=c++11 -Wno-deprecated-gpu-targets -rdc=true -fmad=false -O3 -arch=sm_52 -Xcompiler -fopenmp -Xcompiler -Wall -lineinfo
 
 # Sources
 SRCS = main.cu \
diff --git a/Project/GPU/Src/main.cu b/Project/GPU/Src/main.cu
index 103875b7..1c3e72ca 100644
--- a/Project/GPU/Src/main.cu
+++ b/Project/GPU/Src/main.cu
@@ -37,7 +37,7 @@ int main(int argc, char *argv[]) {
   double ymax(1.0);
   double zmin(-1.5);
   double zmax(1.5);
-  double endTime(0.0005);
+  double endTime(0.00005);
   double cfl(0.1);
   double gamma(4.0/3.0);
   double sigma(0);
diff --git a/Project/GPU/Src/srmhd.cu b/Project/GPU/Src/srmhd.cu
index 57e5dc00..3c8f0446 100644
--- a/Project/GPU/Src/srmhd.cu
+++ b/Project/GPU/Src/srmhd.cu
@@ -400,7 +400,7 @@ void SRMHD::getPrimitiveVarsSingleCell(double *cons, double *prims, double *aux,
 
 }
 
-
+#if 0
 //! Solve for the primitive and auxiliary variables
 /*!
     Method outlined in Anton 2010, `Relativistic Magnetohydrodynamcis:
@@ -411,15 +411,14 @@ void SRMHD::getPrimitiveVarsSingleCell(double *cons, double *prims, double *aux,
   old values for the prims and aux vectors.
   Output is the current values of cons, prims and aux.
 */
-/*
-void SRMHD::getPrimitiveVarsCPU(double *cons, double *prims, double *aux)
+void SRMHD::getPrimitiveVars(double *cons, double *prims, double *aux)
 {
   // Syntax
   Data * d(this->data);
   // Solutions
-  //double * solution;
-  //cudaHostAlloc((void **)&solution, sizeof(double)*2*d->Nx*d->Ny*d->Nz,
-                //cudaHostAllocPortable);
+  double * solution;
+  cudaHostAlloc((void **)&solution, sizeof(double)*2*d->Nx*d->Ny*d->Nz,
+                cudaHostAllocPortable);
 
   // Hybrd1 set-up
   Args args;                          // Additional arguments structure
@@ -590,10 +589,11 @@ void SRMHD::getPrimitiveVarsCPU(double *cons, double *prims, double *aux)
     } // End j-loop
   } // End i-loop
 
+  cudaFreeHost(solution);
 
-}
-*/
 
+}
+#endif
 
 
 
@@ -702,16 +702,25 @@ void SRMHD::primsToAll(double *cons, double *prims, double *aux)
   } // End i-loop
 }
 
-#define Bsq (args->guess[5] * args->guess[5] + args->guess[6] * args->guess[6] + args->guess[7] + args->guess[7])
-#define Ssq (args->guess[1] * args->guess[1] + args->guess[2] * args->guess[2] + args->guess[3] + args->guess[3])
-#define BS  (args->guess[5] * args->guess[1] + args->guess[6] * args->guess[2] + args->guess[7] + args->guess[3])
-
 //! Need a structure to pass to C2P hybrd rootfind to hold the current cons values
+/*
 typedef struct
 {
   double guess[8];
   double gamma;
 } getPrimVarsArgs;
+*/
+
+typedef struct
+{
+  double
+  D,    //!< Relativistic energy for a single cell
+  g,    //!< Adiabatic index, gamma
+  Bsq,  //!< Squared magnitude of magnetic field for a single cell
+  Ssq,  //!< Square magnitude of momentum for a single cell
+  BS,   //!< Scalar product of magnetic field and momentum vector for a single cell
+  tau;  //!< Kinetic energy for a single cell
+} getPrimVarsArgs;
 
 __device__
 int SRMHDresidualParallel(void *p, int n, const double *x, double *fvec, int iflag)
@@ -724,23 +733,25 @@ int SRMHDresidualParallel(void *p, int n, const double *x, double *fvec, int ifl
     fvec[0] = fvec[1] = 1e6;
     return 0;
   }
-
-
+  double Bsq(args->Bsq);
+  double Ssq(args->Ssq);
+  double BS(args->BS);
   double W(1 / sqrt(1 - x[0]));
-  double rho(args->guess[0] / W);
+  double rho(args->D / W);
   double h(x[1] / (rho * W * W));
-  double pr((h - 1) * rho * (args->gamma - 1) / args->gamma);
+  double pr((h - 1) * rho * (args->g - 1) / args->g);
   if (pr < 0 || rho < 0 || h < 0 || W < 1) {
     fvec[0] = fvec[1] = 1e6;
     return 0;
   }
   // Values should be OK
   fvec[0] = (x[1] + Bsq) * (x[1] + Bsq) * x[0] - (2 * x[1] + Bsq) * BS * BS / (x[1] * x[1]) - Ssq;
-  fvec[1] = x[1] + Bsq - pr - Bsq / (2 * W * W) - BS * BS / (2 * x[1] * x[1]) - args->guess[0] - args->guess[4];
+  fvec[1] = x[1] + Bsq - pr - Bsq / (2 * W * W) - BS * BS / (2 * x[1] * x[1]) - args->D - args->tau;
 
   return 0;
 }
 
+#if 1
 void SRMHD::getPrimitiveVars(double *cons, double *prims, double *aux)
 {
   // Syntax
@@ -755,7 +766,26 @@ void SRMHD::getPrimitiveVars(double *cons, double *prims, double *aux)
         for (int var(0); var < d->Ncons; var++) {
           c2pArgs->cons_h[IDCons(var, i, j, k)] = cons[ID(var, i, j, k)];
         }
-        c2pArgs->guess_h[ID(0, i, j, k)] = aux[ID(10, i, j, k)];
+      }
+    }
+  }
+
+  for (int i(0); i < d->Nx; i++) {
+    for (int j(0); j < d->Ny; j++) {
+      for (int k(0); k < d->Nz; k++) {
+        for (int var(0); var < d->Nprims; var++) {
+          c2pArgs->prims_h[IDPrims(var, i, j, k)] = prims[ID(var, i, j, k)];
+        }
+      }
+    }
+  }
+
+  for (int i(0); i < d->Nx; i++) {
+    for (int j(0); j < d->Ny; j++) {
+      for (int k(0); k < d->Nz; k++) {
+        for (int var(0); var < d->Naux; var++) {
+          c2pArgs->aux_h[IDAux(var, i, j, k)] = aux[ID(var, i, j, k)];
+        }
       }
     }
   }
@@ -773,6 +803,8 @@ void SRMHD::getPrimitiveVars(double *cons, double *prims, double *aux)
 
     // Send stream's data
     gpuErrchk( cudaMemcpyAsync(c2pArgs->cons_d[i], c2pArgs->cons_h + lcell*d->Ncons, inMemsize*d->Ncons, cudaMemcpyHostToDevice, c2pArgs->stream[i]) );
+    gpuErrchk( cudaMemcpyAsync(c2pArgs->prims_d[i], c2pArgs->prims_h + lcell*d->Nprims, inMemsize*d->Nprims, cudaMemcpyHostToDevice, c2pArgs->stream[i]) );
+    gpuErrchk( cudaMemcpyAsync(c2pArgs->aux_d[i], c2pArgs->aux_h + lcell*d->Naux, inMemsize*d->Naux, cudaMemcpyHostToDevice, c2pArgs->stream[i]) );
     gpuErrchk( cudaMemcpyAsync(c2pArgs->guess_d[i], c2pArgs->guess_h + lcell, inMemsize, cudaMemcpyHostToDevice, c2pArgs->stream[i]) );
 
 
@@ -804,6 +836,7 @@ void SRMHD::getPrimitiveVars(double *cons, double *prims, double *aux)
     }
   }
 }
+#endif
 
 // /*!
 //     This is the device version of the getPrimitiveVars that takes a streams data
@@ -834,6 +867,8 @@ static void getPrimitiveVarsParallel(double *streamCons, double *streamPrims, do
 
     // Load conserved vector into shared memory, and the initial guess
     for (int i(0); i < Ncons; i++) cons[i] = streamCons[lID * Ncons + i];
+    for (int i(0); i < Nprims; i++) prims[i] = streamPrims[lID * Nprims + i];
+    for (int i(0); i < Naux; i++) aux[i] = streamAux[lID * Naux + i];
 
 
   
@@ -851,10 +886,14 @@ static void getPrimitiveVarsParallel(double *streamCons, double *streamPrims, do
     // Ssq
     aux[12] = cons[1] * cons[1] + cons[2] * cons[2] + cons[3] * cons[3];
   
-  
-
     // Set args for rootfind
-    getPrimVarsArgs GPVAArgs = {cons[0], cons[1], cons[2], cons[3], cons[4], cons[6], cons[7], cons[8], gamma};
+    getPrimVarsArgs GPVAArgs;
+    GPVAArgs.D = cons[0];
+    GPVAArgs.g = gamma;
+    GPVAArgs.BS = aux[10];
+    GPVAArgs.Bsq = aux[11];
+    GPVAArgs.Ssq = aux[12];
+    GPVAArgs.tau = cons[4];
   
     // Guesses of solution
     sol[0] = prims[1] * prims[1] + prims[2] * prims[2] + prims[3] * prims[3];
@@ -862,12 +901,15 @@ static void getPrimitiveVarsParallel(double *streamCons, double *streamPrims, do
   
   
     // Solve residual = 0
-    if ((info = __cminpack_func__(hybrd1) (SRMHDresidualParallel, &GPVAArgs, 2, sol, res, 1.49011612e-7, wa, 19))!=1)
+    if ((info = __cminpack_func__(hybrd1) (SRMHDresidualParallel, &GPVAArgs, 2, sol, res, 1.49011612e-7, wa, 19))!=1 && lID==68)
     {
       printf("C2P single cell failed at lID %d, hybrd returns info=%d\n", lID, info);
     }
-    if (lID == 0){
-       printf("IN LANE %f\n", prims[5]); 
+    if (lID == 68){
+       printf("IN LANE %d\n", lID); 
+       printf("prims: %f %f %f\n", prims[3], prims[4], prims[5]);
+       printf("cons: %f %f %f\n", cons[3], cons[4], cons[5]);
+       printf("args: %f %f %f\n", aux[10], aux[11], aux[12]);
        printf("GPU GAMMA %f\n", gamma);
        printf("sol %f %f res %f %f\n", sol[0], sol[1], res[0], res[1]);
     }
@@ -937,7 +979,13 @@ void SRMHD_D::getPrimitiveVarsSingleCell(double *cons, double *prims, double *au
 
 
   // Set args for rootfind
-  getPrimVarsArgs GPVAArgs = {cons[0], cons[1], cons[2], cons[3], cons[4], cons[6], cons[7], cons[8], args->gamma};
+  getPrimVarsArgs GPVAArgs;
+  GPVAArgs.D = cons[0];
+  GPVAArgs.g = args->gamma;
+  GPVAArgs.BS = aux[10];
+  GPVAArgs.Bsq = aux[11];
+  GPVAArgs.Ssq = aux[12];
+  GPVAArgs.tau = cons[4];
 
   // Guesses of solution
   sol[0] = prims[1] * prims[1] + prims[2] * prims[2] + prims[3] * prims[3];

From ac37273a97a21e68b78db33ed155706d1293a245 Mon Sep 17 00:00:00 2001
From: aniabrown <ania.brown@gmail.com>
Date: Wed, 23 Sep 2020 17:02:11 +0100
Subject: [PATCH 25/56] reversing changes made for debugging

---
 Project/CPU/Src/RK2.cc   | 27 +++++++++------------------
 Project/CPU/Src/main.cc  |  6 +++---
 Project/CPU/Src/srmhd.cc | 30 ++++++------------------------
 Project/GPU/Src/main.cu  |  6 +++---
 Project/GPU/Src/srmhd.cu |  9 +--------
 5 files changed, 22 insertions(+), 56 deletions(-)

diff --git a/Project/CPU/Src/RK2.cc b/Project/CPU/Src/RK2.cc
index 4dd16a99..88633e98 100644
--- a/Project/CPU/Src/RK2.cc
+++ b/Project/CPU/Src/RK2.cc
@@ -38,12 +38,9 @@ void RK2::predictorStep(double * cons, double * prims, double * aux, double dt)
 
   // Cons2prims conversion for p1 estimate stage requires old values to start
   // the rootfind
-  for (int i(0); i < d->Nx; i++) {
-    for (int j(0); j < d->Ny; j++) {
-      for (int k(0); k < d->Nz; k++) {
-  //for (int i(d->is); i < d->ie; i++) {
-    //for (int j(d->js); j < d->je; j++) {
-      //for (int k(d->ks); k < d->ke; k++) {
+  for (int i(d->is); i < d->ie; i++) {
+    for (int j(d->js); j < d->je; j++) {
+      for (int k(d->ks); k < d->ke; k++) {
         for (int var(0); var < d->Naux; var++) {
           p1aux[ID(var, i, j, k)] = aux[ID(var, i, j, k)];
         }
@@ -59,12 +56,9 @@ void RK2::predictorStep(double * cons, double * prims, double * aux, double dt)
 
   // First stage approximation
   for (int var(0); var < d->Ncons; var++) {
-    for (int i(0); i < d->Nx; i++) {
-      for (int j(0); j < d->Ny; j++) {
-        for (int k(0); k < d->Nz; k++) {
-    //for (int i(d->is); i < d->ie; i++) {
-      //for (int j(d->js); j < d->je; j++) {
-        //for (int k(d->ks); k < d->ke; k++) {
+    for (int i(d->is); i < d->ie; i++) {
+      for (int j(d->js); j < d->je; j++) {
+        for (int k(d->ks); k < d->ke; k++) {
           p1cons[ID(var, i, j, k)] = cons[ID(var, i, j, k)] - dt * args1[ID(var, i, j, k)];
         }
       }
@@ -85,12 +79,9 @@ void RK2::correctorStep(double * cons, double * prims, double * aux, double dt)
 
   // Construct solution
   for (int var(0); var < d->Ncons; var++) {
-    for (int i(0); i < d->Nx; i++) {
-      for (int j(0); j < d->Ny; j++) {
-        for (int k(0); k < d->Nz; k++) {
-    //for (int i(d->is); i < d->ie; i++) {
-      //for (int j(d->js); j < d->je; j++) {
-        //for (int k(d->ks); k < d->ke; k++) {
+    for (int i(d->is); i < d->ie; i++) {
+      for (int j(d->js); j < d->je; j++) {
+        for (int k(d->ks); k < d->ke; k++) {
           cons[ID(var, i, j, k)] = 0.5 * (cons[ID(var, i, j, k)] + p1cons[ID(var, i, j, k)] -
                                       dt * args2[ID(var, i, j, k)]);
         }
diff --git a/Project/CPU/Src/main.cc b/Project/CPU/Src/main.cc
index c412eaa2..73c07e32 100644
--- a/Project/CPU/Src/main.cc
+++ b/Project/CPU/Src/main.cc
@@ -19,8 +19,8 @@ int main(int argc, char *argv[]) {
   const double MU(1000);
   // Set up domain
   int Ng(4);
-  int nx(16);
-  int ny(8);
+  int nx(64);
+  int ny(16);
   int nz(0);
   double xmin(-0.5);
   double xmax(0.5);
@@ -28,7 +28,7 @@ int main(int argc, char *argv[]) {
   double ymax(1.0);
   double zmin(-1.5);
   double zmax(1.5);
-  double endTime(0.00005);
+  double endTime(0.0005);
   double cfl(0.1);
   double gamma(4.0/3.0);
   double sigma(0);
diff --git a/Project/CPU/Src/srmhd.cc b/Project/CPU/Src/srmhd.cc
index 48e1060a..53f9f1aa 100644
--- a/Project/CPU/Src/srmhd.cc
+++ b/Project/CPU/Src/srmhd.cc
@@ -404,14 +404,9 @@ void SRMHD::getPrimitiveVars(double *cons, double *prims, double *aux)
   std::vector<Failed> fails;          // Vector of failed structs. Stores location of failed cons2prims cells.
 
   // Loop through domain solving and setting the prim and aux vars
-  //for (int i(d->is); i < d->ie; i++) {
-    //for (int j(d->js); j < d->je; j++) {
-      //for (int k(d->ks); k < d->ke; k++) {
-
-  for (int i(0); i < d->Nx; i++) {
-    for (int j(0); j < d->Ny; j++) {
-      for (int k(0); k < d->Nz; k++) {
-
+  for (int i(d->is); i < d->ie; i++) {
+    for (int j(d->js); j < d->je; j++) {
+      for (int k(d->ks); k < d->ke; k++) {
         // Update possible values
         // Bx, By, Bz
         prims[ID(5, i, j, k)] = cons[ID(5, i, j, k)];
@@ -450,13 +445,6 @@ void SRMHD::getPrimitiveVars(double *cons, double *prims, double *aux)
         // Solve residual = 0
         info = __cminpack_func__(hybrd1) (&SRMHDresidual, &args, n, sol, res,
                                           tol, wa, lwa);
-        if (i==4 && j==4 && k==0){
-            printf("CPU, IN LANE (%d,%d,%d)\n", i, j, k);
-            printf("prims: %f %f %f\n", prims[ID(3, i, j, k)], prims[ID(4, i, j, k)], prims[ID(5, i, j, k)]);
-            printf("cons: %f %f %f\n", cons[ID(3, i, j, k)], cons[ID(4, i, j, k)], cons[ID(5, i, j, k)]);
-            printf("args: %f %f %f\n", aux[ID(10, i, j, k)], aux[ID(11, i, j, k)], aux[ID(12, i, j, k)]);
-            printf("sol %f %f res %f %f\n", sol[0], sol[1], res[0], res[1]);
-        }
         // If root find fails, add failed cell to the list
         if (info!=1) {
           Failed fail = {i, j, k};
@@ -474,8 +462,6 @@ void SRMHD::getPrimitiveVars(double *cons, double *prims, double *aux)
 
 
 
-/*
-
   // ################################## Smart guessing ########################### //
   // Are there any failures?
   if (fails.size() > 0) {
@@ -519,14 +505,10 @@ void SRMHD::getPrimitiveVars(double *cons, double *prims, double *aux)
       // }
     }
   }
-*/
 
-  for (int i(0); i < d->Nx; i++) {
-    for (int j(0); j < d->Ny; j++) {
-      for (int k(0); k < d->Nz; k++) {
-  //for (int i(d->is); i < d->ie; i++) {
-    //for (int j(d->js); j < d->je; j++) {
-      //for (int k(d->ks); k < d->ke; k++) {
+  for (int i(d->is); i < d->ie; i++) {
+    for (int j(d->js); j < d->je; j++) {
+      for (int k(d->ks); k < d->ke; k++) {
         // W
         aux[ID(1, i, j, k)] = 1 / sqrt(1 - solution[ID(0, i, j, k)]);
         // rho
diff --git a/Project/GPU/Src/main.cu b/Project/GPU/Src/main.cu
index 1c3e72ca..e41e1f84 100644
--- a/Project/GPU/Src/main.cu
+++ b/Project/GPU/Src/main.cu
@@ -28,8 +28,8 @@ int main(int argc, char *argv[]) {
 
   // Set up domain
   int Ng(4);
-  int nx(16);
-  int ny(8);
+  int nx(64);
+  int ny(16);
   int nz(0);
   double xmin(-0.5);
   double xmax(0.5);
@@ -37,7 +37,7 @@ int main(int argc, char *argv[]) {
   double ymax(1.0);
   double zmin(-1.5);
   double zmax(1.5);
-  double endTime(0.00005);
+  double endTime(0.0005);
   double cfl(0.1);
   double gamma(4.0/3.0);
   double sigma(0);
diff --git a/Project/GPU/Src/srmhd.cu b/Project/GPU/Src/srmhd.cu
index 3c8f0446..17cd2683 100644
--- a/Project/GPU/Src/srmhd.cu
+++ b/Project/GPU/Src/srmhd.cu
@@ -401,6 +401,7 @@ void SRMHD::getPrimitiveVarsSingleCell(double *cons, double *prims, double *aux,
 }
 
 #if 0
+// CPU VERSION
 //! Solve for the primitive and auxiliary variables
 /*!
     Method outlined in Anton 2010, `Relativistic Magnetohydrodynamcis:
@@ -905,14 +906,6 @@ static void getPrimitiveVarsParallel(double *streamCons, double *streamPrims, do
     {
       printf("C2P single cell failed at lID %d, hybrd returns info=%d\n", lID, info);
     }
-    if (lID == 68){
-       printf("IN LANE %d\n", lID); 
-       printf("prims: %f %f %f\n", prims[3], prims[4], prims[5]);
-       printf("cons: %f %f %f\n", cons[3], cons[4], cons[5]);
-       printf("args: %f %f %f\n", aux[10], aux[11], aux[12]);
-       printf("GPU GAMMA %f\n", gamma);
-       printf("sol %f %f res %f %f\n", sol[0], sol[1], res[0], res[1]);
-    }
     // W
     aux[1] = 1 / sqrt(1 - sol[0]);
     // rho

From 3ee82f2a642c6889caa364d840e98ea58fad9dac Mon Sep 17 00:00:00 2001
From: aniabrown <ania.brown@gmail.com>
Date: Wed, 23 Sep 2020 17:03:21 +0100
Subject: [PATCH 26/56] fixing typo that was causing bug in getPrimitiveVars

---
 Project/GPU/Src/main.cu  | 4 ++--
 Project/GPU/Src/srmhd.cu | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/Project/GPU/Src/main.cu b/Project/GPU/Src/main.cu
index e41e1f84..bb109cd3 100644
--- a/Project/GPU/Src/main.cu
+++ b/Project/GPU/Src/main.cu
@@ -43,8 +43,8 @@ int main(int argc, char *argv[]) {
   double sigma(0);
   bool output(true);
   int safety(180);
-  int nxRanks(1);
-  int nyRanks(1);
+  int nxRanks(2);
+  int nyRanks(2);
   int nzRanks(1);
 
   char * ptr(0);
diff --git a/Project/GPU/Src/srmhd.cu b/Project/GPU/Src/srmhd.cu
index 17cd2683..97c15d36 100644
--- a/Project/GPU/Src/srmhd.cu
+++ b/Project/GPU/Src/srmhd.cu
@@ -878,7 +878,7 @@ static void getPrimitiveVarsParallel(double *streamCons, double *streamPrims, do
     // Bx, By, Bz
     prims[5] = cons[5];
     prims[6] = cons[6];
-    prims[7] = cons[8];
+    prims[7] = cons[7];
   
     // BS
     aux[10] = cons[5] * cons[1] + cons[6] * cons[2] + cons[7] * cons[3];

From adf419bf1c9fdd3595758daed8bbc7229bd6823e Mon Sep 17 00:00:00 2001
From: aniabrown <ania.brown@gmail.com>
Date: Mon, 28 Sep 2020 09:51:26 +0100
Subject: [PATCH 27/56] cleaning up makefile

---
 Project/GPU/Makefile | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/Project/GPU/Makefile b/Project/GPU/Makefile
index 1801d2d2..a5100e7b 100644
--- a/Project/GPU/Makefile
+++ b/Project/GPU/Makefile
@@ -1,13 +1,18 @@
 # Make file for the main function. Builds all modules
 # and links for main.cc
 
-# Compiler
-
-CC_GPU = nvcc
+# -------------- PARAMETERS FOR USERS TO EDIT --------------------
 
+# if USE_MPI=1, need to use parallel versions of objects, such as ParallelEnv, ParallelSaveData etc
 USE_MPI=1
 
-MPI_LIBRARY_PATH = /local/software/openmpi/3.0.0/gcc-cuda8.0
+# find location of MPI libraries to link on your local system using 'mpicc -show'
+MPI_FLAGS = -I/usr/include/mpich -L/usr/lib/x86_64-linux-gnu -lmpichcxx -lmpich 
+
+# -------------- END PARAMETERS FOR USERS TO EDIT --------------------
+
+# Compiler
+CC_GPU = nvcc
 
 # Module directory
 MODULE_DIR = ./Src
@@ -25,7 +30,6 @@ RTFIND_INC_DIR = ./CminpackLibrary/Include
 RTFIND_SRC_DIR = ./CminpackLibrary/Src
 
 #MPI_FLAGS = -lmpi -L${MPI_LIBRARY_PATH}/lib -I${MPI_LIBRARY_PATH}/include
-MPI_FLAGS = -I/usr/include/mpich -L/usr/lib/x86_64-linux-gnu -lmpichcxx -lmpich 
 
 # C++ compiler flags
 CXXFLAGS = -fopenmp -Wall -std=c++11 -O3 -lineinfo

From 192749eac158d934ffccc0a1a597442c0b06dbb0 Mon Sep 17 00:00:00 2001
From: aniabrown <ania.brown@gmail.com>
Date: Wed, 30 Sep 2020 12:01:30 +0100
Subject: [PATCH 28/56] updated getPrimitiveVars on SRMHD to only send prims
 and aux required for initial guess to GPU

---
 Project/GPU/Include/C2PArgs.h |  3 ++-
 Project/GPU/Src/C2PArgs.cu    |  9 ++++++--
 Project/GPU/Src/srmhd.cu      | 41 +++++++++++++++--------------------
 3 files changed, 27 insertions(+), 26 deletions(-)

diff --git a/Project/GPU/Include/C2PArgs.h b/Project/GPU/Include/C2PArgs.h
index 90bd6989..0a6c2aa9 100644
--- a/Project/GPU/Include/C2PArgs.h
+++ b/Project/GPU/Include/C2PArgs.h
@@ -21,7 +21,8 @@ class C2PArgs
     bpg,                //!< Blocks per grid
     cellMem,            //!< Memory required for one cell
     Nstreams,           //!< Number of CUDA streams
-    streamWidth;        //!< Number of cells in each stream
+    streamWidth,        //!< Number of cells in each stream
+    nGuessSRMHD;        //!< Number of elements required for the initial guess per cell for the SRMHD model
     double
     //@{
     ** cons_d,
diff --git a/Project/GPU/Src/C2PArgs.cu b/Project/GPU/Src/C2PArgs.cu
index 8c4e0aed..9af701ea 100644
--- a/Project/GPU/Src/C2PArgs.cu
+++ b/Project/GPU/Src/C2PArgs.cu
@@ -11,6 +11,11 @@ C2PArgs::C2PArgs(Data * data) : data(data)
   // Determine the memory required for one cell
   cellMem = (d->Ncons + d->Nprims + d->Naux) * sizeof(double);
 
+  // Number of values sent to getPrimitiveValues for initial guess. We allocate enough room for SRMHD, which
+  // requires more values than SRRMHD. 
+  //! TODO -- create separate object for SRRMHD, which only allocates the one value needed per cell
+  nGuessSRMHD = 5;
+
   tpb = d->tpb;
   bpg = d->bpg;
   streamWidth = tpb * bpg;
@@ -26,7 +31,7 @@ C2PArgs::C2PArgs(Data * data) : data(data)
   gpuErrchk( cudaHostAlloc((void **)&cons_h, d->Ncons * d->Ncells * sizeof(double), cudaHostAllocPortable) );
   gpuErrchk( cudaHostAlloc((void **)&prims_h, d->Nprims * d->Ncells * sizeof(double), cudaHostAllocPortable) );
   gpuErrchk( cudaHostAlloc((void **)&aux_h, d->Naux * d->Ncells * sizeof(double), cudaHostAllocPortable) );
-  gpuErrchk( cudaHostAlloc((void **)&guess_h, d->Ncells * sizeof(double), cudaHostAllocPortable) );
+  gpuErrchk( cudaHostAlloc((void **)&guess_h, d->Ncells * nGuessSRMHD * sizeof(double), cudaHostAllocPortable) );
 
 
 
@@ -34,7 +39,7 @@ C2PArgs::C2PArgs(Data * data) : data(data)
     gpuErrchk( cudaMalloc((void **)&cons_d[i], d->Ncons * streamWidth * sizeof(double)) );
     gpuErrchk( cudaMalloc((void **)&prims_d[i], d->Nprims * streamWidth * sizeof(double)) );
     gpuErrchk( cudaMalloc((void **)&aux_d[i], d->Naux * streamWidth * sizeof(double)) );
-    gpuErrchk( cudaMalloc((void **)&guess_d[i], streamWidth * sizeof(double)) );
+    gpuErrchk( cudaMalloc((void **)&guess_d[i], nGuessSRMHD * streamWidth * sizeof(double)) );
   }
 
   // Create streams
diff --git a/Project/GPU/Src/srmhd.cu b/Project/GPU/Src/srmhd.cu
index 97c15d36..2e1aa649 100644
--- a/Project/GPU/Src/srmhd.cu
+++ b/Project/GPU/Src/srmhd.cu
@@ -21,6 +21,7 @@
 #define IDCons(var, idx, jdx, kdx) ( (var) + (idx)*(d->Ncons)*(d->Nz)*(d->Ny) + (jdx)*(d->Ncons)*(d->Nz) + (kdx)*(d->Ncons)  )
 #define IDPrims(var, idx, jdx, kdx) ( (var) + (idx)*(d->Nprims)*(d->Nz)*(d->Ny) + (jdx)*(d->Nprims)*(d->Nz) + (kdx)*(d->Nprims)  )
 #define IDAux(var, idx, jdx, kdx) ( (var) + (idx)*(d->Naux)*(d->Nz)*(d->Ny) + (jdx)*(d->Naux)*(d->Nz) + (kdx)*(d->Naux)  )
+#define IDGuess(guessId, Nguess, idx, jdx, kdx) ( (guessId) + (idx)*(Nguess)*(d->Nz)*(d->Ny) + (jdx)*(Nguess)*(d->Nz) + (kdx)*(Nguess)  )
 
 __device__
 int SRMHDresidualParallel(void *p, int n, const double *x, double *fvec, int iflag);
@@ -28,7 +29,7 @@ int SRMHDresidualParallel(void *p, int n, const double *x, double *fvec, int ifl
 int SRMHDresidual(void *p, int n, const double *x, double *fvec, int iflag);
 
 __global__
-static void getPrimitiveVarsParallel(double *cons, double *prims, double *aux, double *guess, int stream, double gamma, double sigma, int Ncons, int Nprims, int Naux, int origWidth, int streamWidth);
+static void getPrimitiveVarsParallel(double *cons, double *prims, double *aux, double *guess, int stream, double gamma, double sigma, int Ncons, int Nprims, int Naux, int nGuess, int origWidth, int streamWidth);
 
 SRMHD::SRMHD() : Model()
 {
@@ -771,22 +772,16 @@ void SRMHD::getPrimitiveVars(double *cons, double *prims, double *aux)
     }
   }
 
+  // Copy 5 values required for the initial guess. They are stored in the following format:
+  // guess_h = {prims[0], prims[1], prims[2], prims[3], aux[0]}
   for (int i(0); i < d->Nx; i++) {
     for (int j(0); j < d->Ny; j++) {
       for (int k(0); k < d->Nz; k++) {
-        for (int var(0); var < d->Nprims; var++) {
-          c2pArgs->prims_h[IDPrims(var, i, j, k)] = prims[ID(var, i, j, k)];
-        }
-      }
-    }
-  }
-
-  for (int i(0); i < d->Nx; i++) {
-    for (int j(0); j < d->Ny; j++) {
-      for (int k(0); k < d->Nz; k++) {
-        for (int var(0); var < d->Naux; var++) {
-          c2pArgs->aux_h[IDAux(var, i, j, k)] = aux[ID(var, i, j, k)];
-        }
+        c2pArgs->guess_h[IDGuess(0, c2pArgs->nGuessSRMHD, i, j, k)] = prims[ID(0, i, j, k)];
+        c2pArgs->guess_h[IDGuess(1, c2pArgs->nGuessSRMHD, i, j, k)] = prims[ID(1, i, j, k)];
+        c2pArgs->guess_h[IDGuess(2, c2pArgs->nGuessSRMHD, i, j, k)] = prims[ID(2, i, j, k)];
+        c2pArgs->guess_h[IDGuess(3, c2pArgs->nGuessSRMHD, i, j, k)] = prims[ID(3, i, j, k)];
+        c2pArgs->guess_h[IDGuess(4, c2pArgs->nGuessSRMHD, i, j, k)] = aux[ID(0, i, j, k)];
       }
     }
   }
@@ -804,16 +799,15 @@ void SRMHD::getPrimitiveVars(double *cons, double *prims, double *aux)
 
     // Send stream's data
     gpuErrchk( cudaMemcpyAsync(c2pArgs->cons_d[i], c2pArgs->cons_h + lcell*d->Ncons, inMemsize*d->Ncons, cudaMemcpyHostToDevice, c2pArgs->stream[i]) );
-    gpuErrchk( cudaMemcpyAsync(c2pArgs->prims_d[i], c2pArgs->prims_h + lcell*d->Nprims, inMemsize*d->Nprims, cudaMemcpyHostToDevice, c2pArgs->stream[i]) );
-    gpuErrchk( cudaMemcpyAsync(c2pArgs->aux_d[i], c2pArgs->aux_h + lcell*d->Naux, inMemsize*d->Naux, cudaMemcpyHostToDevice, c2pArgs->stream[i]) );
-    gpuErrchk( cudaMemcpyAsync(c2pArgs->guess_d[i], c2pArgs->guess_h + lcell, inMemsize, cudaMemcpyHostToDevice, c2pArgs->stream[i]) );
+    gpuErrchk( cudaMemcpyAsync(c2pArgs->guess_d[i], c2pArgs->guess_h + lcell*c2pArgs->nGuessSRMHD, inMemsize*c2pArgs->nGuessSRMHD, cudaMemcpyHostToDevice, c2pArgs->stream[i]) );
 
 
     // Call kernel and operate on data
+    //! TODO -- remove prims and aux -- all values that are needed are contained in guess
     getPrimitiveVarsParallel <<< c2pArgs->bpg, c2pArgs->tpb,
         c2pArgs->tpb * c2pArgs->cellMem, c2pArgs->stream[i] >>> (c2pArgs->cons_d[i],
         c2pArgs->prims_d[i], c2pArgs->aux_d[i], c2pArgs->guess_d[i], i, d->gamma, d->sigma, d->Ncons,
-        d->Nprims, d->Naux, c2pArgs->streamWidth, width);
+        d->Nprims, d->Naux, c2pArgs->nGuessSRMHD, c2pArgs->streamWidth, width);
 
 
     // Copy all data back
@@ -845,7 +839,7 @@ void SRMHD::getPrimitiveVars(double *cons, double *prims, double *aux)
 //     SRRMHD::getPrimitiveVars is required, i.e. all cells need to be found.
 // */
 __global__
-static void getPrimitiveVarsParallel(double *streamCons, double *streamPrims, double *streamAux, double *guess, int stream, double gamma, double sigma, int Ncons, int Nprims, int Naux, int origWidth, int streamWidth)
+static void getPrimitiveVarsParallel(double *streamCons, double *streamPrims, double *streamAux, double *streamGuess, int stream, double gamma, double sigma, int Ncons, int Nprims, int Naux, int Nguess, int origWidth, int streamWidth)
 {
   // First need thread indicies
   const int tID(threadIdx.x);                     //!< thread index (in block)
@@ -856,6 +850,8 @@ static void getPrimitiveVarsParallel(double *streamCons, double *streamPrims, do
   double * cons = &sharedArray[tID * (Ncons + Nprims + Naux)];
   double * prims = &cons[Ncons];
   double * aux = &prims[Nprims];
+  //! TODO -- could probably put guess in registers rather than shared memory
+  double * guess = &aux[Nguess];
 
   // Hybrd1 set-up
   double sol[2];                      // Guess and solution vector
@@ -868,8 +864,7 @@ static void getPrimitiveVarsParallel(double *streamCons, double *streamPrims, do
 
     // Load conserved vector into shared memory, and the initial guess
     for (int i(0); i < Ncons; i++) cons[i] = streamCons[lID * Ncons + i];
-    for (int i(0); i < Nprims; i++) prims[i] = streamPrims[lID * Nprims + i];
-    for (int i(0); i < Naux; i++) aux[i] = streamAux[lID * Naux + i];
+    for (int i(0); i < Nguess; i++) guess[i] = streamGuess[lID * Nguess + i];
 
 
   
@@ -897,8 +892,8 @@ static void getPrimitiveVarsParallel(double *streamCons, double *streamPrims, do
     GPVAArgs.tau = cons[4];
   
     // Guesses of solution
-    sol[0] = prims[1] * prims[1] + prims[2] * prims[2] + prims[3] * prims[3];
-    sol[1] = prims[0] * aux[0] / (1 - sol[0]);
+    sol[0] = guess[1] * guess[1] + guess[2] * guess[2] + guess[3] * guess[3];
+    sol[1] = guess[0] * guess[4] / (1 - sol[0]);
   
   
     // Solve residual = 0

From d83af685bf8e933115bd6e174afcd50f4d7d894b Mon Sep 17 00:00:00 2001
From: aniabrown <ania.brown@gmail.com>
Date: Wed, 30 Sep 2020 13:34:17 +0100
Subject: [PATCH 29/56] adding back multiple streams to GPU version

---
 Project/GPU/Src/simData.cu | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/Project/GPU/Src/simData.cu b/Project/GPU/Src/simData.cu
index 0e829e03..40e79c13 100644
--- a/Project/GPU/Src/simData.cu
+++ b/Project/GPU/Src/simData.cu
@@ -74,9 +74,7 @@ Data::Data(int nx, int ny, int nz,
   cudaDeviceSetLimit(cudaLimitStackSize, 2048); // Needed for SRMHS and SSP2, hybrd called recursively meaning nvcc does not know the stack size at compile time. Manually set.
   // Determine the number of GPU streams
 
-  //Nstreams = Ncells / (tpb * bpg) + 1;
-  //! TODO -- for debugging. Remove
-  Nstreams = 1;
+  Nstreams = Ncells / (tpb * bpg) + 1;
 
   if (false)
   {

From 01c58011c13c3d3b0080a371c5cac883359b96bd Mon Sep 17 00:00:00 2001
From: "ania.brown" <ania.brown@gmail.com>
Date: Wed, 28 Oct 2020 15:04:01 +0000
Subject: [PATCH 30/56] converted half of non-MPI GPU tests to use platformEnv
 API

---
 Tests/GPU/Makefile                  | 46 +++++++++++++--------
 Tests/GPU/Src/test_boundaryConds.cu | 15 ++++---
 Tests/GPU/Src/test_initFunc.cu      | 34 +++++++++++-----
 Tests/GPU/Src/test_simulation.cu    | 28 +++++++++----
 Tests/GPU/Src/test_srmhd.cu         | 46 ++++++++++++---------
 Tests/GPU/Src/test_twoFluidEMHD.cu  | 62 ++++++++++++++++++-----------
 6 files changed, 149 insertions(+), 82 deletions(-)

diff --git a/Tests/GPU/Makefile b/Tests/GPU/Makefile
index 8f8bfd1a..9dbf306b 100644
--- a/Tests/GPU/Makefile
+++ b/Tests/GPU/Makefile
@@ -17,6 +17,10 @@
 
 # Compiler
 CC = nvcc
+MPICC = mpic++
+
+#use `mpic++ -show` to find library and include flags
+MPI_FLAGS = -I/local/software/mpich/3.2.1/gcc/include -L/local/software/mpich/3.2.1/gcc/lib -lmpi -lmpicxx
 
 # Points to the root of Google Test, relative to where this file is.
 # Remember to tweak this if you move this file.
@@ -57,11 +61,14 @@ TESTS = test_simulation \
 				test_cminpack \
 				test_boundaryConds \
 				test_twoFluidEMHD \
-				test_srrmhd \
-				test_fvs \
-				test_id \
-				test_rk2 \
-				test_imex
+#				test_srrmhd \
+#				test_fvs \
+#				test_id \
+#				test_rk2 \
+#				test_imex
+
+
+PARALLEL_TESTS = test_parallel_srmhd     \
 
 # All Google Test headers.  Usually you shouldn't change this
 # definition.
@@ -155,7 +162,7 @@ test_rk2.o : $(TEST_DIR)/test_rk2.cu $(INC_DIR)/RK2.h \
 	                    $(INC_DIR)/twoFluidEMHD.h $(INC_DIR)/simulation.h $(INC_DIR)/simData.h $(GTEST_HEADERS)
 	@$(CC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) -c $(TEST_DIR)/test_rk2.cu -I$(INC_DIR)  -I$(RTFIND_INC_DIR)
 
-test_rk2 : test_rk2.o C2PArgs.o srrmhd.o fluxVectorSplitting.o boundaryConds.o simData.o RK2.o initFunc.o simulation.o weno.o saveData.o $(RTFIND_OBJS) gtest_main.a
+test_rk2 : test_rk2.o C2PArgs.o srrmhd.o fluxVectorSplitting.o boundaryConds.o simData.o RK2.o initFunc.o simulation.o weno.o serialSaveData.o serialEnv.o $(RTFIND_OBJS) gtest_main.a
 	@$(CC)	$(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) -lpthread $^ -o $@
 
 # Explicit RK split integrator
@@ -174,12 +181,19 @@ test_fvs.o : $(TEST_DIR)/test_fvs.cu \
 	                    $(INC_DIR)/srmhd.h $(INC_DIR)/twoFluidEMHD.h $(INC_DIR)/simulation.h $(INC_DIR)/simData.h $(GTEST_HEADERS)
 	@$(CC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) -c $(TEST_DIR)/test_fvs.cu -I$(INC_DIR)  -I$(RTFIND_INC_DIR)
 
-test_fvs : srmhd.o twoFluidEMHD.o test_fvs.o fluxVectorSplitting.o boundaryConds.o simData.o RK2.o rkSplit.o initFunc.o simulation.o weno.o saveData.o $(RTFIND_OBJS) gtest_main.a
+test_fvs : srmhd.o C2PArgs.o twoFluidEMHD.o test_fvs.o fluxVectorSplitting.o boundaryConds.o simData.o RK2.o rkSplit.o initFunc.o simulation.o weno.o serialSaveData.o serialEnv.o $(RTFIND_OBJS) gtest_main.a
 	@$(CC)	$(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) -lpthread $^ -o $@
 
 # Save data (required for simulation.evolve)
-saveData.o :  $(MODULE_DIR)/saveData.cu $(INC_DIR)/saveData.h
-	@$(CC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) -c $(MODULE_DIR)/saveData.cu -I$(INC_DIR)
+serialSaveData.o : $(MODULE_DIR)/serialSaveData.cu $(INC_DIR)/serialSaveData.h
+	@$(CC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) -c $(MODULE_DIR)/serialSaveData.cu -I$(INC_DIR)
+
+parallelSaveData.o : $(MODULE_DIR)/parallelSaveData.cu $(INC_DIR)/parallelSaveData.h
+	@$(MPICC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) -c $(MODULE_DIR)/parallelSaveData.cu -I$(INC_DIR)
+
+# Platform env
+serialEnv.o : $(MODULE_DIR)/serialEnv.cu $(INC_DIR)/serialEnv.h
+	$(CC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) -c $(MODULE_DIR)/serialEnv.cu -I$(INC_DIR)
 
 # Simulation
 simulation.o : $(MODULE_DIR)/simulation.cu $(INC_DIR)/simulation.h
@@ -189,7 +203,7 @@ test_simulation.o : $(TEST_DIR)/test_simulation.cu \
                      $(INC_DIR)/simulation.h $(GTEST_HEADERS)
 	@$(CC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) -c $(TEST_DIR)/test_simulation.cu -I$(INC_DIR)  -I$(RTFIND_INC_DIR)
 
-test_simulation : simulation.o test_simulation.o simData.o fluxVectorSplitting.o srmhd.o boundaryConds.o initFunc.o RK2.o rkSplit.o saveData.o $(RTFIND_OBJS) weno.o gtest_main.a
+test_simulation : simulation.o test_simulation.o simData.o fluxVectorSplitting.o srmhd.o C2PArgs.o boundaryConds.o initFunc.o RK2.o rkSplit.o serialSaveData.o serialEnv.o $(RTFIND_OBJS) weno.o gtest_main.a
 	@$(CC)	$(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) -lpthread $^ -o $@
 
 
@@ -201,7 +215,7 @@ test_srmhd.o : $(TEST_DIR)/test_srmhd.cu \
                      $(INC_DIR)/srmhd.h $(INC_DIR)/simulation.h $(INC_DIR)/simData.h $(GTEST_HEADERS)
 	@$(CC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) -c $(TEST_DIR)/test_srmhd.cu -I$(INC_DIR)  -I$(RTFIND_INC_DIR)
 
-test_srmhd : srmhd.o test_srmhd.o simData.o fluxVectorSplitting.o initFunc.o simulation.o weno.o saveData.o $(RTFIND_OBJS) gtest_main.a
+test_srmhd : srmhd.o C2PArgs.o test_srmhd.o simData.o boundaryConds.o fluxVectorSplitting.o initFunc.o simulation.o weno.o serialSaveData.o serialEnv.o $(RTFIND_OBJS) gtest_main.a
 	@$(CC)	$(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) -lpthread $^ -o $@
 
 
@@ -213,7 +227,7 @@ test_srrmhd.o : $(TEST_DIR)/test_srrmhd.cu \
                      $(INC_DIR)/srrmhd.h $(INC_DIR)/simulation.h $(INC_DIR)/simData.h $(GTEST_HEADERS)
 	@$(CC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) -c $(TEST_DIR)/test_srrmhd.cu -I$(INC_DIR)  -I$(RTFIND_INC_DIR)
 
-test_srrmhd : srrmhd.o C2PArgs.o test_srrmhd.o simData.o fluxVectorSplitting.o initFunc.o simulation.o weno.o saveData.o $(RTFIND_OBJS) gtest_main.a
+test_srrmhd : srrmhd.o C2PArgs.o test_srrmhd.o simData.o fluxVectorSplitting.o initFunc.o simulation.o weno.o serialSaveData.o serialEnv.o $(RTFIND_OBJS) gtest_main.a
 	@$(CC)	$(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) -lpthread $^ -o $@
 
 # TwoFluidEMHD Model
@@ -225,7 +239,7 @@ test_twoFluidEMHD.o : $(TEST_DIR)/test_twoFluidEMHD.cu \
 	                    $(INC_DIR)/twoFluidEMHD.h $(INC_DIR)/simulation.h $(INC_DIR)/simData.h $(GTEST_HEADERS)
 	@$(CC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) -c $(TEST_DIR)/test_twoFluidEMHD.cu -I$(INC_DIR)  -I$(RTFIND_INC_DIR)
 
-test_twoFluidEMHD : twoFluidEMHD.o test_twoFluidEMHD.o fluxVectorSplitting.o boundaryConds.o simData.o RK2.o rkSplit.o initFunc.o simulation.o weno.o saveData.o $(RTFIND_OBJS) gtest_main.a
+test_twoFluidEMHD : twoFluidEMHD.o test_twoFluidEMHD.o fluxVectorSplitting.o boundaryConds.o simData.o RK2.o rkSplit.o initFunc.o simulation.o weno.o serialSaveData.o serialEnv.o $(RTFIND_OBJS) gtest_main.a
 	@$(CC)	$(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) -lpthread $^ -o $@
 
 
@@ -238,7 +252,7 @@ test_initFunc.o : $(TEST_DIR)/test_initFunc.cu $(INC_DIR)/initFunc.h \
 									$(INC_DIR)/simulation.h $(INC_DIR)/simData.h $(GTEST_HEADERS)
 	@$(CC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) -c $(TEST_DIR)/test_initFunc.cu -I$(INC_DIR)  -I$(RTFIND_INC_DIR)
 
-test_initFunc : initFunc.o test_initFunc.o simData.o simulation.o twoFluidEMHD.o srmhd.o saveData.o $(RTFIND_OBJS) weno.o gtest_main.a
+test_initFunc : initFunc.o test_initFunc.o boundaryConds.o simData.o simulation.o twoFluidEMHD.o srmhd.o C2PArgs.o serialSaveData.o serialEnv.o $(RTFIND_OBJS) weno.o gtest_main.a
 	@$(CC)	$(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) -lpthread $^ -o $@
 
 
@@ -258,7 +272,7 @@ test_boundaryConds.o : $(TEST_DIR)/test_boundaryConds.cu $(INC_DIR)/boundaryCond
 									$(GTEST_HEADERS)
 	@$(CC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) -c $(TEST_DIR)/test_boundaryConds.cu -I$(INC_DIR)  -I$(RTFIND_INC_DIR)
 
-test_boundaryConds : test_boundaryConds.o boundaryConds.o simData.o srmhd.o simulation.o initFunc.o weno.o saveData.o $(RTFIND_OBJS) gtest_main.a
+test_boundaryConds : test_boundaryConds.o boundaryConds.o simData.o srmhd.o C2PArgs.o simulation.o initFunc.o weno.o serialSaveData.o serialEnv.o $(RTFIND_OBJS) gtest_main.a
 	@$(CC)	$(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) -lpthread $^ -o $@
 
 # ID macro
@@ -284,5 +298,5 @@ IMEX3Args.o : $(MODULE_DIR)/IMEX3Args.cu $(INC_DIR)/IMEX3Args.h
 test_imex.o : $(TEST_DIR)/test_imex.cu
 	@$(CC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) -c $(TEST_DIR)/test_imex.cu -I$(INC_DIR)  -I$(RTFIND_INC_DIR)
 
-test_imex : IMEX2Args.o C2PArgs.o  SSP2.o IMEX3Args.o  SSP3.o simData.o srrmhd.o srmhd.o twoFluidEMHD.o initFunc.o boundaryConds.o saveData.o simulation.o test_imex.o $(RTFIND_OBJS) fluxVectorSplitting.o weno.o gtest_main.a
+test_imex : IMEX2Args.o C2PArgs.o  SSP2.o IMEX3Args.o  SSP3.o simData.o srrmhd.o srmhd.o C2PArgs.o twoFluidEMHD.o initFunc.o boundaryConds.o serialSaveData.o serialEnv.o simulation.o test_imex.o $(RTFIND_OBJS) fluxVectorSplitting.o weno.o gtest_main.a
 	@$(CC)	$(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) -lpthread $^ -o $@
diff --git a/Tests/GPU/Src/test_boundaryConds.cu b/Tests/GPU/Src/test_boundaryConds.cu
index c4613d75..517f5200 100644
--- a/Tests/GPU/Src/test_boundaryConds.cu
+++ b/Tests/GPU/Src/test_boundaryConds.cu
@@ -4,14 +4,16 @@
 #include "srmhd.h"
 #include "simulation.h"
 #include "initFunc.h"
+#include "serialEnv.h"
 
 TEST(Periodic, periodicBoundaryConditions)
 {
-  Data d(10, 10, 10, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.4);
+  SerialEnv env(0, NULL, 1, 1, 1);
+  Data d(10, 10, 10, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.4, &env);
   SRMHD model(&d);
-  Simulation sim(&d);
-  OTVortexSingleFluid init(&d);
   Periodic bcs(&d);
+  Simulation sim(&d, &env);
+  OTVortexSingleFluid init(&d);
 
   // Set the values of the cons vars to something simple
   for (int var(0); var < d.Ncons; var++) {
@@ -190,11 +192,12 @@ TEST(Periodic, periodicBoundaryConditions)
 TEST(Outflow, outflowBoundaryConditions)
 {
 
-  Data d(10, 10, 10, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.4);
+  SerialEnv env(0, NULL, 1, 1, 1);
+  Data d(10, 10, 10, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.4, &env);
   SRMHD model(&d);
-  Simulation sim(&d);
-  OTVortexSingleFluid init(&d);
   Outflow bcs(&d);
+  Simulation sim(&d, &env);
+  OTVortexSingleFluid init(&d);
 
   // Set the values of the cons vars to something simple
   for (int var(0); var < d.Ncons; var++) {
diff --git a/Tests/GPU/Src/test_initFunc.cu b/Tests/GPU/Src/test_initFunc.cu
index 8902a36a..798b9568 100644
--- a/Tests/GPU/Src/test_initFunc.cu
+++ b/Tests/GPU/Src/test_initFunc.cu
@@ -1,16 +1,20 @@
 #include "gtest/gtest.h"
 #include "simData.h"
 #include "initFunc.h"
+#include "boundaryConds.h"
 #include "simulation.h"
 #include "srmhd.h"
 #include "twoFluidEMHD.h"
+#include "serialEnv.h"
 #include <iostream>
 
-TEST(InitialFunc, baseConstructor)
+TEST(InitialFunc, BaseConstructor)
 {
-  Data data(100, 10, 10, 0, 1, -0.5, 0.5, -0.1, 0.1, 0.8);
+  SerialEnv env(0, NULL, 1, 1, 1);
+  Data data(100, 10, 10, 0, 1, -0.5, 0.5, -0.1, 0.1, 0.8, &env);
   SRMHD model(&data);
-  Simulation sim(&data);
+  Periodic bcs(&data);
+  Simulation sim(&data, &env);
   InitialFunc init(&data);
 
   EXPECT_EQ(data.prims[0], 0);
@@ -40,9 +44,11 @@ TEST(InitialFunc, baseConstructor)
 
 TEST(InitialFunc, OTVortexSingleFluidFunc)
 {
-  Data data(100, 10, 0, 0, 1, 0, 1, -0.1, 0.1, 0.8);
+  SerialEnv env(0, NULL, 1, 1, 1);
+  Data data(100, 10, 0, 0, 1, 0, 1, -0.1, 0.1, 0.8, &env);
   SRMHD model(&data);
-  Simulation sim(&data);
+  Periodic bcs(&data);
+  Simulation sim(&data, &env);
   OTVortexSingleFluid init(&data);
 
   EXPECT_NEAR(data.prims[data.id(0, 0, 0, 0)], 0.2210485321, 0.0000000001);
@@ -62,24 +68,30 @@ TEST(InitialFunc, OTVortexSingleFluidFunc)
 TEST(InitialFunc, BrioWuTwoFluidFunc)
 {
   // Discontinuity in x direction
-  Data dx(10, 10, 10, 0, 1, 0, 1, 0, 1, 0.8);
+  SerialEnv env(0, NULL, 1, 1, 1);
+  Data dx(10, 10, 10, 0, 1, 0, 1, 0, 1, 0.8, &env);
   TwoFluidEMHD modelx(&dx);
-  Simulation simx(&dx);
+  Periodic bcsx(&dx);
+  Simulation simx(&dx, &env);
   BrioWuTwoFluid initx(&dx, 0);
 
 
 
   // Discontinuity in y direction
-  Data dy(10, 10, 10, 0, 1, 0, 1, 0, 1, 0.8);
+  SerialEnv env2(0, NULL, 1, 1, 1);
+  Data dy(10, 10, 10, 0, 1, 0, 1, 0, 1, 0.8, &env2);
   TwoFluidEMHD modely(&dy);
-  Simulation simy(&dy);
+  Periodic bcsy(&dy);
+  Simulation simy(&dy, &env2);
   BrioWuTwoFluid inity(&dy, 1);
 
 
   // Discontinuity in z direction
-  Data dz(10, 10, 10, 0, 1, 0, 1, 0, 1, 0.8);
+  SerialEnv env3(0, NULL, 1, 1, 1);
+  Data dz(10, 10, 10, 0, 1, 0, 1, 0, 1, 0.8, &env3);
   TwoFluidEMHD modelz(&dz);
-  Simulation simz(&dz);
+  Periodic bcsz(&dz);
+  Simulation simz(&dz, &env3);
   BrioWuTwoFluid initz(&dz, 2);
 
   for (int var(0); var < dx.Ncons; var++) {
diff --git a/Tests/GPU/Src/test_simulation.cu b/Tests/GPU/Src/test_simulation.cu
index 91e0fd1e..4426d146 100644
--- a/Tests/GPU/Src/test_simulation.cu
+++ b/Tests/GPU/Src/test_simulation.cu
@@ -1,5 +1,5 @@
 #include "gtest/gtest.h"
-#include "saveData.h"
+#include "serialSaveData.h"
 #include "simData.h"
 #include "simulation.h"
 #include "initFunc.h"
@@ -7,18 +7,28 @@
 #include "boundaryConds.h"
 #include "rkSplit.h"
 #include "fluxVectorSplitting.h"
+#include "serialEnv.h"
 #include <cstdlib>
 #include <stdexcept>
 
-Data data(100, 10, 2, 0, 1, -0.5, 0.5, -0.1, 0.1, 0.8);
 
 TEST(Simulation, dataInitialisation)
 {
+  SerialEnv envNoModel(0, NULL, 1, 1, 1);
+  Data dataNoModel(100, 10, 2, 0, 1, -0.5, 0.5, -0.1, 0.1, 0.8, &envNoModel);
+  Periodic bcsNoModel(&dataNoModel);
+  EXPECT_THROW( Simulation sim(&dataNoModel, &envNoModel), std::runtime_error);
 
-  EXPECT_THROW( Simulation sim(&data), std::runtime_error);
+  SerialEnv envNoBcs(0, NULL, 1, 1, 1);
+  Data dataNoBcs(100, 10, 2, 0, 1, -0.5, 0.5, -0.1, 0.1, 0.8, &envNoBcs);
+  SRMHD modelNoBcs(&dataNoBcs);
+  EXPECT_THROW( Simulation sim(&dataNoBcs, &envNoBcs), std::runtime_error);
 
+  SerialEnv env(0, NULL, 1, 1, 1);
+  Data data(100, 10, 2, 0, 1, -0.5, 0.5, -0.1, 0.1, 0.8, &env);
   SRMHD model(&data);
-  Simulation sim(&data);
+  Periodic bcs(&data);
+  Simulation sim(&data, &env);
 
   // Check standard data
   EXPECT_EQ(sim.data->nx, 100);
@@ -66,14 +76,15 @@ TEST(Simulation, dataInitialisation)
 //! Check that the fields dont change if the system if homogenous
 TEST(Simulation, equilibriumSimulation)
 {
-  Data data(30, 30, 10, 0, 1, 0, 1, 0, 1, 0.1);
+  SerialEnv env(0, NULL, 1, 1, 1);
+  Data data(30, 30, 10, 0, 1, 0, 1, 0, 1, 0.1, &env);
   SRMHD model(&data);
   FVS fluxMethod(&data, &model);
-  Simulation sim(&data);
-  OTVortexSingleFluid init(&data);
   Periodic bcs(&data);
+  Simulation sim(&data, &env);
+  OTVortexSingleFluid init(&data);
   RKSplit timeInt(&data, &model, &bcs, &fluxMethod);
-  SaveData save(&data);
+  SerialSaveData save(&data, &env);
 
   for (int i(0); i < data.Nx; i++) {
     for (int j(0); j < data.Ny; j++) {
@@ -108,4 +119,5 @@ TEST(Simulation, equilibriumSimulation)
       }
     }
   }
+
 }
diff --git a/Tests/GPU/Src/test_srmhd.cu b/Tests/GPU/Src/test_srmhd.cu
index 75c8eacc..2728b0d9 100644
--- a/Tests/GPU/Src/test_srmhd.cu
+++ b/Tests/GPU/Src/test_srmhd.cu
@@ -1,9 +1,11 @@
 #include "gtest/gtest.h"
 #include "srmhd.h"
+#include "boundaryConds.h"
 #include "simulation.h"
 #include "simData.h"
 #include "initFunc.h"
 #include "fluxVectorSplitting.h"
+#include "serialEnv.h"
 #include <cstdlib>
 #include <cmath>
 #include <stdio.h>
@@ -12,7 +14,8 @@
 
 TEST(SRMHD, Constructor)
 {
-  Data d(100, 10, 0, 0, 1, -0.5, 0.5, -0.1, 0.1, 0.8);
+  SerialEnv env(0, NULL, 1, 1, 1);
+  Data d(100, 10, 0, 0, 1, -0.5, 0.5, -0.1, 0.1, 0.8, &env);
   SRMHD model(&d);
   EXPECT_EQ(d.Ncons, 9);
   EXPECT_EQ(d.Nprims, 8);
@@ -26,13 +29,14 @@ TEST(SRMHD, Constructor)
 
 TEST(SRMHD, FluxVectorSplittingStationary)
 {
-
+  double tol(1.0e-15);
   // Set up
-  Data d(10, 10, 10, 0, 1, 0, 1, 0, 1, 1.0, 0.5, 4, 5.0/3.0, 1000.0, 0.5);
+  SerialEnv env(0, NULL, 1, 1, 1);
+  Data d(10, 10, 10, 0, 1, 0, 1, 0, 1, 1.0, &env, 0.5, 4, 5.0/3.0, 1000.0, 0.5);
   SRMHD model(&d);
   FVS fluxMethod(&d, &model);
-  Simulation sim(&d);
-
+  Periodic bcs(&d);
+  Simulation sim(&d, &env);
   // Set state to stationary equilibrium state
   for (int i(0); i < d.Nx; i++) {
     for (int j(0); j < d.Ny; j++) {
@@ -45,7 +49,6 @@ TEST(SRMHD, FluxVectorSplittingStationary)
         d.prims[d.id(5, i, j, k)] = 0.0;
         d.prims[d.id(6, i, j, k)] = 0.0;
         d.prims[d.id(7, i, j, k)] = 0.0;
-        d.prims[d.id(8, i, j, k)] = 0.0;
       }
     }
   }
@@ -59,7 +62,7 @@ TEST(SRMHD, FluxVectorSplittingStationary)
     for (int j(d.Ng); j < d.Ny-d.Ng; j++) {
       for (int k(d.Ng); k < d.Nz-d.Ng; k++) {
         for (int var(0); var < d.Ncons; var++) {
-          EXPECT_EQ(d.fnet[d.id(var, i, j, k)], 0.0);
+          EXPECT_NEAR(d.fnet[d.id(var, i, j, k)], 0.0, tol);
         }
       }
     }
@@ -70,7 +73,7 @@ TEST(SRMHD, FluxVectorSplittingStationary)
     for (int j(d.Ng); j < d.Ny-d.Ng; j++) {
       for (int k(d.Ng); k < d.Nz-d.Ng; k++) {
         for (int var(0); var < d.Ncons; var++) {
-          EXPECT_EQ(d.fnet[d.id(var, i, j, k)], 0.0);
+          EXPECT_NEAR(d.fnet[d.id(var, i, j, k)], 0.0, tol);
         }
       }
     }
@@ -81,7 +84,7 @@ TEST(SRMHD, FluxVectorSplittingStationary)
     for (int j(d.Ng); j < d.Ny-d.Ng; j++) {
       for (int k(d.Ng); k < d.Nz-d.Ng; k++) {
         for (int var(0); var < d.Ncons; var++) {
-          EXPECT_EQ(d.fnet[d.id(var, i, j, k)], 0.0);
+          EXPECT_NEAR(d.fnet[d.id(var, i, j, k)], 0.0, tol);
         }
       }
     }
@@ -95,9 +98,11 @@ TEST(SRMHD, SourceTerm)
 {
 
   // Set up
-  Data d(10, 10, 10, 0, 1, 0, 1, 0, 1, 1.0, 0.5, 4, 5.0/3.0, 1000.0, 0.5);
+  SerialEnv env(0, NULL, 1, 1, 1);
+  Data d(10, 10, 10, 0, 1, 0, 1, 0, 1, 1.0, &env, 0.5, 4, 5.0/3.0, 1000.0, 0.5);
   SRMHD model(&d);
-  Simulation sim(&d);
+  Periodic bcs(&d);
+  Simulation sim(&d, &env);
 
   // Set cons to something
   for (int i(0); i < d.Nx; i++) {
@@ -130,12 +135,16 @@ TEST(SRMHD, SourceTerm)
 TEST(SRMHD, Prims2Cons2Prims)
 {
   const double tol = 1.49011612e-8;   // Tolerance of rootfinder
-  Data d(10, 10, 0, 0, 1, 0, 1, 0, 1, 1.0);
-  Data d2(10, 10, 0, 0, 1, 0, 1, 0, 1, 1.0);
+  SerialEnv env(0, NULL, 1, 1, 1);
+  SerialEnv env2(0, NULL, 1, 1, 1);
+  Data d(10, 10, 0, 0, 1, 0, 1, 0, 1, 1.0, &env);
+  Data d2(10, 10, 0, 0, 1, 0, 1, 0, 1, 1.0, &env2);
   SRMHD model(&d);
   SRMHD model2(&d2);
-  Simulation sim(&d);
-  Simulation sim2(&d2);
+  Periodic bcs(&d);
+  Periodic bcs2(&d2);
+  Simulation sim(&d, &env);
+  Simulation sim2(&d2, &env2);
   OTVortexSingleFluid init(&d);
   OTVortexSingleFluid init2(&d2);
 
@@ -146,7 +155,6 @@ TEST(SRMHD, Prims2Cons2Prims)
 
   model2.getPrimitiveVars(d2.cons, d2.prims, d2.aux);
 
-
   for (int var(0); var < d.Nprims; var++) {
     for (int i(0); i < d.Nx; i++) {
       for (int j(0); j < d.Ny; j++) {
@@ -218,9 +226,11 @@ TEST(SRMHD, Prims2Cons2Prims)
 TEST(SRMHD, PrimsToAll)
 {
   // Set up
-  Data d(10, 10, 10, 0, 1, 0, 1, 0, 1, 1.0);
+  SerialEnv env(0, NULL, 1, 1, 1);
+  Data d(10, 10, 10, 0, 1, 0, 1, 0, 1, 1.0, &env);
   SRMHD model(&d);
-  Simulation sim(&d);
+  Periodic bcs(&d);
+  Simulation sim(&d, &env);
   InitialFunc init(&d);
 
   // Set cons and aux vars to rubbish so we know they have changed, and
diff --git a/Tests/GPU/Src/test_twoFluidEMHD.cu b/Tests/GPU/Src/test_twoFluidEMHD.cu
index 1e162f5b..2ae22746 100644
--- a/Tests/GPU/Src/test_twoFluidEMHD.cu
+++ b/Tests/GPU/Src/test_twoFluidEMHD.cu
@@ -2,19 +2,21 @@
 #include "twoFluidEMHD.h"
 #include "simulation.h"
 #include "simData.h"
+#include "serialSaveData.h"
 #include "initFunc.h"
 #include "rkSplit.h"
 #include "fluxVectorSplitting.h"
+#include "serialEnv.h"
 #include <cstdlib>
 #include <cmath>
 #include <cstdio>
 
-
-/* ######################### Test model constructor ########################*/
+  /* ######################### Test model constructor ########################*/
 
 TEST(TwoFluidEMHD, Constructor)
 {
-  Data d(100, 10, 0, 0, 1, -0.5, 0.5, -0.1, 0.1, 0.8);
+  SerialEnv env(0, NULL, 1, 1, 1);
+  Data d(100, 10, 0, 0, 1, -0.5, 0.5, -0.1, 0.1, 0.8, &env);
   TwoFluidEMHD model(&d);
   EXPECT_EQ(d.Ncons, 18);
   EXPECT_EQ(d.Nprims, 16);
@@ -40,37 +42,40 @@ TEST(TwoFluidEMHD, Constructor)
 TEST(TwoFluidEMHD, FluxFunctionIsConsistentUponRotation)
 {
   // Discontinuity in x direction
-  Data dx(30, 30, 30, 0, 1, 0, 1, 0, 1, 0.8);
+  SerialEnv env(0, NULL, 1, 1, 1);
+  Data dx(30, 30, 30, 0, 1, 0, 1, 0, 1, 0.8, &env);
   TwoFluidEMHD modelx(&dx);
   FVS fluxMethodx(&dx, &modelx);
-  Simulation simx(&dx);
-  BrioWuTwoFluid initx(&dx, 0, 0);
   Outflow bcsx(&dx);
+  Simulation simx(&dx, &env);
+  BrioWuTwoFluid initx(&dx, 0, 0);
   RKSplit timeIntx(&dx, &modelx, &bcsx, &fluxMethodx);
-  SaveData save(&dx);
+  SerialSaveData save(&dx, &env);
   simx.set(&initx, &modelx, &timeIntx, &bcsx, &fluxMethodx, &save);
   printf("Stepping x-discontinuity...\n");
   simx.updateTime();
 
   // Discontinuity in y direction
-  Data dy(30, 30, 30, 0, 1, 0, 1, 0, 1, 0.8);
+  SerialEnv env2(0, NULL, 1, 1, 1);
+  Data dy(30, 30, 30, 0, 1, 0, 1, 0, 1, 0.8, &env2);
   TwoFluidEMHD modely(&dy);
   FVS fluxMethody(&dy, &modely);
-  Simulation simy(&dy);
-  BrioWuTwoFluid inity(&dy, 1, 0);
   Outflow bcsy(&dy);
+  Simulation simy(&dy, &env2);
+  BrioWuTwoFluid inity(&dy, 1, 0);
   RKSplit timeInty(&dy, &modely, &bcsy, &fluxMethody);
   simy.set(&inity, &modely, &timeInty, &bcsy, &fluxMethody, &save);
   printf("Stepping y-discontinuity...\n");
   simy.updateTime();
 
   // Discontinuity in z direction
-  Data dz(30, 30, 30, 0, 1, 0, 1, 0, 1, 0.8);
+  SerialEnv env3(0, NULL, 1, 1, 1);
+  Data dz(30, 30, 30, 0, 1, 0, 1, 0, 1, 0.8, &env3);
   TwoFluidEMHD modelz(&dz);
   FVS fluxMethodz(&dz, &modelz);
-  Simulation simz(&dz);
-  BrioWuTwoFluid initz(&dz, 2, 0);
   Outflow bcsz(&dz);
+  Simulation simz(&dz, &env3);
+  BrioWuTwoFluid initz(&dz, 2, 0);
   RKSplit timeIntz(&dz, &modelz, &bcsz, &fluxMethodz);
   simz.set(&initz, &modelz, &timeIntz, &bcsz, &fluxMethodz, &save);
   printf("Stepping z-discontinuity...\n");
@@ -80,6 +85,7 @@ TEST(TwoFluidEMHD, FluxFunctionIsConsistentUponRotation)
   for (int i(dx.Ng); i < dx.Nx-dx.Ng; i++) {
     for (int j(dy.Ng); j < dy.Ny-dy.Ng; j++) {
       for (int k(dz.Ng); k < dz.Nz-dz.Ng; k++) {
+
         // Swap x and y
         EXPECT_NEAR(dx.cons[dx.id(0, i, j, k)], dy.cons[dy.id(0, j, i, k)], 1e-15);
         EXPECT_NEAR(dx.cons[dx.id(1, i, j, k)], dy.cons[dy.id(2, j, i, k)], 1e-15);
@@ -101,7 +107,6 @@ TEST(TwoFluidEMHD, FluxFunctionIsConsistentUponRotation)
         EXPECT_NEAR(dx.cons[dx.id(17, i, j, k)], dy.cons[dy.id(17, j, i, k)], 1e-15);
 
 
-
         // Swap x and z
         EXPECT_NEAR(dx.cons[dx.id(0, i, j, k)], dz.cons[dz.id(0, k, j, i)], 1e-15);
         EXPECT_NEAR(dx.cons[dx.id(1, i, j, k)], dz.cons[dz.id(3, k, j, i)], 1e-15);
@@ -122,6 +127,7 @@ TEST(TwoFluidEMHD, FluxFunctionIsConsistentUponRotation)
         EXPECT_NEAR(dx.cons[dx.id(16, i, j, k)], dz.cons[dz.id(16, k, j, i)], 1e-15);
         EXPECT_NEAR(dx.cons[dx.id(17, i, j, k)], dz.cons[dz.id(17, k, j, i)], 1e-15);
 
+
         // Swap y and z
         EXPECT_NEAR(dy.cons[dy.id(0, i, j, k)], dz.cons[dz.id(0, i, k, j)], 1e-15);
         EXPECT_NEAR(dy.cons[dy.id(1, i, j, k)], dz.cons[dz.id(1, i, k, j)], 1e-15);
@@ -150,14 +156,18 @@ TEST(TwoFluidEMHD, FluxFunctionIsConsistentUponRotation)
 TEST(TwoFluidEMHD, Prims2Cons2Prims)
 {
   const double tol = 1.49011612e-8;   // Tolerance of rootfinder
-  Data d(10, 10, 10, 0, 1, 0, 1, 0, 1, 0.8);
+  SerialEnv env(0, NULL, 1, 1, 1);
+  Data d(10, 10, 10, 0, 1, 0, 1, 0, 1, 0.8, &env);
   TwoFluidEMHD model(&d);
-  Simulation sim(&d);
+  Periodic bcs(&d);
+  Simulation sim(&d, &env);
   BrioWuTwoFluid init(&d, 0, 0);
 
-  Data d2(10, 10, 10, 0, 1, 0, 1, 0, 1, 0.8);
+  SerialEnv env2(0, NULL, 1, 1, 1);
+  Data d2(10, 10, 10, 0, 1, 0, 1, 0, 1, 0.8, &env2);
   TwoFluidEMHD model2(&d2);
-  Simulation sim2(&d2);
+  Periodic bcs2(&d2);
+  Simulation sim2(&d2, &env2);
   BrioWuTwoFluid init2(&d2, 0, 0);
 
   model2.primsToAll(d2.cons, d2.prims, d2.aux);
@@ -231,10 +241,12 @@ TEST(TwoFluidEMHD, FluxVectorSplittingStationary)
 {
 
   // Set up
-  Data d(6, 6, 6, 0, 1, 0, 1, 0, 1, 1.0, 0.5, 4, 5.0/3.0, 1000.0, 0.5);
+  SerialEnv env(0, NULL, 1, 1, 1);
+  Data d(6, 6, 6, 0, 1, 0, 1, 0, 1, 1.0, &env, 0.5, 4, 5.0/3.0, 1000.0, 0.5);
   TwoFluidEMHD model(&d);
   FVS fluxMethod(&d, &model);
-  Simulation sim(&d);
+  Periodic bcs(&d);
+  Simulation sim(&d, &env);
 
   // Set state to stationary equilibrium state
   for (int i(0); i < d.Nx; i++) {
@@ -252,6 +264,7 @@ TEST(TwoFluidEMHD, FluxVectorSplittingStationary)
 
   // System is stationary, there should be zero flux
   // x-direction
+  model.fluxVector(d.cons, d.prims, d.aux, d.f, 0);
   fluxMethod.fluxReconstruction(d.cons, d.prims, d.aux, d.f, d.fnet, 0);
 
 
@@ -259,31 +272,34 @@ TEST(TwoFluidEMHD, FluxVectorSplittingStationary)
     for (int j(d.Ng); j < d.Ny-d.Ng; j++) {
       for (int k(d.Ng); k < d.Nz-d.Ng; k++) {
         for (int var(0); var < d.Ncons; var++) {
-          EXPECT_EQ(d.fnet[d.id(var, i, j, k)], 0.0);
+          EXPECT_NEAR(d.fnet[d.id(var, i, j, k)], 0.0, 1e-15);
         }
       }
     }
   }
   // y-direction
+  model.fluxVector(d.cons, d.prims, d.aux, d.f, 1);
   fluxMethod.fluxReconstruction(d.cons, d.prims, d.aux, d.f, d.fnet, 1);
   for (int i(d.Ng); i < d.Nx-d.Ng; i++) {
     for (int j(d.Ng); j < d.Ny-d.Ng; j++) {
       for (int k(d.Ng); k < d.Nz-d.Ng; k++) {
         for (int var(0); var < d.Ncons; var++) {
-          EXPECT_EQ(d.fnet[d.id(var, i, j, k)], 0.0);
+          EXPECT_NEAR(d.fnet[d.id(var, i, j, k)], 0.0, 1e-15);
         }
       }
     }
   }
   // z-direction
+  model.fluxVector(d.cons, d.prims, d.aux, d.f, 2);
   fluxMethod.fluxReconstruction(d.cons, d.prims, d.aux, d.f, d.fnet, 2);
   for (int i(d.Ng); i < d.Nx-d.Ng; i++) {
     for (int j(d.Ng); j < d.Ny-d.Ng; j++) {
       for (int k(d.Ng); k < d.Nz-d.Ng; k++) {
         for (int var(0); var < d.Ncons; var++) {
-          EXPECT_EQ(d.fnet[d.id(var, i, j, k)], 0.0);
+          EXPECT_NEAR(d.fnet[d.id(var, i, j, k)], 0.0, 1e-15);
         }
       }
     }
   }
+
 } // End test

From ccf97040b374c8a4c1a3026a3809d53f2d07b3d5 Mon Sep 17 00:00:00 2001
From: "ania.brown" <ania.brown@gmail.com>
Date: Tue, 3 Nov 2020 12:06:04 +0000
Subject: [PATCH 31/56] finished converting serial GPU tests to use platformEnv
 object

---
 Tests/GPU/Makefile           | 12 ++++-----
 Tests/GPU/Src/test_fvs.cu    | 48 ++++++++++++++++++++----------------
 Tests/GPU/Src/test_imex.cu   | 29 ++++++++++++----------
 Tests/GPU/Src/test_rk2.cu    | 11 +++++----
 Tests/GPU/Src/test_srrmhd.cu | 31 ++++++++++++++---------
 5 files changed, 75 insertions(+), 56 deletions(-)

diff --git a/Tests/GPU/Makefile b/Tests/GPU/Makefile
index 9dbf306b..14ad2d73 100644
--- a/Tests/GPU/Makefile
+++ b/Tests/GPU/Makefile
@@ -61,11 +61,11 @@ TESTS = test_simulation \
 				test_cminpack \
 				test_boundaryConds \
 				test_twoFluidEMHD \
-#				test_srrmhd \
-#				test_fvs \
-#				test_id \
-#				test_rk2 \
-#				test_imex
+				test_srrmhd \
+				test_fvs \
+				test_id \
+				test_rk2 \
+				test_imex
 
 
 PARALLEL_TESTS = test_parallel_srmhd     \
@@ -227,7 +227,7 @@ test_srrmhd.o : $(TEST_DIR)/test_srrmhd.cu \
                      $(INC_DIR)/srrmhd.h $(INC_DIR)/simulation.h $(INC_DIR)/simData.h $(GTEST_HEADERS)
 	@$(CC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) -c $(TEST_DIR)/test_srrmhd.cu -I$(INC_DIR)  -I$(RTFIND_INC_DIR)
 
-test_srrmhd : srrmhd.o C2PArgs.o test_srrmhd.o simData.o fluxVectorSplitting.o initFunc.o simulation.o weno.o serialSaveData.o serialEnv.o $(RTFIND_OBJS) gtest_main.a
+test_srrmhd : srrmhd.o boundaryConds.o C2PArgs.o test_srrmhd.o simData.o fluxVectorSplitting.o initFunc.o simulation.o weno.o serialSaveData.o serialEnv.o $(RTFIND_OBJS) gtest_main.a
 	@$(CC)	$(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) -lpthread $^ -o $@
 
 # TwoFluidEMHD Model
diff --git a/Tests/GPU/Src/test_fvs.cu b/Tests/GPU/Src/test_fvs.cu
index 7a219be0..5bf19139 100644
--- a/Tests/GPU/Src/test_fvs.cu
+++ b/Tests/GPU/Src/test_fvs.cu
@@ -3,9 +3,11 @@
 #include "twoFluidEMHD.h"
 #include "simulation.h"
 #include "simData.h"
+#include "serialSaveData.h"
 #include "initFunc.h"
 #include "rkSplit.h"
 #include "fluxVectorSplitting.h"
+#include "serialEnv.h"
 #include <cstdlib>
 #include <cmath>
 #include <cstdio>
@@ -16,14 +18,15 @@ TEST(FVS, SameFnetAsSerial)
   as the serial version.
 */
 {
-  Data d(20, 20, 20, 0, 1, 0, 1, 0, 1, 0.8);
+  SerialEnv env(0, NULL, 1, 1, 1);
+  Data d(20, 20, 0, 0, 1, 0, 1, 0, 1, 0.8, &env);
   SRMHD model(&d);
   FVS fluxMethod(&d, &model);
-  Simulation sim(&d);
-  OTVortexSingleFluid init(&d);
   Outflow bcs(&d);
+  Simulation sim(&d, &env);
+  OTVortexSingleFluid init(&d);
   RKSplit timeInt(&d, &model, &bcs, &fluxMethod);
-  SaveData save(&d);
+  SerialSaveData save(&d, &env);
   sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save);
 
   fluxMethod.F(d.cons, d.prims, d.aux, d.f, d.fnet);
@@ -51,28 +54,29 @@ TEST(FVS, SameFnetAsSerial)
 
 TEST(FVS, SameXReconstructionAsSerial)
 {
-  Data d(20, 20, 20, 0, 1, 0, 1, 0, 1, 0.8);
+  SerialEnv env(0, NULL, 1, 1, 1);
+  Data d(20, 20, 0, 0, 1, 0, 1, 0, 1, 0.8, &env);
   SRMHD model(&d);
   FVS fluxMethod(&d, &model);
-  Simulation sim(&d);
-  OTVortexSingleFluid init(&d);
   Outflow bcs(&d);
+  Simulation sim(&d, &env);
+  OTVortexSingleFluid init(&d);
   RKSplit timeInt(&d, &model, &bcs, &fluxMethod);
-  SaveData save(&d);
+  SerialSaveData save(&d, &env);
   sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save);
 
-
+  model.fluxVector(d.cons, d.prims, d.aux, d.f, 0);
   fluxMethod.fluxReconstruction(d.cons, d.prims, d.aux, d.f, d.fnet, 0);
 
   for (int var(0); var < d.Ncons; var++)
   {
-    for (int i(0); i < d.Nx; i++)
+    for (int i(0); i < d.Nx-1; i++)
     {
       for (int j(0); j < d.Ny; j++)
       {
         for (int k(0); k < d.Nz; k++)
         {
-          d.cons[d.id(var, i, j, k)] = d.fnet[d.id(var, i, j, k)];
+          d.cons[d.id(var, i, j, k)] = d.fnet[d.id(var, i+1, j, k)]/d.dx - d.fnet[d.id(var, i, j, k)]/d.dx;
         }
       }
     }
@@ -88,14 +92,15 @@ TEST(FVS, SameXReconstructionAsSerial)
 
 TEST(FVS, SameYReconstructionAsSerial)
 {
-  Data d(20, 20, 20, 0, 1, 0, 1, 0, 1, 0.8);
+  SerialEnv env(0, NULL, 1, 1, 1);
+  Data d(20, 20, 0, 0, 1, 0, 1, 0, 1, 0.8, &env);
   SRMHD model(&d);
   FVS fluxMethod(&d, &model);
-  Simulation sim(&d);
-  OTVortexSingleFluid init(&d);
   Outflow bcs(&d);
+  Simulation sim(&d, &env);
+  OTVortexSingleFluid init(&d);
   RKSplit timeInt(&d, &model, &bcs, &fluxMethod);
-  SaveData save(&d);
+  SerialSaveData save(&d, &env);
   sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save);
 
   model.fluxVector(d.cons, d.prims, d.aux, d.f, 1);
@@ -109,7 +114,7 @@ TEST(FVS, SameYReconstructionAsSerial)
       {
         for (int k(0); k < d.Nz; k++)
         {
-          d.cons[d.id(var, i, j, k)] = d.fnet[d.id(var, i, j, k)];
+          d.cons[d.id(var, i, j, k)] = d.fnet[d.id(var, i, j+1, k)]/d.dy - d.fnet[d.id(var, i, j, k)]/d.dy;
         }
       }
     }
@@ -125,14 +130,15 @@ TEST(FVS, SameYReconstructionAsSerial)
 
 TEST(FVS, SameZReconstructionAsSerial)
 {
-  Data d(20, 20, 20, 0, 1, 0, 1, 0, 1, 0.8);
+  SerialEnv env(0, NULL, 1, 1, 1);
+  Data d(20, 20, 0, 0, 1, 0, 1, 0, 1, 0.8, &env);
   SRMHD model(&d);
   FVS fluxMethod(&d, &model);
-  Simulation sim(&d);
-  OTVortexSingleFluid init(&d);
   Outflow bcs(&d);
+  Simulation sim(&d, &env);
+  OTVortexSingleFluid init(&d);
   RKSplit timeInt(&d, &model, &bcs, &fluxMethod);
-  SaveData save(&d);
+  SerialSaveData save(&d, &env);
   sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save);
 
   model.fluxVector(d.cons, d.prims, d.aux, d.f, 2);
@@ -146,7 +152,7 @@ TEST(FVS, SameZReconstructionAsSerial)
       {
         for (int k(0); k < d.Nz; k++)
         {
-          d.cons[d.id(var, i, j, k)] = d.fnet[d.id(var, i, j, k)];
+          d.cons[d.id(var, i, j, k)] = d.fnet[d.id(var, i, j, k+1)]/d.dz - d.fnet[d.id(var, i, j, k)]/d.dz;
         }
       }
     }
diff --git a/Tests/GPU/Src/test_imex.cu b/Tests/GPU/Src/test_imex.cu
index 830da06e..5bb776a0 100644
--- a/Tests/GPU/Src/test_imex.cu
+++ b/Tests/GPU/Src/test_imex.cu
@@ -8,7 +8,7 @@
 #include "SSP3.h"
 #include "saveData.h"
 #include "fluxVectorSplitting.h"
-#include "saveData.h"
+#include "serialSaveData.h"
 #include <cstdlib>
 #include <cstdio>
 
@@ -21,17 +21,18 @@ TEST(SSP2, IMEX2ConsistentWithSerialVersion)
   */
   double sigma(0);
 
-  Data data(64, 16, 0, 0, 1, 0, 1, 0, 1, 0.05,
+  SerialEnv env(0, NULL, 1, 1, 1);
+  Data data(64, 16, 0, 0, 1, 0, 1, 0, 1, 0.05, &env,
             0.5, 4, 4.0/3.0, sigma);
 
   // Choose particulars of simulation
   SRRMHD model(&data);
   FVS fluxMethod(&data, &model);
-  Simulation sim(&data);
-  BrioWuSingleFluid init(&data);
   Outflow bcs(&data);
+  Simulation sim(&data, &env);
+  BrioWuSingleFluid init(&data);
   SSP2 timeInt(&data, &model, &bcs, &fluxMethod);
-  SaveData save(&data);
+  SerialSaveData save(&data, &env);
 
   sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save);
   sim.evolve();
@@ -54,17 +55,18 @@ TEST(SSP2FlowKHSingleFluid, IMEX2ConsistentWithSerialVersion)
   */
   double sigma(0);
 
-  Data data(64, 16, 0, -0.5, 0.5, -1, 1, 0, 1, 0.05,
+  SerialEnv env(0, NULL, 1, 1, 1);
+  Data data(64, 16, 0, -0.5, 0.5, -1, 1, 0, 1, 0.05, &env,
             0.5, 4, 4.0/3.0, sigma);
 
   // Choose particulars of simulation
   SRRMHD model(&data);
   FVS fluxMethod(&data, &model);
-  Simulation sim(&data);
-  KHInstabilitySingleFluid init(&data);
   Flow bcs(&data);
+  Simulation sim(&data, &env);
+  KHInstabilitySingleFluid init(&data);
   SSP2 timeInt(&data, &model, &bcs, &fluxMethod);
-  SaveData save(&data);
+  SerialSaveData save(&data, &env);
 
   sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save);
   sim.evolve();
@@ -129,17 +131,18 @@ TEST(SSP3, IMEX3ConsistentWithSerialVersion)
   */
   double sigma(0);
 
-  Data data(64, 16, 0, 0, 1, 0, 1, 0, 1, 0.05,
+  SerialEnv env(0, NULL, 1, 1, 1);
+  Data data(64, 16, 0, 0, 1, 0, 1, 0, 1, 0.05, &env,
             0.5, 4, 4.0/3.0, sigma);
 
   // Choose particulars of simulation
   SRRMHD model(&data);
   FVS fluxMethod(&data, &model);
-  Simulation sim(&data);
-  BrioWuSingleFluid init(&data);
   Outflow bcs(&data);
+  Simulation sim(&data, &env);
+  BrioWuSingleFluid init(&data);
   SSP3 timeInt(&data, &model, &bcs, &fluxMethod);
-  SaveData save(&data);
+  SerialSaveData save(&data, &env);
 
   sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save);
   sim.evolve();
diff --git a/Tests/GPU/Src/test_rk2.cu b/Tests/GPU/Src/test_rk2.cu
index 183ff724..f00407fc 100644
--- a/Tests/GPU/Src/test_rk2.cu
+++ b/Tests/GPU/Src/test_rk2.cu
@@ -1,6 +1,7 @@
 #include "gtest/gtest.h"
 #include "srrmhd.h"
 #include "simulation.h"
+#include "serialSaveData.h"
 #include "simData.h"
 #include "initFunc.h"
 #include "RK2.h"
@@ -15,15 +16,15 @@ TEST(RK2, RK2OutputConsistentWithSerial)
     The following was used to gather data to compare the parallel
      version with. No tests are run in the serial version of this test
   */
-
-  Data d(30, 30, 30, 0, 1, 0, 1, 0, 1, 0.004);
+  SerialEnv env(0, NULL, 1, 1, 1, 1);
+  Data d(30, 30, 30, 0, 1, 0, 1, 0, 1, 0.004, &env);
   SRRMHD model(&d);
   FVS fluxMethod(&d, &model);
-  Simulation sim(&d);
-  OTVortexSingleFluid init(&d);
   Outflow bcs(&d);
+  Simulation sim(&d, &env);
+  OTVortexSingleFluid init(&d);
   RK2 timeInt(&d, &model, &bcs, &fluxMethod);
-  SaveData save(&d);
+  SerialSaveData save(&d, &env);
   sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save);
 
   sim.evolve();
diff --git a/Tests/GPU/Src/test_srrmhd.cu b/Tests/GPU/Src/test_srrmhd.cu
index c155f1c6..b220004d 100644
--- a/Tests/GPU/Src/test_srrmhd.cu
+++ b/Tests/GPU/Src/test_srrmhd.cu
@@ -2,8 +2,10 @@
 #include "srrmhd.h"
 #include "simulation.h"
 #include "simData.h"
+#include "boundaryConds.h"
 #include "initFunc.h"
 #include "fluxVectorSplitting.h"
+#include "serialEnv.h"
 #include <cstdlib>
 #include <cmath>
 #include <stdio.h>
@@ -13,7 +15,8 @@
 
 TEST(SRRMHD, Constructor)
 {
-  Data d(100, 10, 0, 0, 1, -0.5, 0.5, -0.1, 0.1, 0.8);
+  SerialEnv env(0, NULL, 1, 1, 1);
+  Data d(100, 10, 0, 0, 1, -0.5, 0.5, -0.1, 0.1, 0.8, &env);
   SRRMHD model(&d);
   EXPECT_EQ(d.Ncons, 14);
   EXPECT_EQ(d.Nprims, 11);
@@ -27,12 +30,14 @@ TEST(SRRMHD, Constructor)
 
 TEST(SRRMHD, FluxVectorSplittingStationary)
 {
-
+  double tol(1.0e-15);
   // Set up
-  Data d(10, 10, 10, 0, 1, 0, 1, 0, 1, 1.0, 0.5, 4, 5.0/3.0, 1000.0, 0.5);
+  SerialEnv env(0, NULL, 1, 1, 1);
+  Data d(10, 10, 10, 0, 1, 0, 1, 0, 1, 1.0, &env, 0.5, 4, 5.0/3.0, 1000.0, 0.5);
   SRRMHD model(&d);
   FVS fluxMethod(&d, &model);
-  Simulation sim(&d);
+  Periodic bcs(&d);
+  Simulation sim(&d, &env);
 
   // Set state to stationary equilibrium state
   for (int i(0); i < d.Nx; i++) {
@@ -62,7 +67,7 @@ TEST(SRRMHD, FluxVectorSplittingStationary)
     for (int j(d.Ng); j < d.Ny-d.Ng; j++) {
       for (int k(d.Ng); k < d.Nz-d.Ng; k++) {
         for (int var(0); var < d.Ncons; var++) {
-          EXPECT_EQ(d.fnet[d.id(var, i, j, k)], 0.0);
+          EXPECT_NEAR(d.fnet[d.id(var, i, j, k)], 0.0, tol);
         }
       }
     }
@@ -73,7 +78,7 @@ TEST(SRRMHD, FluxVectorSplittingStationary)
     for (int j(d.Ng); j < d.Ny-d.Ng; j++) {
       for (int k(d.Ng); k < d.Nz-d.Ng; k++) {
         for (int var(0); var < d.Ncons; var++) {
-          EXPECT_EQ(d.fnet[d.id(var, i, j, k)], 0.0);
+          EXPECT_NEAR(d.fnet[d.id(var, i, j, k)], 0.0, tol);
         }
       }
     }
@@ -84,7 +89,7 @@ TEST(SRRMHD, FluxVectorSplittingStationary)
     for (int j(d.Ng); j < d.Ny-d.Ng; j++) {
       for (int k(d.Ng); k < d.Nz-d.Ng; k++) {
         for (int var(0); var < d.Ncons; var++) {
-          EXPECT_EQ(d.fnet[d.id(var, i, j, k)], 0.0);
+          EXPECT_NEAR(d.fnet[d.id(var, i, j, k)], 0.0, tol);
         }
       }
     }
@@ -97,12 +102,16 @@ TEST(SRRMHD, FluxVectorSplittingStationary)
 TEST(SRRMHD, Prims2Cons2Prims)
 {
   const double tol = 1.49011612e-8;   // Tolerance of rootfinder
-  Data d(10, 10, 0, 0, 1, 0, 1, 0, 1, 1.0);
-  Data d2(10, 10, 0, 0, 1, 0, 1, 0, 1, 1.0);
+  SerialEnv env(0, NULL, 1, 1, 1);
+  SerialEnv env2(0, NULL, 1, 1, 1);
+  Data d(10, 10, 0, 0, 1, 0, 1, 0, 1, 1.0, &env);
+  Data d2(10, 10, 0, 0, 1, 0, 1, 0, 1, 1.0, &env2);
   SRRMHD model(&d);
   SRRMHD model2(&d2);
-  Simulation sim(&d);
-  Simulation sim2(&d2);
+  Periodic bcs(&d);
+  Periodic bcs2(&d2);
+  Simulation sim(&d, &env);
+  Simulation sim2(&d2, &env2);
   OTVortexSingleFluid init(&d);
   OTVortexSingleFluid init2(&d2);
 

From 7aecf2b510469baff7eda3dfb382141460f74a71 Mon Sep 17 00:00:00 2001
From: "ania.brown" <ania.brown@gmail.com>
Date: Tue, 17 Nov 2020 12:20:28 +0000
Subject: [PATCH 32/56] adding tests to compare gpu with and without MPI

---
 Tests/GPU/Makefile                        | 37 ++++++++++++++++++++---
 Tests/GPU/Src/compareParallelAndSerial.py |  6 ++--
 makePaths.sh                              |  6 ++++
 3 files changed, 42 insertions(+), 7 deletions(-)

diff --git a/Tests/GPU/Makefile b/Tests/GPU/Makefile
index 14ad2d73..26eecb7c 100644
--- a/Tests/GPU/Makefile
+++ b/Tests/GPU/Makefile
@@ -17,7 +17,6 @@
 
 # Compiler
 CC = nvcc
-MPICC = mpic++
 
 #use `mpic++ -show` to find library and include flags
 MPI_FLAGS = -I/local/software/mpich/3.2.1/gcc/include -L/local/software/mpich/3.2.1/gcc/lib -lmpi -lmpicxx
@@ -68,7 +67,7 @@ TESTS = test_simulation \
 				test_imex
 
 
-PARALLEL_TESTS = test_parallel_srmhd     \
+PARALLEL_TESTS = test_parallel_rk2     \
 
 # All Google Test headers.  Usually you shouldn't change this
 # definition.
@@ -80,9 +79,14 @@ RTFIND = buildRootfinder
 
 # House-keeping build targets.
 
-test : $(RTFIND) $(TESTS)
+test : $(RTFIND) $(TESTS) $(PARALLEL_TESTS)
 	# Run all tests
 	@$(foreach exe, $(TESTS), ./$(exe);)
+	# Run all parallel tests
+	$(foreach exe, $(PARALLEL_TESTS), mpirun -np 4 ./$(exe);)
+	# Run tests that compare outputs of TestData/GPU and TestData/MPIGPU
+	py.test -v Src/compareParallelAndSerial.py
+
 
 all : $(RTFIND) $(TESTS)
 
@@ -189,7 +193,7 @@ serialSaveData.o : $(MODULE_DIR)/serialSaveData.cu $(INC_DIR)/serialSaveData.h
 	@$(CC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) -c $(MODULE_DIR)/serialSaveData.cu -I$(INC_DIR)
 
 parallelSaveData.o : $(MODULE_DIR)/parallelSaveData.cu $(INC_DIR)/parallelSaveData.h
-	@$(MPICC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) -c $(MODULE_DIR)/parallelSaveData.cu -I$(INC_DIR)
+	@$(CC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) $(MPI_FLAGS) -c $(MODULE_DIR)/parallelSaveData.cu -I$(INC_DIR)
 
 # Platform env
 serialEnv.o : $(MODULE_DIR)/serialEnv.cu $(INC_DIR)/serialEnv.h
@@ -300,3 +304,28 @@ test_imex.o : $(TEST_DIR)/test_imex.cu
 
 test_imex : IMEX2Args.o C2PArgs.o  SSP2.o IMEX3Args.o  SSP3.o simData.o srrmhd.o srmhd.o C2PArgs.o twoFluidEMHD.o initFunc.o boundaryConds.o serialSaveData.o serialEnv.o simulation.o test_imex.o $(RTFIND_OBJS) fluxVectorSplitting.o weno.o gtest_main.a
 	@$(CC)	$(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) -lpthread $^ -o $@
+
+# main func used for testing parallel tests. Serial tests can use the default gtest_main
+main.o : $(TEST_DIR)/main.cu $(INC_DIR)/parallelEnv.h
+	$(CC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) $(MPI_FLAGS) -c $(TEST_DIR)/main.cu -I$(INC_DIR)
+
+parallelEnv.o : $(MODULE_DIR)/parallelEnv.cu $(INC_DIR)/parallelEnv.h
+	$(CC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) $(MPI_FLAGS) -c $(MODULE_DIR)/parallelEnv.cu -I$(INC_DIR)
+
+parallelBoundaryConds.o : $(MODULE_DIR)/parallelBoundaryConds.cu $(INC_DIR)/parallelBoundaryConds.h
+	@$(CC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) $(MPI_FLAGS) -c $(MODULE_DIR)/parallelBoundaryConds.cu -I$(INC_DIR)
+
+
+test_parallel_rk2.o : $(TEST_DIR)/test_parallel_rk2.cu $(INC_DIR)/RK2.h \
+                        $(INC_DIR)/twoFluidEMHD.h $(INC_DIR)/simulation.h $(INC_DIR)/simData.h $(GTEST_HEADERS)
+	$(CC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) $(MPI_FLAGS) -c $(TEST_DIR)/test_parallel_rk2.cu -I$(INC_DIR)  -I$(RTFIND_INC_DIR)
+
+test_parallel_rk2 : main.o C2PArgs.o test_parallel_rk2.o srmhd.o srrmhd.o fluxVectorSplitting.o parallelBoundaryConds.o simData.o RK2.o initFunc.o simulation.o weno.o parallelEnv.o parallelSaveData.o $(RTFIND_OBJS) gtest.a
+	$(CC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) $(MPI_FLAGS) -lpthread $^ -o $@
+
+test_parallel_rkSplit.o : $(TEST_DIR)/test_parallel_rkSplit.cu $(INC_DIR)/rkSplit.h \
+                        $(INC_DIR)/twoFluidEMHD.h $(INC_DIR)/simulation.h $(INC_DIR)/simData.h $(GTEST_HEADERS)
+	$(CC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) $(MPI_FLAGS) -c $(TEST_DIR)/test_parallel_rkSplit.cu -I$(INC_DIR)  -I$(RTFIND_INC_DIR)
+
+test_parallel_rkSplit : main.o test_parallel_rkSplit.o weno.o wenoUpwinds.o srmhd.o srrmhd.o fluxVectorSplitting.o parallelBoundaryConds.o simData.o rkSplit.o RK2.o initFunc.o simulation.o weno.o parallelEnv.o parallelSaveData.o $(RTFIND_OBJS) gtest.a
+	$(CC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) $(MPI_FLAGS) -lpthread $^ -o $@
diff --git a/Tests/GPU/Src/compareParallelAndSerial.py b/Tests/GPU/Src/compareParallelAndSerial.py
index 808c5340..1928d410 100644
--- a/Tests/GPU/Src/compareParallelAndSerial.py
+++ b/Tests/GPU/Src/compareParallelAndSerial.py
@@ -61,7 +61,7 @@ def getFiles(self):
 
         # For each file, determine the appendix and use interactivePlot to
         # gather the data
-        for i, serfile in enumerate(glob(fromSpyder+"../TestData/GPU/Conserved/*")):
+        for i, serfile in enumerate(glob(fromSpyder+"../TestData/MPIGPU/Conserved/*")):
             ext = serfile.find('.dat')
             app = serfile.find('Conserved/cons') + len('Conserved.cons')
             appendix = serfile[app:ext]
@@ -69,8 +69,8 @@ def getFiles(self):
             print("Fetching {} data...".format(appendix))
 
             with HidePrints():
-                self.Serials.append(Plot(fromSpyder+"../TestData/Serial/", appendix))
-                self.Parallels.append(Plot(fromSpyder+"../TestData/GPU/", appendix))
+                self.Serials.append(Plot(fromSpyder+"../TestData/GPU/", appendix))
+                self.Parallels.append(Plot(fromSpyder+"../TestData/MPIGPU/", appendix))
 
             self.Ncons.append(self.Serials[i].c['Ncons'])
             self.Nprims.append(self.Serials[i].c['Nprims'])
diff --git a/makePaths.sh b/makePaths.sh
index 91508bc3..08d50ad7 100644
--- a/makePaths.sh
+++ b/makePaths.sh
@@ -48,6 +48,12 @@ mkdir Tests/TestData/GPU/Conserved
 mkdir Tests/TestData/GPU/Constants
 mkdir Tests/TestData/GPU/Primitive
 
+mkdir Tests/TestData/MPIGPU
+mkdir Tests/TestData/MPIGPU/Auxiliary
+mkdir Tests/TestData/MPIGPU/Conserved
+mkdir Tests/TestData/MPIGPU/Constants
+mkdir Tests/TestData/MPIGPU/Primitive
+
 mkdir Tests/TestData
 mkdir Tests/TestData/CPU
 mkdir Tests/TestData/CPU/Auxiliary

From 709359ebdd454cea07da118aca87d54762ae4c32 Mon Sep 17 00:00:00 2001
From: "ania.brown" <ania.brown@gmail.com>
Date: Tue, 17 Nov 2020 14:24:07 +0000
Subject: [PATCH 33/56] added instructions for running gpu tests on iridis 5

---
 Scrips/IridisEnv/requirements.txt      |  5 ++++
 Scrips/IridisEnv/tests_instructions.md | 33 ++++++++++++++++++++++++++
 Scrips/IridisEnv/tests_job.sh          | 22 +++++++++++++++++
 3 files changed, 60 insertions(+)
 create mode 100644 Scrips/IridisEnv/requirements.txt
 create mode 100644 Scrips/IridisEnv/tests_instructions.md
 create mode 100644 Scrips/IridisEnv/tests_job.sh

diff --git a/Scrips/IridisEnv/requirements.txt b/Scrips/IridisEnv/requirements.txt
new file mode 100644
index 00000000..af599da1
--- /dev/null
+++ b/Scrips/IridisEnv/requirements.txt
@@ -0,0 +1,5 @@
+numpy
+matplotlib
+scipy
+pytest
+h5py
diff --git a/Scrips/IridisEnv/tests_instructions.md b/Scrips/IridisEnv/tests_instructions.md
new file mode 100644
index 00000000..9c215425
--- /dev/null
+++ b/Scrips/IridisEnv/tests_instructions.md
@@ -0,0 +1,33 @@
+## Tests Instructions
+
+These are instructions to run GPU unit tests as a batch job on Iridis 5
+
+## Setting up python env
+
+In the root METHOD folder, create a python venv using
+
+```
+module purge
+module load gcc/6.4.0
+module load python/3.6.4
+module load hdf5/1.10.2/gcc/parallel
+module load cuda/8.0
+python3 -m venv venv
+source venv/bin/activate
+```
+
+Then install python modules using
+
+```
+python -m pip install -r Scripts/IridisEnv/requirements.txt
+```
+
+## Runing unit tests as a batch job
+
+From `Tests/GPU` run `sbatch ../../Scripts/IridisEnv/tests_job.sh`
+
+This will run all GPU tests
+
+
+
+
diff --git a/Scrips/IridisEnv/tests_job.sh b/Scrips/IridisEnv/tests_job.sh
new file mode 100644
index 00000000..165c1917
--- /dev/null
+++ b/Scrips/IridisEnv/tests_job.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+#SBATCH --ntasks-per-node=2     # Tasks per node
+#SBATCH --nodes=1                # Number of nodes requested
+#SBATCH --partition=gtx1080
+#SBATCH --time=00:10:00
+
+module purge
+#module load gcc/6.4.0
+module load python/3.6.4
+module load hdf5/1.10.2/gcc/parallel
+module load cuda/8.0
+
+module list
+
+source ../../venv/bin/activate
+
+make clean
+make test
+
+
+

From bed02a70f415501286dd7158458aa67a7ca0b028 Mon Sep 17 00:00:00 2001
From: "ania.brown" <ania.brown@gmail.com>
Date: Mon, 23 Nov 2020 13:49:48 +0000
Subject: [PATCH 34/56] splitting tests into non-python and python to make
 setting up module env on Iridis easier

---
 Scrips/IridisEnv/tests_job.sh | 7 ++++++-
 Tests/GPU/Makefile            | 9 ++++++---
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/Scrips/IridisEnv/tests_job.sh b/Scrips/IridisEnv/tests_job.sh
index 165c1917..2e701818 100644
--- a/Scrips/IridisEnv/tests_job.sh
+++ b/Scrips/IridisEnv/tests_job.sh
@@ -16,7 +16,12 @@ module list
 source ../../venv/bin/activate
 
 make clean
-make test
+make gpu_test
+
+# required for GLIBCXX_3.4.21 module to be available for python
+module load gcc/6.4.0
+
+make compare_mpi_test
 
 
 
diff --git a/Tests/GPU/Makefile b/Tests/GPU/Makefile
index 26eecb7c..4427cf5e 100644
--- a/Tests/GPU/Makefile
+++ b/Tests/GPU/Makefile
@@ -79,14 +79,17 @@ RTFIND = buildRootfinder
 
 # House-keeping build targets.
 
-test : $(RTFIND) $(TESTS) $(PARALLEL_TESTS)
+compare_mpi_test : $(RTFIND) $(TESTS) $(PARALLEL_TESTS) gpu_test
+	# Run tests that compare outputs of TestData/GPU and TestData/MPIGPU
+	py.test -v Src/compareParallelAndSerial.py
+
+gpu_test : $(RTFIND) $(TESTS) $(PARALLEL_TESTS)
 	# Run all tests
 	@$(foreach exe, $(TESTS), ./$(exe);)
 	# Run all parallel tests
 	$(foreach exe, $(PARALLEL_TESTS), mpirun -np 4 ./$(exe);)
-	# Run tests that compare outputs of TestData/GPU and TestData/MPIGPU
-	py.test -v Src/compareParallelAndSerial.py
 
+test : gpu_test compare_mpi_test
 
 all : $(RTFIND) $(TESTS)
 

From 7a0cfb3f8ce2c8b81516704d3807bae5b380f336 Mon Sep 17 00:00:00 2001
From: "ania.brown" <ania.brown@gmail.com>
Date: Mon, 30 Nov 2020 12:25:48 +0000
Subject: [PATCH 35/56] updating BrioWuSingleFluid for MPI; adding parallel
 tests

---
 Project/GPU/Src/initFunc.cu               |  87 ++++++--
 Tests/GPU/Makefile                        |   2 +-
 Tests/GPU/Src/compareParallelAndSerial.py | 257 +++++-----------------
 Tests/GPU/Src/test_rk2.cu                 | 164 +++++++++++++-
 4 files changed, 282 insertions(+), 228 deletions(-)

diff --git a/Project/GPU/Src/initFunc.cu b/Project/GPU/Src/initFunc.cu
index c9a99601..733ccdd3 100644
--- a/Project/GPU/Src/initFunc.cu
+++ b/Project/GPU/Src/initFunc.cu
@@ -351,9 +351,6 @@ BrioWuSingleFluid::BrioWuSingleFluid(Data * data, int dir) : InitialFunc(data)
   if (d->nx%2 || d->ny%2 || d->nz%2)
     throw std::invalid_argument("Please ensure even number of cells in each direction for Brio Wu initial data.\n");
 
-  int endX(d->Nx - 1);
-  int endY(d->Ny - 1);
-  int endZ(d->Nz - 1);
   int facX(1);
   int facY(1);
   int facZ(1);
@@ -382,25 +379,79 @@ BrioWuSingleFluid::BrioWuSingleFluid(Data * data, int dir) : InitialFunc(data)
     lBx = 0.5;
     rBx = -0.5;
   }
-
-  for (int i(0); i < d->Nx/facX; i++) {
-    for (int j(0); j < d->Ny/facY; j++) {
-      for (int k(0); k < d->Nz/facZ; k++) {
+  double xLower((d->xmax - d->xmin)/facX + d->xmin);
+  double yLower((d->ymax - d->ymin)/facY + d->ymin);
+  double zLower((d->zmax - d->zmin)/facZ + d->zmin);
+  double xUpper(d->xmax - (d->xmax - d->xmin)/facX);
+  double yUpper(d->ymax - (d->ymax - d->ymin)/facY);
+  double zUpper(d->zmax - (d->zmax - d->zmin)/facZ);
+
+  if (d->dims==3){
+      for (int i(0); i < d->Nx; i++) {
+        for (int j(0); j < d->Ny; j++) {
+          for (int k(0); k < d->Nz; k++) {
+            // Left side
+            if ((d->x[i] < xLower) && (d->y[j] < yLower) && (d->z[k] < zLower)){
+                d->prims[ID(0, i, j, k)] = 1;
+                d->prims[ID(4, i, j, k)] = 1;
+                d->prims[ID(5, i, j, k)] = lBx;
+                d->prims[ID(6, i, j, k)] = lBy;
+                d->prims[ID(7, i, j, k)] = lBz;
+            }
+
+            // Right side
+            if ((d->x[i] > xUpper) && (d->y[j] > yUpper) && (d->z[k] > zUpper)){
+                d->prims[ID(0, i, j, k)] = 0.125;
+                d->prims[ID(4, i, j, k)] = 0.1;
+                d->prims[ID(5, i, j, k)] = rBx;
+                d->prims[ID(6, i, j, k)] = rBy;
+                d->prims[ID(7, i, j, k)] = rBz;
+            }
+          }
+        }
+      }
+  } else if (d->dims==2) {
+      for (int i(0); i < d->Nx; i++) {
+        for (int j(0); j < d->Ny; j++) {
+          // Left side
+          if ((d->x[i] < xLower) && (d->y[j] < yLower)){
+              d->prims[ID(0, i, j, 0)] = 1;
+              d->prims[ID(4, i, j, 0)] = 1;
+              d->prims[ID(5, i, j, 0)] = lBx;
+              d->prims[ID(6, i, j, 0)] = lBy;
+              d->prims[ID(7, i, j, 0)] = lBz;
+          }
+
+          // Right side
+          if ((d->x[i] > xUpper) && (d->y[j] > yUpper)){
+              d->prims[ID(0, i, j, 0)] = 0.125;
+              d->prims[ID(4, i, j, 0)] = 0.1;
+              d->prims[ID(5, i, j, 0)] = rBx;
+              d->prims[ID(6, i, j, 0)] = rBy;
+              d->prims[ID(7, i, j, 0)] = rBz;
+          }
+        }
+      }
+  } else {
+      for (int i(0); i < d->Nx; i++) {
         // Left side
-        d->prims[ID(0, i, j, k)] = 1;
-        d->prims[ID(4, i, j, k)] = 1;
-        d->prims[ID(5, i, j, k)] = lBx;
-        d->prims[ID(6, i, j, k)] = lBy;
-        d->prims[ID(7, i, j, k)] = lBz;
+        if (d->x[i] < xLower){
+            d->prims[ID(0, i, 0, 0)] = 1;
+            d->prims[ID(4, i, 0, 0)] = 1;
+            d->prims[ID(5, i, 0, 0)] = lBx;
+            d->prims[ID(6, i, 0, 0)] = lBy;
+            d->prims[ID(7, i, 0, 0)] = lBz;
+        }
 
         // Right side
-        d->prims[ID(0, endX - i, endY - j, endZ - k)] = 0.125;
-        d->prims[ID(4, endX - i, endY - j, endZ - k)] = 0.1;
-        d->prims[ID(5, endX - i, endY - j, endZ - k)] = rBx;
-        d->prims[ID(6, endX - i, endY - j, endZ - k)] = rBy;
-        d->prims[ID(7, endX - i, endY - j, endZ - k)] = rBz;
+        if (d->x[i] > xUpper){
+            d->prims[ID(0, i, 0, 0)] = 0.125;
+            d->prims[ID(4, i, 0, 0)] = 0.1;
+            d->prims[ID(5, i, 0, 0)] = rBx;
+            d->prims[ID(6, i, 0, 0)] = rBy;
+            d->prims[ID(7, i, 0, 0)] = rBz;
+        }
       }
-    }
   }
 }
 
diff --git a/Tests/GPU/Makefile b/Tests/GPU/Makefile
index 4427cf5e..303dd2a4 100644
--- a/Tests/GPU/Makefile
+++ b/Tests/GPU/Makefile
@@ -169,7 +169,7 @@ test_rk2.o : $(TEST_DIR)/test_rk2.cu $(INC_DIR)/RK2.h \
 	                    $(INC_DIR)/twoFluidEMHD.h $(INC_DIR)/simulation.h $(INC_DIR)/simData.h $(GTEST_HEADERS)
 	@$(CC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) -c $(TEST_DIR)/test_rk2.cu -I$(INC_DIR)  -I$(RTFIND_INC_DIR)
 
-test_rk2 : test_rk2.o C2PArgs.o srrmhd.o fluxVectorSplitting.o boundaryConds.o simData.o RK2.o initFunc.o simulation.o weno.o serialSaveData.o serialEnv.o $(RTFIND_OBJS) gtest_main.a
+test_rk2 : test_rk2.o C2PArgs.o srmhd.o srrmhd.o fluxVectorSplitting.o boundaryConds.o simData.o RK2.o initFunc.o simulation.o weno.o serialSaveData.o serialEnv.o $(RTFIND_OBJS) gtest_main.a
 	@$(CC)	$(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) -lpthread $^ -o $@
 
 # Explicit RK split integrator
diff --git a/Tests/GPU/Src/compareParallelAndSerial.py b/Tests/GPU/Src/compareParallelAndSerial.py
index 1928d410..017cf8b8 100644
--- a/Tests/GPU/Src/compareParallelAndSerial.py
+++ b/Tests/GPU/Src/compareParallelAndSerial.py
@@ -85,224 +85,71 @@ def getFiles(self):
             self.ybounds.append((0, self.ny[-1]))
             self.zbounds.append((0, self.nz[-1]))
 
-
-
 # Instantiate the compare class so we have the data
 Compare = CompareParallelAndSerial()
 
 
 # Test functions
 
-# IMEX3
-def test_ConsEquivalentForSSP3():
-    Obj = Compare.Appendicies.index('SSP3')
-    Serial, Parallel = Compare.Serials[Obj], Compare.Parallels[Obj]
-    for Nv in range(Compare.Ncons[Obj]):
-        for i in range(*Compare.xbounds[Obj]):
-            for j in range(*Compare.ybounds[Obj]):
-                for k in range(*Compare.zbounds[Obj]):
-                    try:
-                        print(abs((Serial.cons[Nv, i, j, k] - Parallel.cons[Nv, i, j, k])))
-                        assert(abs((Serial.cons[Nv, i, j, k] - Parallel.cons[Nv, i, j, k]) < TOL))
-                    except AssertionError:
-                        print("Error for (Nv, i, j, k) = ({}, {}, {}, {})".format(Nv, i, j, k))
-                        assert(abs((Serial.cons[Nv, i, j, k] - Parallel.cons[Nv, i, j, k]) < TOL))
-
-def test_PrimsEquivalentForSSP3():
-    Obj = Compare.Appendicies.index('SSP3')
-    Serial, Parallel = Compare.Serials[Obj], Compare.Parallels[Obj]
-    for Nv in range(Compare.Nprims[Obj]):
-        for i in range(*Compare.xbounds[Obj]):
-            for j in range(*Compare.ybounds[Obj]):
-                for k in range(*Compare.zbounds[Obj]):
-                    try:
-                        assert(abs((Serial.prims[Nv, i, j, k] - Parallel.prims[Nv, i, j, k]) < TOL))
-                    except AssertionError:
-                        print("Error for (Nv, i, j, k) = ({}, {}, {}, {})".format(Nv, i, j, k))
-                        assert(abs((Serial.prims[Nv, i, j, k] - Parallel.prims[Nv, i, j, k]) < TOL))
-
-def test_AuxEquivalentForSSP3():
-    Obj = Compare.Appendicies.index('SSP3')
-    Serial, Parallel = Compare.Serials[Obj], Compare.Parallels[Obj]
-    for Nv in range(Compare.Naux[Obj]):
-        for i in range(*Compare.xbounds[Obj]):
-            for j in range(*Compare.ybounds[Obj]):
-                for k in range(*Compare.zbounds[Obj]):
-                    try:
-                        assert(abs((Serial.aux[Nv, i, j, k] - Parallel.aux[Nv, i, j, k]) < TOL))
-                    except AssertionError:
-                        print("Error for (Nv, i, j, k) = ({}, {}, {}, {})".format(Nv, i, j, k))
-                        assert(abs((Serial.aux[Nv, i, j, k] - Parallel.aux[Nv, i, j, k]) < TOL))
-
-# IMEX2
-def test_ConsEquivalentForSSP2():
-    Obj = Compare.Appendicies.index('SSP2')
-    Serial, Parallel = Compare.Serials[Obj], Compare.Parallels[Obj]
-    for Nv in range(Compare.Ncons[Obj]):
-        for i in range(*Compare.xbounds[Obj]):
-            for j in range(*Compare.ybounds[Obj]):
-                for k in range(*Compare.zbounds[Obj]):
-                    try:
-                        assert(abs((Serial.cons[Nv, i, j, k] - Parallel.cons[Nv, i, j, k]) < TOL))
-                    except AssertionError:
-                        print("Error for (Nv, i, j, k) = ({}, {}, {}, {})".format(Nv, i, j, k))
-                        assert(abs((Serial.cons[Nv, i, j, k] - Parallel.cons[Nv, i, j, k]) < TOL))
-
-def test_PrimsEquivalentForSSP2():
-    Obj = Compare.Appendicies.index('SSP2')
-    Serial, Parallel = Compare.Serials[Obj], Compare.Parallels[Obj]
-    for Nv in range(Compare.Nprims[Obj]):
-        for i in range(*Compare.xbounds[Obj]):
-            for j in range(*Compare.ybounds[Obj]):
-                for k in range(*Compare.zbounds[Obj]):
-                    try:
-                        assert(abs((Serial.prims[Nv, i, j, k] - Parallel.prims[Nv, i, j, k]) < TOL))
-                    except AssertionError:
-                        print("Error for (Nv, i, j, k) = ({}, {}, {}, {})".format(Nv, i, j, k))
-                        assert(abs((Serial.prims[Nv, i, j, k] - Parallel.prims[Nv, i, j, k]) < TOL))
-
-def test_AuxEquivalentForSSP2():
-    Obj = Compare.Appendicies.index('SSP2')
-    Serial, Parallel = Compare.Serials[Obj], Compare.Parallels[Obj]
-    for Nv in range(Compare.Naux[Obj]):
-        for i in range(*Compare.xbounds[Obj]):
-            for j in range(*Compare.ybounds[Obj]):
-                for k in range(*Compare.zbounds[Obj]):
-                    try:
-                        assert(abs((Serial.aux[Nv, i, j, k] - Parallel.aux[Nv, i, j, k]) < TOL))
-                    except AssertionError:
-                        print("Error for (Nv, i, j, k) = ({}, {}, {}, {})".format(Nv, i, j, k))
-                        assert(abs((Serial.aux[Nv, i, j, k] - Parallel.aux[Nv, i, j, k]) < TOL))
-
-def test_ConsEquivalentForSSP2FlowKHSingleFluid():
-    Obj = Compare.Appendicies.index('SSP2FlowKHSingleFluid')
-    Serial, Parallel = Compare.Serials[Obj], Compare.Parallels[Obj]
-    for Nv in range(Compare.Ncons[Obj]):
-        for i in range(*Compare.xbounds[Obj]):
-            for j in range(*Compare.ybounds[Obj]):
-                for k in range(*Compare.zbounds[Obj]):
-                    try:
-                        assert(abs((Serial.cons[Nv, i, j, k] - Parallel.cons[Nv, i, j, k]) < TOL))
-                    except AssertionError:
-                        print("Error for (Nv, i, j, k) = ({}, {}, {}, {})".format(Nv, i, j, k))
-                        assert(abs((Serial.cons[Nv, i, j, k] - Parallel.cons[Nv, i, j, k]) < TOL))
-
-def test_PrimsEquivalentForSSP2FlowKHSingleFluid():
-    Obj = Compare.Appendicies.index('SSP2FlowKHSingleFluid')
-    Serial, Parallel = Compare.Serials[Obj], Compare.Parallels[Obj]
-    for Nv in range(Compare.Nprims[Obj]):
-        for i in range(*Compare.xbounds[Obj]):
-            for j in range(*Compare.ybounds[Obj]):
-                for k in range(*Compare.zbounds[Obj]):
-                    try:
-                        assert(abs((Serial.prims[Nv, i, j, k] - Parallel.prims[Nv, i, j, k]) < TOL))
-                    except AssertionError:
-                        print("Error for (Nv, i, j, k) = ({}, {}, {}, {})".format(Nv, i, j, k))
-                        assert(abs((Serial.prims[Nv, i, j, k] - Parallel.prims[Nv, i, j, k]) < TOL))
-
-def test_AuxEquivalentForSSP2FlowKHSingleFluid():
-    Obj = Compare.Appendicies.index('SSP2FlowKHSingleFluid')
-    Serial, Parallel = Compare.Serials[Obj], Compare.Parallels[Obj]
-    for Nv in range(Compare.Naux[Obj]):
-        for i in range(*Compare.xbounds[Obj]):
-            for j in range(*Compare.ybounds[Obj]):
-                for k in range(*Compare.zbounds[Obj]):
-                    try:
-                        assert(abs((Serial.aux[Nv, i, j, k] - Parallel.aux[Nv, i, j, k]) < TOL))
-                    except AssertionError:
-                        print("Error for (Nv, i, j, k) = ({}, {}, {}, {})".format(Nv, i, j, k))
-                        assert(abs((Serial.aux[Nv, i, j, k] - Parallel.aux[Nv, i, j, k]) < TOL))
-
-
-# RK2
-def test_ConsEquivalentForRK2():
-   Obj = Compare.Appendicies.index('RK2')
-   Serial, Parallel = Compare.Serials[Obj], Compare.Parallels[Obj]
-   for Nv in range(Compare.Ncons[Obj]):
+def _compareStateVarArrays(serialArray, parallelArray, Obj, nVars):
+   for Nv in range(nVars):
        for i in range(*Compare.xbounds[Obj]):
            for j in range(*Compare.ybounds[Obj]):
                for k in range(*Compare.zbounds[Obj]):
                    try:
-                       assert(abs((Serial.cons[Nv, i, j, k] - Parallel.cons[Nv, i, j, k]) < TOL))
+                       assert(abs((serialArray[Nv, i, j, k] - parallelArray[Nv, i, j, k]) < TOL))
                    except AssertionError:
                        print("Error for (Nv, i, j, k) = ({}, {}, {}, {})".format(Nv, i, j, k))
-                       assert(abs((Serial.cons[Nv, i, j, k] - Parallel.cons[Nv, i, j, k]) < TOL))
+                       assert(abs((serialArray[Nv, i, j, k] - parallelArray[Nv, i, j, k]) < TOL))
+
+
+# RK2
+
+## BrioWuSingleFluid
 
-def test_PrimsEquivalentForRK2():
-   Obj = Compare.Appendicies.index('RK2')
+def test_ConsEquivalentForRK2SrmhdOutflowBrioWuSF():
+   Obj = Compare.Appendicies.index('RK2SrmhdOutflowBrioWuSF')
    Serial, Parallel = Compare.Serials[Obj], Compare.Parallels[Obj]
-   for Nv in range(Compare.Nprims[Obj]):
-       for i in range(*Compare.xbounds[Obj]):
-           for j in range(*Compare.ybounds[Obj]):
-               for k in range(*Compare.zbounds[Obj]):
-                   try:
-                       assert(abs((Serial.prims[Nv, i, j, k] - Parallel.prims[Nv, i, j, k]) < TOL))
-                   except AssertionError:
-                       print("Error for (Nv, i, j, k) = ({}, {}, {}, {})".format(Nv, i, j, k))
-                       assert(abs((Serial.prims[Nv, i, j, k] - Parallel.prims[Nv, i, j, k]) < TOL))
+   _compareStateVarArrays(Serial.cons, Parallel.cons, Obj, Compare.Ncons[Obj])
 
-def test_AuxEquivalentForRK2():
-   Obj = Compare.Appendicies.index('RK2')
+def test_PrimsEquivalentForRK2SrmhdOutflowBrioWuSF():
+   Obj = Compare.Appendicies.index('RK2SrmhdOutflowBrioWuSF')
    Serial, Parallel = Compare.Serials[Obj], Compare.Parallels[Obj]
-   for Nv in range(Compare.Naux[Obj]):
-       for i in range(*Compare.xbounds[Obj]):
-           for j in range(*Compare.ybounds[Obj]):
-               for k in range(*Compare.zbounds[Obj]):
-                   try:
-                       assert(abs((Serial.aux[Nv, i, j, k] - Parallel.aux[Nv, i, j, k]) < TOL))
-                   except AssertionError:
-                       print("Error for (Nv, i, j, k) = ({}, {}, {}, {})".format(Nv, i, j, k))
-                       assert(abs((Serial.aux[Nv, i, j, k] - Parallel.aux[Nv, i, j, k]) < TOL))
-
-# FVS
-def test_FnetEquivalentForFVS():
-    Obj = Compare.Appendicies.index('FVSFnet')
-    Serial, Parallel = Compare.Serials[Obj], Compare.Parallels[Obj]
-    for Nv in range(Compare.Ncons[Obj]):
-        for i in range(*Compare.xbounds[Obj]):
-            for j in range(*Compare.ybounds[Obj]):
-                for k in range(*Compare.zbounds[Obj]):
-                    try:
-                        assert(abs((Serial.cons[Nv, i, j, k] - Parallel.cons[Nv, i, j, k]) < TOL))
-                    except AssertionError:
-                        print("Error for (Nv, i, j, k) = ({}, {}, {}, {})".format(Nv, i, j, k))
-                        assert(abs((Serial.cons[Nv, i, j, k] - Parallel.cons[Nv, i, j, k]) < TOL))
-
-def test_FxEquivalentForFVS():
-    Obj = Compare.Appendicies.index('FVSFx')
-    Serial, Parallel = Compare.Serials[Obj], Compare.Parallels[Obj]
-    for Nv in range(Compare.Ncons[Obj]):
-        for i in range(*Compare.xbounds[Obj]):
-            for j in range(*Compare.ybounds[Obj]):
-                for k in range(*Compare.zbounds[Obj]):
-                    try:
-                        assert(abs((Serial.cons[Nv, i, j, k] - Parallel.cons[Nv, i, j, k]) < TOL))
-                    except AssertionError:
-                        print("Error for (Nv, i, j, k) = ({}, {}, {}, {})".format(Nv, i, j, k) + " with diff of {}".format(Serial.cons[Nv, i, j, k] - Parallel.cons[Nv, i, j, k]))
-                        assert(abs((Serial.cons[Nv, i, j, k] - Parallel.cons[Nv, i, j, k]) < TOL))
-
-def test_FyEquivalentForFVS():
-    Obj = Compare.Appendicies.index('FVSFy')
-    Serial, Parallel = Compare.Serials[Obj], Compare.Parallels[Obj]
-    for Nv in range(Compare.Ncons[Obj]):
-        for i in range(*Compare.xbounds[Obj]):
-            for j in range(*Compare.ybounds[Obj]):
-                for k in range(*Compare.zbounds[Obj]):
-                    try:
-                        assert(abs((Serial.cons[Nv, i, j, k] - Parallel.cons[Nv, i, j, k]) < TOL))
-                    except AssertionError:
-                        print("Error for (Nv, i, j, k) = ({}, {}, {}, {})".format(Nv, i, j, k) + " with diff of {}".format(Serial.cons[Nv, i, j, k] - Parallel.cons[Nv, i, j, k]))
-                        assert(abs((Serial.cons[Nv, i, j, k] - Parallel.cons[Nv, i, j, k]) < TOL))
-
-def test_FzEquivalentForFVS():
-    Obj = Compare.Appendicies.index('FVSFz')
-    Serial, Parallel = Compare.Serials[Obj], Compare.Parallels[Obj]
-    for Nv in range(Compare.Ncons[Obj]):
-        for i in range(*Compare.xbounds[Obj]):
-            for j in range(*Compare.ybounds[Obj]):
-                for k in range(*Compare.zbounds[Obj]):
-                    try:
-                        assert(abs((Serial.cons[Nv, i, j, k] - Parallel.cons[Nv, i, j, k]) < TOL))
-                    except AssertionError:
-                        print("Error for (Nv, i, j, k) = ({}, {}, {}, {})".format(Nv, i, j, k) + " with diff of {}".format(Serial.cons[Nv, i, j, k] - Parallel.cons[Nv, i, j, k]))
-                        assert(abs((Serial.cons[Nv, i, j, k] - Parallel.cons[Nv, i, j, k]) < TOL))
+   _compareStateVarArrays(Serial.prims, Parallel.prims, Obj, Compare.Nprims[Obj])
+
+def test_AuxEquivalentForRK2SrmhdOutflowBrioWuSF():
+   Obj = Compare.Appendicies.index('RK2SrmhdOutflowBrioWuSF')
+   Serial, Parallel = Compare.Serials[Obj], Compare.Parallels[Obj]
+   _compareStateVarArrays(Serial.aux, Parallel.aux, Obj, Compare.Naux[Obj])
+
+def test_ConsEquivalentForRK2SrmhdPeriodicBrioWuSF():
+   Obj = Compare.Appendicies.index('RK2SrmhdPeriodicBrioWuSF')
+   Serial, Parallel = Compare.Serials[Obj], Compare.Parallels[Obj]
+   _compareStateVarArrays(Serial.cons, Parallel.cons, Obj, Compare.Ncons[Obj])
+
+def test_PrimsEquivalentForRK2SrmhdPeriodicBrioWuSF():
+   Obj = Compare.Appendicies.index('RK2SrmhdPeriodicBrioWuSF')
+   Serial, Parallel = Compare.Serials[Obj], Compare.Parallels[Obj]
+   _compareStateVarArrays(Serial.prims, Parallel.prims, Obj, Compare.Nprims[Obj])
+
+def test_AuxEquivalentForRK2SrmhdPeriodicBrioWuSF():
+   Obj = Compare.Appendicies.index('RK2SrmhdPeriodicBrioWuSF')
+   Serial, Parallel = Compare.Serials[Obj], Compare.Parallels[Obj]
+   _compareStateVarArrays(Serial.aux, Parallel.aux, Obj, Compare.Naux[Obj])
+
+def test_ConsEquivalentForRK2SrmhdFlowBrioWuSF():
+   Obj = Compare.Appendicies.index('RK2SrmhdFlowBrioWuSF')
+   Serial, Parallel = Compare.Serials[Obj], Compare.Parallels[Obj]
+   _compareStateVarArrays(Serial.cons, Parallel.cons, Obj, Compare.Ncons[Obj])
+
+def test_PrimsEquivalentForRK2SrmhdFlowBrioWuSF():
+   Obj = Compare.Appendicies.index('RK2SrmhdFlowBrioWuSF')
+   Serial, Parallel = Compare.Serials[Obj], Compare.Parallels[Obj]
+   _compareStateVarArrays(Serial.prims, Parallel.prims, Obj, Compare.Nprims[Obj])
+
+def test_AuxEquivalentForRK2SrmhdFlowBrioWuSF():
+   Obj = Compare.Appendicies.index('RK2SrmhdFlowBrioWuSF')
+   Serial, Parallel = Compare.Serials[Obj], Compare.Parallels[Obj]
+   _compareStateVarArrays(Serial.aux, Parallel.aux, Obj, Compare.Naux[Obj])
+
+
diff --git a/Tests/GPU/Src/test_rk2.cu b/Tests/GPU/Src/test_rk2.cu
index f00407fc..1c9059f4 100644
--- a/Tests/GPU/Src/test_rk2.cu
+++ b/Tests/GPU/Src/test_rk2.cu
@@ -1,5 +1,6 @@
 #include "gtest/gtest.h"
 #include "srrmhd.h"
+#include "srmhd.h"
 #include "simulation.h"
 #include "serialSaveData.h"
 #include "simData.h"
@@ -9,16 +10,67 @@
 #include <cstdlib>
 
 
-TEST(RK2, RK2OutputConsistentWithSerial)
+/*
+ Assumptions:
+   RKRandomInstabilitySingleFluid is tested in 2D only
+   BrioWuSingleFluid is tested in 1D only
+*/
+
+
+// RKOTVSingleFluidPeriodic
+TEST(RK2OutputConsistentWithSerial, RK2SrmhdPeriodicOTVSF)
 {
 
   /*
     The following was used to gather data to compare the parallel
      version with. No tests are run in the serial version of this test
   */
+
+  double cfl(0.6);
+  int Ng(4);
+  double gamma(2.0);
+
   SerialEnv env(0, NULL, 1, 1, 1, 1);
-  Data d(30, 30, 30, 0, 1, 0, 1, 0, 1, 0.004, &env);
-  SRRMHD model(&d);
+  Data d(40, 40, 0, 0, 1, 0, 1, 0, 1, 0.004, &env, cfl, Ng, gamma);
+  SRMHD model(&d);
+  FVS fluxMethod(&d, &model);
+  Periodic bcs(&d);
+  Simulation sim(&d, &env);
+  OTVortexSingleFluid init(&d);
+  RK2 timeInt(&d, &model, &bcs, &fluxMethod);
+  SerialSaveData save(&d, &env);
+  sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save);
+
+  // sim.evolve();
+  sim.updateTime();
+  // sim.updateTime();
+
+
+  // Save data in test directory
+  strcpy(save.dir, "../TestData/GPU");
+  strcpy(save.app, "RK2SrmhdPeriodicOTVSF");
+
+  save.saveCons();
+  save.savePrims();
+  save.saveAux();
+  save.saveConsts();
+}
+// RKOTVSingleFluidOutflow
+TEST(RK2OutputConsistentWithSerial, RK2SrmhdOutflowOTVSF)
+{
+
+  /*
+    The following was used to gather data to compare the parallel
+     version with. No tests are run in the serial version of this test
+  */
+
+  double cfl(0.6);
+  int Ng(4);
+  double gamma(2.0);
+
+  SerialEnv env(0, NULL, 1, 1, 1, 1);
+  Data d(40, 40, 0, 0, 1, 0, 1, 0, 1, 0.004, &env, cfl, Ng, gamma);
+  SRMHD model(&d);
   FVS fluxMethod(&d, &model);
   Outflow bcs(&d);
   Simulation sim(&d, &env);
@@ -27,17 +79,121 @@ TEST(RK2, RK2OutputConsistentWithSerial)
   SerialSaveData save(&d, &env);
   sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save);
 
+  // sim.evolve();
+  sim.updateTime();
+  // sim.updateTime();
+
+
+  // Save data in test directory
+  strcpy(save.dir, "../TestData/GPU");
+  strcpy(save.app, "RK2SrmhdOutflowOTVSF");
+
+  save.saveCons();
+  save.savePrims();
+  save.saveAux();
+  save.saveConsts();
+}
+
+
+
+
+// BrioWuSingleFluid
+
+TEST(RK2OutputConsistentWithSerial, RK2SrmhdOutflowBrioWuSF)
+{
+
+  /*
+    The following was used to gather data to compare the parallel
+     version with. No tests are run in the serial version of this test
+  */
+
+  SerialEnv env(0, NULL, 1, 1, 1, 1);
+  Data d(40, 40, 0, 0, 1, 0, 1, 0, 1, 0.004, &env);
+  SRMHD model(&d);
+  FVS fluxMethod(&d, &model);
+  Outflow bcs(&d);
+  Simulation sim(&d, &env);
+  BrioWuSingleFluid init(&d);
+  RK2 timeInt(&d, &model, &bcs, &fluxMethod);
+  SerialSaveData save(&d, &env);
+  sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save);
+
+  sim.evolve();
+
+
+  // Save data in test directory
+  strcpy(save.dir, "../TestData/GPU");
+  strcpy(save.app, "RK2SrmhdOutflowBrioWuSF");
+
+  save.saveCons();
+  save.savePrims();
+  save.saveAux();
+  save.saveConsts();
+}
+
+TEST(RK2OutputConsistentWithSerial, RK2SrmhdPeriodicBrioWuSF)
+{
+
+  /*
+    The following was used to gather data to compare the parallel
+     version with. No tests are run in the serial version of this test
+  */
+
+  SerialEnv env(0, NULL, 1, 1, 1, 1);
+  Data d(40, 40, 0, 0, 1, 0, 1, 0, 1, 0.004, &env);
+  SRMHD model(&d);
+  FVS fluxMethod(&d, &model);
+  Periodic bcs(&d);
+  Simulation sim(&d, &env);
+  BrioWuSingleFluid init(&d);
+  RK2 timeInt(&d, &model, &bcs, &fluxMethod);
+  SerialSaveData save(&d, &env);
+  sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save);
+
   sim.evolve();
 
 
   // Save data in test directory
   strcpy(save.dir, "../TestData/GPU");
-  strcpy(save.app, "RK2");
+  strcpy(save.app, "RK2SrmhdPeriodicBrioWuSF");
+
   save.saveCons();
   save.savePrims();
   save.saveAux();
   save.saveConsts();
+}
+
+TEST(RK2OutputConsistentWithSerial, RK2SrmhdFlowBrioWuSF)
+{
+
+  /*
+    The following was used to gather data to compare the parallel
+     version with. No tests are run in the serial version of this test
+  */
+
+  SerialEnv env(0, NULL, 1, 1, 1, 1);
+  Data d(40, 40, 0, 0, 1, 0, 1, 0, 1, 0.004, &env);
+  SRMHD model(&d);
+  FVS fluxMethod(&d, &model);
+  Flow bcs(&d);
+  Simulation sim(&d, &env);
+  BrioWuSingleFluid init(&d);
+  RK2 timeInt(&d, &model, &bcs, &fluxMethod);
+  SerialSaveData save(&d, &env);
+  sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save);
 
+  sim.evolve();
 
 
+  // Save data in test directory
+  strcpy(save.dir, "../TestData/GPU");
+  strcpy(save.app, "RK2SrmhdFlowBrioWuSF");
+
+  save.saveCons();
+  save.savePrims();
+  save.saveAux();
+  save.saveConsts();
 }
+
+
+

From 5dab77294c9ed78880f882a894f728bd069028da Mon Sep 17 00:00:00 2001
From: "ania.brown" <ania.brown@gmail.com>
Date: Mon, 30 Nov 2020 12:26:53 +0000
Subject: [PATCH 36/56] forgetting to add files to git

---
 Tests/GPU/Src/main.cu              |  12 ++
 Tests/GPU/Src/test_parallel_rk2.cu | 238 +++++++++++++++++++++++++++++
 2 files changed, 250 insertions(+)
 create mode 100644 Tests/GPU/Src/main.cu
 create mode 100644 Tests/GPU/Src/test_parallel_rk2.cu

diff --git a/Tests/GPU/Src/main.cu b/Tests/GPU/Src/main.cu
new file mode 100644
index 00000000..c6ff0a86
--- /dev/null
+++ b/Tests/GPU/Src/main.cu
@@ -0,0 +1,12 @@
+#include "gtest/gtest.h"
+#include "parallelEnv.h"
+
+int main(int argc, char** argv)
+{
+    ::testing::InitGoogleTest(&argc, argv);
+
+    // Create env here to ensure MPI initialisation is handled. Will need to create this object again inside each test
+    // -- mpi init will only be called the first time
+    ParallelEnv env(0, NULL, 1, 1, 1);
+    return RUN_ALL_TESTS();
+}
diff --git a/Tests/GPU/Src/test_parallel_rk2.cu b/Tests/GPU/Src/test_parallel_rk2.cu
new file mode 100644
index 00000000..8e866a83
--- /dev/null
+++ b/Tests/GPU/Src/test_parallel_rk2.cu
@@ -0,0 +1,238 @@
+#include "gtest/gtest.h"
+#include "srrmhd.h"
+#include "srmhd.h"
+#include "simulation.h"
+#include "simData.h"
+#include "parallelSaveData.h"
+#include "parallelBoundaryConds.h"
+#include "initFunc.h"
+#include "RK2.h"
+#include "fluxVectorSplitting.h"
+#include "parallelEnv.h"
+#include <cstdlib>
+
+/*
+ Assumptions:
+   RKRandomInstabilitySingleFluid is tested in 2D only
+   BrioWuSingleFluid is tested in 1D only
+*/
+
+
+#if 1
+// RKOTVSingleFluidPeriodic
+TEST(RK2OutputConsistentWithSerial, RK2SrmhdPeriodicOTVSF)
+{
+
+  /*
+    The following was used to gather data to compare the parallel
+     version with. No tests are run in the serial version of this test
+  */
+
+  double cfl(0.6);
+  int Ng(4);
+  double gamma(2.0);
+
+  ParallelEnv env(0, NULL, 2, 2, 1, 1);
+  Data d(40, 40, 0, 0, 1, 0, 1, 0, 1, 0.004, &env, cfl, Ng, gamma);
+  SRMHD model(&d);
+  FVS fluxMethod(&d, &model);
+  ParallelPeriodic bcs(&d, &env);
+  Simulation sim(&d, &env);
+  OTVortexSingleFluid init(&d);
+  RK2 timeInt(&d, &model, &bcs, &fluxMethod);
+  ParallelSaveData save(&d, &env);
+  sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save);
+
+  // sim.evolve();
+  sim.updateTime();
+  // sim.updateTime();
+
+
+  // Save data in test directory
+  strcpy(save.dir, "../TestData/MPIGPU");
+  strcpy(save.app, "RK2SrmhdPeriodicOTVSF");
+
+  save.saveCons();
+  save.savePrims();
+  save.saveAux();
+  save.saveConsts();
+}
+// RKOTVSingleFluidOutflow
+TEST(RK2OutputConsistentWithSerial, RK2SrmhdOutflowOTVSF)
+{
+
+  /*
+    The following was used to gather data to compare the parallel
+     version with. No tests are run in the serial version of this test
+  */
+
+  double cfl(0.6);
+  int Ng(4);
+  double gamma(2.0);
+
+  ParallelEnv env(0, NULL, 2, 2, 1, 1);
+  Data d(40, 40, 0, 0, 1, 0, 1, 0, 1, 0.004, &env, cfl, Ng, gamma);
+  SRMHD model(&d);
+  FVS fluxMethod(&d, &model);
+  ParallelOutflow bcs(&d, &env);
+  Simulation sim(&d, &env);
+  OTVortexSingleFluid init(&d);
+  RK2 timeInt(&d, &model, &bcs, &fluxMethod);
+  ParallelSaveData save(&d, &env);
+  sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save);
+
+  // sim.evolve();
+  sim.updateTime();
+  // sim.updateTime();
+
+
+  // Save data in test directory
+  strcpy(save.dir, "../TestData/MPIGPU");
+  strcpy(save.app, "RK2SrmhdOutflowOTVSF");
+
+  save.saveCons();
+  save.savePrims();
+  save.saveAux();
+  save.saveConsts();
+}
+
+
+
+
+// BrioWuSingleFluid
+
+TEST(RK2OutputConsistentWithSerial, RK2SrmhdOutflowBrioWuSF)
+{
+
+  /*
+    The following was used to gather data to compare the parallel
+     version with. No tests are run in the serial version of this test
+  */
+
+  ParallelEnv env(0, NULL, 2, 2, 1, 1);
+  Data d(40, 40, 0, 0, 1, 0, 1, 0, 1, 0.004, &env);
+  SRMHD model(&d);
+  FVS fluxMethod(&d, &model);
+  ParallelOutflow bcs(&d, &env);
+  Simulation sim(&d, &env);
+  BrioWuSingleFluid init(&d);
+  RK2 timeInt(&d, &model, &bcs, &fluxMethod);
+  ParallelSaveData save(&d, &env);
+  sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save);
+
+  sim.evolve();
+
+
+  // Save data in test directory
+  strcpy(save.dir, "../TestData/MPIGPU");
+  strcpy(save.app, "RK2SrmhdOutflowBrioWuSF");
+
+  save.saveCons();
+  save.savePrims();
+  save.saveAux();
+  save.saveConsts();
+}
+
+TEST(RK2OutputConsistentWithSerial, RK2SrmhdPeriodicBrioWuSF)
+{
+
+  /*
+    The following was used to gather data to compare the parallel
+     version with. No tests are run in the serial version of this test
+  */
+
+  ParallelEnv env(0, NULL, 2, 2, 1, 1);
+  Data d(40, 40, 0, 0, 1, 0, 1, 0, 1, 0.004, &env);
+  SRMHD model(&d);
+  FVS fluxMethod(&d, &model);
+  ParallelPeriodic bcs(&d, &env);
+  Simulation sim(&d, &env);
+  BrioWuSingleFluid init(&d);
+  RK2 timeInt(&d, &model, &bcs, &fluxMethod);
+  ParallelSaveData save(&d, &env);
+  sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save);
+
+  sim.evolve();
+
+
+  // Save data in test directory
+  strcpy(save.dir, "../TestData/MPIGPU");
+  strcpy(save.app, "RK2SrmhdPeriodicBrioWuSF");
+
+  save.saveCons();
+  save.savePrims();
+  save.saveAux();
+  save.saveConsts();
+}
+
+TEST(RK2OutputConsistentWithSerial, RK2SrmhdFlowBrioWuSF)
+{
+
+  /*
+    The following was used to gather data to compare the parallel
+     version with. No tests are run in the serial version of this test
+  */
+
+  ParallelEnv env(0, NULL, 2, 2, 1, 1);
+  Data d(40, 40, 0, 0, 1, 0, 1, 0, 1, 0.004, &env);
+  SRMHD model(&d);
+  FVS fluxMethod(&d, &model);
+  ParallelFlow bcs(&d, &env);
+  Simulation sim(&d, &env);
+  BrioWuSingleFluid init(&d);
+  RK2 timeInt(&d, &model, &bcs, &fluxMethod);
+  ParallelSaveData save(&d, &env);
+  sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save);
+
+  sim.evolve();
+
+
+  // Save data in test directory
+  strcpy(save.dir, "../TestData/MPIGPU");
+  strcpy(save.app, "RK2SrmhdFlowBrioWuSF");
+
+  save.saveCons();
+  save.savePrims();
+  save.saveAux();
+  save.saveConsts();
+}
+#endif
+
+
+
+#if 0
+
+// Tests which do not currently pass
+
+TEST(RK2OutputConsistentWithSerial, RK2SrrmhdOutflowOTVortexSingleFluid)
+{
+
+  /*
+    The following was used to gather data to compare the parallel
+     version with. No tests are run in the serial version of this test
+  */
+
+  ParallelEnv env(0, NULL, 2, 2, 1, 1);
+  Data d(30, 30, 0, 0, 1, 0, 1, 0, 1, 0.004, &env);
+  SRRMHD model(&d);
+  FVS fluxMethod(&d, &model);
+  ParallelOutflow bcs(&d, &env);
+  Simulation sim(&d, &env);
+  OTVortexSingleFluid init(&d);
+  RK2 timeInt(&d, &model, &bcs, &fluxMethod);
+  ParallelSaveData save(&d, &env);
+  sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save);
+
+  sim.evolve();
+
+
+  // Save data in test directory
+  strcpy(save.dir, "../TestData/MPIGPU");
+  strcpy(save.app, "RK2SrrmhdOutflowOTVortexSingleFluid");
+
+  save.saveCons();
+  save.savePrims();
+  save.saveAux();
+  save.saveConsts();
+}
+#endif

From a3c115c8764e9760747ce4c6689bc55b56d669b4 Mon Sep 17 00:00:00 2001
From: "ania.brown" <ania.brown@gmail.com>
Date: Wed, 6 Jan 2021 15:13:35 +0000
Subject: [PATCH 37/56] putting cpu and gpu iridis instructions in the same
 place

---
 Project/GPU/Include/serialEnv.h               |  1 +
 Project/GPU/Src/main.cu                       | 12 ++++-------
 Scrips/IridisEnv/tests_instructions.md        | 19 ++++++++++++++---
 Scrips/IridisEnv/tests_job_cpu.sh             | 21 +++++++++++++++++++
 .../{tests_job.sh => tests_job_gpu.sh}        |  0
 5 files changed, 42 insertions(+), 11 deletions(-)
 create mode 100644 Scrips/IridisEnv/tests_job_cpu.sh
 rename Scrips/IridisEnv/{tests_job.sh => tests_job_gpu.sh} (100%)

diff --git a/Project/GPU/Include/serialEnv.h b/Project/GPU/Include/serialEnv.h
index f1cae491..7ad548c7 100644
--- a/Project/GPU/Include/serialEnv.h
+++ b/Project/GPU/Include/serialEnv.h
@@ -19,6 +19,7 @@ class SerialEnv : public PlatformEnv
 {
 	public:
 
+    // TODO -- no reason for this constructor to take nxRanks etc
     //! Constructor -- Initialize global MPI communicator
 		SerialEnv(int *argcP, char **argvP[], int nxRanks, int nyRanks, int nzRanks, int testing=0);
 
diff --git a/Project/GPU/Src/main.cu b/Project/GPU/Src/main.cu
index bb109cd3..4acfedb0 100644
--- a/Project/GPU/Src/main.cu
+++ b/Project/GPU/Src/main.cu
@@ -5,10 +5,9 @@
 #include "srmhd.h"
 #include "srrmhd.h"
 #include "boundaryConds.h"
-#include "parallelBoundaryConds.h"
 #include "rkSplit.h"
 #include "SSP2.h"
-#include "parallelSaveData.h"
+#include "serialSaveData.h"
 #include "fluxVectorSplitting.h"
 #include "serialEnv.h"
 
@@ -43,9 +42,6 @@ int main(int argc, char *argv[]) {
   double sigma(0);
   bool output(true);
   int safety(180);
-  int nxRanks(2);
-  int nyRanks(2);
-  int nzRanks(1);
 
   char * ptr(0);
   //! Overwrite any variables that have been passed in as main() arguments
@@ -55,7 +51,7 @@ int main(int argc, char *argv[]) {
     }
   }
 
-  ParallelEnv env(&argc, &argv, nxRanks, nyRanks, nzRanks);
+  SerialEnv env(&argc, &argv, 1, 1, 1);
 
   Data data(nx, ny, nz, xmin, xmax, ymin, ymax, zmin, zmax, endTime, &env,
             cfl, Ng, gamma, sigma);
@@ -65,7 +61,7 @@ int main(int argc, char *argv[]) {
 
   FVS fluxMethod(&data, &model);
 
-  ParallelFlow bcs(&data, &env);
+  Flow bcs(&data, &env);
 
   Simulation sim(&data, &env);
 
@@ -73,7 +69,7 @@ int main(int argc, char *argv[]) {
 
   RK2 timeInt(&data, &model, &bcs, &fluxMethod);
 
-  ParallelSaveData save(&data, &env);
+  SerialSaveData save(&data, &env);
 
   // Now objects have been created, set up the simulation
   sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save);
diff --git a/Scrips/IridisEnv/tests_instructions.md b/Scrips/IridisEnv/tests_instructions.md
index 9c215425..3569ca88 100644
--- a/Scrips/IridisEnv/tests_instructions.md
+++ b/Scrips/IridisEnv/tests_instructions.md
@@ -1,6 +1,6 @@
 ## Tests Instructions
 
-These are instructions to run GPU unit tests as a batch job on Iridis 5
+These are instructions to run GPU or CPU unit tests as a batch job on Iridis 5
 
 ## Setting up python env
 
@@ -11,7 +11,13 @@ module purge
 module load gcc/6.4.0
 module load python/3.6.4
 module load hdf5/1.10.2/gcc/parallel
-module load cuda/8.0
+```
+
+Optionally also type `module load cuda/8.0` if using gpu,
+
+Finish creating and activating the python venv with:
+
+```
 python3 -m venv venv
 source venv/bin/activate
 ```
@@ -24,10 +30,17 @@ python -m pip install -r Scripts/IridisEnv/requirements.txt
 
 ## Runing unit tests as a batch job
 
-From `Tests/GPU` run `sbatch ../../Scripts/IridisEnv/tests_job.sh`
+For GPU:
+
+From `Tests/GPU` run `sbatch ../../Scripts/IridisEnv/tests_job_gpu.sh`
 
 This will run all GPU tests
 
+For CPU:
+
+From `Tests/CPU` run `sbatch ../../Scripts/IridisEnv/tests_job_cpu.sh`
+
+
 
 
 
diff --git a/Scrips/IridisEnv/tests_job_cpu.sh b/Scrips/IridisEnv/tests_job_cpu.sh
new file mode 100644
index 00000000..583b9043
--- /dev/null
+++ b/Scrips/IridisEnv/tests_job_cpu.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+
+#SBATCH --ntasks-per-node=4     # Tasks per node
+#SBATCH --nodes=1                # Number of nodes requested
+#SBATCH --time=00:10:00          # walltime
+
+module purge
+module load gcc/6.4.0
+module load python/3.6.4
+module load hdf5/1.10.2/gcc/parallel
+#module load hdf5/1.10.2/gcc/serial
+
+module list
+
+source ../../venv/bin/activate
+
+export PYTHONPATH=$PYTHONPATH:../../Scripts:/home/amb1u19/METHOD_branches/METHOD_dev_hdf5/Scripts
+
+gcc --version
+make clean
+make test
diff --git a/Scrips/IridisEnv/tests_job.sh b/Scrips/IridisEnv/tests_job_gpu.sh
similarity index 100%
rename from Scrips/IridisEnv/tests_job.sh
rename to Scrips/IridisEnv/tests_job_gpu.sh

From 9fd16ee7f0c7f46fccc4b08dfc1314a080ba29b7 Mon Sep 17 00:00:00 2001
From: "ania.brown" <ania.brown@gmail.com>
Date: Fri, 8 Jan 2021 15:38:58 +0000
Subject: [PATCH 38/56] small fix to main file

---
 Project/GPU/Makefile    | 2 +-
 Project/GPU/Src/main.cu | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/Project/GPU/Makefile b/Project/GPU/Makefile
index a5100e7b..b3e0c821 100644
--- a/Project/GPU/Makefile
+++ b/Project/GPU/Makefile
@@ -4,7 +4,7 @@
 # -------------- PARAMETERS FOR USERS TO EDIT --------------------
 
 # if USE_MPI=1, need to use parallel versions of objects, such as ParallelEnv, ParallelSaveData etc
-USE_MPI=1
+USE_MPI=0
 
 # find location of MPI libraries to link on your local system using 'mpicc -show'
 MPI_FLAGS = -I/usr/include/mpich -L/usr/lib/x86_64-linux-gnu -lmpichcxx -lmpich 
diff --git a/Project/GPU/Src/main.cu b/Project/GPU/Src/main.cu
index 4acfedb0..7b2ffe50 100644
--- a/Project/GPU/Src/main.cu
+++ b/Project/GPU/Src/main.cu
@@ -61,7 +61,7 @@ int main(int argc, char *argv[]) {
 
   FVS fluxMethod(&data, &model);
 
-  Flow bcs(&data, &env);
+  Flow bcs(&data);
 
   Simulation sim(&data, &env);
 

From e7b5062d5bc682724eb1784c80deaa4601b54a69 Mon Sep 17 00:00:00 2001
From: "ania.brown" <ania.brown@gmail.com>
Date: Fri, 8 Jan 2021 16:15:02 +0000
Subject: [PATCH 39/56] adding initial files for hdf5 and checkpoint restart,
 copying from cpu version

---
 Project/GPU/Include/checkpointArgs.h         |  68 ++++
 Project/GPU/Include/parallelCheckpointArgs.h |  37 ++
 Project/GPU/Include/parallelSaveDataHDF5.h   | 120 +++++++
 Project/GPU/Include/serialCheckpointArgs.h   |  37 ++
 Project/GPU/Include/serialSaveDataHDF5.h     | 122 +++++++
 Project/GPU/Src/checkpointArgs.cu            |  67 ++++
 Project/GPU/Src/parallelCheckpointArgs.cu    |  70 ++++
 Project/GPU/Src/parallelSaveDataHDF5.cu      | 348 +++++++++++++++++++
 Project/GPU/Src/serialCheckpointArgs.cu      |  67 ++++
 Project/GPU/Src/serialSaveDataHDF5.cu        | 306 ++++++++++++++++
 10 files changed, 1242 insertions(+)
 create mode 100644 Project/GPU/Include/checkpointArgs.h
 create mode 100644 Project/GPU/Include/parallelCheckpointArgs.h
 create mode 100644 Project/GPU/Include/parallelSaveDataHDF5.h
 create mode 100644 Project/GPU/Include/serialCheckpointArgs.h
 create mode 100644 Project/GPU/Include/serialSaveDataHDF5.h
 create mode 100644 Project/GPU/Src/checkpointArgs.cu
 create mode 100644 Project/GPU/Src/parallelCheckpointArgs.cu
 create mode 100644 Project/GPU/Src/parallelSaveDataHDF5.cu
 create mode 100644 Project/GPU/Src/serialCheckpointArgs.cu
 create mode 100644 Project/GPU/Src/serialSaveDataHDF5.cu

diff --git a/Project/GPU/Include/checkpointArgs.h b/Project/GPU/Include/checkpointArgs.h
new file mode 100644
index 00000000..a1cb5478
--- /dev/null
+++ b/Project/GPU/Include/checkpointArgs.h
@@ -0,0 +1,68 @@
+#ifndef CHECKPOINTARGS_H
+#define CHECKPOINTARGS_H
+
+#include <vector>
+#include <string>
+#include "platformEnv.h"
+
+
+//! <b> Wrapper around Data object for populating Data from a checkpoint restart file</b>
+/*!
+  @par
+    Class contains all the data of the simulation relevant to any of the other
+  modules. Containing it in this way prevents issues of cyclic includes, also
+  results in Simulation as more of an interface than a class that needs to be
+  known to lower objects---good practice. <br>
+
+*/
+class CheckpointArgs 
+{
+  public:
+
+int
+    //@{
+    nx, ny, nz;            //!< Number of physical cells in specified direction
+    //@}
+    double
+    //@{
+    xmin, xmax,
+    ymin, ymax,            //!< Positional limits of domain in specified direction
+    zmin, zmax,
+    //@}
+    endTime,               //!< End time of simulation
+    cfl;                   //!< Courant factor
+    int Ng;                //!< Number of ghost cells
+    double
+    gamma,                 //!< Adiabatic index
+    sigma;                 //!< Resistivity
+    int
+    //@{
+    Ncons, Nprims, Naux;   //!< Number of specified variables
+    //@}
+    double
+    cp;                    //!< Constant divergence cleaning term
+    double
+    gam;                   //!< Exponent in the functional conductivity
+    double
+    t,                     //!< Current time
+    dt;                    //!< Width of current timestep
+    int
+    //@{
+    Nx, Ny, Nz;      //!< Total number of compute cells in domain in the specified direction
+    //@}
+
+
+    //! Constructor
+    /*!
+      @par
+        Allocates the memory required for the state arrays and sets the simulation
+      constants to the given values. Does not set initial state, thats done by
+      the initialFunc object.
+      @param name name of checkpoint file to use for restart, including path and extension
+      @param env environment object containing platform details eg MPI ranks
+    */
+    CheckpointArgs() {};
+
+};
+
+#endif
diff --git a/Project/GPU/Include/parallelCheckpointArgs.h b/Project/GPU/Include/parallelCheckpointArgs.h
new file mode 100644
index 00000000..d2a2f09e
--- /dev/null
+++ b/Project/GPU/Include/parallelCheckpointArgs.h
@@ -0,0 +1,37 @@
+#ifndef PARALLELCHECKPOINTARGS_H
+#define PARALLELCHECKPOINTARGS_H
+
+#include <vector>
+#include <string>
+#include "parallelEnv.h"
+
+
+//! <b> Wrapper around Data object for populating Data from a checkpoint restart file</b>
+/*!
+  @par
+    Class contains all the data of the simulation relevant to any of the other
+  modules. Containing it in this way prevents issues of cyclic includes, also
+  results in Simulation as more of an interface than a class that needs to be
+  known to lower objects---good practice. <br>
+
+*/
+class ParallelCheckpointArgs : public CheckpointArgs
+{
+  public:
+
+    //! Constructor
+    /*!
+      @par
+        Allocates the memory required for the state arrays and sets the simulation
+      constants to the given values. Does not set initial state, thats done by
+      the initialFunc object.
+      @param name name of checkpoint file to use for restart, including path and extension
+      @param env environment object containing platform details eg MPI ranks
+    */
+    ParallelCheckpointArgs(
+         const char* name,
+	 ParallelEnv *env);
+
+};
+
+#endif
diff --git a/Project/GPU/Include/parallelSaveDataHDF5.h b/Project/GPU/Include/parallelSaveDataHDF5.h
new file mode 100644
index 00000000..52c2f5b0
--- /dev/null
+++ b/Project/GPU/Include/parallelSaveDataHDF5.h
@@ -0,0 +1,120 @@
+#ifndef PARALLELSAVEDATAHDF5_H
+#define PARALLELSAVEDATAHDF5_H
+
+#include <string>
+#include <iostream>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <utility>
+#include "hdf5.h"
+#include "simData.h"
+#include "saveData.h"
+#include "parallelEnv.h"
+
+using namespace std;
+
+//! <b> Class used to save simulation data to HDF5 using a single process</b>
+/*!
+  @par
+  Class is initialized with the data that is to be saved. Saves the simulation
+  data in the Data directory, located within the Project folder. All data is
+  saved automatically, including all constant data (xmin, ymax, endTime etc) and
+  and the values of all prims, aux and cons variables.
+*/
+class ParallelSaveDataHDF5 : public SaveData
+{
+
+public:
+  ParallelEnv * env;    //!< Pointer to PlatformEnv class containing platform specific info such as MPI details
+  string filename;    //!< Filename for the HDF5 file. Defaults to 'data.hdf5'.
+  hid_t file = 0;     //!< HDF5 file to write to.
+  int file_iteration = 0; //!< The simulation iteration this file was opened for.
+
+  //! The level of detail to output to file
+  enum OutputDetail {
+    OUTPUT_ALL,       //!< All conserved, primitive, auxiliary and user-defined data
+    OUTPUT_REDUCED,   //!< Skip auxiliary data
+    OUTPUT_MINIMAL    //!< Only conserved and primitive data
+  } detail;
+
+  //! Saves the conserved vector state
+  void saveCons() override;
+
+  //! Saves the primitive vector state
+  void savePrims() override;
+
+  //! Saves the auxiliary vector state
+  void saveAux() override;
+
+  //! Saves the domain coordinates
+  void saveDomain() override;
+
+  //! Saves the constant data
+  void saveConsts() override;
+
+  //! Constructor
+  /*!
+    @param[in] *data pointer to the Data class
+    @param[in] *env pointer to the Parallel Environment containing information on bounds etc.
+    @param[in] filename String describing the file to create. Can ignore
+  */
+  ParallelSaveDataHDF5(
+      Data * data, ParallelEnv * env, string filename="data", OutputDetail detail=OUTPUT_ALL
+  ) : SaveData(data, 0), env(env), filename(filename), detail(detail) {
+    // Remove any pre-existing checkpoint file
+    std::remove((filename+".checkpoint.hdf5").c_str());
+  }
+
+  virtual ~ParallelSaveDataHDF5() { }     //!< Destructor
+
+  //! Saves all cons, prims, aux and constant data
+  /*!
+    @par
+      This calls the other member functions to save their respective
+    simulation data.
+
+    @param[in] timeSeries flags whether the saved data is final or transient
+  */
+  void saveAll(bool timeSeries=false) override;
+
+  //! Saves user specified variable
+  /*!
+    @par
+      Function saves the data for the variable specified by the string `var`
+
+    @param[in] variable Defines the variable the user wants to save. Should match a variable label
+    @param[in] num number of user-specified variables to save in total (required for consistent numbering of files)
+  */
+  void saveVar(string variable, int num=1) override;
+
+  //! Opens a new HDF5 file
+  /*!
+   * @par
+   *    Function opens a new HDF5 file with a specified filename, and closes any current one.
+   *
+   * @param[in] name Filename to create
+   */
+  void openFile(const char *name);
+
+  //! Tries to open a checkpoint file
+  /*!
+   * @par
+   *    If there is not already a checkkpoint file open for this iteration, opens a new one
+   */
+  void openCheckpointFile();
+
+  //! Writes a new dataset
+  /*!
+   * @par
+   *    Saves a new dataset double to file
+   *
+   * @param group Root location to save to
+   * @param name Name of the new dataset
+   * @param var Which variable to save within the data array
+   * @param data Pointer to the data array (cons, prims, aux etc.)
+   */
+  void writeDataSetDouble(const hid_t *group, const char *name, const int *var, const double *data);
+};
+
+#endif
diff --git a/Project/GPU/Include/serialCheckpointArgs.h b/Project/GPU/Include/serialCheckpointArgs.h
new file mode 100644
index 00000000..8b072fbd
--- /dev/null
+++ b/Project/GPU/Include/serialCheckpointArgs.h
@@ -0,0 +1,37 @@
+#ifndef SERIALCHECKPOINTARGS_H
+#define SERIALCHECKPOINTARGS_H
+
+#include <vector>
+#include <string>
+#include "platformEnv.h"
+
+
+//! <b> Wrapper around Data object for populating Data from a checkpoint restart file</b>
+/*!
+  @par
+    Class contains all the data of the simulation relevant to any of the other
+  modules. Containing it in this way prevents issues of cyclic includes, also
+  results in Simulation as more of an interface than a class that needs to be
+  known to lower objects---good practice. <br>
+
+*/
+class SerialCheckpointArgs : public CheckpointArgs
+{
+  public:
+
+    //! Constructor
+    /*!
+      @par
+        Allocates the memory required for the state arrays and sets the simulation
+      constants to the given values. Does not set initial state, thats done by
+      the initialFunc object.
+      @param name name of checkpoint file to use for restart, including path and extension
+      @param env environment object containing platform details eg MPI ranks
+    */
+    SerialCheckpointArgs(
+         const char* name,
+	 PlatformEnv *env);
+
+};
+
+#endif
diff --git a/Project/GPU/Include/serialSaveDataHDF5.h b/Project/GPU/Include/serialSaveDataHDF5.h
new file mode 100644
index 00000000..16ab4139
--- /dev/null
+++ b/Project/GPU/Include/serialSaveDataHDF5.h
@@ -0,0 +1,122 @@
+#ifndef SERIALSAVEDATAHDF5_H
+#define SERIALSAVEDATAHDF5_H
+
+#include <string>
+#include <iostream>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <utility>
+#include "hdf5.h"
+#include "simData.h"
+#include "saveData.h"
+#include "serialEnv.h"
+
+using namespace std;
+
+#include "hdf5.h"
+//! <b> Class used to save simulation data to HDF5 using a single process</b>
+/*!
+  @par
+  Class is initialized with the data that is to be saved. Saves the simulation
+  data in the Data directory, located within the Project folder. All data is
+  saved automatically, including all constant data (xmin, ymax, endTime etc) and
+  and the values of all prims, aux and cons variables.
+*/
+class SerialSaveDataHDF5 : public SaveData
+{
+
+  public:
+
+    SerialEnv * env;    //!< Pointer to PlatformEnv class containing platform specific info such as MPI details
+    string filename;    //!< Filename for the HDF5 file. Defaults to 'data.hdf5'.
+    hid_t file = 0;     //!< HDF5 file to write to.
+    int file_iteration = 0; //!< The simulation iteration this file was opened for.
+
+    //! The level of detail to output to file
+    enum OutputDetail {
+      OUTPUT_ALL,       //!< All conserved, primitive, auxiliary and user-defined data
+      OUTPUT_REDUCED,   //!< Skip auxiliary data
+      OUTPUT_MINIMAL    //!< Only conserved and primitive data
+    } detail;
+
+    //! Saves the conserved vector state
+    void saveCons() override;
+
+    //! Saves the primitive vector state
+    void savePrims() override;
+
+    //! Saves the auxiliary vector state
+    void saveAux() override;
+
+    //! Saves the domain coordinates
+    void saveDomain() override;
+
+    //! Saves the constant data
+    void saveConsts() override;
+
+    //! Constructor
+    /*!
+      @param *data pointer to the Data class
+      @param *env pointer to the Serial Environment containing information on bounds etc.
+      @param filename String describing the file to create. Can ignore
+    */
+    SerialSaveDataHDF5(
+            Data * data, SerialEnv * env, string filename="data", OutputDetail detail=OUTPUT_ALL
+    ) : SaveData(data, 0), env(env), filename(filename), detail(detail) {
+      // Remove any pre-existing checkpoint file
+      std::remove((filename+".checkpoint.hdf5").c_str());
+    }
+
+    virtual ~SerialSaveDataHDF5() { }     //!< Destructor
+
+    //! Saves all cons, prims, aux and constant data
+    /*!
+      @par
+        This calls the other member functions to save their respective
+      simulation data.
+
+      @param[in] timeSeries flags whether the saved data is final or transient
+    */
+    void saveAll(bool timeSeries=false) override;
+
+    //! Saves user specified variable
+    /*!
+      @par
+        Function saves the data for the variable specified by the string `var`
+
+      @param[in] variable Defines the variable the user wants to save. Should match a variable label
+      @param[in] num number of user-specified variables to save in total (required for consistent numbering of files)
+    */
+    void saveVar(string variable, int num=1) override;
+
+    //! Opens a new HDF5 file
+    /*!
+     * @par
+     *    Function opens a new HDF5 file with a specified filename, and closes any current one.
+     *
+     * @param[in] name Filename to create
+     */
+    void openFile(const char *name);
+
+    //! Tries to open a checkpoint file
+    /*!
+     * @par
+     *    If there is not already a checkkpoint file open for this iteration, opens a new one
+     */
+    void openCheckpointFile();
+
+    //! Writes a new dataset
+    /*!
+     * @par
+     *    Saves a new dataset double to file
+     *
+     * @param group Root location to save to
+     * @param name Name of the new dataset
+     * @param var Which variable to save within the data array
+     * @param data Pointer to the data array (cons, prims, aux etc.)
+     */
+    void writeDataSetDouble(const hid_t *group, const char *name, const int *var, const double *data);
+};
+
+#endif
diff --git a/Project/GPU/Src/checkpointArgs.cu b/Project/GPU/Src/checkpointArgs.cu
new file mode 100644
index 00000000..c6d2cffb
--- /dev/null
+++ b/Project/GPU/Src/checkpointArgs.cu
@@ -0,0 +1,67 @@
+#include "simData.h"
+#include "checkpointArgs.h"
+#include "platformEnv.h"
+#include <stdexcept>
+#include <cmath>
+#include "hdf5.h"
+#include "hdf5_hl.h"
+#include <stdexcept>
+
+
+CheckpointArgs::CheckpointArgs(const char* name, PlatformEnv *env)
+{
+	herr_t error=0, tmpError=-1;
+	hid_t file = H5Fopen(name, H5F_ACC_RDONLY, H5P_DEFAULT);
+
+	if (file<0) throw std::runtime_error("Could not open checkpoint restart file. Does it exist? CheckpointArgs requires path to file and extension");
+
+	// Read global file attributes
+	tmpError = H5LTget_attribute_double(file, ".", "cfl",  &(cfl));
+	if (tmpError < 0) error = tmpError;
+	tmpError = H5LTget_attribute_double(file, ".", "gamma",  &(gamma));
+	if (tmpError < 0) error = tmpError;
+	tmpError = H5LTget_attribute_double(file, ".", "sigma",  &(sigma));
+	if (tmpError < 0) error = tmpError;
+	tmpError = H5LTget_attribute_double(file, ".", "cp",  &(cp));
+	if (tmpError < 0) error = tmpError;
+	tmpError = H5LTget_attribute_double(file, ".", "t",  &(t));
+	if (tmpError < 0) error = tmpError;
+	if (error<0) throw std::runtime_error("Checkpoint restart file is missing some global attributes");
+
+	// Remaining required attributes are stored in the Domain group
+	hid_t group = H5Gopen(file, "Domain", H5P_DEFAULT);
+	tmpError = H5LTget_attribute_int(group, ".", "nx",  &(nx));
+	if (tmpError < 0) error = tmpError;
+	tmpError = H5LTget_attribute_int(group, ".", "ny",  &(ny));
+	if (tmpError < 0) error = tmpError;
+	tmpError = H5LTget_attribute_int(group, ".", "nz",  &(nz));
+	if (tmpError < 0) error = tmpError;
+	tmpError = H5LTget_attribute_int(group, ".", "Nx",  &(Nx));
+	if (tmpError < 0) error = tmpError;
+	tmpError = H5LTget_attribute_int(group, ".", "Ny",  &(Ny));
+	if (tmpError < 0) error = tmpError;
+	tmpError = H5LTget_attribute_int(group, ".", "Nz",  &(Nz));
+	if (tmpError < 0) error = tmpError;
+	tmpError = H5LTget_attribute_int(group, ".", "Ng",  &(Ng));
+	if (tmpError < 0) error = tmpError;
+	tmpError = H5LTget_attribute_double(group, ".", "xmin",  &(xmin));
+	if (tmpError < 0) error = tmpError;
+	tmpError = H5LTget_attribute_double(group, ".", "ymin",  &(ymin));
+	if (tmpError < 0) error = tmpError;
+	tmpError = H5LTget_attribute_double(group, ".", "zmin",  &(zmin));
+	if (tmpError < 0) error = tmpError;
+	tmpError = H5LTget_attribute_double(group, ".", "xmax",  &(xmax));
+	if (tmpError < 0) error = tmpError;
+	tmpError = H5LTget_attribute_double(group, ".", "ymax",  &(ymax));
+	if (tmpError < 0) error = tmpError;
+	tmpError = H5LTget_attribute_double(group, ".", "zmax",  &(zmax));
+	if (tmpError < 0) error = tmpError;
+	tmpError = H5LTget_attribute_double(group, ".", "endTime",  &(endTime));
+	if (tmpError < 0) error = tmpError;
+	if (error<0) throw std::runtime_error("Checkpoint restart file is missing some domain attributes");
+
+        H5Gclose(group);
+        H5Fclose(file);
+}
+
+
diff --git a/Project/GPU/Src/parallelCheckpointArgs.cu b/Project/GPU/Src/parallelCheckpointArgs.cu
new file mode 100644
index 00000000..20b6d485
--- /dev/null
+++ b/Project/GPU/Src/parallelCheckpointArgs.cu
@@ -0,0 +1,70 @@
+#include "simData.h"
+#include "parallelCheckpointArgs.h"
+#include "parallelEnv.h"
+#include <stdexcept>
+#include <cmath>
+#include "hdf5.h"
+#include "hdf5_hl.h"
+#include <stdexcept>
+
+
+ParallelCheckpointArgs::ParallelCheckpointArgs(const char* name, ParallelEnv *env) : CheckpointArgs()
+{
+	herr_t error=0, tmpError=-1;
+
+        hid_t file_access_property_list = H5Pcreate(H5P_FILE_ACCESS);
+        H5Pset_fapl_mpio(file_access_property_list, env->mpiCartesianComm, env->mpiInfo);
+	hid_t file = H5Fopen(name, H5F_ACC_RDONLY, file_access_property_list);
+	if (file<0) throw std::runtime_error("Could not open checkpoint restart file. Does it exist? CheckpointArgs requires path to file and extension");
+
+	// Read global file attributes
+	tmpError = H5LTget_attribute_double(file, ".", "cfl",  &(cfl));
+	if (tmpError < 0) error = tmpError;
+	tmpError = H5LTget_attribute_double(file, ".", "gamma",  &(gamma));
+	if (tmpError < 0) error = tmpError;
+	tmpError = H5LTget_attribute_double(file, ".", "sigma",  &(sigma));
+	if (tmpError < 0) error = tmpError;
+	tmpError = H5LTget_attribute_double(file, ".", "cp",  &(cp));
+	if (tmpError < 0) error = tmpError;
+	tmpError = H5LTget_attribute_double(file, ".", "t",  &(t));
+	if (tmpError < 0) error = tmpError;
+	if (error<0) throw std::runtime_error("Checkpoint restart file is missing some global attributes");
+
+	// Remaining required attributes are stored in the Domain group
+	hid_t group = H5Gopen(file, "Domain", H5P_DEFAULT);
+	tmpError = H5LTget_attribute_int(group, ".", "nx",  &(nx));
+	if (tmpError < 0) error = tmpError;
+	tmpError = H5LTget_attribute_int(group, ".", "ny",  &(ny));
+	if (tmpError < 0) error = tmpError;
+	tmpError = H5LTget_attribute_int(group, ".", "nz",  &(nz));
+	if (tmpError < 0) error = tmpError;
+	tmpError = H5LTget_attribute_int(group, ".", "Nx",  &(Nx));
+	if (tmpError < 0) error = tmpError;
+	tmpError = H5LTget_attribute_int(group, ".", "Ny",  &(Ny));
+	if (tmpError < 0) error = tmpError;
+	tmpError = H5LTget_attribute_int(group, ".", "Nz",  &(Nz));
+	if (tmpError < 0) error = tmpError;
+	tmpError = H5LTget_attribute_int(group, ".", "Ng",  &(Ng));
+	if (tmpError < 0) error = tmpError;
+	tmpError = H5LTget_attribute_double(group, ".", "xmin",  &(xmin));
+	if (tmpError < 0) error = tmpError;
+	tmpError = H5LTget_attribute_double(group, ".", "ymin",  &(ymin));
+	if (tmpError < 0) error = tmpError;
+	tmpError = H5LTget_attribute_double(group, ".", "zmin",  &(zmin));
+	if (tmpError < 0) error = tmpError;
+	tmpError = H5LTget_attribute_double(group, ".", "xmax",  &(xmax));
+	if (tmpError < 0) error = tmpError;
+	tmpError = H5LTget_attribute_double(group, ".", "ymax",  &(ymax));
+	if (tmpError < 0) error = tmpError;
+	tmpError = H5LTget_attribute_double(group, ".", "zmax",  &(zmax));
+	if (tmpError < 0) error = tmpError;
+	tmpError = H5LTget_attribute_double(group, ".", "endTime",  &(endTime));
+	if (tmpError < 0) error = tmpError;
+	if (error<0) throw std::runtime_error("Checkpoint restart file is missing some domain attributes");
+
+        H5Gclose(group);
+        H5Fclose(file);
+        H5Pclose(file_access_property_list);
+}
+
+
diff --git a/Project/GPU/Src/parallelSaveDataHDF5.cu b/Project/GPU/Src/parallelSaveDataHDF5.cu
new file mode 100644
index 00000000..712465d4
--- /dev/null
+++ b/Project/GPU/Src/parallelSaveDataHDF5.cu
@@ -0,0 +1,348 @@
+#include "parallelSaveDataHDF5.h"
+#include <cstdlib>
+#include <cstdio>
+#include <fstream>
+#include "hdf5.h"
+#include "hdf5_hl.h"
+
+using namespace std;
+
+
+/*!
+ * /brief Opens a HDF5 file
+ *
+ * This bundles up closing any existing open checkpoint file, removing the old file with the same name,
+ * then recording the iteration this file was opened on (for reusing checkpoint files later in the same
+ * cycle).
+ *
+ * TODO: If there is an existing file, if it has the same dimensions, we should overwrite it and not remove it.
+ *
+ * @param name Name of the file to open
+ */
+void ParallelSaveDataHDF5::openFile(const char *name) {
+  if(this->file) H5Fclose(this->file);
+
+  std::remove(name);
+
+  hid_t file_access_property_list = H5Pcreate(H5P_FILE_ACCESS);
+  H5Pset_fapl_mpio(file_access_property_list, env->mpiCartesianComm, env->mpiInfo);
+
+  this->file = H5Fcreate(
+      name, H5F_ACC_TRUNC, H5P_DEFAULT,
+      file_access_property_list
+  );
+  this->file_iteration = this->d->iters;
+  H5Pclose(file_access_property_list);
+}
+
+
+/*!
+ * /brief Opens a HDF5 checkpoint file
+ *
+ * Checkpoint files are used to either store all data for restarting a run,
+ * or to store individual outputs in user-defined mode, or both.
+ * Writing out individual variables happens before the final checkpoint write.
+ * So therefore, when we want to write out a final file, there may or may not be an existing
+ * checkpoint file for this cycle full of user-defined outputs.
+ */
+void ParallelSaveDataHDF5::openCheckpointFile() {
+  if(this->file) {
+    // If there's currently a checkpoint file, was it opened this cycle?
+    if (this->file_iteration != this->d->iters) {
+      // If not, close the open file, delete the file with the name we want to write to on disk,
+      // then open a new one
+      string filename_full = this->filename+".checkpoint."+to_string(this->d->t)+".hdf5";
+      this->openFile(filename_full.c_str());
+      hid_t user_def = H5Gcreate(this->file, "UserDef", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+      H5Gclose(user_def);
+    } else {
+      // Then the checkpoint file was opened this cycle, and we can write to it
+    }
+
+  } else {
+    // If there's no existing checkpoint file, we need to create a new one.
+    string filename_full = this->filename+".checkpoint."+to_string(this->d->t)+".hdf5";
+    this->openFile(filename_full.c_str());
+    hid_t user_def = H5Gcreate(this->file, "UserDef", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+    H5Gclose(user_def);
+  }
+}
+
+
+/*!
+ * /brief Writes an HDF5 dataset to file
+ *
+ * Prepares the buffer for writing to file, and writes a dataset.
+ *
+ * @param group The group within the file (or the file itself for root datasets)
+ * @param name The name the dataset should have
+ * @param var Data is stored in 4-d arrays for each class of data (conserved/primitive/auxiliary),
+ *  with the 1st dimension being the variable. This argument indicates which variable is being output.
+ * @param data The pointer to the data array.
+ */
+void ParallelSaveDataHDF5::writeDataSetDouble(const hid_t *group, const char *name, const int *var,
+                                            const double *data) {
+  hsize_t lengths_local[d->dims];
+  hsize_t lengths_total[d->dims];
+  hsize_t offsets[d->dims];
+
+  // So now, we set the total data-space size, and the offset the local data-space has from it.
+  // The local data dimensions Nx/Ny/Nz include ghost cells, whilst the total one does not.
+  lengths_total[0] = d->nx;
+  lengths_local[0] = (d->Nx - 2 * d->Ng);
+  offsets[0] = (d->Nx -  2 * d->Ng) * env->xRankId;
+  unsigned long buffer_size = lengths_local[0]; // The length of the buffer
+
+  if(d->dims > 1) {
+    lengths_total[1] = d->ny;
+    lengths_local[1] = (d->Ny - 2 * d->Ng);
+    offsets[1] = (d->Ny - 2 * d->Ng) * env->yRankId;
+    buffer_size *= lengths_local[1];
+  }
+  if(d->dims > 2) {
+    lengths_total[2] = d->nz;
+    lengths_local[2] = (d->Nz - 2 * d->Ng);
+    offsets[2] = (d->Nz - 2 * d->Ng) * env->zRankId;
+    buffer_size = lengths_local[2];
+  }
+
+  // We also need to create a buffer to write to, that excludes the ghost cells.
+  // So we calculate the size it needs to be, excluding ghost cells.
+  double buffer[buffer_size];
+  int buffer_position(0);
+
+  // Consider the efficiency of this! std::copy would probably be better but maybe the compiler
+  // will vectorise this. I prefer the consistency of a single set of loops over having 1 per dimension.
+  for (int i(d->is); i < d->ie; i++) {
+    for (int j(d->js); j < d->je; j++) {
+      for (int k(d->ks); k < d->ke; k++) {
+        buffer[buffer_position++] = data[ID(*var, i, j, k)];
+      }
+    }
+  }
+
+  // Define the total dataspace for this dataset, and create the dataset
+  hid_t dataspace_total = H5Screate_simple(d->dims, lengths_total, nullptr);
+  hid_t dataset = H5Dcreate(
+    *group, name, H5T_NATIVE_DOUBLE, dataspace_total,
+    H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT
+  );
+
+  // Define the dataspace that describes the fraction of the total dataspace
+  // accessed by this process.
+  hid_t dataspace_local = H5Screate_simple(d->dims, lengths_local, nullptr);
+
+  // Create an access property list that tells the write to use MPI
+  hid_t dataset_access_property_list = H5Pcreate(H5P_DATASET_XFER);
+  H5Pset_dxpl_mpio(dataset_access_property_list, H5FD_MPIO_COLLECTIVE);
+
+  // Select the 'hyperslab', i.e. the subset of the total dataspace to write to
+  // This bit is per process
+  H5Sselect_hyperslab(
+    dataspace_total, H5S_SELECT_SET, offsets, nullptr, lengths_local, nullptr
+  );
+
+  // Write this processes' buffer contents to the hyperslab
+  H5Dwrite(
+      dataset, H5T_NATIVE_DOUBLE,
+      dataspace_local, dataspace_total,
+      dataset_access_property_list, buffer
+  );
+
+  // Close everything to avoid memory leaks
+  H5Pclose(dataset_access_property_list);
+  H5Sclose(dataspace_total);
+  H5Sclose(dataspace_local);
+  H5Dclose(dataset);
+}
+
+
+/*!
+ * /brief Saves all data to file
+ *
+ * Saves all the data to file. This is modified by the level of detail on this
+ * (this->detail), and whether or not it is a checkpoint file.
+ *
+ * @param timeSeries If this is a checkpoint or not
+ */
+void ParallelSaveDataHDF5::saveAll(bool timeSeries)
+{
+  if(timeSeries) {
+    // If we're doing a timeseries/checkpoint output, things may be complicated
+    // as saveVars may have written some of the variables to file already!
+    string filename_full = this->filename+".checkpoint."+to_string(d->t)+".hdf5";
+    if(!env->rank) {
+      std::cout << "Saving checkpoint '" << filename_full << "' (iteration " + to_string(d->iters) + ")\n";
+    }
+    this->openCheckpointFile();
+
+  } else {
+    string filename_full = this->filename+".hdf5";
+    if(!env->rank) {
+      std::cout << "Saving final output '" << filename_full << "'\n";
+    }
+    this->openFile(filename_full.c_str());
+  }
+
+  this->saveConsts();
+  this->saveDomain();
+  this->savePrims();
+  if(this->detail != OUTPUT_MINIMAL) this->saveCons();
+  if(this->detail == OUTPUT_ALL) this->saveAux();
+
+  // If this isn't a timeseries, then this is the final save and the file should be closed.
+  if(!timeSeries)H5Fclose(this->file);
+}
+
+
+/*!
+ * /brief Saves conserved variables
+ */
+void ParallelSaveDataHDF5::saveCons()
+{
+  hid_t group = H5Gcreate(this->file, "Conserved", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+  H5LTset_attribute_int(group, ".", "Ncons", &d->Ncons, 1);
+  // For each one of the conserved variables, write it to disk
+  string varOrder;
+  for(int var(0); var < d->Ncons; var++) {
+    this->writeDataSetDouble(&group, d->consLabels[var].c_str(), &var, d->cons);
+    varOrder += d->consLabels[var] + ',';
+  }
+  H5LTset_attribute_string(group, ".", "varOrder", varOrder.c_str());
+  H5Gclose(group);
+}
+
+
+/*!
+ * /brief Saves primitive variables
+ */
+void ParallelSaveDataHDF5::savePrims()
+{
+  hid_t group = H5Gcreate(this->file, "Primitive", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+  H5LTset_attribute_int(group, ".", "Nprims", &d->Nprims, 1);
+
+  string varOrder;
+  for(int var(0); var < d->Nprims; var++) {
+    this->writeDataSetDouble(&group, d->primsLabels[var].c_str(), &var, d->prims);
+    varOrder += d->primsLabels[var] + ',';
+  }
+  H5LTset_attribute_string(group, ".", "varOrder", varOrder.c_str());
+  H5Gclose(group);
+}
+
+
+/*!
+ * /brief Save auxiliary variables
+ */
+void ParallelSaveDataHDF5::saveAux()
+{
+  hid_t group = H5Gcreate(this->file, "Auxiliary", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+  H5LTset_attribute_int(group, ".", "Naux", &d->Naux, 1);
+
+  string varOrder;
+  for(int var(0); var < d->Naux; var++) {
+    this->writeDataSetDouble(&group, d->auxLabels[var].c_str(), &var, d->aux);
+    varOrder += d->auxLabels[var] + ',';
+  }
+  H5LTset_attribute_string(group, ".", "varOrder", varOrder.c_str());
+  H5Gclose(group);
+}
+
+
+/*!
+ * /brief Save domain information
+ */
+void ParallelSaveDataHDF5::saveDomain()
+{
+  hid_t group = H5Gcreate(this->file, "Domain", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+  H5LTset_attribute_int(group, ".", "nx", &d->nx, 1);
+  H5LTset_attribute_int(group, ".", "ny", &d->ny, 1);
+  H5LTset_attribute_int(group, ".", "nz", &d->nz, 1);
+  H5LTset_attribute_int(group, ".", "Nx", &d->Nx, 1);
+  H5LTset_attribute_int(group, ".", "Ny", &d->Ny, 1);
+  H5LTset_attribute_int(group, ".", "Nz", &d->Nz, 1);
+  H5LTset_attribute_int(group, ".", "Ng", &d->Ng, 1);
+  H5LTset_attribute_double(group, ".", "xmin", &d->xmin, 1);
+  H5LTset_attribute_double(group, ".", "ymin", &d->ymin, 1);
+  H5LTset_attribute_double(group, ".", "zmin", &d->zmin, 1);
+  H5LTset_attribute_double(group, ".", "xmax", &d->xmax, 1);
+  H5LTset_attribute_double(group, ".", "ymax", &d->ymax, 1);
+  H5LTset_attribute_double(group, ".", "zmax", &d->zmax, 1);
+  H5LTset_attribute_double(group, ".", "dx", &d->dx, 1);
+  H5LTset_attribute_double(group, ".", "dy", &d->dy, 1);
+  H5LTset_attribute_double(group, ".", "dz", &d->dz, 1);
+  H5LTset_attribute_double(group, ".", "endTime", &d->endTime, 1);
+  H5LTset_attribute_double(group, ".", "dt", &d->dt, 1);
+
+  // Unlike serial, we do not write out the domain- gathering across threads is a pain and it's all defined in xmin, xmax & dx.
+  H5Gclose(group);
+}
+
+
+/*!
+ * /brief Save constants
+ */
+void ParallelSaveDataHDF5::saveConsts()
+{
+  H5LTset_attribute_double(this->file, ".", "cfl", &d->cfl, 1);
+  H5LTset_attribute_double(this->file, ".", "gamma", &d->gamma, 1);
+  H5LTset_attribute_double(this->file, ".", "sigma", &d->sigma, 1);
+  H5LTset_attribute_double(this->file, ".", "cp", &d->cp, 1);
+  H5LTset_attribute_double(this->file, ".", "t", &d->t, 1);
+}
+
+
+/*!
+ * /brief Save a single variable to a checkpoint file
+ *
+ * Saves variables for debug or animation purposes.
+ * Finds what data index and array the variable name corresponds to,
+ * then opens a checkpoint file and saves to it.
+ *
+ * @param variable The name of the variable
+ * @param num The number of variables to save; not used in HDF5 version
+ */
+void ParallelSaveDataHDF5::saveVar(string variable, int num)
+{
+  int found_var(-1); // Variable number
+  double *data;  // Pointer to the data array containing the variable
+
+  // Determine which variable the user wants saved
+  for (int var(0); var < d->Ncons; var++) {
+    if (strcmp(d->consLabels[var].c_str(), variable.c_str()) == 0) {
+      found_var=var;
+      data = d->cons;
+      break;
+    }
+  }
+
+  if (found_var < 0) {
+    for (int var(0); var < d->Nprims; var++) {
+      if (strcmp(d->primsLabels[var].c_str(), variable.c_str()) == 0) {
+        found_var=var;
+        data = d->prims;
+        break;
+      }
+    }
+  }
+
+  if (found_var < 0) {
+    for (int var(0); var < d->Naux; var++) {
+      if (strcmp(d->auxLabels[var].c_str(), variable.c_str()) == 0) {
+        found_var=var;
+        data = d->aux;
+        break;
+      }
+    }
+  }
+
+  if (found_var < 0) {
+    printf("Error: Could not find user specified variable '%s'\n", variable.c_str());
+    exit(1);
+  }
+
+  this->openCheckpointFile();
+  hid_t user_def = H5Gopen1(this->file, "UserDef");
+  writeDataSetDouble(&user_def, variable.c_str(), &found_var, data);
+  H5Gclose(user_def);
+}
diff --git a/Project/GPU/Src/serialCheckpointArgs.cu b/Project/GPU/Src/serialCheckpointArgs.cu
new file mode 100644
index 00000000..c69ffd93
--- /dev/null
+++ b/Project/GPU/Src/serialCheckpointArgs.cu
@@ -0,0 +1,67 @@
+#include "simData.h"
+#include "serialCheckpointArgs.h"
+#include "platformEnv.h"
+#include <stdexcept>
+#include <cmath>
+#include "hdf5.h"
+#include "hdf5_hl.h"
+#include <stdexcept>
+
+
+SerialCheckpointArgs::SerialCheckpointArgs(const char* name, PlatformEnv *env) : CheckpointArgs
+{
+	herr_t error=0, tmpError=-1;
+	hid_t file = H5Fopen(name, H5F_ACC_RDONLY, H5P_DEFAULT);
+
+	if (file<0) throw std::runtime_error("Could not open checkpoint restart file. Does it exist? CheckpointArgs requires path to file and extension");
+
+	// Read global file attributes
+	tmpError = H5LTget_attribute_double(file, ".", "cfl",  &(cfl));
+	if (tmpError < 0) error = tmpError;
+	tmpError = H5LTget_attribute_double(file, ".", "gamma",  &(gamma));
+	if (tmpError < 0) error = tmpError;
+	tmpError = H5LTget_attribute_double(file, ".", "sigma",  &(sigma));
+	if (tmpError < 0) error = tmpError;
+	tmpError = H5LTget_attribute_double(file, ".", "cp",  &(cp));
+	if (tmpError < 0) error = tmpError;
+	tmpError = H5LTget_attribute_double(file, ".", "t",  &(t));
+	if (tmpError < 0) error = tmpError;
+	if (error<0) throw std::runtime_error("Checkpoint restart file is missing some global attributes");
+
+	// Remaining required attributes are stored in the Domain group
+	hid_t group = H5Gopen(file, "Domain", H5P_DEFAULT);
+	tmpError = H5LTget_attribute_int(group, ".", "nx",  &(nx));
+	if (tmpError < 0) error = tmpError;
+	tmpError = H5LTget_attribute_int(group, ".", "ny",  &(ny));
+	if (tmpError < 0) error = tmpError;
+	tmpError = H5LTget_attribute_int(group, ".", "nz",  &(nz));
+	if (tmpError < 0) error = tmpError;
+	tmpError = H5LTget_attribute_int(group, ".", "Nx",  &(Nx));
+	if (tmpError < 0) error = tmpError;
+	tmpError = H5LTget_attribute_int(group, ".", "Ny",  &(Ny));
+	if (tmpError < 0) error = tmpError;
+	tmpError = H5LTget_attribute_int(group, ".", "Nz",  &(Nz));
+	if (tmpError < 0) error = tmpError;
+	tmpError = H5LTget_attribute_int(group, ".", "Ng",  &(Ng));
+	if (tmpError < 0) error = tmpError;
+	tmpError = H5LTget_attribute_double(group, ".", "xmin",  &(xmin));
+	if (tmpError < 0) error = tmpError;
+	tmpError = H5LTget_attribute_double(group, ".", "ymin",  &(ymin));
+	if (tmpError < 0) error = tmpError;
+	tmpError = H5LTget_attribute_double(group, ".", "zmin",  &(zmin));
+	if (tmpError < 0) error = tmpError;
+	tmpError = H5LTget_attribute_double(group, ".", "xmax",  &(xmax));
+	if (tmpError < 0) error = tmpError;
+	tmpError = H5LTget_attribute_double(group, ".", "ymax",  &(ymax));
+	if (tmpError < 0) error = tmpError;
+	tmpError = H5LTget_attribute_double(group, ".", "zmax",  &(zmax));
+	if (tmpError < 0) error = tmpError;
+	tmpError = H5LTget_attribute_double(group, ".", "endTime",  &(endTime));
+	if (tmpError < 0) error = tmpError;
+	if (error<0) throw std::runtime_error("Checkpoint restart file is missing some domain attributes");
+
+        H5Gclose(group);
+        H5Fclose(file);
+}
+
+
diff --git a/Project/GPU/Src/serialSaveDataHDF5.cu b/Project/GPU/Src/serialSaveDataHDF5.cu
new file mode 100644
index 00000000..7b5463d4
--- /dev/null
+++ b/Project/GPU/Src/serialSaveDataHDF5.cu
@@ -0,0 +1,306 @@
+#include "serialSaveDataHDF5.h"
+#include <cstdlib>
+#include <cstdio>
+#include <fstream>
+#include "hdf5.h"
+#include "hdf5_hl.h"
+
+using namespace std;
+
+
+/*!
+ * /brief Opens a HDF5 file
+ *
+ * This bundles up closing any existing open checkpoint file, removing the old file with the same name,
+ * then recording the iteration this file was opened on (for reusing checkpoint files later in the same
+ * cycle).
+ *
+ * TODO: If there is an existing file, if it has the same dimensions, we should overwrite it and not remove it.
+ *
+ * @param name Name of the file to open
+ */
+void SerialSaveDataHDF5::openFile(const char *name) {
+  if(this->file) H5Fclose(this->file);
+
+  std::remove(name);
+  this->file = H5Fcreate(name, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);
+  this->file_iteration = this->d->iters;
+}
+
+
+/*!
+ * /brief Opens a HDF5 checkpoint file
+ *
+ * Checkpoint files are used to either store all data for restarting a run,
+ * or to store individual outputs in user-defined mode, or both.
+ * Writing out individual variables happens before the final checkpoint write.
+ * So therefore, when we want to write out a final file, there may or may not be an existing
+ * checkpoint file for this cycle full of user-defined outputs.
+ */
+void SerialSaveDataHDF5::openCheckpointFile() {
+  if(this->file) {
+    // If there's currently a checkpoint file, was it opened this cycle?
+    if (this->file_iteration != this->d->iters) {
+      // If not, close the open file, delete the file with the name we want to write to on disk,
+      // then open a new one
+      string filename_full = this->filename+".checkpoint."+to_string(this->d->t)+".hdf5";
+      this->openFile(filename_full.c_str());
+      hid_t user_def = H5Gcreate(this->file, "UserDef", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+      H5Gclose(user_def);
+    } else {
+      // Then the checkpoint file was opened this cycle, and we can write to it
+    }
+
+  } else {
+    // If there's no existing checkpoint file, we need to create a new one.
+    string filename_full = this->filename+".checkpoint."+to_string(this->d->t)+".hdf5";
+    this->openFile(filename_full.c_str());
+    hid_t user_def = H5Gcreate(this->file, "UserDef", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+    H5Gclose(user_def);
+  }
+}
+
+
+/*!
+ * /brief Writes an HDF5 dataset to file
+ *
+ * Prepares the buffer for writing to file, and writes a dataset.
+ *
+ * @param group The group within the file (or the file itself for root datasets)
+ * @param name The name the dataset should have
+ * @param var Data is stored in 4-d arrays for each class of data (conserved/primitive/auxiliary),
+ *  with the 1st dimension being the variable. This argument indicates which variable is being output.
+ * @param data The pointer to the data array.
+ */
+void SerialSaveDataHDF5::writeDataSetDouble(const hid_t *group, const char *name, const int *var,
+                                            const double *data) {
+
+  // So now, we set the data-space size. We also need to create a buffer to write to, that excludes the ghost cells.
+  // So we calculate the size it needs to be, excluding ghost cells.
+  hsize_t lengths[d->dims];
+
+  lengths[0] = d->ie - d->is;
+  unsigned long buffer_size = lengths[0]; // The length of the buffer
+
+  if(d->dims > 1) {
+    lengths[1] = d->je - d->js;
+    buffer_size *= lengths[1];
+  }
+  if(d->dims > 2) {
+    lengths[2] = d->ke - d->ks;
+    buffer_size = lengths[2];
+  }
+
+  // Now create the buffer to store the data in
+  double buffer[buffer_size];
+  int buffer_position(0);
+
+  // Consider the efficiency of this! std::copy would probably be better but maybe the compiler
+  // will vectorise this. I prefer the consistency of a single set of loops over having 1 per dimension.
+  for (int i(d->is); i < d->ie; i++) {
+    for (int j(d->js); j < d->je; j++) {
+      for (int k(d->ks); k < d->ke; k++) {
+        buffer[buffer_position++] = data[ID(*var, i, j, k)];
+      }
+    }
+  }
+  H5LTmake_dataset_double(*group, name, d->dims, lengths, buffer);
+}
+
+
+/*!
+ * /brief Saves all data to file
+ *
+ * Saves all the data to file. This is modified by the level of detail on this
+ * (this->detail), and whether or not it is a checkpoint file.
+ *
+ * @param timeSeries If this is a checkpoint or not
+ */
+void SerialSaveDataHDF5::saveAll(bool timeSeries)
+{
+  if(timeSeries) {
+    // If we're doing a timeseries/checkpoint output, things may be complicated
+    // as saveVars may have written some of the variables to file already!
+    string filename_full = this->filename+".checkpoint."+to_string(d->t)+".hdf5";
+    std::cout << "Saving checkpoint '" << filename_full << "' (iteration "+to_string(d->iters)+")\n";
+    this->openCheckpointFile();
+
+  } else {
+    string filename_full = this->filename+".hdf5";
+    std::cout << "Saving final output '" << filename_full << "'\n";
+    this->openFile(filename_full.c_str());
+  }
+
+  this->saveConsts();
+  this->saveDomain();
+  this->savePrims();
+  if(this->detail != OUTPUT_MINIMAL) this->saveCons();
+  if(this->detail == OUTPUT_ALL) this->saveAux();
+
+  // If this isn't a timeseries, then this is the final save and the file should be closed.
+  if(!timeSeries)H5Fclose(this->file);
+}
+
+
+/*!
+ * /brief Saves conserved variables
+ */
+void SerialSaveDataHDF5::saveCons()
+{
+  hid_t group = H5Gcreate(this->file, "Conserved", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+  H5LTset_attribute_int(group, ".", "Ncons", &d->Ncons, 1);
+
+  // For each one of the conserved variables, write it to disk
+  string varOrder;
+  for(int var(0); var < d->Ncons; var++) {
+    this->writeDataSetDouble(&group, d->consLabels[var].c_str(), &var, d->cons);
+    varOrder += d->consLabels[var] + ',';
+  }
+  H5LTset_attribute_string(group, ".", "varOrder", varOrder.c_str());
+  H5Gclose(group);
+}
+
+
+/*!
+ * /brief Saves primitive variables
+ */
+void SerialSaveDataHDF5::savePrims()
+{
+  hid_t group = H5Gcreate(this->file, "Primitive", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+  H5LTset_attribute_int(group, ".", "Nprims", &d->Nprims, 1);
+
+  string varOrder;
+  for(int var(0); var < d->Nprims; var++) {
+    this->writeDataSetDouble(&group, d->primsLabels[var].c_str(), &var, d->prims);
+    varOrder += d->primsLabels[var] + ',';
+  }
+  H5LTset_attribute_string(group, ".", "varOrder", varOrder.c_str());
+  H5Gclose(group);
+}
+
+
+/*!
+ * /brief Save auxiliary variables
+ */
+void SerialSaveDataHDF5::saveAux()
+{
+  hid_t group = H5Gcreate(this->file, "Auxiliary", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+  H5LTset_attribute_int(group, ".", "Naux", &d->Naux, 1);
+
+  string varOrder;
+  for(int var(0); var < d->Naux; var++) {
+    this->writeDataSetDouble(&group, d->auxLabels[var].c_str(), &var, d->aux);
+    varOrder += d->auxLabels[var] + ',';
+  }
+  H5LTset_attribute_string(group, ".", "varOrder", varOrder.c_str());
+  H5Gclose(group);
+}
+
+
+/*!
+ * /brief Save domain information
+ */
+void SerialSaveDataHDF5::saveDomain()
+{
+  hid_t group = H5Gcreate(this->file, "Domain", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+  H5LTset_attribute_int(group, ".", "nx", &d->nx, 1);
+  H5LTset_attribute_int(group, ".", "ny", &d->ny, 1);
+  H5LTset_attribute_int(group, ".", "nz", &d->nz, 1);
+  H5LTset_attribute_int(group, ".", "Nx", &d->Nx, 1);
+  H5LTset_attribute_int(group, ".", "Ny", &d->Ny, 1);
+  H5LTset_attribute_int(group, ".", "Nz", &d->Nz, 1);
+  H5LTset_attribute_int(group, ".", "Ng", &d->Ng, 1);
+  H5LTset_attribute_double(group, ".", "xmin", &d->xmin, 1);
+  H5LTset_attribute_double(group, ".", "ymin", &d->ymin, 1);
+  H5LTset_attribute_double(group, ".", "zmin", &d->zmin, 1);
+  H5LTset_attribute_double(group, ".", "xmax", &d->xmax, 1);
+  H5LTset_attribute_double(group, ".", "ymax", &d->ymax, 1);
+  H5LTset_attribute_double(group, ".", "zmax", &d->zmax, 1);
+  H5LTset_attribute_double(group, ".", "dx", &d->dx, 1);
+  H5LTset_attribute_double(group, ".", "dy", &d->dy, 1);
+  H5LTset_attribute_double(group, ".", "dz", &d->dz, 1);
+  H5LTset_attribute_double(group, ".", "endTime", &d->endTime, 1);
+  H5LTset_attribute_double(group, ".", "dt", &d->dt, 1);
+
+  hsize_t length(d->nx);
+  H5LTmake_dataset_double(group, "x", 1, &length, &d->x[d->Ng]);
+
+  if (d->ny) {
+    length = d->ny;
+    H5LTmake_dataset_double(group, "y", 1, &length, &d->y[d->Ng]);
+  }
+  if (d->nz) {
+    length = d->nz;
+    H5LTmake_dataset_double(group, "z", 1, &length, &d->z[d->Ng]);
+  }
+  H5Gclose(group);
+}
+
+
+/*!
+ * /brief Save constants
+ */
+void SerialSaveDataHDF5::saveConsts()
+{
+  H5LTset_attribute_double(this->file, ".", "cfl", &d->cfl, 1);
+  H5LTset_attribute_double(this->file, ".", "gamma", &d->gamma, 1);
+  H5LTset_attribute_double(this->file, ".", "sigma", &d->sigma, 1);
+  H5LTset_attribute_double(this->file, ".", "cp", &d->cp, 1);
+  H5LTset_attribute_double(this->file, ".", "t", &d->t, 1);
+}
+
+
+/*!
+ * /brief Save a single variable to a checkpoint file
+ *
+ * Saves variables for debug or animation purposes.
+ * Finds what data index and array the variable name corresponds to,
+ * then opens a checkpoint file and saves to it.
+ *
+ * @param variable The name of the variable
+ * @param num The number of variables to save; not used in HDF5 version
+ */
+void SerialSaveDataHDF5::saveVar(string variable, int num)
+{
+  int found_var(-1); // Variable number
+  double *data;  // Pointer to the data array containing the variable
+
+  // Determine which variable the user wants saved
+  for (int var(0); var < d->Ncons; var++) {
+    if (strcmp(d->consLabels[var].c_str(), variable.c_str()) == 0) {
+        found_var=var;
+        data = d->cons;
+        break;
+    }
+  }
+
+  if (found_var < 0) {
+    for (int var(0); var < d->Nprims; var++) {
+      if (strcmp(d->primsLabels[var].c_str(), variable.c_str()) == 0) {
+          found_var=var;
+          data = d->prims;
+          break;
+      }
+    }
+  }
+
+  if (found_var < 0) {
+    for (int var(0); var < d->Naux; var++) {
+      if (strcmp(d->auxLabels[var].c_str(), variable.c_str()) == 0) {
+          found_var=var;
+          data = d->aux;
+          break;
+      }
+    }
+  }
+
+  if (found_var < 0) {
+    printf("Error: Could not find user specified variable '%s'\n", variable.c_str());
+    exit(1);
+  }
+
+  this->openCheckpointFile();
+  hid_t user_def = H5Gopen1(this->file, "UserDef");
+  writeDataSetDouble(&user_def, variable.c_str(), &found_var, data);
+  H5Gclose(user_def);
+}

From 490764b72d2c5cca6962692402a948b86925facb Mon Sep 17 00:00:00 2001
From: "ania.brown" <ania.brown@gmail.com>
Date: Mon, 11 Jan 2021 15:22:38 +0000
Subject: [PATCH 40/56] continuing to modify gpu version for hdf5 and
 checkpoint restart

---
 Project/GPU/Include/initFunc.h                |   3 +-
 Project/GPU/Include/initFuncFromCheckpoint.h  |  21 +++
 Project/GPU/Include/parallelBoundaryConds.h   |  10 +-
 Project/GPU/Include/parallelEnv.h             |   1 +
 .../Include/parallelInitFuncFromCheckpoint.h  |  22 +++
 Project/GPU/Include/saveData.h                |   2 +-
 Project/GPU/Include/simData.h                 |  33 ++++-
 Project/GPU/Makefile                          |  46 +++++-
 Project/GPU/Src/initFuncFromCheckpoint.cu     |  96 +++++++++++++
 Project/GPU/Src/main.cu                       |  12 +-
 Project/GPU/Src/parallelEnv.cu                |  28 ++--
 .../GPU/Src/parallelInitFuncFromCheckpoint.cu | 134 ++++++++++++++++++
 Project/GPU/Src/parallelSaveDataHDF5.cu       |   1 +
 Project/GPU/Src/serialSaveDataHDF5.cu         |   1 +
 Project/GPU/Src/simData.cu                    |  38 ++++-
 Project/GPU/Src/simulation.cu                 |   3 +-
 Project/GPU/Src/srrmhd.cu                     |   2 +-
 17 files changed, 420 insertions(+), 33 deletions(-)
 create mode 100644 Project/GPU/Include/initFuncFromCheckpoint.h
 create mode 100644 Project/GPU/Include/parallelInitFuncFromCheckpoint.h
 create mode 100644 Project/GPU/Src/initFuncFromCheckpoint.cu
 create mode 100644 Project/GPU/Src/parallelInitFuncFromCheckpoint.cu

diff --git a/Project/GPU/Include/initFunc.h b/Project/GPU/Include/initFunc.h
index e36b3909..a6899572 100644
--- a/Project/GPU/Include/initFunc.h
+++ b/Project/GPU/Include/initFunc.h
@@ -13,10 +13,11 @@ class InitialFunc
 {
   private:
 
-    Data * data; //!< Pointer to Data class containing global simulation data
 
   public:
 
+    Data * data; //!< Pointer to Data class containing global simulation data
+
     //! Constructor
     /*!
         Stores a pointer to the Data class for reference in its methods and
diff --git a/Project/GPU/Include/initFuncFromCheckpoint.h b/Project/GPU/Include/initFuncFromCheckpoint.h
new file mode 100644
index 00000000..4c93ff7a
--- /dev/null
+++ b/Project/GPU/Include/initFuncFromCheckpoint.h
@@ -0,0 +1,21 @@
+#ifndef INITFUNCFROMCHECKPOINT_H
+#define INITFUNCFROMCHCKPOINT_H
+
+#include "simData.h"
+#include "initFunc.h"
+#include "hdf5.h"
+#include "hdf5_hl.h"
+
+class CheckpointRestart : public InitialFunc
+{
+  public:
+    CheckpointRestart(Data * data, const char* name);
+
+    virtual ~CheckpointRestart() { }     //!< Destructor
+
+    virtual void readDataSetDouble(const hid_t *group, const char *name, const int *var, double *varData);
+};
+
+
+
+#endif
diff --git a/Project/GPU/Include/parallelBoundaryConds.h b/Project/GPU/Include/parallelBoundaryConds.h
index 85978654..636192a8 100644
--- a/Project/GPU/Include/parallelBoundaryConds.h
+++ b/Project/GPU/Include/parallelBoundaryConds.h
@@ -30,9 +30,9 @@ class ParallelBcs : public Bcs
       @param[in] *env pointer to ParallelEnv class
       @sa Bcs::Bcs
     */
-    ParallelBcs(Data *data, ParallelEnv *env, int xPeriodic=1, int yPeriodic=1, int zPeriodic=1) : Bcs(data), env(env)
+    ParallelBcs(Data *data, ParallelEnv *env) : Bcs(data), env(env)
     {
-        env->setParallelDecomposition(xPeriodic, yPeriodic, zPeriodic);
+
     }
 
     virtual ~ParallelBcs() { }     //!< Destructor
@@ -165,7 +165,7 @@ class ParallelOutflow : public ParallelBcs
       @param[in] *data pointer to Data class
       @sa ParallelBcs::ParallelBcs
     */
-    ParallelOutflow(Data * data, ParallelEnv *env) : ParallelBcs(data, env, xPeriodic=0, yPeriodic=0, zPeriodic=0) { }
+    ParallelOutflow(Data * data, ParallelEnv *env) : ParallelBcs(data, env) { }
 
     virtual ~ParallelOutflow() { }     //!< Destructor
 
@@ -243,7 +243,7 @@ class ParallelPeriodic : public ParallelBcs
       @param[in] *env pointer to ParallelEnv class
       @sa ParallelBcs::ParallelBcs
     */
-    ParallelPeriodic(Data * data, ParallelEnv * env) : ParallelBcs(data, env, xPeriodic=1, yPeriodic=1, zPeriodic=1) { }
+    ParallelPeriodic(Data * data, ParallelEnv * env) : ParallelBcs(data, env) { }
 
     virtual ~ParallelPeriodic() { }     //!< Destructor
 
@@ -276,7 +276,7 @@ class ParallelFlow : public ParallelBcs
       @param[in] *data pointer to Data class
       @sa ParallelBcs::ParallelBcs
     */
-    ParallelFlow(Data * data, ParallelEnv *env) : ParallelBcs(data, env, xPeriodic=1, yPeriodic=0, zPeriodic=0) { }
+    ParallelFlow(Data * data, ParallelEnv *env) : ParallelBcs(data, env) { }
 
     virtual ~ParallelFlow() { }     //!< Destructor
 
diff --git a/Project/GPU/Include/parallelEnv.h b/Project/GPU/Include/parallelEnv.h
index 7c67181e..0642fe46 100644
--- a/Project/GPU/Include/parallelEnv.h
+++ b/Project/GPU/Include/parallelEnv.h
@@ -20,6 +20,7 @@ class ParallelEnv : public PlatformEnv
 {
 	public:
 		MPI_Comm mpiCartesianComm;  //!< Cartesian MPI communicator that maps processes to the simulation grid
+                MPI_Info mpiInfo;           //!< MPI information channel
 
     //! Constructor -- Initialize global MPI communicator
 		ParallelEnv(int *argcP, char **argvP[], int nxRanks, int nyRanks, int nzRanks, int testing=0);
diff --git a/Project/GPU/Include/parallelInitFuncFromCheckpoint.h b/Project/GPU/Include/parallelInitFuncFromCheckpoint.h
new file mode 100644
index 00000000..a4832095
--- /dev/null
+++ b/Project/GPU/Include/parallelInitFuncFromCheckpoint.h
@@ -0,0 +1,22 @@
+#ifndef PARALLELINITFUNCFROMCHECKPOINT_H
+#define PARALLELINITFUNCFROMCHCKPOINT_H
+
+#include "simData.h"
+#include "initFunc.h"
+#include "hdf5.h"
+#include "hdf5_hl.h"
+#include "parallelEnv.h"
+
+class ParallelCheckpointRestart : public InitialFunc
+{
+  public:
+    ParallelCheckpointRestart(Data * data, const char* name, ParallelEnv *env);
+
+    virtual ~ParallelCheckpointRestart() { }     //!< Destructor
+
+    virtual void readDataSetDouble(const hid_t *group, const char *name, const int *var, double *varData, ParallelEnv *env);
+};
+
+
+
+#endif
diff --git a/Project/GPU/Include/saveData.h b/Project/GPU/Include/saveData.h
index 95734cc2..20d97fcd 100644
--- a/Project/GPU/Include/saveData.h
+++ b/Project/GPU/Include/saveData.h
@@ -58,7 +58,7 @@ class SaveData
       in the Project folder.
 
       @param *data pointer to the Data class
-      @param test integar flagging if we are in the 'Examples' directory or not,
+      @param test integer flagging if we are in the 'Examples' directory or not,
       Only used for running the given examples, can ignore otherwise.
     */
     SaveData(Data * data, int test=0) : d(data), Nouts(0), Ncount(0), test(test)
diff --git a/Project/GPU/Include/simData.h b/Project/GPU/Include/simData.h
index 1111ab3b..05936292 100644
--- a/Project/GPU/Include/simData.h
+++ b/Project/GPU/Include/simData.h
@@ -4,6 +4,7 @@
 #include <vector>
 #include <string>
 #include "platformEnv.h"
+#include "checkpointArgs.h"
 
 
 /*!
@@ -104,7 +105,7 @@ class Data
     //@{
     alphaX, alphaY, alphaZ,//!< Max wave speed in specified direction. As we are evolving EM fields, this is always the speed of light.
     //@}
-    t,                     //!< Current time
+    t=-1,                     //!< Current time
     dt,                    //!< Width of current timestep
     //@{
     dx, dy, dz;            //!< Witdth of specified spatial step
@@ -122,6 +123,10 @@ class Data
     //@}
     int
     dims,                  //!< Number of dimensions of simulation
+    //@{
+    is, js, ks,
+    ie, je, ke,            //!< Cell IDs for interior grid points
+    //@}
     GPUcount;              //!< Number of NVIDIA devices detected
     cudaDeviceProp
     prop;                  //!< Properties of NVIDIA device (assuming all are same)
@@ -144,6 +149,18 @@ class Data
       return var * this->Nx * this->Ny * this->Nz + i * this->Ny * this->Nz + j * this->Nz + k;
     }
 
+    //! Initialiser
+    /*!  
+        @par
+        Allocates the memory required for the state arrays and sets the simulation
+      constants to the given values. Does not set initial state, thats done by
+      the initialFunc object. Called automatically from constructors after setting object vars.
+      This is separated from the constructor to avoid duplicated code between the two available
+      constructors for Data.
+     */
+     void initData(PlatformEnv *env);
+
+
     //! Constructor
     /*!
       @par
@@ -179,6 +196,20 @@ class Data
          double mu1=-1.0e4, double mu2=1.0e4,
          int frameskip=10);
 
+    //! Constructor
+    /*!
+      @par
+        Allocates the memory required for the state arrays and sets the simulation
+      constants to the given values. Does not set initial state, thats done by
+      the initialFunc object.
+      @param args simulation arguments such as cfl, sigma etc, as read from checkpoint restart file
+      @param mu1 charge mass ratio of species 1
+      @param mu2 charge mass ratio of species 2
+    */
+    Data(CheckpointArgs args, PlatformEnv *env, double mu1=-1.0e4, double mu2=1.0e4,
+         int frameskip=10, int reportItersPeriod=1, int functionalSigma=false, double gam=12);
+
+   
 };
 
 #endif
diff --git a/Project/GPU/Makefile b/Project/GPU/Makefile
index b3e0c821..981d7fc9 100644
--- a/Project/GPU/Makefile
+++ b/Project/GPU/Makefile
@@ -4,10 +4,23 @@
 # -------------- PARAMETERS FOR USERS TO EDIT --------------------
 
 # if USE_MPI=1, need to use parallel versions of objects, such as ParallelEnv, ParallelSaveData etc
-USE_MPI=0
+USE_MPI=1
+USE_HDF=1
 
-# find location of MPI libraries to link on your local system using 'mpicc -show'
-MPI_FLAGS = -I/usr/include/mpich -L/usr/lib/x86_64-linux-gnu -lmpichcxx -lmpich 
+# Compiler used for all compilation objects. This should be the version of the hdf5 
+# compiler available on your system that links the correct mpi libraries if required. Should 
+# be one of h5pcc, h5pcc.openmpi or h5pcc.mpich if using MPI. Should be h5cc otherwise. 
+#CC = h5pcc
+CC = mpic++
+
+# Compiler used by hdf5 for c++. Shouldn't need to change this
+#export HDF5_CXX := mpic++
+#export HDF5_CLINKER := mpic++
+
+HDF5_FLAGS = -I/local/software/szip/2.1.1/include -L/local/software/hdf5/1.10.2/gcc/parallel/lib -L/local/software/szip/2.1.1/lib -lsz -lz -ldl -lm -I/local/software/hdf5/1.10.2/gcc/parallel/include -lhdf5 -lhdf5_hl
+
+# this should no longer be needed but leaving them in just in case
+MPI_FLAGS =
 
 # -------------- END PARAMETERS FOR USERS TO EDIT --------------------
 
@@ -37,6 +50,11 @@ CXXFLAGS = -fopenmp -Wall -std=c++11 -O3 -lineinfo
 # NVIDIA compiler flags
 NVFLAGS = -std=c++11 -Wno-deprecated-gpu-targets -rdc=true -fmad=false -O3 -arch=sm_52 -Xcompiler -fopenmp -Xcompiler -Wall -lineinfo
 
+
+ifeq ($(USE_MPI), 1)
+	NVFLAGS += -ccbin ${CC}
+endif
+
 # Sources
 SRCS = main.cu \
 			 simulation.cu \
@@ -58,12 +76,21 @@ SRCS = main.cu \
 			 srrmhd.cu \
 			 C2PArgs.cu
 
+
+SERIAL_SRCS = serialSaveData.cu \
+	serialEnv.cu
+
+ifeq ($(USE_HDF), 1)
+    SERIAL_SRCS += serialSaveDataHDF5.cu
+endif
+
 PARALLEL_SRCS = parallelSaveData.cu \
 	parallelEnv.cu \
 	parallelBoundaryConds.cu
 
-SERIAL_SRCS = serialSaveData.cu \
-	serialEnv.cu
+ifeq ($(USE_HDF), 1)
+    PARALLEL_SRCS += parallelSaveDataHDF5.cu
+endif
 
 # Headers
 HDRS = ${SRCS:.cu=.h} cudaErrorCheck.h
@@ -144,7 +171,7 @@ simulation.o : $(MODULE_DIR)/simulation.cu	$(INC_DIR)/simulation.h $(INC_DIR)/mo
 	$(CC_GPU)	$<	-c	$(NVFLAGS) -I$(INC_DIR)
 
 main.o : $(MODULE_DIR)/main.cu $(INC_DIR)/simulation.h $(INC_DIR)/model.h $(INC_DIR)/simData.h $(INC_DIR)/initFunc.h
-	$(CC_GPU)	$<	-c	$(NVFLAGS) -I$(INC_DIR) -I$(RTFIND_INC_DIR) $(MPI_FLAGS)
+	$(CC_GPU)	$<	-c	$(HDF5_FLAGS) $(NVFLAGS) -I$(INC_DIR) -I$(RTFIND_INC_DIR) $(MPI_FLAGS)
 
 weno.o : $(MODULE_DIR)/weno.cu $(INC_DIR)/weno.h
 	$(CC_GPU)	$<	-c	$(NVFLAGS) -I$(INC_DIR)
@@ -179,6 +206,8 @@ boundaryConds.o : $(MODULE_DIR)/boundaryConds.cu $(INC_DIR)/boundaryConds.h
 serialSaveData.o : $(MODULE_DIR)/serialSaveData.cu $(INC_DIR)/serialSaveData.h
 	$(CC_GPU)	$<	-c	$(NVFLAGS) -I$(INC_DIR)
 
+serialSaveDataHDF5.o : $(MODULE_DIR)/serialSaveDataHDF5.cu $(INC_DIR)/serialSaveDataHDF5.h
+	$(CC_GPU)	$<	-c	$(HDF5_FLAGS) $(NVFLAGS) -I$(INC_DIR)
 
 serialEnv.o : $(MODULE_DIR)/serialEnv.cu $(INC_DIR)/serialEnv.h
 	$(CC_GPU)	$<	-c	$(NVFLAGS) -I$(INC_DIR)
@@ -191,6 +220,9 @@ fluxVectorSplitting.o : $(MODULE_DIR)/fluxVectorSplitting.cu $(INC_DIR)/fluxVect
 parallelSaveData.o : $(MODULE_DIR)/parallelSaveData.cu $(INC_DIR)/parallelSaveData.h
 	$(CC_GPU)	$<	-c $(NVFLAGS)	-I$(INC_DIR) $(MPI_FLAGS)
 
+parallelSaveDataHDF5.o : $(MODULE_DIR)/parallelSaveDataHDF5.cu $(INC_DIR)/parallelSaveDataHDF5.h
+	$(CC_GPU)	$<	-c $(HDF5_FLAGS) $(NVFLAGS)	-I$(INC_DIR) $(MPI_FLAGS)
+
 parallelBoundaryConds.o : $(MODULE_DIR)/parallelBoundaryConds.cu $(INC_DIR)/parallelBoundaryConds.h
 	$(CC_GPU)	$<	-c $(NVFLAGS)	-I$(INC_DIR) $(MPI_FLAGS)
 
@@ -202,7 +234,7 @@ parallelEnv.o : $(MODULE_DIR)/parallelEnv.cu $(INC_DIR)/parallelEnv.h
 
 # Executable
 main : $(RTFIND_OBJS) $(OBJS) $(ENV_OBJS)
-	$(CC_GPU) $^	-o $@ 	$(NVFLAGS) $(MPI_FLAGS)
+	$(CC_GPU) $^	-o $@ 	$(HDF5_FLAGS) $(NVFLAGS) $(MPI_FLAGS)
 
 buildRootfinder:
 	@cd $(RTFIND_DIR) && $(MAKE)	objects
diff --git a/Project/GPU/Src/initFuncFromCheckpoint.cu b/Project/GPU/Src/initFuncFromCheckpoint.cu
new file mode 100644
index 00000000..d714b1aa
--- /dev/null
+++ b/Project/GPU/Src/initFuncFromCheckpoint.cu
@@ -0,0 +1,96 @@
+#include "initFuncFromCheckpoint.h"
+#include <stdexcept>
+#include <cmath>
+#include <iostream>
+#include <cstdlib>
+#include <cstdio>
+#include <stdexcept>
+#include "hdf5.h"
+#include "hdf5_hl.h"
+
+/*!
+ * /brief Writes an HDF5 dataset to file
+ *
+ * Prepares the buffer for writing to file, and writes a dataset.
+ *
+ * @param group The group within the file (or the file itself for root datasets)
+ * @param name The name the dataset should have
+ * @param var Data is stored in 4-d arrays for each class of data (conserved/primitive/auxiliary),
+ *  with the 1st dimension being the variable. This argument indicates which variable is being output.
+ * @param data The pointer to the data array.
+ */
+void CheckpointRestart::readDataSetDouble(const hid_t *group, const char *name, const int *var,
+                                            double *varData) {
+  // Syntax
+  Data * d(data);
+
+  // So now, we set the data-space size. We also need to create a buffer to write to, that excludes the ghost cells.
+  // So we calculate the size it needs to be, excluding ghost cells.
+  hsize_t lengths[d->dims];
+
+  lengths[0] = d->ie - d->is;
+  unsigned long buffer_size = lengths[0]; // The length of the buffer
+
+  if(d->dims > 1) {
+    lengths[1] = d->je - d->js;
+    buffer_size *= lengths[1];
+  }
+  if(d->dims > 2) {
+    lengths[2] = d->ke - d->ks;
+    buffer_size = lengths[2];
+  }
+
+  // Now create the buffer to store the data in
+  double buffer[buffer_size];
+
+  H5LTread_dataset_double(*group, name, buffer);
+
+  int buffer_position(0);
+
+  // Consider the efficiency of this! std::copy would probably be better but maybe the compiler
+  // will vectorise this. I prefer the consistency of a single set of loops over having 1 per dimension.
+  for (int i(d->is); i < d->ie; i++) {
+    for (int j(d->js); j < d->je; j++) {
+      for (int k(d->ks); k < d->ke; k++) {
+        varData[ID(*var, i, j, k)] = buffer[buffer_position++];
+      }
+    }
+  }
+}
+
+CheckpointRestart::CheckpointRestart(Data * data, const char *name) : InitialFunc(data)
+{
+  // Syntax
+  Data * d(data);
+
+  herr_t error=0;
+  hid_t file = H5Fopen(name, H5F_ACC_RDONLY, H5P_DEFAULT);
+
+  if (file<0) throw std::runtime_error("Could not open checkpoint restart file. Does it exist? CheckpointArgs requires path to file and extension");
+
+  // Read number of vars and check against the number set by the model we are using
+  // We we check both cons and prims first, so if there is an error we know before we've wasted time
+  // reading in any data
+  int NconsFile=0, NprimsFile=0;
+  hid_t groupCons = H5Gopen(file, "Conserved", H5P_DEFAULT);
+  error = H5LTget_attribute_int(groupCons, ".", "Ncons",  &(NconsFile));
+  if (error<0 || NconsFile < d->Ncons) throw std::runtime_error("Too few conserved vars recorded in checkpoint restart file for this model");
+
+  hid_t groupPrims = H5Gopen(file, "Primitive", H5P_DEFAULT);
+  error = H5LTget_attribute_int(groupPrims, ".", "Nprims",  &(NprimsFile));
+  if (error<0 || NconsFile < d->Nprims) throw std::runtime_error("Too few primitive vars recorded in checkpoint restart file for this model");
+
+  // Read all cons vars
+  for(int var(0); var < d->Ncons; var++) {
+    readDataSetDouble(&groupCons, d->consLabels[var].c_str(), &var, d->cons);
+  }
+  H5Gclose(groupCons);
+
+  // Read all prims vars
+  for(int var(0); var < d->Nprims; var++) {
+    readDataSetDouble(&groupPrims, d->primsLabels[var].c_str(), &var, d->prims);
+  }
+  H5Gclose(groupPrims);
+
+  H5Fclose(file);
+}
diff --git a/Project/GPU/Src/main.cu b/Project/GPU/Src/main.cu
index 7b2ffe50..9ef7f103 100644
--- a/Project/GPU/Src/main.cu
+++ b/Project/GPU/Src/main.cu
@@ -5,9 +5,10 @@
 #include "srmhd.h"
 #include "srrmhd.h"
 #include "boundaryConds.h"
+#include "parallelBoundaryConds.h"
 #include "rkSplit.h"
 #include "SSP2.h"
-#include "serialSaveData.h"
+#include "parallelSaveDataHDF5.h"
 #include "fluxVectorSplitting.h"
 #include "serialEnv.h"
 
@@ -42,6 +43,9 @@ int main(int argc, char *argv[]) {
   double sigma(0);
   bool output(true);
   int safety(180);
+  int nxRanks(2);
+  int nyRanks(2);
+  int nzRanks(1);
 
   char * ptr(0);
   //! Overwrite any variables that have been passed in as main() arguments
@@ -51,7 +55,7 @@ int main(int argc, char *argv[]) {
     }
   }
 
-  SerialEnv env(&argc, &argv, 1, 1, 1);
+  ParallelEnv env(&argc, &argv, nxRanks, nyRanks, nzRanks);
 
   Data data(nx, ny, nz, xmin, xmax, ymin, ymax, zmin, zmax, endTime, &env,
             cfl, Ng, gamma, sigma);
@@ -61,7 +65,7 @@ int main(int argc, char *argv[]) {
 
   FVS fluxMethod(&data, &model);
 
-  Flow bcs(&data);
+  ParallelFlow bcs(&data, &env);
 
   Simulation sim(&data, &env);
 
@@ -69,7 +73,7 @@ int main(int argc, char *argv[]) {
 
   RK2 timeInt(&data, &model, &bcs, &fluxMethod);
 
-  SerialSaveData save(&data, &env);
+  ParallelSaveDataHDF5 save(&data, &env, "data_parallel", ParallelSaveDataHDF5::OUTPUT_ALL);
 
   // Now objects have been created, set up the simulation
   sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save);
diff --git a/Project/GPU/Src/parallelEnv.cu b/Project/GPU/Src/parallelEnv.cu
index 0c8141a8..694e81ea 100644
--- a/Project/GPU/Src/parallelEnv.cu
+++ b/Project/GPU/Src/parallelEnv.cu
@@ -12,20 +12,30 @@
 
 ParallelEnv::ParallelEnv(int *argcP, char **argvP[], int nxRanks, int nyRanks, int nzRanks, int testing) : PlatformEnv(testing)
 {
+
     int initialized;
     MPI_Initialized(&initialized);
-	if (!initialized && !testing) MPI_Init(argcP, argvP);
-
-	MPI_Comm_size(MPI_COMM_WORLD, &nProc);
-	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+    // TODO -- is testing required? Won't initialize if already initialised anyway
+    if (!initialized && !testing) MPI_Init(argcP, argvP);
+    
+    MPI_Comm_size(MPI_COMM_WORLD, &nProc);
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+    MPI_Info_create(&this->mpiInfo);
 
-	if (rank==0){
+    if (rank==0){
         printf("Running in multi-process mode with %d processes\n", nProc);
     }
-
-	this->nxRanks = nxRanks;
-	this->nyRanks = nyRanks;
-	this->nzRanks = nzRanks;
+    
+    this->nxRanks = nxRanks;
+    this->nyRanks = nyRanks;
+    this->nzRanks = nzRanks;
+    
+    // NOTE: We always set the parallel decomposition to be periodic in all dimensions here, rather than determining
+    // periodicity based on the Bcs object. This is very slightly less efficient for eg Flow bcs, as external processes will 
+    // exchange a small amount of data which is not used, but makes the order in which bcs are created relative to 
+    // PlatformEnv and Data much less strict. This is necessary as parallel checkpoint restart requires the cartesian 
+    // mpi communicator set below to exist before being able to create Data, but bcs require Data to be created first. 
+    setParallelDecomposition(1,1,1);
 }
 
 ParallelEnv::~ParallelEnv()
diff --git a/Project/GPU/Src/parallelInitFuncFromCheckpoint.cu b/Project/GPU/Src/parallelInitFuncFromCheckpoint.cu
new file mode 100644
index 00000000..17122142
--- /dev/null
+++ b/Project/GPU/Src/parallelInitFuncFromCheckpoint.cu
@@ -0,0 +1,134 @@
+#include "parallelInitFuncFromCheckpoint.h"
+#include <stdexcept>
+#include <cmath>
+#include <iostream>
+#include <cstdlib>
+#include <cstdio>
+#include <stdexcept>
+#include "hdf5.h"
+#include "hdf5_hl.h"
+
+/*!
+ * /brief Writes an HDF5 dataset to file
+ *
+ * Prepares the buffer for writing to file, and writes a dataset.
+ *
+ * @param group The group within the file (or the file itself for root datasets)
+ * @param name The name the dataset should have
+ * @param var Data is stored in 4-d arrays for each class of data (conserved/primitive/auxiliary),
+ *  with the 1st dimension being the variable. This argument indicates which variable is being output.
+ * @param data The pointer to the data array.
+ */
+void ParallelCheckpointRestart::readDataSetDouble(const hid_t *group, const char *name, const int *var,
+                                            double *varData, ParallelEnv* env) {
+  // Syntax
+  Data * d(data);
+
+  // So now, we set the total data-space size, and the offset the local data-space has from it.
+  // The local data dimensions Nx/Ny/Nz include ghost cells, whilst the total ones (nx/ny/nz) do not.
+  // The data-spaces to be read should not include ghost cells
+  hsize_t lengths_local[d->dims];
+  hsize_t lengths_total[d->dims];
+  hsize_t offsets[d->dims];
+
+  lengths_total[0] = d->nx;
+  lengths_local[0] = (d->Nx - 2 * d->Ng);
+  offsets[0] = (d->Nx -  2 * d->Ng) * env->xRankId;
+  unsigned long buffer_size = lengths_local[0]; // The length of the buffer
+
+  if(d->dims > 1) {
+    lengths_total[1] = d->ny;
+    lengths_local[1] = (d->Ny - 2 * d->Ng);
+    offsets[1] = (d->Ny - 2 * d->Ng) * env->yRankId;
+    buffer_size *= lengths_local[1];
+  }
+  if(d->dims > 2) {
+    lengths_total[2] = d->nz;
+    lengths_local[2] = (d->Nz - 2 * d->Ng);
+    offsets[2] = (d->Nz - 2 * d->Ng) * env->zRankId;
+    buffer_size = lengths_local[2];
+  }
+
+  // Now create the buffer to store the data in
+  double buffer[buffer_size];
+
+
+  // Define the total dataspace for this dataset, and create the dataset
+  hid_t dataspace_total = H5Screate_simple(d->dims, lengths_total, nullptr);
+  hid_t dataset = H5Dopen(
+    *group, name, H5P_DEFAULT
+  );
+
+  // Define the dataspace that describes the fraction of the total dataspace
+  // accessed by this process.
+  hid_t dataspace_local = H5Screate_simple(d->dims, lengths_local, nullptr);
+
+  // Create an access property list that tells the write to use MPI
+  hid_t dataset_access_property_list = H5Pcreate(H5P_DATASET_XFER);
+  H5Pset_dxpl_mpio(dataset_access_property_list, H5FD_MPIO_COLLECTIVE);
+
+  // Select the 'hyperslab', i.e. the subset of the total dataspace to write to
+  // This bit is per process
+  H5Sselect_hyperslab(
+    dataspace_total, H5S_SELECT_SET, offsets, nullptr, lengths_local, nullptr
+  );
+
+  // Read this processes hyperslab into the buffer
+  H5Dread(
+      dataset, H5T_NATIVE_DOUBLE,
+      dataspace_local, dataspace_total,
+      dataset_access_property_list, buffer
+  );
+
+  int buffer_position(0);
+
+  // Consider the efficiency of this! std::copy would probably be better but maybe the compiler
+  // will vectorise this. I prefer the consistency of a single set of loops over having 1 per dimension.
+  for (int i(d->is); i < d->ie; i++) {
+    for (int j(d->js); j < d->je; j++) {
+      for (int k(d->ks); k < d->ke; k++) {
+        varData[ID(*var, i, j, k)] = buffer[buffer_position++];
+      }
+    }
+  }
+}
+
+ParallelCheckpointRestart::ParallelCheckpointRestart(Data * data, const char *name, ParallelEnv *env) : InitialFunc(data)
+{
+  // Syntax
+  Data * d(data);
+
+  herr_t error=0;
+
+  hid_t file_access_property_list = H5Pcreate(H5P_FILE_ACCESS);
+  H5Pset_fapl_mpio(file_access_property_list, env->mpiCartesianComm, env->mpiInfo);
+  hid_t file = H5Fopen(name, H5F_ACC_RDONLY, file_access_property_list);
+
+  if (file<0) throw std::runtime_error("Could not open checkpoint restart file. Does it exist? CheckpointArgs requires path to file and extension");
+
+  // Read number of vars and check against the number set by the model we are using
+  // We we check both cons and prims first, so if there is an error we know before we've wasted time
+  // reading in any data
+  int NconsFile=0, NprimsFile=0;
+  hid_t groupCons = H5Gopen(file, "Conserved", H5P_DEFAULT);
+  error = H5LTget_attribute_int(groupCons, ".", "Ncons",  &(NconsFile));
+  if (error<0 || NconsFile < d->Ncons) throw std::runtime_error("Too few conserved vars recorded in checkpoint restart file for this model");
+
+  hid_t groupPrims = H5Gopen(file, "Primitive", H5P_DEFAULT);
+  error = H5LTget_attribute_int(groupPrims, ".", "Nprims",  &(NprimsFile));
+  if (error<0 || NconsFile < d->Nprims) throw std::runtime_error("Too few primitive vars recorded in checkpoint restart file for this model");
+
+  // Read all cons vars
+  for(int var(0); var < d->Ncons; var++) {
+    readDataSetDouble(&groupCons, d->consLabels[var].c_str(), &var, d->cons, env);
+  }
+  H5Gclose(groupCons);
+
+  // Read all prims vars
+  for(int var(0); var < d->Nprims; var++) {
+    readDataSetDouble(&groupPrims, d->primsLabels[var].c_str(), &var, d->prims, env);
+  }
+  H5Gclose(groupPrims);
+
+  H5Fclose(file);
+}
diff --git a/Project/GPU/Src/parallelSaveDataHDF5.cu b/Project/GPU/Src/parallelSaveDataHDF5.cu
index 712465d4..c045e432 100644
--- a/Project/GPU/Src/parallelSaveDataHDF5.cu
+++ b/Project/GPU/Src/parallelSaveDataHDF5.cu
@@ -7,6 +7,7 @@
 
 using namespace std;
 
+#define ID(variable, idx, jdx, kdx) ((variable)*(d->Nx)*(d->Ny)*(d->Nz) + (idx)*(d->Ny)*(d->Nz) + (jdx)*(d->Nz) + (kdx))
 
 /*!
  * /brief Opens a HDF5 file
diff --git a/Project/GPU/Src/serialSaveDataHDF5.cu b/Project/GPU/Src/serialSaveDataHDF5.cu
index 7b5463d4..45853d50 100644
--- a/Project/GPU/Src/serialSaveDataHDF5.cu
+++ b/Project/GPU/Src/serialSaveDataHDF5.cu
@@ -7,6 +7,7 @@
 
 using namespace std;
 
+#define ID(variable, idx, jdx, kdx) ((variable)*(d->Nx)*(d->Ny)*(d->Nz) + (idx)*(d->Ny)*(d->Nz) + (jdx)*(d->Nz) + (kdx))
 
 /*!
  * /brief Opens a HDF5 file
diff --git a/Project/GPU/Src/simData.cu b/Project/GPU/Src/simData.cu
index 40e79c13..58bfc73f 100644
--- a/Project/GPU/Src/simData.cu
+++ b/Project/GPU/Src/simData.cu
@@ -25,8 +25,31 @@ Data::Data(int nx, int ny, int nz,
            Ncons(0), Nprims(0), Naux(0),
            cp(cp),
            mu1(mu1), mu2(mu2),
-           frameSkip(frameSkip)
+           frameSkip(frameSkip), t(0)
 {
+	initData(env);
+}
+
+Data::Data(CheckpointArgs args, PlatformEnv *env, double mu1, double mu2,
+         int frameSkip)
+           :
+           nx(args.nx), ny(args.ny), nz(args.nz),
+           xmin(args.xmin), xmax(args.xmax),
+           ymin(args.ymin), ymax(args.ymax),
+           zmin(args.zmin), zmax(args.zmax),
+           endTime(args.endTime), cfl(args.cfl), Ng(args.Ng),
+           gamma(args.gamma), sigma(args.sigma),
+           memSet(0), bcsSet(0),
+           Ncons(0), Nprims(0), Naux(0),
+           cp(args.cp),
+           mu1(mu1), mu2(mu2),
+           frameSkip(frameSkip),
+           t(args.t)
+{
+	initData(env);
+}
+
+void Data::initData(PlatformEnv *env){
   // TODO -- handle nx not dividing perfectly into nxRanks
 
   // Set Nx to be nx per MPI process + ghost cells
@@ -51,6 +74,17 @@ Data::Data(int nx, int ny, int nz,
     dims = 1;
   }
 
+  // Set some variables that define the interior cells
+  is = Ng; ie = Nx-Ng;  // i-start, i-end
+  js = Ng; je = Ny-Ng;  // j-start, j-end
+  ks = Ng; ke = Nz-Ng;  // k-start, k-end
+  if (dims<3) {
+    ks = 0; ke = 1;
+  }
+  if (dims<2) {
+    js = 0; je = 1;
+  }
+
   // Total number of cells
   Ncells = Nx * Ny * Nz;
 
@@ -95,5 +129,5 @@ Data::Data(int nx, int ny, int nz,
   }
 
   // cudaDeviceSetCacheConfig(cudaFuncCachePreferShared);
-
 }
+
diff --git a/Project/GPU/Src/simulation.cu b/Project/GPU/Src/simulation.cu
index 46d36208..ebbbe636 100644
--- a/Project/GPU/Src/simulation.cu
+++ b/Project/GPU/Src/simulation.cu
@@ -56,7 +56,6 @@ Simulation::Simulation(Data * data, PlatformEnv *env) : data(data), env(env)
   d->dy = (d->ymax - d->ymin) / d->ny;
   d->dz = (d->zmax - d->zmin) / d->nz;
   d->iters = 0;
-  d->t = 0;
   d->alphaX = 1.0;
   d->alphaY = 1.0;
   d->alphaZ = 1.0;
@@ -193,7 +192,7 @@ void Simulation::evolve(bool output, int safety)
     }
 
     if (safety>0 && d->iters%safety==0) {
-      this->save->saveAll();
+      this->save->saveAll(true);
       if (env->rank==0) printf("Data saved...\n");
     }
 
diff --git a/Project/GPU/Src/srrmhd.cu b/Project/GPU/Src/srrmhd.cu
index 43027d0d..5c703831 100644
--- a/Project/GPU/Src/srrmhd.cu
+++ b/Project/GPU/Src/srrmhd.cu
@@ -72,7 +72,7 @@ SRRMHD::SRRMHD(Data * data) : Model(data)
 
 
   this->data->consLabels.push_back("D");   this->data->consLabels.push_back("Sx");
-  this->data->consLabels.push_back("Sy");  this->data->consLabels.push_back("Sx");
+  this->data->consLabels.push_back("Sy");  this->data->consLabels.push_back("Sz");
   this->data->consLabels.push_back("tau"); this->data->consLabels.push_back("Bx");
   this->data->consLabels.push_back("By");  this->data->consLabels.push_back("Bz");
   this->data->consLabels.push_back("Ex");  this->data->consLabels.push_back("Ey");

From 478fa0610c1cb5d1585d4e31c58a2836db4ee89e Mon Sep 17 00:00:00 2001
From: "ania.brown" <ania.brown@gmail.com>
Date: Mon, 11 Jan 2021 15:40:02 +0000
Subject: [PATCH 41/56] updating project makefile for checkpoint restart

---
 Project/GPU/Include/simData.h                 |  2 +-
 Project/GPU/Makefile                          | 20 +++++++++++++++++--
 Project/GPU/Src/initFuncFromCheckpoint.cu     |  2 ++
 Project/GPU/Src/main.cu                       | 13 ++++++++++--
 .../GPU/Src/parallelInitFuncFromCheckpoint.cu |  2 ++
 5 files changed, 34 insertions(+), 5 deletions(-)

diff --git a/Project/GPU/Include/simData.h b/Project/GPU/Include/simData.h
index 05936292..24665c62 100644
--- a/Project/GPU/Include/simData.h
+++ b/Project/GPU/Include/simData.h
@@ -207,7 +207,7 @@ class Data
       @param mu2 charge mass ratio of species 2
     */
     Data(CheckpointArgs args, PlatformEnv *env, double mu1=-1.0e4, double mu2=1.0e4,
-         int frameskip=10, int reportItersPeriod=1, int functionalSigma=false, double gam=12);
+         int frameskip=10);
 
    
 };
diff --git a/Project/GPU/Makefile b/Project/GPU/Makefile
index 981d7fc9..c7b284a5 100644
--- a/Project/GPU/Makefile
+++ b/Project/GPU/Makefile
@@ -81,7 +81,9 @@ SERIAL_SRCS = serialSaveData.cu \
 	serialEnv.cu
 
 ifeq ($(USE_HDF), 1)
-    SERIAL_SRCS += serialSaveDataHDF5.cu
+    SERIAL_SRCS += serialSaveDataHDF5.cu \
+	initFuncFromCheckpoint.cu \
+	checkpointArgs.cu 
 endif
 
 PARALLEL_SRCS = parallelSaveData.cu \
@@ -89,7 +91,9 @@ PARALLEL_SRCS = parallelSaveData.cu \
 	parallelBoundaryConds.cu
 
 ifeq ($(USE_HDF), 1)
-    PARALLEL_SRCS += parallelSaveDataHDF5.cu
+    PARALLEL_SRCS += parallelSaveDataHDF5.cu \
+	parallelInitFuncFromCheckpoint.cu \
+	parallelCheckpointArgs.cu 
 endif
 
 # Headers
@@ -152,9 +156,18 @@ clean :
 simData.o : $(MODULE_DIR)/simData.cu $(INC_DIR)/simData.h
 	$(CC_GPU)	$<	-c	$(NVFLAGS) -I$(INC_DIR)
 
+serialCheckpointArgs.o : $(MODULE_DIR)/serialCheckpointArgs.cu $(INC_DIR)/serialCheckpointArgs.h
+	$(CC_GPU)	$<	-c	$(HDF5_FLAGS) $(NVFLAGS) -I$(INC_DIR)
+
+parallelCheckpointArgs.o : $(MODULE_DIR)/parallelCheckpointArgs.cu $(INC_DIR)/parallelCheckpointArgs.h
+	$(CC_GPU)	$<	-c	$(HDF5_FLAGS) $(NVFLAGS) -I$(INC_DIR) $(MPI_FLAGS)
+
 initFunc.o : $(MODULE_DIR)/initFunc.cu $(INC_DIR)/initFunc.h	$(INC_DIR)/simData.h
 	$(CC_GPU)	$<	-c	$(NVFLAGS) -I$(INC_DIR)
 
+initFuncFromCheckpoint.o : $(MODULE_DIR)/initFuncFromCheckpoint.cu $(INC_DIR)/initFuncFromCheckpoint.h	$(INC_DIR)/simData.h
+	$(CC_GPU)	$<	-c	$(HDF5_FLAGS) $(NVFLAGS) -I$(INC_DIR)
+
 C2PArgs.o : $(MODULE_DIR)/C2PArgs.cu $(INC_DIR)/C2PArgs.h	$(INC_DIR)/simData.h
 	$(CC_GPU)	$<	-c	$(NVFLAGS) -I$(INC_DIR)
 
@@ -229,6 +242,9 @@ parallelBoundaryConds.o : $(MODULE_DIR)/parallelBoundaryConds.cu $(INC_DIR)/para
 parallelEnv.o : $(MODULE_DIR)/parallelEnv.cu $(INC_DIR)/parallelEnv.h
 	$(CC_GPU)	$<	-c $(NVFLAGS)	-I$(INC_DIR) $(MPI_FLAGS)
 
+parallelInitFuncFromCheckpoint.o : $(MODULE_DIR)/parallelInitFuncFromCheckpoint.cu $(INC_DIR)/parallelInitFuncFromCheckpoint.h	$(INC_DIR)/simData.h
+	$(CC_GPU)	$<	-c $(HDF5_FLAGS) $(NVFLAGS)	-I$(INC_DIR) $(MPI_FLAGS)
+
 #end parallel sources
 
 
diff --git a/Project/GPU/Src/initFuncFromCheckpoint.cu b/Project/GPU/Src/initFuncFromCheckpoint.cu
index d714b1aa..11dfb5ca 100644
--- a/Project/GPU/Src/initFuncFromCheckpoint.cu
+++ b/Project/GPU/Src/initFuncFromCheckpoint.cu
@@ -8,6 +8,8 @@
 #include "hdf5.h"
 #include "hdf5_hl.h"
 
+#define ID(variable, idx, jdx, kdx) ((variable)*(d->Nx)*(d->Ny)*(d->Nz) + (idx)*(d->Ny)*(d->Nz) + (jdx)*(d->Nz) + (kdx))
+
 /*!
  * /brief Writes an HDF5 dataset to file
  *
diff --git a/Project/GPU/Src/main.cu b/Project/GPU/Src/main.cu
index 9ef7f103..75bce683 100644
--- a/Project/GPU/Src/main.cu
+++ b/Project/GPU/Src/main.cu
@@ -1,7 +1,9 @@
 // Serial main
 #include "simData.h"
+#include "parallelCheckpointArgs.h"
 #include "simulation.h"
 #include "initFunc.h"
+#include "initFuncFromCheckpoint.h"
 #include "srmhd.h"
 #include "srrmhd.h"
 #include "boundaryConds.h"
@@ -57,8 +59,15 @@ int main(int argc, char *argv[]) {
 
   ParallelEnv env(&argc, &argv, nxRanks, nyRanks, nzRanks);
 
-  Data data(nx, ny, nz, xmin, xmax, ymin, ymax, zmin, zmax, endTime, &env,
-            cfl, Ng, gamma, sigma);
+  const char* filename = "data_t0.checkpoint.hdf5";
+
+  ParallelCheckpointArgs checkpointArgs(filename, &env);
+  checkpointArgs.endTime=3.0;
+
+  Data data(checkpointArgs, &env);
+
+  //Data data(nx, ny, nz, xmin, xmax, ymin, ymax, zmin, zmax, endTime, &env,
+            //cfl, Ng, gamma, sigma);
 
   // Choose particulars of simulation
   SRMHD model(&data);
diff --git a/Project/GPU/Src/parallelInitFuncFromCheckpoint.cu b/Project/GPU/Src/parallelInitFuncFromCheckpoint.cu
index 17122142..f9533624 100644
--- a/Project/GPU/Src/parallelInitFuncFromCheckpoint.cu
+++ b/Project/GPU/Src/parallelInitFuncFromCheckpoint.cu
@@ -8,6 +8,8 @@
 #include "hdf5.h"
 #include "hdf5_hl.h"
 
+#define ID(variable, idx, jdx, kdx) ((variable)*(d->Nx)*(d->Ny)*(d->Nz) + (idx)*(d->Ny)*(d->Nz) + (jdx)*(d->Nz) + (kdx))
+
 /*!
  * /brief Writes an HDF5 dataset to file
  *

From 919f4830ac4a02afb5e32cfd86ce12b7c0a66b65 Mon Sep 17 00:00:00 2001
From: "ania.brown" <ania.brown@gmail.com>
Date: Tue, 12 Jan 2021 13:26:22 +0000
Subject: [PATCH 42/56] small fix to project makefile with checkpoint restart

---
 Project/GPU/Makefile    | 2 +-
 Project/GPU/Src/main.cu | 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/Project/GPU/Makefile b/Project/GPU/Makefile
index c7b284a5..3d160bb4 100644
--- a/Project/GPU/Makefile
+++ b/Project/GPU/Makefile
@@ -83,7 +83,7 @@ SERIAL_SRCS = serialSaveData.cu \
 ifeq ($(USE_HDF), 1)
     SERIAL_SRCS += serialSaveDataHDF5.cu \
 	initFuncFromCheckpoint.cu \
-	checkpointArgs.cu 
+	serialCheckpointArgs.cu 
 endif
 
 PARALLEL_SRCS = parallelSaveData.cu \
diff --git a/Project/GPU/Src/main.cu b/Project/GPU/Src/main.cu
index 75bce683..7158bcba 100644
--- a/Project/GPU/Src/main.cu
+++ b/Project/GPU/Src/main.cu
@@ -39,7 +39,8 @@ int main(int argc, char *argv[]) {
   double ymax(1.0);
   double zmin(-1.5);
   double zmax(1.5);
-  double endTime(0.0005);
+  //double endTime(0.0005);
+  double endTime(0.01);
   double cfl(0.1);
   double gamma(4.0/3.0);
   double sigma(0);
@@ -62,7 +63,7 @@ int main(int argc, char *argv[]) {
   const char* filename = "data_t0.checkpoint.hdf5";
 
   ParallelCheckpointArgs checkpointArgs(filename, &env);
-  checkpointArgs.endTime=3.0;
+  checkpointArgs.endTime=0.1;
 
   Data data(checkpointArgs, &env);
 

From de16673a04a834a3acd244082809436c8a3ba6ed Mon Sep 17 00:00:00 2001
From: "ania.brown" <ania.brown@gmail.com>
Date: Thu, 14 Jan 2021 09:49:24 +0000
Subject: [PATCH 43/56] adding example checkpoint file; cleaning up makefile

---
 Project/GPU/Makefile                |  11 +++--------
 Project/GPU/Src/main.cu             |   2 +-
 Project/GPU/data_t0.checkpoint.hdf5 | Bin 0 -> 266408 bytes
 3 files changed, 4 insertions(+), 9 deletions(-)
 create mode 100644 Project/GPU/data_t0.checkpoint.hdf5

diff --git a/Project/GPU/Makefile b/Project/GPU/Makefile
index 3d160bb4..f72611e3 100644
--- a/Project/GPU/Makefile
+++ b/Project/GPU/Makefile
@@ -7,16 +7,11 @@
 USE_MPI=1
 USE_HDF=1
 
-# Compiler used for all compilation objects. This should be the version of the hdf5 
-# compiler available on your system that links the correct mpi libraries if required. Should 
-# be one of h5pcc, h5pcc.openmpi or h5pcc.mpich if using MPI. Should be h5cc otherwise. 
-#CC = h5pcc
 CC = mpic++
 
-# Compiler used by hdf5 for c++. Shouldn't need to change this
-#export HDF5_CXX := mpic++
-#export HDF5_CLINKER := mpic++
-
+# HDF5 libraries must be linked explicitly like this rather than using the hdf5 compiler h5pcc. 
+# h5pcc should wrap mpicc with the hdf5 libraries included, but appears to interact badly with nvcc
+# The library paths below are found using h5pcc -show
 HDF5_FLAGS = -I/local/software/szip/2.1.1/include -L/local/software/hdf5/1.10.2/gcc/parallel/lib -L/local/software/szip/2.1.1/lib -lsz -lz -ldl -lm -I/local/software/hdf5/1.10.2/gcc/parallel/include -lhdf5 -lhdf5_hl
 
 # this should no longer be needed but leaving them in just in case
diff --git a/Project/GPU/Src/main.cu b/Project/GPU/Src/main.cu
index 7158bcba..60e4f941 100644
--- a/Project/GPU/Src/main.cu
+++ b/Project/GPU/Src/main.cu
@@ -63,7 +63,7 @@ int main(int argc, char *argv[]) {
   const char* filename = "data_t0.checkpoint.hdf5";
 
   ParallelCheckpointArgs checkpointArgs(filename, &env);
-  checkpointArgs.endTime=0.1;
+  checkpointArgs.endTime=endTime;
 
   Data data(checkpointArgs, &env);
 
diff --git a/Project/GPU/data_t0.checkpoint.hdf5 b/Project/GPU/data_t0.checkpoint.hdf5
new file mode 100644
index 0000000000000000000000000000000000000000..c3c39aeb5902e4b9491ff662222cf322ce0cab1e
GIT binary patch
literal 266408
zcmeI*2e?$#**4$-L=iOt_J-ICM2%vL;ml~nmRQl)qoSx73!)-o!`>4UOJa-07{yp)
z#d2os8e<e|>?N_qO6<nMKXdlF54b#=;bbSleE;&gJ|FixPdVph@3r1F`@6^q>#e=c
zY(3}iS!(>#qep3mQm?lE^sh<bpK-lrXe+t)`HtZ~x&%Bv;JB{Ye{?C$RBC)SZ>aYQ
z{{6aTg>gZ+`DTL$m+DOZFvv>B&y@YwSARO6Cj7tNf%OKjxk=-9+$#K{m4Nq!`z&0R
z*4}5YordpSDs3`y_+G>JA3kbW_~85d?7iQxk)wv~60X-gVD#`khwn7<pm6Q}b)WL<
z9q!lozMi$TUK^!L{fU{ges}M@>z-}bz5A6*J=*@OQW`i*Y2c)ApLgk!CCGtipVj#1
zrt)OJ8Qc7J+i9=8cB=by)^Cfpe?I)0-+se)`<MLMewF&aCjWfhvfp<^_DN@dzOVnj
zUEBQj4?&&v^YeA7yEJ<0&(AtDA^XE9jqRQ>o2@fD&YtnoI-|4ye__1NxSv)C`;Tsn
z4*wCa9~Fd7;*gCtUc0fDtPpft?VkOIbz<CB*+Mt*#;npW@OtbP@s`d$Um?`P?O(bt
z+rHbLk%nSoxb7A0tK;YUs^5#@y0l<c|EB-jdL93Mvz97tKYEu^xB6N(Z)xxF2iU9L
zpsW@Bvd?qY=}g^N2ebcb_X#(^wE9?AzUDKgExt6ZJ{kM#KhH*;{Jqu-M)A+HQ73;d
z&&k((I{ACK-@oP)?&oRkPq@9O*tfd3?K7kIYD@>6-&eCXy;z&P4`zM`74n<-HNP?c
zqF?3Y>qjqi)m(SQKi}xRc51sL{<VGe#VO_pzi$!0iC^;@^Dq8<;rCs8^6#6?CA)<C
z|6lukPo3iD3x44__}Bby`?_Cv9{x4IiHA@AeS5Us&%^fKW%J>C4eRur7#{m6o{#YR
zhC!^eKVQ$CCv7_K{F{}#lxEEKnD8KX2@{$5w-SP3xBBM|YF=LV`ZV46yldC4ZH_&H
zU$-vfETvyL0rUyecY}QG1z+q?Duo-Oemy<@$@cZG;isyfOTIPrpTaLS&Vo91`u|^_
zj(wrl_6v`FKHzf!pAGm-z^4N~74XS`e+l?Rz{dkV7Vy!4j|6-;;DiiI4+Z>lz&{22
zW59a@-W~AHfVT&%2D~}o4FRtSczM8!0{$xCSXj>fz<(9+qJWnNye8lc0dEdi4S0LN
zI|JSw@ZNxb4EU#je-8LihGQoLd^q4E0Ur(cSir{vJ`wOQ0iO)`RKTYLJ`?cSfX@Yd
zKHv)(mfoplzyBKnUk&)zfZ;wUg+I^IeW89eENA-r0*0Tr6mm;{4fR(8z7g=9fSEe|
zJ9Qv+Aax*h;J?>_W^<*v2^N?u`((e>*suI@-?X{1OR0O?Tx?AAedp-#J<X2cr*HOs
z=j#h)#D0OF;Qy}<#NT)NKB!;xec#C4_i1a)sBoMb70$(@!g+Q?Xxm!h^R>d~YlY9-
z4sUgi=hOXK>6Zz25Bka9`^?n#J>5nl>MwHktLvumd%8NU?Vqo&7fWaL>=+ro#hd)|
zR5Drn&sSjm=ohSD+vV5S55I9>|A!$vhxTaye)-9rryjUk`1ti*rp~K`k3aFEEr*_U
zzf`(!XbvHi{rNo^-G8yleYC>D@h4xswmy#A@vHUts$VFK-zom&tJl-<zN&<uq~DZ3
z(e%EreBU0wCz$*_)y(zpK6;c!?KE=Zk-H2V>1Us#{vY$z|IfNmUtb1{8a-gtK?6pO
z88BkNTB8T7b<lvd#tgvEaAG)zO~wTm+xp{ACcRs}ZR~{SAFjVgy&$|#J+}$s(ecU4
z$*a@x;hXZa_u6@V&?kM<$Gkr0)#>=~&C6BCqvMm8lUJwX!&goo;?eQR%gL+L@!^}7
ztByy<Cod<jPREC@oIJ#%<CB+@SEu8{H!oKmkB(1XPF|gk4_`TXh)2gKFDI`~$A@oT
zt~wqapS+yBIvpRra`F(5j!#}rUY(8)-@IIPJUTvkIeB$DK78fmAs!u{yqvr`9Us1V
zx$1ayeDZSg>U4bg%E?1KIzD+hd38EIeDiYE@#y&E<>b}r`0$mJhj?^+@^bR(bbR>c
z<*MV+@yW}{tJCq}D<==}==kL2<kji;@XgCr$D`wumy=hg<HJ`@9^%pQ$;-*B)A8Y(
zm#dCP$0si*uTIB@ube!@qvMm8lUJwX!#6Ki9gmJrUQS+}jt^fsd5A~HCod<jPRECD
zUamSG9iP0MygD5pzH;&qkB(1XPF|gk58u38bv!ygc{zD?IzD{m<RKm%pS+yBIvpRr
zdAaI%bbRu1^6GSa_{zycJUTvkIeB$DK78|X)$!=~<mKem>G<%KlZSY8eDZSg>U4bg
z=H;s6(ecU4$*a@x;VUN(@#y&E<>b}r`0&lkRmY>_lb4fMr{lv{P9EaX@yW}{tJCq}
zo0qGON5>~GC$CP&hp(JG#G~Vrmy=hg<HI*ER~?UzPhL)5osJJ*IeCaj$0si*uTIB@
zZ(goC9vz>&oV+?6AHH((5RZ;eUQS+}jt}3wTy;D;K6yELbvizL<>VnA9iP0MygD5p
zzInOocyxU7a`NhQeE7=ALp(Y@c{zD?IzD{!a@FzZ_~hl})#>=~m6L~fbbRu1^6GSa
z_~zxR<I(ZS%gL+L@!=~c5Ao>u<mKem>G<%?%T>pt<CB+@SEu8{S56+{(ecU4$*a@x
z;hUGMjz`BQFDI`~$A_<+JjA2plb4fMr{lvnFIOFpj!#}rUY(8)UpaY*N5>~GC$CP&
zhi_i4IvyRLyqvr`9Us1O@(_=XPhL)5osJLRyj*oWIzD+hd38EIeC6aJ9vz>&oV+?6
zAHI3H>UeZ~@^bR(bbR>A$wNFkK6yD={l)P9|HX3l_XE68{>67r`SHX_?N|%-THxm*
zUeMS0@Qw50YdktWd{cg-123ljx7`R|?}Piqed9iIUxoXp{eF+H@#y&Q4gA#q)c<-1
z()|}d51tdxjpxX775M4-%buUF^+BKXO&|3&?st5RN5_Y6>VKmHljohdANr(k`lzpg
zpVt4jMx^yGz8~Bt?i=@!`zpTQ;%huQK78Z6_!^In58pU1zQ&{D!#B>0ukq;k@Qw50
zYdktWeB-?M8jp?--#9P6#-roIH_nT%@#y&Qjq~DbJUTvn<GlD9kB$%DI4{1&qvOLj
z&Wo?{==kuB^WtkfIzD{ky!aZAjt}2BFTTd3<HI-3i?8wM`0$PM;%huQK78Z6_!^In
z58pU1zQ&{D!#B>0ukq;k@Qw50YdktWeB-?M8jp?--#9P6#-roIH_nT%@#y&Qjq~Db
zJUTvn<GlD9kB$%DI4{1&qvOLj&Wo?{==kuB^WtkfIzD{ky!aZAjt}2BFTTd3<HI-3
zi?8wM`0$PM;%huQK78Z6_!^In58pU1zQ&{D!#B>0ukq;k@Qw50YdktWeB-?M8jp?-
z-#9P6#-roIH_nT%@#y&Qjq~DbJUTvn<GlD9kB$%DI4{1&qvOLj&Wo?{==kuB^Wtkf
zIzD{ky!aZAjt}2BFTTd3<HI-3i?8wM`0$PM;%huQK78Z6_!^In58pU1zQ&{D!#B>0
zukq;k@Qw50YdktWeB-?M8jp?--#9P6#-roIH_nT%@#y&Qjq~DbJUTvn<GlD9kB$%D
zI4{1&qvOLj&Wo?{==kuB^WtkfIzD{ky!aZAjt}2BFTTd3<HI-3i?8wM`0$PM;%huQ
zK78Z6_!^In58pU1zQ&{D!#B>0ukq;k@Qw50YdktWeB-?M8jp?--#9P6#-roIH_nT%
z@#y&Qjq~DbJUTvn<GlD9kB$%DI4{1&qvOLj&Wo?{==kuB^WtkfIzD{ky!aZAjt}2B
zFTTd3<HI-3i?8wM`0$PM;%huQK78Z6_!^In58pU%I{v*2{-Tb4+0T^ydwT!93)8V5
z$A><oe$*RL|9eQsrGC`^r$Z+Hy{z}8ete}Lucv-|r5`=Q-}%xn^ra1^|9Uz@hku{i
zxbXLmUmbo}e_xjCO+x*X>-|kOQ~p-}zW?_3?2L>4e*ejTU)GG--<P#>{eS4B9y_gn
zU)IdwN1T?wucz@>#rZAQ9x!C|fFTDB7&2zS{yQBo`S0r)vHS40zq2Rr@29(Oo9wS^
z-ZA7(E4(tO`wG3k|NB9A%=*T5Z@hA3%Z+P|U*wj)bClO!;>8(mT5;QURL7$W*c$59
zz^es)JF4T+b@@qU-i0o_bI?wMzWcywYo6OOW`hs@e(ae8%InsKuDR|O`?sSy9$moJ
zP_G7FE$G`(9gl9eTdo~=+?o>xt+w;i8?HU<l9tDxIP}P~FI%<zSj$h3d*Si1?Wm4N
z7qB(dtASSw`gT;uqdR8y{_nqe!V`mrud>UK9;;p5vclm0v;JxKO1bpe`1{V<^0;<X
z$D<3_8tT=+s|9^Ks^ig}H2TBgwX>fc^p6k!`sKc7T-S2MZ$9a}`J-!>ufKBF%kMq^
zly+3dqYKy?>eaxj1${fJ<I&x*z^>;VyW&fOjz6#8;013R-!enDnbtU_=lbPSCM~ku
zsE2;uj_P=H0b4`88hEv!Z%1`Jy6ql3Z{)gf|83CaYs|63?N{8|(tXeQZ~U~|hUL>1
zT>XTFe|vg6s^ifGYz_5l;MIb@9o6yZ=G|tkLtY#8>Y&#ySnac|mc6azjWx#p?7eL^
zDbG1>gL_{4{x9269gi+xYp7QPuNL&}sE$W>+KMwTvcw->A2jBGC6D^(@!MNo9{SHK
z&X{Gh^0|LKap2~coz;%&cys|<L%kY!wV-cDbv(LJ4{kkV-RIvJwCNu{objA3?ra%w
z()?%K^0Oi3VRyZA@_~<@(~jzRbOBpKy&8D6pl?TYJh~UxUgwJbe|T%q^(X%Hs?&dW
zSId{5uejMIf7`s=^YvGLTDtMPc2vis3)mX!)xfI-eLJe-(H*$w-17|?^v<B&j_=n0
zk~{8hIripWPxYDMN9C;#8ouDVo1Wi}>UeYkTSL7Xc(tH!M|C{9SC(7hwYLs?chGSc
z+;s6AzrUwt)bb0@z3MwZD$m#BxTpJXd_g;^<Ix3d4fSf^)q=hq)$!<dTy=&8Myx(@
z&<{$>oPPe+_qL4P^2mE1Ie*LYQLDUi)?t6XpdHom=mNHedNuHBLEnz*cyup(aKQb;
zPntOB$zzUP>g<Q^ZMkvv!LzO~&sOC{_Puf2T^9XKJF4T+1#AuVYT(s^z8%%^=w91y
z*WJ##ZsMSSy#B`{U+j5b%f_F*cFxj6wkki`Z>fi8+3+{*sE$V$ur<`HfmaLqc2vis
z<HJ|Ze=A=f^;Mt$Tm5$*xv$)3|E>GabL6@5oc*_+zXPK6=a{hmY#G)csBZnC3)sH?
z1YRxZ+ELy5LuVbeu3BgRTkG!uVgK1A>_7d({<BNif1o-ZUBK2*uLfQ%=-N>okM4}H
z{~jFn-&ex^`|Gg(LUla4fUTik4ZK>=wWB&7-8|9!@%J!)%n{}fsBZqC3)nt?1YRxZ
z+ELy7LAPy~zkVO)uWn)f`bC((pgJC1z}8T&23{@b+EE>k?)ET$9vkM*u3`Q>Da@Zx
z9gi+xYp7QPuNHLesE$W>a+ts03-kApVg9}$%->KQk1k+qs8<887If{Xjz>2(oIi$z
z^T&X2{`l;l+4%#i<Ix3d4fSf^)q<`a)$!<d4(G2G!};r>aQ>PQ&R<X+k1k+qs8<88
z7If{Xjz>3XkL>*U<8b~wFq}UJhw~>?$D<3_8tT=+s|8&<s^ihU9nRnPhVyr8IDh{z
zoWG$u9$moJP_G7FE$G@&9ghyG<8@L$$j{~H^m9XXJU_oa=!-s0YybQ?_;c~+<j)PN
z<I%Yf+!yYXX}y2khwe-FsrweH<I#BzJQtpmX?=b?hn`E%spl4|<I!0MtP9qOX<a|8
zL)InhlywWL<I!0Mt&7&lX<a|9!`5Z%v~?S*<I&j%><jjZY282UL-r;6lzj`T<I&j%
z?ThxwY282V!}ewSw0#?@<I$M|%mwCzX`LU;A?6Zuin#@<<I$Oe%thv;X`P?UVdgS(
znz;?C<Iy>XJC{4BPwV-;==^F9HW!<dr*(cdhnvgI>E?E*jz{Ml?p*GiKCS2XqURUq
zaOZO8^l3f67d^i@hdY-$r%&toz3BPXIo!G2IsJd-{9g3?gX!J>>3#oA@8{3=fYu-1
z6He>*hrWljuK1pETED-X-s`XLfqgIRd*W&R{@C}>zL)kr^|XF}?R#+Fi~F8@TE9Q{
zJ-qMbeNR8F-`{%=VE*u)U|QcFcn@K&@Sb8?-(PqS;=PFXB-8r-#CsU;WxS`E*7rBw
z19>mxJ<+tjKk^>Rdnxa!ruF@m_h8<Oc~3U2@6Wu4^Ipz-x@moX=RKhJg5DEO>-$6R
zA-$LMo^o2>UwRMfy{Px3)B66@dsy#fy{Dbl_qW~ydoS!g@wC1__8!`MY4540_5HQ?
z;NFXSPd=^h&%KBDUfz59X?=e`t^a=liI+N%I*>Y$I*>Y$I*>Y$I*>Y$I*>Y$I*>Y$
zI*>Y$I*>Y$I*>Y$I*>Y$I*>Y$I*>Y$I*>Y$I*>Y$I`B>Jz{K#sLy$20lS+L^eQ0zb
z-3RGDNcTZ{4$?Z1)`7GRq;(*z18E&d>p)ru(mIgVfwT^!bs((+X&p%GKw1aVI*`_Z
zv<{?oAgu#w9Z2gyS_l5u)`9G)P5(|ENF7KWNF7KWNF7KWNF7KWNF7KWNF7KWNF7KW
zNF7KWNF7KWNF7KWNF7KWNF7KWNF7KWNF7KWNF7KWNF7KWNF7KWNF7KWNF7KWNF7KW
zNF7KWNF7KWNF7KWNF7KWNF7KWNF7KWNF7KWNF7KWNF7KWNF7KWNFA6y9XR`}#y>Ze
z6Q(|-`yf3BX&p%GKw1aVI*`_Zv<{?oAgu#w9Z2gyS_jfPkk)~;4y1J;tpjNtNb5ja
z2huu_)`7GRq;(*z18E&d>p)ru{&&}b%{Lo7xK!#|niT!x{~O`S{$1Msvqvb5yCJ(?
z8X@8T=?-)$%~)#t6VLwG>;Iko$#?0R6)NGk?-ek<?pB(&P2am;xs=^QmD0djN&_c#
z%LLu))rM2ou9;zvI=}Hxw=UyW2$B8T3gKUUf=lDGYcKd>hf-<xDL?*?|M!rMHeS0+
zX@=4YA!+;{!cUo#yjrN=cg@s!mB4%Q+Ntxh9+pb?Ejx8y&X4nrDNo1yYLn2pep5DM
zdf!*mu^%H+KT7pgrG4Ew^_$p_Deu*P=YKD>(y4<lIq1FeP66-z^`+}HuDi~-#a<WI
zanHTay8RD7Yc0?5<+z?B>-g*Oul8u*cOq2BqvOL@j$C-u%GuBHa&4E58~Nk^vdp~K
z)p6LN!)AQ2p7*Vlelu?)x5up==d0t<@!=~+F1%*-EPthij@hx1-(%=6Z+oJS-|q3?
znC_>xmTSxP7}&_a>5h)`)$!=~@RcJM-d)b}pId$CL5=)L+uYb^<}vS;7n=R!FSn@Y
zpS#_JZ5#RfKG1Q#IvyP#zH;Qk!`IF7$BmiktVaHZPrcA@{d)ep`+a`Mt@ZquwjcS^
zM*f3OcAT${N5_Y+9J%m}O|tyWPaafl<lp%Euu-Si^S>DS(TpqqytRD$9H0NOk-ybz
z9p|g#(edFcM=rc-n=F63ZO(kHkw4(>la_zIp5JZU_rJKgp8w#Ti_N-UJ^zT0JI+_f
zqvOL@j$F9guq=Oveh1GnuAbj>;P|=LJovrx%cHJZWBp&WmRCRf$D1|s$Mtxv!{<*O
zkB$#tIdb6^BeMJ_XZc>Gk^jJjf3KcX&+m5ioj-WGo`3q~pPtpoUwr<K^VRX_`0$k@
z7tVHYmcQ9!{q}6+&-~fSSA9^=pY8ggXYYPmYxy6?4t%kZ|AYP==d0t<@!=~+E?nxE
zEdPy*4?3ff-@Q-ogV#Icz4D<mo`2;#_52^SY`5tC_5A(U=r~^;kB$#tIdb8kpJw?d
zRj<0akw5%rJ^p@LJ%8}3*YB{~>8<5!zj$T)M*f%Ucbu<|N5_Y+9J%o0GqU_YcHiRZ
zM*d2(Uflb!dj3NX%--X{dj9$|AAf2if7>5*oUe{Y$A_;Rxv=NCS^n-zulRl=|Kj~t
zJ!Hm1-z#r0_ZHXoJ)^aJWo47`jr=)w>^NT?kB$#tIdb9Z7i9VCp19czm)7%FUVWQY
zm#OFf_R7l#|EQjS`S)*pw2^<;t{vyA<I(ZqD@QKe_QEXx*(+w7t&#ud4>o%1yY>7t
zKe+7dk@fs@4!P>3M*ebpbeyk_N5_Y+9Jx>(kB$#tIr09ljz`CbuN=8hU-U`e^if~+
zInGzdqvOL@j$G)zaG$tu+(+&!_gS2;jz`CbuN=A1bKyDh+<1;WSDv#tUmcH*4_`TQ
zp>@GJVcoEfSXZnwalSep9Us1O<U;GBb<(<N9ks4nXXAWzJUTvn<;aEh1^a}3!#-kP
zvCqW$>UeZ~_{xzB?ThwF`=))=zG|P1^VRX_`0$k@7n%#q3FZcKgt@|;5$CJp(edFc
zM=mrMnUl;-<|uQOIV;Xr$D`xJSB_k0E;J{a8_kjCN^@qMuZ~B@hp!yD&|GXzHaDB2
z&DG}YIA0x)jt^fsa-nm9bAoe&bA)q+b4Hx6jz`CbuN=A1xyU)mxyd=oxym^!&R55y
z<HJ{uT<Bcroao%>9O+!?oEhh<<I(ZqD@QJLE_P0KZg!4#u6E9j^VRX_`0$k@7pmjY
z@!>1y-<_{7`lN6AsIU4Q=d0t<@!>1S?+x7-?i2To`^bIeK8y3!@#y&Ql_M8=E<7im
z8_$vF%5xUytK-q};VVZjv@TdDtQ*!5>xy+I&R55y<HJ{uTxeaiPFgpuqt;dHY@Dx-
zN5_Y+9J$cGV4tvW*hlOu_L(?e9gmI=UpaE2ebGK?-?Wd~SM9TLzB(QqAHH(rLUVyR
z!Q5buFjtr};(T>HIzD{m$c5%2bCS8q9A&ODXT|yIcyxUD%8?7rh2}(aqdC%CY0ixE
z)$!=~@RcJMnv2cJ=4NxWx!Rl^=d0t<@!=~+E_5z%PH=8;j&QDU&WQ8X@#y&Ql_M8A
z7da<6H#tW+S2<_J`RaIdeE7<d3!Mv{6P+8KBb_UqGvj=9JUTvn<;aE3#m>pj&Cb!z
z)y~;*zB(QqAHH(rLUlYkK78f;yYux$pY%;1^;Mtae04lJK78f)y`lTUed4}xAGxpG
zXK}td9vvUPa^ynKh3CX`<2mwNdCuZ|bv!ygeC5c6)&=W?b;CMhU9rx@`RaIdeE7<d
z3$2URN$aL{)VgY&jq}y<==kuJBNy5i>=X74`-pwTJ`?Aw<I(ZqD@QK0FWM*VoAy!r
zs(m)jSI48{!&i=6Xf7}(m>bLy<_dF0oUe{Y$A_;RxzJo>PBJ%{qs&$2tT<mCkB$#t
zIdY-7(41&)G)I~%&6#n&IvyP#zH;P3bFn$u+-#0ESDUlre04lJK78fKh0X=e3C<19
z5zZCP8F9Wk9vvUPa^yniBIhLMCg&*UD(9>?UmcH*4_`TQp>v^gqI08jq;sWnW}L5%
zN5_Y+9Jz3MoxjH)vC#srU)Qs8$dMU7_sW>Qjq910I`V?Q)NzhOhQ2lX>)mgg<M{<P
zU7(I9UEdINzwOQIzKZI2bbR>Akqa+AKFeQZnX6W9<nMCKBNz0%zGvl6pPyD;y`I<S
zrhA4pa({L6S9ARw>UeZ~_{x#nj;Ck&-@j$y;f?%X-TB59d)M>deQc{go>b4>bHPo2
z+Q>iru8#B7@#y&Q9UpSqa~pQ`ytOXQ@TTv+dUfOauGP+be^Nbv@ps;Qz3bn*-`0Jl
z0e@`dzxYVU`RaIdeE7<V^H04s%fIY|%B+8{=XYPJ^w;q>^sM}5?K@66yPp5TCv(oZ
zU_HP8D;?*n<I(ZqD<{ss|A{RB(218F*vRjD%sD&%;>Mnpn@_2}{=+xA-!}N*W%q34
zU;fXI^VRX_`0$kz=YQ~imcQ5~Gril$-{hwE7CCx+&&s{W{CcrD-t2zcMa%Ylu#tae
zpT>jo_4{8PkB$#tIdT5WeKu~p4@T_Uy4M@^{H>OJ{)wY+>RGw^v0wc5m3scEOZ{Wz
zh3dH<_3t=e9gmI=UpaC9$Q85v?zMN`Y2=@BMZZf<yt!v(^Wi5Cx%I8?w{7+Pm(FkG
zKR&qQe04lJK78fG`9n9&@*n!eMSHzj&%bBE9!H&YOV7&fKN|i1Pv7o-+pO!|H&fqw
z{=gkN&R55y<HJ`@oPW~ZS$?-|ORqHYH~M6^6Arz#XXT9F4<Ek8JKb-4xcj)_jr@fU
z>^NT?kB$#tIdT3y$7lI>J^7PO-m2%H-hYwdTekMB?D5erdiQv@`)%FMX}!IXfBwlG
z=d0t<@!=~c&Y$D_EdRvHJ(o4|f3Vyht-Y!}E0-_!-pm)&^H)FM;#n82=l}iJ9p|g#
z(edFcC(b|j>MZ~5Cr*C9k-zzIEA4b)J^zt4uNgJZ#QN|3>m$}}<Zp0A$NB1bbbR>A
ziSw_yHOoJ5%omHlUC+OA#O625SnFANcJ%SL4zK6`xYw*B8u=^U*m1r(9vvUPa^n1_
z?#S|Yx%WFQjr_MBz2Ldk>-ig<{f8@0tLHEC{tZVr@(118alSep9Us1OzG?aTqEGs!
zkNT?5_I;?1N5_Y+ocQ;5U${@)H|`_%mHSLCRL7&^!&gq6@44`tcy2sLo-5CpT&Rvm
z$A_<+IN!Qpov?0LN31K>8M#m$kB$#tIdQ&q(K>0}w2oR=t+R5WIvyP#zH;Jx`+|MK
zzF{A+uh?hgLUlYkK78fG`SwNoq<zyqYG1X_%7yB9bbR>AiSx|`<^*$tIl^3F&X5b$
z@#y&Ql@sTii_A&pCUcaz%A6$^s^ih|;VUQ3Hy4@{&5h<rbEP>`E>y>(<HJ`@oNq2R
zC!3qi(dKG%wp^%=N5_Y+oH*aPz&XLW!8yXY!Z|}ORL7&^!&gq6?_A`Z<lN*O<y_^Q
zB^RpW(edFcC(d^+bWU_`bdGecbk39u)$!=~@RbwiI~O}AJ2yK=J6Ahr%Z2KAbbR>A
z`KIOTi$3X_KI*GJ`@Nw$9vvUPa^m0Lec?WF-?)$5SMD>pP#uqs4_`TPzURVo;<@o0
zd9FNXa-ljN9Us1O;(Y6Zb;7z~9kH%hXXHY4JUTvn<;3~cMeC$>(>iKhwa&_g>UeZ~
z_{xd%?F;q^`-XkQzG9z|3)S)H`0$kz=i3+UllD#fsD0HwD;KKc(edFcC(bt)m=nwm
z<_L3zIYTZ~$D`xJS5BO7E;1*Xo6J$>Dsz@xsE$X+hp(JC-&|-;G&h<f&6Va%xlkRC
zjt^fsalX0OoNR73N1Ln7*>a&e9vvUPa^igF0_Ozh2ImOp3g-;DP#uqs4_`TPzH^at
zl5>-DlyjAHmRzWgN5_Y+oH*aP&^gh$(K*t&(m7KuRL7&^!&gq6?_BPj?%eJi?_BSk
zFBhug(edFc=bM(VFZ!f!`lzq^?DvN1cyxUD%87q}_l5h!ed9iIU%AiZLUlYkK78fG
z`JM~UiRZ?1<hk;k$%X28bbR>AiSw-s)(Pu|b;P=2oskRG@#y&Ql@sS%7p;@lP3x$2
z)jBH|s^ih|;VUQ3w=dWy>>KtG`-*)=E>y>(<HJ`@oNr&WPue%_qxMz%tX!y$N5_Y+
zoH*ZHU`{YMm?O*;<_x(|9gmI=UpaBUxyYPkZZb!itISz)p*kKNAHH(pd~=~W(cEZ`
zG*_B4<wA8lIzD{m#QEl8bF#VF9Br;PXUm1^cyxUD%8B!x3!D?28=NDYE1WarLUlYk
zK78fG`OZboNzP5qQO;G)S#qH|9vvUPa^igFLgz&1M(0T9O6N?uP#uqs4_`TPzH_;A
zx^ug8ymP&CzFeq|N5_Y+9QoUowz}@aHy$5!*e`!|!uls&)zbg>v#z-C8|8Ah88<6^
zbj`8lNpGI`$DjTB$nsxC?DzDw-~Mj-)lc8N;O{S7+VWzz#os>spAQcT*c$59z^esa
zE%?-eZ!P3hLvHKt7maxL`k@mCZNBl7uXjJ=-j<6VoaO#EpV_kfryFPf!M2}VP~L0d
zje5@amvhSRzJKNxi~nLsdDTx=KJoJ{?`+xlj<dHs^QAWi1#AuVYT(s^t`>Z1!M7H2
zsv);^zWdJ^zuuys4Z3pFc@Lj>?4vC!mu9-D<@TM+YaDvgvt#GKyu8it&-mMj*)A#X
zKlbI<HXOKP`S5uc`*g_}CbX<OdeXL!uKCfRfUTik4ZK>=)q+ng_|`&BHRQJLwcv-7
z#yr%u<)+f+-`=OsGcAvnKR^1>oqtlU%roz$pB#K`dA)OP8v5x+SCwbKa?``lJaE_Y
z=0|Nj=c5CkXj$pHJG*`U%$I`#wuX8&@M=L<3qG~rTMIeWkXt?N!-Wsn_l22SmfmLj
zQ||xvOD%JLHe2h6o%Suyv)Mwc&3McB^0p5=@{^_3zpniAIe)y`!AI>`-tf*#ud6Nh
zY|9TO&h*|)XLN(rP_G7FE$C{&rxtu`AxCbt+v>|+bn<nxw!HM?1s3Wy?&X$~5B|-<
zeP7+by!?hoY=3NPYkB)$zJKSE!^W3)dGUz%PFQ^3@@{J^cF?c>@nXwWD=abY{*z{c
z)ljbnUM=Wq!KW5{YavH&_2%wBTyNjs^lDlEmSv9E=-t;^=KaC^I}BcRba}$_2d?t-
zHEu88`SZE&*mm%(<%O1fbwZ!#_A6gIV8i)lx$ti-i@$cx=402J1y)178hEv!s|BB0
z@U4X$xz%G%+u+%AYJFNx|I2Gf{_?XoTK3p?-4Dk9@Zj=?^K}1s?!E6UFI-t*)|FnY
zmES*j{|_(RWmNgdC0qBHd(l^0Rw*B`&U?3i3sytD8hEv!s|BB0@U4X$xz)Y9F0u0|
zSIyb-`t|GYw$0sdx2$&Gk;gpW_ptI6V;BGEr4e_RKiy!1(LX-(_HxhryFK~Eb)(BG
z?X&oPr~c&imhx|Qe)!&xd%<d`R|BsWbhY493%<3GBe%N1E;}xD_?~mOJomyQH$SuJ
z#FlGjeD;wYhm0+s{q7#8{-fpI^42?UTN&E>&hk4Oj_bM4ePhboKez2|kB@$%<<`FI
zta<w`ePA`ztASSwx?1q51>ai8ky}0Zha;9>`n`Et{!*Fq!!_6VN6RA<*FUt+K1Y;C
z_S$xXyB@u-Jo>g#*Nxo%uJTbYJ-)${XB|@h)*|z4{Fk-gYN>5ozHq0D=77~uuLfQ%
z=xV{I7JO?VM{aeqn^%4C-tW!VveZIzoH1aT_gaR2a{EK~?Qmpyv&A=EYM1`^m+zkK
zh<P?S@b2=G_wLcJy7*z`GvBzV+fsjeyXBHQpPqR7wR6I1s8<887Id}XQwzSekR!Lc
z!eK}JZMpU5Zy7biZap`B<Gq&aX1{ae6&E|IJi~H-dSKtR{#2g*!@Jg4WxIRIOI&=w
zo0|<gyuACU&(G-E<K32Re}BO#)t}7;tD#;EyjsxJf=?~@)<TZl>R+F_;QD*#T%hH_
zy{=#C(L>&ES!$i{t+>~XN0s-#Yu(kBoBM(CK1aTN(91L5TfTMlHH%gTj4gjKOaCXf
z`{do0(Vri4MQxS2VKvmNfmaK<TJWg_-&)9#TYc@W7dAQfyaigmtX?*L{TJSEd8WrB
zlV(}q=<+>Xo}6&xMGusZd#b$gMTgy6eqz9?8-IN0*z(&eEq>N}Kb+Xoa^Iq-e}3`Y
zuo~*sz^esaE%?-eZ!P4=t)6!I4BKz}*9BT$Kk4YtZ}0U%%QoL0JpX(f9$o&&9_Rmd
zrnep_&$Rtq2XA!Kz2(Ix^*^%vC1cC){Oy|Sk3C>w%Msf@aN>?H%nhrdUJblj(A9!Z
zE%??#j$An1@~3zIr}zCgy`R5LqxEN}u>Ncs)}K4W`m=bn{`@YiKLOj<pTMgHT`l<3
zf^Yl!6LPJC({25oKkPrxhW+OcVgETR>_1zD{panl|7@GH|Acxq@M=L<3qG~rTMIeW
zkXs!Q_TRsU{r9_J|Gg^gzyA#T?<`^e9T@iCO4xq`RztlSc(tIb1)o~*t%V%9)uW^N
zV|bW9J_z$i-!Ol?7tJ37!~7Ajef|i%TF}*kPc8Vi&mSSTdQ+Ic)(!L5++qHDD9m5C
zhxu#4Fn?VW=C8%V{1vbo>eaxj1zj!p)Pip<<jAdd3G?TRVg5WR%%A<k{JBk-KX(rE
z=dNM?yfVz60jr^24ZK>=)q+ng_|`&>-0JCJ{$3``-*bied){dNJ}u1O8|Td5p<WHV
z_W3*b)Pip<<jAcK59g0-!};TZaQ^sSIDf1e&L1;}^T)>F{4pS$KLWOfdNuHBL01bt
zwcuL|IdZFg!ujjcaQ<2$oWE8M=daDf`D;Wte~k&}uQkK@D`0D=R|BsWbhY493%<3G
zBe!+eaQ@sZoIfuN=g<4Y`ST~?{P|Wmf36(PpFa%e&w#C=UJblj(A9!ZE%??#PBrAV
z?i|kF$A$Cv%HjOoGn~J>zm=W8UkK;#x5N4Sm2mzJ*c$59z^esaE%?-eZ!P3hL$3T*
zbv!ygeC5dXbNIRZoPKV<hu_QZ>G#$LebFa<(?@;PXMOkQ;LpXMlRr0qj{aQzIs0=D
z_g|~~!hPbtaUZ#_+-L4P_o4gJed@k-AG@#J=kELP{8c>{o)gcF=g4#AIrH3k4n3Ei
zQ_rpE*mLbU_uPl|r)piWPFOdb<E<;!nQ;EE`X0c#WS#Q8fpyHfW}UO{h4r^;U9?VG
zH?5=ARqL#E*E(!nwoY5Ot>e~p>%4V8>_1ief_=iiVIQ%t*k|lJ_96R{eagONAG5F7
z=j?m-LFaP&q<zyqYF~BEx9{4AeJ^02wr|_V?d!g0u<wWYqiQZNCzu<|5#|bWhPlHW
zVlFYKm|M&-<{ER3xyKx2E;1*Xo6J$>Dsxsi|5tqvU@kMKncK{9<~nnpxi6gmtIp-l
z>CWxW@y_+m`QiLu^*w;^1$<B7djsDi_+G*H48C_T2RoOWlg-WMXmho5zPZ~R?t1}q
zy1CsPZ?5+}gSkJPKdR2<&gstW&hgIm&iUc|U-dnJ?*)8M;Cln#Blupy_YBTG;rvx~
zE_Y6MZg-A%u6NE4=l`nj0emmudjj7Z_#VOc3chD>?hEJ7s&lz>x^ug8ymP&CemMVE
zeGlM!0pAn&-oW<=zE|))gL7{<|5u&MoztD$o#UPBo%6%_zv_Dc-wXJj!1o5eNASIZ
z?-_jWFun7qcmJpN{WrazzwrHI)%xRmLf;$u9?|!T)}QeGW7YSN)*s(f`rgv_n7-Gv
z{`lT=dau9X`{%0fg?&%#dt=`t`(D}i%<=x?duiWO``+63*uK~HJ-6?@!}sr1-;4X6
z-1p|bNB6zD@7crm?^WN!`(EDn^uD+EJ-+Yteb4WE|M32yYX0z^z<UGl5xiG0e}wlB
zRqr9pAKp`VZ{a<L_ZsF8?>)l%m#X(7-jjH5;ysG@D&Dh%_b*lNVZ4{|p2m9{?{U1>
z@t((fpYZ;v>b;QnMBW>DkL10Q_e|maQ`LJY@1?w_^4`jOEbq0v=knewynm~DFXla&
z_h#mA@726#i|23e<-Dgee|wMTy`J}c-urnE=)Iu#gx(u^kLbOk_l({<dJpNnr1zBG
zTY8V_y{7k^-g|lv>b<D<q~4o)kLtau_pIK#dJpTptoO9u+j@`dy{`AX-urqF?7gt}
z#NHcwkL<m&_srfqdk^iswD;8BTYHb~y|(w<-g|ow?!CD8<ldWmkM6y?_w3%gdk^ou
zy!Z6p+k21iy}tMS-uwSM|F;B^SL#6OK<Ys1K<Ys1K<Ys1K<Ys1K<Ys1K<Ys1K<Ys1
zK<Ys1K<Ys1K<Ys1K<Ys1K<Ys1K<Ys1K<Ys1!1U=rm(q--E{*86e>Gs~wjZQR*Q`)!
zyDF7>1&pt|mF8`e_U>0M^=RW%N&{yp4V=_16Lhav8%|xjW`;fL{Kh}sx{MniBKx%!
z!oT_im&Rw;Uhu^ZrPA(Ge*7Q*@9Ef&CsRL4^*{ag`{R?;kFWG&-mcl*-*3u?;Q!<2
zWwmc&Km6IO9sXm;=<Gj+929U&hWqbyK-+(;6+T}pe7;ure8ld<+ddD^KxxNJ_HY09
zkc~E8yGyBi`}*GR+Y&pD%AU?rX@%vc&g&QCmF1_-TQBflzi#ThY*#9k#$7#iUdKQ6
zf4;ij;59dCq+A`MMg+WIhwLux9XdR2tL%E>joI%pYpG3E-y>(PuYEmAqjnm(@yK0<
zjSTYA9QFTb{O*41-3M$rVAz122Mk?dz|heHh8{Fv=$HXR_uF^CsQUk{HDthAbr@3r
zKk}xy;S5)lFX_{#QtF=J(MO)PR^$5q<(8Q7cXe!?x9@{9?A^Pvf6H};%~;3ozwdWQ
z19!djtEi4g$A_;Rx$wfCS^jT+eeTwc{0+AF&Tk*B<1>FBx5{$$yxLRCp4OIs)mL-<
z9Nj}5kB$#tIdb9e=gRUwTlIls8~NQnx@XSi=jc=UaNYqU53lFnP}}I1M*gGYJI+_f
zqvOL@j$C-{5?TJhdxqWD$lrhH0?%Aq&%d+vkzGEl=N>c586P(Cd)?b{zB(QqAHH(r
z!nwYi<)8M}-xe5G&ma8e`!8-cXP?Sedz^LRN&ED!d^+N`@=W#oZ$H&>zB(QqAHH(r
z!r3;<^8dC@@537TKm60nmoGL~pUR`Hy%+o5h~AZb*Z%m-M*e~Cbeyk_N5_Y+9J%ni
zJ+u5-{<+kfjr<e7SpI_2+<hvyT~`@i+P8OQ&SPHqxRJm3jEi)5|EuHC@!=~+F8u8H
zEdTk3Kl<^d_55c?e_DC7p1<!~r#wEPo`1#TwIBDW=dZnR$NB1bbbR>AkqaOFU6$YP
z@Scx1^6wu#=P6Im)2DLjq|JBx-N@dR8yCLl-bVgQt96{Ojz`CbuN=8>zB{t~$98yX
zz2DdKyUf1XW_Qoqr*h%ETfcw6e!VL*k3M?2nd|voHtRTF9gmI=UpaDN?b$4U*Tp7Y
z-^gFI^^@Z-oUc!1(w`RD|GWG5u6%gUo<D2kUopJne04lJK78fKg*SbW<u7&GLi1i$
z&!4c#o0kutzfa|*Czjp%&3b<CD}MWGBmdLGI?h+eqvOL@j$AnZ%vZMU1Jw!V{G^e;
z`eWVu%&|b9%DT%v{O}P6^sd~r<U=d;tmn`5^N#b?@#y&Ql_M7}I$xH*>;5ZW(a8Vn
zN87IOt9t&GC-><6Nj-o0W9IrvBY(5=JI+_fqvOL@j$F9h;#vN(eTO~W$X{fh4PSY`
zp1<kAOB}cUsNR*m4qE-RM*bp~cAT${N5_Y+9J%n%%VhZjzQ6ZB8u{~X`{#=nU9eB(
zt>t%{eZPACs0FXSq>;byl^y4+<I(ZqD@QI=$D`xJSB`xbs^ih|;VVZj)E9lyH+|Gs
zeU=N=@#y&Ql_M9rFWe{Y8~2g>%6%pms^ih|;VVZj^jvsOJU5;r&z0v)E>y>(<HJ{u
zTxeaePFOdrBi0q`j9jRWN5_Y+9J$cCXq~ifT1Ty`)>*kw9gmI=UpaE2eZf9q->{F^
zSL`!#p*kKNAHH(rLi?h9(!OaQwXfP|<wA8lIzD{m$c5$tbAq|S9AU07XUK)>cyxUD
z%8?7rMdl=PlR3&<WzLcd)$!=~@RcJMnhVW|=0<a*xzd~|7pmjY@!=~+E;JXLlg-WM
zXmhnWTP{?`qvOL@j$G(m;GE#x;2hyx;hZ5Cs^ih|;VVZjbS`pEa&B^ta;|dDk_*-G
z==kuJBNsXsIwv|eI!8KJI%mp->UeZ~_{xzBor|55otvGbovWR*<wA8lIzD{m$c5^7
zbbR>Akqg!F==kuJBNyt6KIxl2>Z?A>h3a^8eE7<d3*8s)6Zeh#$bIELlMB`H==kuJ
zBNuuuJSUzT&ynZKb0!z6<I(ZqD@QK0E?6h58`cr)igiXVRL7&^!&i=6XkD~US~snu
z)>Z4QT&Rvm$A_;RxzN5~pRjM(N9-&18M#m$kB$#tIdY+W(LQP4w2#_X?Xz;BIvyP#
zzH;P3bAdU*++dC{SC}*8LUlYkK78fKh2|o2lDWwoWv()3$%X28bbR>Akqga*=0tO&
zInrEd&Xfz)@#y&Ql_M9Li_OXAW^=T;+MF#Hs^ih|;VVZjbS`jCaBgspaISF9kPFrE
z==kuJBNsXsIVU+cIY&8HIcLd*>UeZ~_{xzBoeP~4og1AaohzL)<wA8lIzD{m$c4_u
z&dJWr&e6`*&e?LIIvyP#zH;P3bv!ygeC5c6>UeZ~_{xzB^+libO&|4DpXEYzJUTvn
z<;aEZ3-^ip#(m_za-YeC>UeZ~_{xzBJr|x6&yDBEbLBac3)S)H`0$k@7g`sr6V?su
zh;_v}BNwXU(edFcM=rE3S|_cW)=}%KbyhA^$D`xJSB_k0U$9TuH|!(!75j`_sE$X+
zhp!yD(7tG&v~Su+?W^`#xlkRCjt^fsa-q4voM3J+N0=+j8FHaI9vvUPa^ymDkvYlS
zWR5adnX}|Vbv!ygeC5c6=0bC#xzQYHt~6)Lh3a^8eE7<d3(dvmWOK7Q+FWhUmJ8MK
z==kuJBNsXsI43waI7c{FIA_R(>UeZ~_{xzBor|24oSU4ZoU5F(<U(~kIzD{m$c4^@
z&WX;A&XLZQ&Y5zdIvyP#zH;P3=VIq%=Vs?<=W6F{xlkRCjt^fs?YT|=KLipXbs%*h
zbs%*hbs%*hbs%*hbs%*hbs%*hbs%*hbs%*hbs%*hbs%*hbs%*hbs%*hbs%*hbs%*h
zbs%*hbs%*hbs%*hbs%*hbs%*hbs%*hbs%*hbs%*hbs%*hbs%*hbs%*hbs%*hbs%*h
zbs%*hbs%*hbs%*hbs%-%Kiz?|&uaX0Q#oPkL%I*rbCA}7v<{?oAgu#w9Z2gyS_jfP
zkk)~;4y1J;tpjNtNb5ja2huu_)`7GRq;(*z18E&d>p)ru(mIgVfwT^!b>M$@9r#cG
z{}&|x)PdB2)PdB2)PdB2)PdB2)PdB2)PdB2)PdB2)PdB2)PdB2)PdB2)PdB2)PdB2
z)PdB2)PdB2)PdB2|9l5F-)!*UQmJccQuL4H)wb)}_MfYTLXUg1>rD_6{-5qZuTrnJ
z->=vBE^Rm>{I;`}DsBJOrPRGNYiZunMmrrax>4xWqg0xq?UR0O|L)Qy(`ElImEde`
zpEj;%%xd9-E~TY2UDvEo3B2s5D23E|PN;XQXE&I=`;|-e==wjE(!g0t11EKByQu$k
zJ=#9++BGxmQRg@Q>DFc3j@g}D>ep5X|LPN58lKl)@Wl?L((Y4!{2%}CAscPHc9;6k
zuvJJJe?s_0a*}rp_4`hoI?r$M<VjQK>Fe0-Lw9m=vp=O$Y5eU|=dBm?pWHol-U@z?
zJEqRF4n2A2)OpkKz8V{TvVK#xZhGHW)3F~Hd=vZO&&G4HWhe{_xO2dv;l>==wz`!{
zLl4S67&<24e*3ol$Ebj74aw?jh3g>!v(>DzI(@VL$$qXmD=}_8aPy<PmFBG6a?6H2
zA3J>Q%IA&wppI>0!c-8Cj!*30?3#`b-;_V~KL>r%H+^i@@Az{%K73REQ~#&74!BR;
zH}0eO{%Uqj$A@pa|I+=J?!Wwf*mL5!@f<aK-r~>c`0!27UwZ!1^Ov5#ymi1jVcoEf
z#Oq75YdSuB^K#Yk==e0ti$AC1!&goo;?ePm{hM9W@!^}7tByy<r&(V7IUOIqa`F(5
zj!*30?3#`b-@IIPJUTwj^5W0w`0$mJhj?^+V*h5>bbR>c<*MV+@oAP9e@@4Tube!@
zqvI3%H@l|e!#6Ki9gmJrv%L6oIzD{m<RKm%pV+_IH60(mdAaI%bbOlS#h=sh;VUN(
z@#y%({>`rG`0&lkRmY>_(=0FkoQ@A)IeCaj$0zo0c1_2JZ(goC9vz=%dGY6TeE7=A
zLp(Y@v468`IzD{!a@FzZ_%zFlKd0lvS56+{(ea7>n_biK;hUGMjz`C*Szi1(9Us1O
z@(_=XPwe09nvM_Oyj*oWIzG+v;?L>$@RgH?cyxSX|7O>8eE8<&s^ih|X_gm%PREC@
zoIJ#%;}iQgyQbsAH!oKmkB(2Xy!dlEK78fmAs!u{*uU8|9Us1Vx$1aye46FOpVRT-
zD<==}==j9`&93SA@XgCr$D`xZEHD0?jt^fsd5A~HC-!f4O~;3CUamSG9iL`-@#l1W
z_{zycJUTwHf3s^kK78|X)$!=~G|P)Wr{lv{P9EaX@rnJLUDNU5o0qGON5`jGUi>*7
zAHH((5RZ;e?BDE~jt}3wTy;D;KF#vt&*}K^m6L~fbbMm}X4iCl_~zxR<I(ZS%Ufur
zQwLvi(0k>b0^a-UOV?>!pYolFhg>`bs^if?b-d2%k6JnVIbN>qvT-ASk5}$nYWx(a
zjwctY<8@Yl&FWeHN(&vcV<Z2)*;*z%Jq4=c$%X28oz>r6&hnpIeds}r{6ps+zUXXY
zCZjr@T&RxMS^eSbX8GgB%yd>GfAvexUtz;~K2*n(3)S)HpgJBMJY$nAfAf<ERU7#?
zue-{zzpCd$bv!z#jz<U8@#x@H+hqCMZFA;pjr<>-w0HTfdOlRgql4;rbWk0S4(>KA
z%ip2j!E=nO=bv-*l}ijhcrvQv(Lr^*&g!=qk>x)*%l9gc{MTQ6|H1R?`A{8CE>y>(
zgX(y6aJGZ9{LLQgw`U{&p1X(CKB?zJbv!z#jz<U8@#x@E$7K0$Tzt?Ojr{KGt$ET9
z51EYWcyv%5ue16=Kh5$_s$O+-Bmc#%*Z=gF^?ay~Cl{*Y(Lr@QI{5J!S^gipZ}D{7
zpZ~-U7J0Is|9U`mJUXb3M+epM=wQ!tv;5tcUh(}#{>FR#{N$d8PDXV+I;f7<S^er4
zWcll!xY-Ps*7H|-{FZgUUC)Q=cyggS9vxK2ql4RCnB_lv#cZ=R^7|if-RCX!e5j5`
z2i5WDpgJBMRL7%(>UeZe9ghyG<IzEV(I<U_>UeZe9ghyG<I%yQ_fJv1=fZR1xq<3<
zer~9aM+epM=%97MI$_;_>UeZe9ghyG<I%yQ>t|8DeZf9q-+=0Per~9aM+epM=%9Vk
zK55^C>UeZe9ghyG<I%yQ^FvX+xyYPkZi4D~er~9aM+epM=%BgKoM>)@>UeZe9ghyG
z<IzELu{qh?4At@IpgJBMRL7%(Mb8gK^+nH5MfF9`k45!G&(Hts`l9_SsxNx~6xA1f
zev0a?3)Tti22{thZa{TBI;f6E2d#_NN$Vz5$D@Plcyv%5j}F=w>=X74sE$Vm)$!<{
zIvyP?x_=hcn+wbd<_4&a=jVp%cyv%5j}DrP%t_`ZsE$Vm)$!<{IvyP?IzJZGn~TlK
z=4Pml=jVp%cyv%5j}AH)I43waKy^GisE$Vm)$!<H(eqPLy>p>+qH`lu$MbVTbv!z#
zjz<Tbi=C65o1r=$9aP7ogX(y6P#upBs^if?bv!z#jz<UeMW6Hys^if?bv!z#jz<TJ
z-ake4o(s>3=LV|d`MIGw9vxK2ql4B3>x6X!s^if?bv!z#jz<TJuAfEq_67TdeFLiF
z`MIGw9vxK2ql5NE`=osns^if?bv!z#jz<TJ&JRWP<|1>Fxe2P{`MIGw9vxK2ql4x`
zbE3Hss^if?bv!z#jz<U0#pYylGgQZ;gX(y6P#upB7Ck={)fYWK71b9#KNi(buk(BM
zcQf>;#CXg`hpkipyHhG>owfh$)k%XYpSArz7!0a>*#z<E_{9FruIc#jP5F&Kfc!rP
zebP65Y}W7ib2>hJQ~y){)BTs8zr1z8I$_<gj>PLrvuip&eDiYE@#y$8%Zop!<HJ`@
z9^%pQiT#^h)A8Y(m#dCP$ER6d{5c&TzH;&qkB(35-|U)>58u38bv!yg&GO>U>G<%K
zlZSY8d}9A**K~aN=H;s6(eY`P7k^I2hp(JG#G~UA`!~C$<HI*ER~?UzPqV!Eb2>hJ
z<>VnA9iQ00*)<&>zInOocyxT4<;9=V@!=~c5Ao>u#Qx2$>G<%?%T>pt<I^lJ{+x~v
zUpaY*N5?1jZ+1<`hi_i4IvyRLW_j`FbbR>A$wNFkKCyqZYdSuB^K#Yk==e0ti$AC1
z!&goo;?ePm{hM9W@!^}7tByy<r&(V7IUOIqa`F(5j!*30?3#`b-@IIPJUTwj^5W0w
z`0$mJhj?^+V*h5>bbR>c<*MV+@oAP9e@@4Tube!@qvI3%H@l|e!#6Ki9gmJrv%L6o
zIzD{m<RKm%pV+_IH60(mdAaI%bbOlS#h=sh;VUN(@#y%({>`rG`0&lkRmY>_(=0Fk
zoQ@A)IeCaj$0zo0c1_2JZ(goC9vz=%dGY6TeE7=ALp(Y@v468`IzD{!a@FzZ_%zFl
zKd0lvS56+{(ea7>n_biK;hUGMjz`C*Szi1(9Us1O@(_=XPwe09nvM_Oyj*oWIzG+v
z;?L>$@RgH?cyxSX|7O>8eE8<&s^ih|X_gm%PREC@oIJ#%;}iQgyQbsAH!oKmkB(2X
zy!dlEK78fmAs!u{*uU8|9Us1Vx$1aye46FOpVRT-D<==}==j9`&93SA@XgCr$D`xZ
zEHD0?jt^fsd5A~HC-!f4O~;3CUamSG9iL`-@#l1W_{zycJUTwHf3s^kK78|X)$!=~
zG|P)Wr{lv{P9EaX@rnJLUDNU5o0qGON5>~GZ#w?(Lfn`AOr?HP{x2f@fBe6b@p|e<
zss6uw)BeBD(<A&Z)uj8Q{(#i~_llNI{rJlLv0m!OSNgI0H?bet|DIL%QY9?%uiq6`
zxo{mrzfc%=+thj4Dp4w(`s1nddIZ0vx1Bm~6S>ESE{5y>>+tkPk6pO=u`iZm{M$+o
zPioY^eaMA>J+J)N@|SBoyTh;R_(c=MqvI3%H@l|e!#Cwm{m((4^i3a|^*jEYjt}3|
z|J47ftpn~8_l^4~zQ3AX)A8Y(?!R>ZrTZ^`ANHJhZahcLp11gOIzD{U^Ov5#^!%mg
zFK->NPFOdrBk}su?3#`b-@IIPJUTwj^5W0w`0$mJhj?^+V*h5>bbR>c<*MV+@oAP9
ze@@4Tube!@qvI3%H@l|e!#6Ki9gmJrv%L6oIzD{m<RKm%pV+_IH60(mdAaI%bbOlS
z#h=sh;VUN(@#y%({>`rG`0&lkRmY>_(=0FkoQ@A)IeCaj$0zo0c1_2JZ(goC9vz=%
zdGY6TeE7=ALp(Y@v468`IzD{!a@FzZ_%zFlKd0lvS56+{(ea7>n_biK;hUGMjz`C*
zSzi1(9Us1O@(_=XPwe09nvM_Oyj*oWIzG+v;?L>$@RgH?cyxSX|7O>8eE8<&s^ih|
zX_gm%PREC@oIJ#%;}iQgyQbsAH!oKmkB(2Xy!dlEK78fmAs!u{*uU8|9Us1Vx$1ay
ze46FOpVRT-D<==}==j9`&93SA@XgCr$D`xZEHD0?jt^fsd5A~HC-!f4O~;3CUamSG
z9iL`-@#l1W_{zycJUTwHf3s^kK78|X)$!=~G|P)Wr{lv{P9EaX@rnJLUDNU5o0qGO
zN5`jGUi>*7AHH((5RZ;e?BDE~jt}3wTy;D;KF#vt&*}K^m6L~fbbMm}X4iCl_~zxR
z<I(YHmKT3c$A_<+JjA2p6Z<#2rsKmmFIOFpj!(0^_;WfweC6aJ9vz?9zu7e%AHI3H
z>UeZ~^78(#|Nl3n-!F9_bs%*hbs%*hbs%*hbs%*hbs%*hbs%*hbs%*hbs%*hbs%*h
zbs%*hbs%*hbs%*hbs%*hbs%*hbs%*hbs%*hbs%*hbs%*hbs%*hbs%*hbs%*hbs%*h
zbs%*hbs%*hbs%*hbs%*hbs%*hbs%*hbs%*hbs%*hbs%*hbs%*hbs%*hbs%*hbs%*h
zbs%*hbs%*hbs%*hbs%*hbs%*hbs%*hbs%*hbs%*hbs%*hbs%*hbs%-%ztMq3dR}qt
z8U1f4PnbEwTesNun8x*LbMJNFnNy%T9vxK2>#TmwnX>#DmwRbUBmc*J=UV%SDNr3x
zE>y?stbUEIS^n^U4B54jf8<iPUa{pAsE#KWs^fK5|BruO-uCBk)I*1^*T~=gfO$4t
zU<y>nlMB`HI;+3@A6fpz8y~%BBmdzI&fM?8WhbLLo?NJo*IE7DuV(rEXMXO()9d+*
z?{Mjy``7cKI-Xppjz<U8@#x_1p3d^;Td#aqBmdS-9~n1$Js+y$(Lr@QI;f6E2mkoz
zEPto|r~Ryvf9#s4&+@xvCZjqY9aP8btp2jwvit}B`ux_7{AD)%`4+3y^PxJPT&Rvm
z2i5WD;DgtO{@=ag5^cZ#;B~M2^V0R-Kj3_!jz<U8@#vsB9v!^uk}Q9vu4lY|TK)H5
z@6!(}!|VA_9ghyG<IzENJUV#axmo^}H@$sNBmd}6zW?O6>iJL|j}EHi(Lr@QI(X!1
zS^g`ld~iV{|I3S)|KRqeCZjqY9aP8btbVnVv;6b=^gXJPf7uInpKwe)AFAWYh3a^8
zP#upBu5?_M|Lw<4-Mf+h)TQsexotfks^if?bv!z#jz<SCIy%cAbJ^UxH1ZF-;iuQF
zQ_qL$cyv%5j}EHi(Lr@QI;f6E2i5WDpgJBM)E9lyH>i$B2i5WDpgJBMbYHkn+&54i
zj}EHi(Lr@QI#~4iDXO<FSSPF-P#w?D4b}1JpgJBMv@TjFt(#CCj}EHi(Lr@QI%r?8
zPuMr0IvyQV$D@Plcy!RdXrHuiLUlYksE$Vm)$!<H(fOgM-dto(GB-hWJU=&7$D@Pl
zcy!QQXihXYLUlYksE$Vm)$!<H(fPTkzUcX(sJ`g=si?l_`LU?J==u47U0<|+MfFAR
zpQ8Gr&reZ((e<OK-nwX=v~EImJnJS@$D@Plcy!RdV4tvWKy^GisE$Vm)$!<H(fzZi
z-dtc#FgHMTJU=&7$D@Plcy!QQWKJ?SL3KPjsE$Vm)$!<{xzL<wZiMQ1bWk0S4yxnP
zL36P=+1w1(@#vsB9vxK2qk~1y4@LFPMb1giO;8=r&kfb_=%6|t9ds^qPIPXB>UeZe
z9ghyG<IzFqV&`P%W~h!w2i5WDpgJBMRL7%(>UeZe9ghyG<IzEV(I<U_>UeZe9ghyG
z<IzF)h5N*P1J&{9pgJBMRL7%(MW3Iddh3F9!ny&~@%-FS9ghyG<IzFuqIJ@`3DxoF
zpgJBMRL7%(_67TdeFLiF(Lr@QI;f6E2kndYN&6;L$D@Plcyv%5j}8`{AByVDMdl=P
z6I93Zb3=7JI;f6E2hD}%L~|ol$D@Plcyv%5j}8`{pNr~?o*#<pi=Ll~>WiKqi|VJ>
z`F(ufW8U0!{9)xM7RvCUmgin;T=#$Dx??w=0@d;8pgLY>_0_qv{O=BW?EXgnmpd$Y
z&wNv$I-Xppj@McJ<2|zc!(Vve=Z*aF<KH~?;u|NUI-Xppj@McJg&!Q<_UEwqs1?eM
z{4t~6US-k^lTjT{E>y?stbXFtS^f?Oy#M$n_59&8F244%^?ay~Cl{*Y(Lr@Qx^}!Z
z%RhRBf!j3lPuk=7=a#>IGOCXcymno<f7S6itM7MCmOr%5brUwO=kGVe@fXf_-DFh9
zi}Tg-I;%hUz$}04qqnp)@}D_z>$TszW-_Yd#rf)Zoz*WoB+Fm_u-7i#sGfiQ*asH8
z`wx>*9WTyT$Lp;Akma-dJ<9LS)5u?Vre9ajyLvLJ<Hh;vc%9Y1GFz6v?jrmAbi;c7
z%)dGBsN=4hjOuuCzB*oK^;f)mRNM2v=d)M!Y2+`}z3W1!Tsaxl@#1`Syw2)Bdpye@
z^~}gie^}3-^NiJ&9e>4SRL6_+)$uy3zwh=ef1{Ci{-}}v#Ae@px>r3Ps^i7^>UeZe
z9WU0uaBY_V^En?{sF8o#Wiy|1-sO`~9q-?*zxVPi|MNcglp6UzS!Ca*ht~6<I$oTw
zjz<U8@xEz#ebwjxcK_WM?i2S7RL6_^r;bMl)$w9|(dVbAzUcZ<R9|%cEUGWMe-zah
z-9L-!i_Q;4^+o5WqWYrqV^O`i*qm%`hU$3n`k{_T2i5UnebMtnQGL<#Q&D}<^J7tc
z(ev~Fy1r=tit3BrKSlLLpP!<7>w<N{x&hVk;`Kuvj}EHi#rmS_XHk98{iCS9=>Az$
zUvz#bsxLY}71bA=AB*aX&d){lMb8gK_0C1kNzP4B9WP!#)bZ$`I$o@w-sex}a_4mC
zcBqaQ|NQEBbWk1do2H-M{dZruPuw?99WU;mIvyQV$BXqvpP!=oqU%RdebM!^sJ`g_
zQB+@a|17F6IzJTE7oDGq>Wj{gMfK)lbF#S^s^i7$hdLe|RL6_;Mb8gK^+nH5MfF9`
zk45#<>-^rOG-Iht<1ZR$`&R>&Zu@Hox^&G7mA0!=saL@Gx?5@9HfisE<x-C}PNg((
zmeRmU-7-P<dbQ!ywQFYBqt0*q)2++6Qv;=6TOs_bPjG2yuD#%k9ZIF$r~LRo{@>HF
zA6KV-l<I%_?f1uo)Q_+9W8yclAOG<OG6kd#qz?RtJ20U<tZloWP=0R18@iWD6Uu`x
z>iNvalOAeEb-e5cnox%7c%9XcnYH0Gp}f_ObFS3DZol8}<}If{bv(IH9j~+cRTj<i
zA6&7|$&LISu3G-jvviq^>UeUYI$me>cdnV`FTC#BvvsNG|Lvmz-|gLXGOFXrh3a^n
z)xR}7%Rlc|*Z!fAU%KJA54Z0&8P)ORLUp{(>X$ex%OAM^xAyB=&p+j<r&oSuhRLXo
zCl{*YbyolDKWF*t|Mu1OyVdi*b<iyD{c^_1sE#KWs^fK5zghRj13004;xYgH_6&9G
zInTfYhV__?>UeUYI$me>eOt2pD>pcH?(X&c*GHAQeXr+aRL7GG)$uy3f8>BH|J?y+
z|Fe<*@|?>YJM*lQQ5{b%RLASAeuLj<`Q5e~`S6VO{9)H^z1FR>O-6M*xlkRiv-+c+
z%JR=y_0PYUsh<D-UoJoL`?F6*bv(IH9j~+c%e!=~TTLj>d&2it?NP_se>qdjsl6tn
zI-Xppj@McJcNfX>_c(9w@s0e`o}b~eGkQ-(bv(IH9j~+ck5<X@U)*QU-ZR(pC;siW
z*7f^LMs+;7P#v$c`dQY>^0z(m{+34mgf*@@@ptunsE#KWs^if?bv!z#jz<U8@#vsB
z9vxK2qk~2JS5#m0{wb<2`ur5t7hOM!>Wi+QMfFAZkD~gb`)5&o(fOgMzUcf^R9|#{
zEUGU$KNr;(JwFuH7d<}})fYWK7S$I$KmV`mi}tUmzUci^RA2P@DXK5JeiYRgT|bNJ
zi|!vq^+os3qWYrqLs5Ov`KhSB==@kzUvz#hsxNweD5@`dek!UjdVVabcP@5Lc5a61
zc+SmG9ghyG<IzENJUXb3M+epM=%6|t9W2_vqWYruPf>l*=clN?==xDqUv&K}sxP{K
z6xA2qKa1*%&JRWPMdzoY`l9n=QGL<*xv0MA`Jt%3==rIrzUcX}sDApM-?Lpf{X2Ca
zbs%*hbs%*hbs%*hbs%*hbs%*hbs%*hbs%*hbs%*hbs%*hbs%*hbs%*hbs%*hbs%*h
zbs%*hb>M%q1Bc9c<+?A8Il7!M^&#B{={ZR2Kw1aVI*`_Zv<{?oAgu#w9Z2gyS_jfP
zkk)~;4y1J;tpjNtNb5ja2huu_)`7GRq;(*z18E&d>p)ru(mL?JyAJf&Y{^$YUVq8T
znp<Re;_%(xZ(Ogs-uj2VRmXRBuM8gc<wBKJSN`HJd)INrKP`1n1E0U=tEi6Gt_wbV
z<;aEq+&jzv?XH(^_enkXxP|XH>h=wmtQ@uP<u4yKX`#xN>;7igKK0xO-u`N?pF<sw
zjt^fsa^Y^j$@0hCv26cO>-l}=`0YMB|8U95vb`5r<%TYOD-VBu;K3v6`QyH`bcesc
zIvyP#zH;Qk3*X4{XPWi>AN{kQ|J|3D-Sm!)maJU!_~ZAyIz!*e)!iQZ*2sGP@~3y4
zuZ~B@hp!yD@S$~=YP%1vn0@SHpVjlPIH=c`_iVak<)7Wp_{m2-`c_U@e&t^K*YkhY
zdzlV@e|0=MK78fKg+IPN%dhoc`iw8?`Je8%@x&opELpj%&%1lH&epfm|DzXwGOC{c
z$weLKtK-q};VVZjTw?j9+kXF#e|p7nlj`|TZTaO+>ukMbWz4Z_em3hIeJg{HzNqIx
z_56p|TeidBUmcH*4_`TQ;n1tI{PB;!_Ny-S`@YMf>z#Mt_Dfcl7+ah15A*h|tn}gd
zM-Hy%-}F((`RaIdeE7<d3;PdTrtSCdb>!IVy4Ca7x&5U5t2-@OdAR3sON{K>w=&b;
zZyR@5J%8qL{X6{q)$!=~@RcJM?tXoizj&`(?(SaC|LcXXJvMRJl9l(KTl1EA7Vlem
z{ier998u4IcZ&fX=Bwk;@!=~+E?jQ;W!rxL<v*^BpQ)a|^gRpi^2+cfD+Bt^y!al=
z^sQ|D=~=TMUC+Pg(~k4i@#y&Ql_M9{&du@<tNnfS%=P?p?|*r|1NK_7vf<ceo*B4Y
z-^%YUS?4Fm*7Lt}%C|fG{nhd4`0$k@7ryanmj8=G5B#WSJ%6ng&%f)C5%v5Z%zx~9
zEB39d|KPYikFV$dtmkqa=Bwk;@!=~+E?n&U{oDG#^?sE_XRYTyzs}XepC7qo<-U{7
zdGL^x`&J&iwR`Erdj4E{b)2t`N5_Y+9J%n92W0u(hSVlD@;^Oyp8by8f62-pHaz^3
zKdjofa?xVX&2>^e|AVVL&R55y<HJ{uT=>JEW%&pFa)xbYtLOK;=EC!~I-vgNu=CQ3
zKC)Wh%57&H@!LlJ36FN1uZ~B@hp!yDP#uqs4_`U<U8s&n$A_;Rxlmv9N#FEQU-elo
zRL7&^!&i=6=)Q2DxNqD??ko41T&Rvm$A_;RxzKarIq}?hjyzYMGr3S5kB$#tIdY+O
z!8&2xu#Q+)tTS?<IvyP#zH;P3>!Nkix@jG?u3Bg1LUlYkK78fKh4uyegnh$4VqdY(
z$c5^7bbR>AkqhmM_DTDuebl~cpOp*M@#y&Ql_M9L3(N`T26KeD!ki%&s^ih|;VVZj
zG#8nZ%uVJfbCo$uE>y>(<HJ{uTxc#dCz>10k>*Nsrd+6wN5_Y+9J$b3Y)&>eo1@Lu
z=4`o89gmI=UpaE2bAfY$bAxk)bA@w;T&Rvm$A_;RxzM@DImx-nIm)@pIZG~7$D`xJ
zSB_lhT<DzW+~^$XT<M%C7pmjY@!=~+E_5z-PX2G7zo&P;zUY&_>7%~tv%W)hJUTvn
z<>)(fU${@)H|`_%mHSLCRL7&^!&i=6=(+Hmcy2sLo-5CpT&Rvm$A_;RxzM^`ov?0L
zN31K>8M#m$kB$#tIdY+O(K>0}w2oR=t+R5WIvyP#zH;P3`+|MKzF{A+uh?hgLUlYk
zK78fKh4w}Jq<zyqYG1X_%7yB9bbR>AkqgZQ<^*$tIl^3F&X5b$@#y&Ql_M9Li_A&p
zCUcaz%A6$^s^ih|;VVZjG#8o^&5h<rbEP>`E>y>(<HJ{uTxc#fC!3qi(dKG%wp^%=
zN5_Y+9J$cBz&XLW!8yXY!Z|}ORL7&^!&i=6=v?HS<lN*O<y_^QB^RpW(edFcM=o?O
zbWU_`bdGecbk39u)$!=~@RcJMIu|=9J2yK=J6Ahr%Z2KAbbR>Akqg!F==kuJBNwXU
z(edFcM=sPCebP65)K`6$3)S)H`0$k@7rHOpC+-{fk^9PhCKsyX(edFcM=tbScuqVw
zo+Hnd=S(hC$D`xJSB_k0U9e7AH>@Mp73+*#sE$X+hp!yD(7I@yv~F5Qt*h2qxlkRC
zjt^fsa-n^}K4IUmkJwl2GjgFi9vvUPa^yn$qJ7f7X&<$(+GpiLbv!ygeC5c6<^pqq
zxxpM^t}tiFh3a^8eE7<d3(ZC5By*EF%3Nj6k_*-G==kuJBNv(r&57nlbELV_oGBNo
z<I(ZqD@QIg7n_sK&E{xxwK-cZRL7&^!&i=6=v?5O;N0LG;auUIAs4FS(edFcM=o?O
za!zt?a*lGYa?X+q)$!=~@RcJMIu|-8IyX8;I#)Vp%7yB9bbR>Akqf8S`TPF?LT(QL

literal 0
HcmV?d00001


From d89ce8bd55f09a8697b3a5c13318190823b78c3d Mon Sep 17 00:00:00 2001
From: "ania.brown" <ania.brown@gmail.com>
Date: Tue, 19 Jan 2021 09:56:31 +0000
Subject: [PATCH 44/56] updating tests makefile to use mpicc rather than hard
 code mpi library path

---
 Tests/GPU/Makefile | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/Tests/GPU/Makefile b/Tests/GPU/Makefile
index 303dd2a4..83419fef 100644
--- a/Tests/GPU/Makefile
+++ b/Tests/GPU/Makefile
@@ -17,9 +17,15 @@
 
 # Compiler
 CC = nvcc
+MPI_CC = mpic++
 
-#use `mpic++ -show` to find library and include flags
-MPI_FLAGS = -I/local/software/mpich/3.2.1/gcc/include -L/local/software/mpich/3.2.1/gcc/lib -lmpi -lmpicxx
+# HDF5 libraries must be linked explicitly like this rather than using the hdf5 compiler h5pcc. 
+# h5pcc should wrap mpicc with the hdf5 libraries included, but appears to interact badly with nvcc
+# The library paths below are found using h5pcc -show
+HDF5_FLAGS = -I/local/software/szip/2.1.1/include -L/local/software/hdf5/1.10.2/gcc/parallel/lib -L/local/software/szip/2.1.1/lib -lsz -lz -ldl -lm -I/local/software/hdf5/1.10.2/gcc/parallel/include -lhdf5 -lhdf5_hl
+
+# this should no longer be needed but leaving them in just in case
+MPI_FLAGS =
 
 # Points to the root of Google Test, relative to where this file is.
 # Remember to tweak this if you move this file.
@@ -43,12 +49,13 @@ STDFLAGS = -std=c++11
 CXXFLAGS = -D_MWAITXINTRIN_H_INCLUDED \
 					 -D_FORCE_INLINES \
 					 -D__STRICT_ANSI__ \
-					 -Wno-deprecated-gpu-targets \
+					 -Wno-deprecated-gpu-targets
 
 NVFLAGS = -std=c++11 \
 					-rdc=true \
 					-fmad=false \
-					-Wno-deprecated-gpu-targets
+					-Wno-deprecated-gpu-targets \
+					-ccbin ${MPI_CC}
 
 
 
@@ -67,7 +74,9 @@ TESTS = test_simulation \
 				test_imex
 
 
-PARALLEL_TESTS = test_parallel_rk2     \
+PARALLEL_TESTS = test_parallel_rk2    
+
+HDF5_TESTS = test_hdf5_rk2
 
 # All Google Test headers.  Usually you shouldn't change this
 # definition.
@@ -98,7 +107,7 @@ buildRootfinder:
 
 
 clean :
-	rm -f $(TESTS) gtest.a gtest_main.a *.o
+	rm -f $(TESTS) $(PARALLEL_TESTS) gtest.a gtest_main.a *.o
 
 # Builds gtest.a and gtest_main.a.
 

From 8e623a34740571dea17bcc5fcf210615e01bdc83 Mon Sep 17 00:00:00 2001
From: "ania.brown" <ania.brown@gmail.com>
Date: Tue, 19 Jan 2021 11:43:58 +0000
Subject: [PATCH 45/56] adding serial hdf5 to gpu tests

---
 Tests/GPU/Makefile | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/Tests/GPU/Makefile b/Tests/GPU/Makefile
index 83419fef..9ea7fdff 100644
--- a/Tests/GPU/Makefile
+++ b/Tests/GPU/Makefile
@@ -49,7 +49,7 @@ STDFLAGS = -std=c++11
 CXXFLAGS = -D_MWAITXINTRIN_H_INCLUDED \
 					 -D_FORCE_INLINES \
 					 -D__STRICT_ANSI__ \
-					 -Wno-deprecated-gpu-targets
+					 -Wno-deprecated-gpu-targets \
 
 NVFLAGS = -std=c++11 \
 					-rdc=true \
@@ -181,6 +181,13 @@ test_rk2.o : $(TEST_DIR)/test_rk2.cu $(INC_DIR)/RK2.h \
 test_rk2 : test_rk2.o C2PArgs.o srmhd.o srrmhd.o fluxVectorSplitting.o boundaryConds.o simData.o RK2.o initFunc.o simulation.o weno.o serialSaveData.o serialEnv.o $(RTFIND_OBJS) gtest_main.a
 	@$(CC)	$(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) -lpthread $^ -o $@
 
+test_hdf5_rk2.o : $(TEST_DIR)/test_hdf5_rk2.cu $(INC_DIR)/RK2.h \
+	                    $(INC_DIR)/twoFluidEMHD.h $(INC_DIR)/simulation.h $(INC_DIR)/simData.h $(GTEST_HEADERS)
+	@$(CC) $(CPPFLAGS) $(CXXFLAGS) $(HDF5_FLAGS) $(NVFLAGS) -c $(TEST_DIR)/test_rk2.cu -I$(INC_DIR)  -I$(RTFIND_INC_DIR)
+
+test_hdf5_rk2 : test_hdf5_rk2.o C2PArgs.o srmhd.o srrmhd.o fluxVectorSplitting.o boundaryConds.o simData.o RK2.o initFunc.o simulation.o weno.o serialSaveDataHDF5.o serialEnv.o $(RTFIND_OBJS) gtest_main.a
+	@$(CC)	$(CPPFLAGS) $(CXXFLAGS) $(HDF5_FLAGS) $(NVFLAGS) -lpthread $^ -o $@
+
 # Explicit RK split integrator
 rkSplit.o : $(MODULE_DIR)/rkSplit.cu $(INC_DIR)/rkSplit.h $(INC_DIR)/RK2.h
 	@$(CC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) -c $(MODULE_DIR)/rkSplit.cu -I$(INC_DIR)
@@ -204,6 +211,9 @@ test_fvs : srmhd.o C2PArgs.o twoFluidEMHD.o test_fvs.o fluxVectorSplitting.o bou
 serialSaveData.o : $(MODULE_DIR)/serialSaveData.cu $(INC_DIR)/serialSaveData.h
 	@$(CC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) -c $(MODULE_DIR)/serialSaveData.cu -I$(INC_DIR)
 
+serialSaveDataHDF5.o : $(MODULE_DIR)/serialSaveDataHDF5.cu $(INC_DIR)/serialSaveDataHDF5.h
+	@$(CC) $(CPPFLAGS) $(CXXFLAGS) $(HDF5_FLAGS) $(NVFLAGS) -c $(MODULE_DIR)/serialSaveDataHDF5.cu -I$(INC_DIR)
+
 parallelSaveData.o : $(MODULE_DIR)/parallelSaveData.cu $(INC_DIR)/parallelSaveData.h
 	@$(CC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) $(MPI_FLAGS) -c $(MODULE_DIR)/parallelSaveData.cu -I$(INC_DIR)
 

From a19f45a8d283b2f8f3997e9dae7dd93c3089aaa8 Mon Sep 17 00:00:00 2001
From: "ania.brown" <ania.brown@gmail.com>
Date: Tue, 19 Jan 2021 11:44:26 +0000
Subject: [PATCH 46/56] adding serial hdf5 to gpu tests

---
 Tests/GPU/Src/test_hdf5_rk2.cu | 195 +++++++++++++++++++++++++++++++++
 1 file changed, 195 insertions(+)
 create mode 100644 Tests/GPU/Src/test_hdf5_rk2.cu

diff --git a/Tests/GPU/Src/test_hdf5_rk2.cu b/Tests/GPU/Src/test_hdf5_rk2.cu
new file mode 100644
index 00000000..d46684af
--- /dev/null
+++ b/Tests/GPU/Src/test_hdf5_rk2.cu
@@ -0,0 +1,195 @@
+#include "gtest/gtest.h"
+#include "srrmhd.h"
+#include "srmhd.h"
+#include "simulation.h"
+#include "serialSaveDataHDF5.h"
+#include "simData.h"
+#include "initFunc.h"
+#include "RK2.h"
+#include "fluxVectorSplitting.h"
+#include <cstdlib>
+
+
+/*
+ Assumptions:
+   RKRandomInstabilitySingleFluid is tested in 2D only
+   BrioWuSingleFluid is tested in 1D only
+*/
+
+
+// RKOTVSingleFluidPeriodic
+TEST(RK2OutputConsistentWithSerial, RK2SrmhdPeriodicOTVSF)
+{
+
+  /*
+    The following was used to gather data to compare the parallel
+     version with. No tests are run in the serial version of this test
+  */
+
+  double cfl(0.6);
+  int Ng(4);
+  double gamma(2.0);
+
+  SerialEnv env(0, NULL, 1, 1, 1, 1);
+  Data d(40, 40, 0, 0, 1, 0, 1, 0, 1, 0.004, &env, cfl, Ng, gamma);
+  SRMHD model(&d);
+  FVS fluxMethod(&d, &model);
+  Periodic bcs(&d);
+  Simulation sim(&d, &env);
+  OTVortexSingleFluid init(&d);
+  RK2 timeInt(&d, &model, &bcs, &fluxMethod);
+  SerialSaveDataHDF5 save(&d, &env, "../TestData/GPUHDF5/RK2SrmhdPeriodicOTVSF", SerialSaveDataHDF5::OUTPUT_ALL);
+  sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save);
+
+  // sim.evolve();
+  sim.updateTime();
+  // sim.updateTime();
+
+  // Save data in test directory
+  // This currently needs to be set in the save() function above as well
+  strcpy(save.dir, "../TestData/GPUHDF5");
+  strcpy(save.app, "RK2SrmhdPeriodicOTVSF");
+
+  save.saveAll();
+}
+// RKOTVSingleFluidOutflow
+TEST(RK2OutputConsistentWithSerial, RK2SrmhdOutflowOTVSF)
+{
+
+  /*
+    The following was used to gather data to compare the parallel
+     version with. No tests are run in the serial version of this test
+  */
+
+  double cfl(0.6);
+  int Ng(4);
+  double gamma(2.0);
+
+  SerialEnv env(0, NULL, 1, 1, 1, 1);
+  Data d(40, 40, 0, 0, 1, 0, 1, 0, 1, 0.004, &env, cfl, Ng, gamma);
+  SRMHD model(&d);
+  FVS fluxMethod(&d, &model);
+  Outflow bcs(&d);
+  Simulation sim(&d, &env);
+  OTVortexSingleFluid init(&d);
+  RK2 timeInt(&d, &model, &bcs, &fluxMethod);
+  SerialSaveDataHDF5 save(&d, &env, "../TestData/GPUHDF5/RK2SrmhdOutflowOTVSF", SerialSaveDataHDF5::OUTPUT_ALL);
+  sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save);
+
+  // sim.evolve();
+  sim.updateTime();
+  // sim.updateTime();
+
+  // Save data in test directory
+  // This currently needs to be set in the save() function above as well
+  strcpy(save.dir, "../TestData/GPUHDF5");
+  strcpy(save.app, "RK2SrmhdOutflowOTVSF");
+
+  save.saveAll();
+
+}
+
+
+
+
+// BrioWuSingleFluid
+
+TEST(RK2OutputConsistentWithSerial, RK2SrmhdOutflowBrioWuSF)
+{
+
+  /*
+    The following was used to gather data to compare the parallel
+     version with. No tests are run in the serial version of this test
+  */
+
+  SerialEnv env(0, NULL, 1, 1, 1, 1);
+  Data d(40, 40, 0, 0, 1, 0, 1, 0, 1, 0.004, &env);
+  SRMHD model(&d);
+  FVS fluxMethod(&d, &model);
+  Outflow bcs(&d);
+  Simulation sim(&d, &env);
+  BrioWuSingleFluid init(&d);
+  RK2 timeInt(&d, &model, &bcs, &fluxMethod);
+
+  SerialSaveDataHDF5 save(&d, &env, "../TestData/GPUHDF5/RK2SrmhdOutflowBrioWuSF", SerialSaveDataHDF5::OUTPUT_ALL);
+  sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save);
+
+  // sim.evolve();
+  sim.updateTime();
+  // sim.updateTime();
+
+  // Save data in test directory
+  // This currently needs to be set in the save() function above as well
+  strcpy(save.dir, "../TestData/GPUHDF5");
+  strcpy(save.app, "RK2SrmhdOutflowBrioWuSF");
+
+  save.saveAll();
+
+}
+
+TEST(RK2OutputConsistentWithSerial, RK2SrmhdPeriodicBrioWuSF)
+{
+
+  /*
+    The following was used to gather data to compare the parallel
+     version with. No tests are run in the serial version of this test
+  */
+
+  SerialEnv env(0, NULL, 1, 1, 1, 1);
+  Data d(40, 40, 0, 0, 1, 0, 1, 0, 1, 0.004, &env);
+  SRMHD model(&d);
+  FVS fluxMethod(&d, &model);
+  Periodic bcs(&d);
+  Simulation sim(&d, &env);
+  BrioWuSingleFluid init(&d);
+  RK2 timeInt(&d, &model, &bcs, &fluxMethod);
+
+  SerialSaveDataHDF5 save(&d, &env, "../TestData/GPUHDF5/RK2SrmhdPeriodicBrioWuSF", SerialSaveDataHDF5::OUTPUT_ALL);
+  sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save);
+
+  // sim.evolve();
+  sim.updateTime();
+  // sim.updateTime();
+
+  // Save data in test directory
+  // This currently needs to be set in the save() function above as well
+  strcpy(save.dir, "../TestData/GPUHDF5");
+  strcpy(save.app, "RK2SrmhdPeriodicBrioWuSF");
+
+  save.saveAll();
+}
+
+TEST(RK2OutputConsistentWithSerial, RK2SrmhdFlowBrioWuSF)
+{
+
+  /*
+    The following was used to gather data to compare the parallel
+     version with. No tests are run in the serial version of this test
+  */
+
+  SerialEnv env(0, NULL, 1, 1, 1, 1);
+  Data d(40, 40, 0, 0, 1, 0, 1, 0, 1, 0.004, &env);
+  SRMHD model(&d);
+  FVS fluxMethod(&d, &model);
+  Flow bcs(&d);
+  Simulation sim(&d, &env);
+  BrioWuSingleFluid init(&d);
+  RK2 timeInt(&d, &model, &bcs, &fluxMethod);
+
+  SerialSaveDataHDF5 save(&d, &env, "../TestData/GPUHDF5/RK2SrmhdFlowBrioWuSF", SerialSaveDataHDF5::OUTPUT_ALL);
+  sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save);
+
+  // sim.evolve();
+  sim.updateTime();
+  // sim.updateTime();
+
+  // Save data in test directory
+  // This currently needs to be set in the save() function above as well
+  strcpy(save.dir, "../TestData/GPUHDF5");
+  strcpy(save.app, "RK2SrmhdFlowBrioWuSF");
+
+  save.saveAll();
+}
+
+
+

From ea5548eec66965549a24ff9239a01f77abddea12 Mon Sep 17 00:00:00 2001
From: aniabrown <ania.brown@gmail.com>
Date: Tue, 19 Jan 2021 14:57:12 +0000
Subject: [PATCH 47/56] updating test makefile to run hdf5 on ubuntu

---
 Tests/GPU/Makefile    | 21 +++++++++++++--------
 Tests/GPU/Src/main.cu |  2 +-
 makePaths.sh          |  8 ++++++++
 3 files changed, 22 insertions(+), 9 deletions(-)

diff --git a/Tests/GPU/Makefile b/Tests/GPU/Makefile
index 9ea7fdff..4a7830ee 100644
--- a/Tests/GPU/Makefile
+++ b/Tests/GPU/Makefile
@@ -17,19 +17,24 @@
 
 # Compiler
 CC = nvcc
-MPI_CC = mpic++
+MPI_CC = mpicxx.mpich
+GPU_COMPUTE_CAPABILITY = 61
+
+##DOCKER_ENV = --allow-run-as-root
+DOCKER_ENV = 
 
 # HDF5 libraries must be linked explicitly like this rather than using the hdf5 compiler h5pcc. 
 # h5pcc should wrap mpicc with the hdf5 libraries included, but appears to interact badly with nvcc
 # The library paths below are found using h5pcc -show
-HDF5_FLAGS = -I/local/software/szip/2.1.1/include -L/local/software/hdf5/1.10.2/gcc/parallel/lib -L/local/software/szip/2.1.1/lib -lsz -lz -ldl -lm -I/local/software/hdf5/1.10.2/gcc/parallel/include -lhdf5 -lhdf5_hl
+#HDF5_FLAGS = -I/local/software/szip/2.1.1/include -L/local/software/hdf5/1.10.2/gcc/parallel/lib -L/local/software/szip/2.1.1/lib -lsz -lz -ldl -lm -I/local/software/hdf5/1.10.2/gcc/parallel/include -lhdf5 -lhdf5_hl
+HDF5_FLAGS = -I/usr/include/hdf5/mpich -L/usr/lib/x86_64-linux-gnu/hdf5/mpich /usr/lib/x86_64-linux-gnu/hdf5/mpich/libhdf5_hl.a /usr/lib/x86_64-linux-gnu/hdf5/mpich/libhdf5.a -lsz -lz -lm
 
 # this should no longer be needed but leaving them in just in case
 MPI_FLAGS =
 
 # Points to the root of Google Test, relative to where this file is.
 # Remember to tweak this if you move this file.
-GTEST_DIR = ../../../GoogleTest
+GTEST_DIR = ../../GoogleTest
 
 # Where to find user code.
 MODULE_DIR = ./../../Project/GPU/Src
@@ -55,7 +60,8 @@ NVFLAGS = -std=c++11 \
 					-rdc=true \
 					-fmad=false \
 					-Wno-deprecated-gpu-targets \
-					-ccbin ${MPI_CC}
+					-ccbin ${MPI_CC} \
+					-arch=sm_${GPU_COMPUTE_CAPABILITY}
 
 
 
@@ -70,13 +76,12 @@ TESTS = test_simulation \
 				test_srrmhd \
 				test_fvs \
 				test_id \
-				test_rk2 \
+				test_hdf5_rk2 \
 				test_imex
 
 
 PARALLEL_TESTS = test_parallel_rk2    
 
-HDF5_TESTS = test_hdf5_rk2
 
 # All Google Test headers.  Usually you shouldn't change this
 # definition.
@@ -96,7 +101,7 @@ gpu_test : $(RTFIND) $(TESTS) $(PARALLEL_TESTS)
 	# Run all tests
 	@$(foreach exe, $(TESTS), ./$(exe);)
 	# Run all parallel tests
-	$(foreach exe, $(PARALLEL_TESTS), mpirun -np 4 ./$(exe);)
+	$(foreach exe, $(PARALLEL_TESTS), mpirun.mpich -np 4 ${DOCKER_ENV} ./$(exe);)
 
 test : gpu_test compare_mpi_test
 
@@ -183,7 +188,7 @@ test_rk2 : test_rk2.o C2PArgs.o srmhd.o srrmhd.o fluxVectorSplitting.o boundaryC
 
 test_hdf5_rk2.o : $(TEST_DIR)/test_hdf5_rk2.cu $(INC_DIR)/RK2.h \
 	                    $(INC_DIR)/twoFluidEMHD.h $(INC_DIR)/simulation.h $(INC_DIR)/simData.h $(GTEST_HEADERS)
-	@$(CC) $(CPPFLAGS) $(CXXFLAGS) $(HDF5_FLAGS) $(NVFLAGS) -c $(TEST_DIR)/test_rk2.cu -I$(INC_DIR)  -I$(RTFIND_INC_DIR)
+	@$(CC) $(CPPFLAGS) $(CXXFLAGS) $(HDF5_FLAGS) $(NVFLAGS) -c $(TEST_DIR)/test_hdf5_rk2.cu -I$(INC_DIR)  -I$(RTFIND_INC_DIR)
 
 test_hdf5_rk2 : test_hdf5_rk2.o C2PArgs.o srmhd.o srrmhd.o fluxVectorSplitting.o boundaryConds.o simData.o RK2.o initFunc.o simulation.o weno.o serialSaveDataHDF5.o serialEnv.o $(RTFIND_OBJS) gtest_main.a
 	@$(CC)	$(CPPFLAGS) $(CXXFLAGS) $(HDF5_FLAGS) $(NVFLAGS) -lpthread $^ -o $@
diff --git a/Tests/GPU/Src/main.cu b/Tests/GPU/Src/main.cu
index c6ff0a86..fb01812d 100644
--- a/Tests/GPU/Src/main.cu
+++ b/Tests/GPU/Src/main.cu
@@ -7,6 +7,6 @@ int main(int argc, char** argv)
 
     // Create env here to ensure MPI initialisation is handled. Will need to create this object again inside each test
     // -- mpi init will only be called the first time
-    ParallelEnv env(0, NULL, 1, 1, 1);
+    ParallelEnv env(0, NULL, 2, 2, 1);
     return RUN_ALL_TESTS();
 }
diff --git a/makePaths.sh b/makePaths.sh
index d2125bf5..b58b2b4b 100644
--- a/makePaths.sh
+++ b/makePaths.sh
@@ -33,12 +33,16 @@ mkdir Tests/TestData/GPU/Conserved
 mkdir Tests/TestData/GPU/Constants
 mkdir Tests/TestData/GPU/Primitive
 
+mkdir Tests/TestData/GPUHDF5
+
 mkdir Tests/TestData/MPIGPU
 mkdir Tests/TestData/MPIGPU/Auxiliary
 mkdir Tests/TestData/MPIGPU/Conserved
 mkdir Tests/TestData/MPIGPU/Constants
 mkdir Tests/TestData/MPIGPU/Primitive
 
+mkdir Tests/TestData/MPIGPUHDF5
+
 mkdir Tests/TestData
 mkdir Tests/TestData/CPU
 mkdir Tests/TestData/CPU/Auxiliary
@@ -46,6 +50,8 @@ mkdir Tests/TestData/CPU/Conserved
 mkdir Tests/TestData/CPU/Constants
 mkdir Tests/TestData/CPU/Primitive
 
+mkdir Tests/TestData/CPUHDF5
+
 mkdir Tests/TestData/SerialHDF5
 mkdir Tests/TestData/SerialTextToHDF5
 mkdir Tests/TestData/CPUHDF5
@@ -57,6 +63,8 @@ mkdir Tests/TestData/Serial/Conserved
 mkdir Tests/TestData/Serial/Constants
 mkdir Tests/TestData/Serial/Primitive
 
+mkdir Tests/TestData/SerialHDF5
+
 mkdir Examples/Data
 mkdir Examples/Data/Final
 mkdir Examples/Data/Final/Auxiliary

From d8af18d5f9aae8bcb21e623faa322243179cf29f Mon Sep 17 00:00:00 2001
From: aniabrown <ania.brown@gmail.com>
Date: Tue, 19 Jan 2021 15:37:54 +0000
Subject: [PATCH 48/56] use hdf5 comparison script in gpu tests

---
 Tests/GPU/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Tests/GPU/Makefile b/Tests/GPU/Makefile
index 4a7830ee..33be5794 100644
--- a/Tests/GPU/Makefile
+++ b/Tests/GPU/Makefile
@@ -95,7 +95,7 @@ RTFIND = buildRootfinder
 
 compare_mpi_test : $(RTFIND) $(TESTS) $(PARALLEL_TESTS) gpu_test
 	# Run tests that compare outputs of TestData/GPU and TestData/MPIGPU
-	py.test -v Src/compareParallelAndSerial.py
+	py.test -v Src/compareParallelAndSerialHDF5.py
 
 gpu_test : $(RTFIND) $(TESTS) $(PARALLEL_TESTS)
 	# Run all tests

From d87d993b280bf6f80c1808c93025fd3b9e575146 Mon Sep 17 00:00:00 2001
From: aniabrown <ania.brown@gmail.com>
Date: Wed, 20 Jan 2021 15:05:02 +0000
Subject: [PATCH 49/56] adding parallel HDF5 to gpu tests

---
 Scripts/compareHDF5.py                        |  10 +-
 Tests/CPU/Src/compareParallelHDF5.py          |   3 +
 Tests/CPU/Src/compareSerialHDF5.py            |   3 +
 Tests/GPU/Makefile                            |  12 +-
 Tests/GPU/Src/compareParallelAndSerialHDF5.py |  37 ++++
 Tests/GPU/Src/test_hdf5_parallel_rk2.cu       | 186 ++++++++++++++++++
 Tests/GPU/Src/test_hdf5_rk2.cu                |  12 +-
 7 files changed, 255 insertions(+), 8 deletions(-)
 create mode 100644 Tests/GPU/Src/compareParallelAndSerialHDF5.py
 create mode 100644 Tests/GPU/Src/test_hdf5_parallel_rk2.cu

diff --git a/Scripts/compareHDF5.py b/Scripts/compareHDF5.py
index dc5e981c..51171ed9 100644
--- a/Scripts/compareHDF5.py
+++ b/Scripts/compareHDF5.py
@@ -96,7 +96,15 @@ def compare(file1, file2):
         for attribute_name, a_attribute in a_group.attrs.items():
             if attribute_name not in whitelist_attributes:
                 b_attribute = b_group.attrs[attribute_name]
-                if not np.allclose(a_attribute, b_attribute):
+                if a_attribute.dtype.char == 'S':
+                    if not a_attribute == b_attribute:
+                        warnings_found = True
+                        print(
+                            "Warning: "+group_name+" attribute '"+attribute_name+"' values differ!\n"
+                            " - "+file1+": "+a_attribute+"\n"
+                            " - "+file2+": "+b_attribute
+                        )
+                elif not np.allclose(a_attribute, b_attribute):
                     warnings_found = True
                     print(
                         "Warning: "+group_name+" attribute '"+attribute_name+"' values differ!\n"
diff --git a/Tests/CPU/Src/compareParallelHDF5.py b/Tests/CPU/Src/compareParallelHDF5.py
index 5e302821..6ce3ae4b 100644
--- a/Tests/CPU/Src/compareParallelHDF5.py
+++ b/Tests/CPU/Src/compareParallelHDF5.py
@@ -21,6 +21,9 @@ def test_compareParallelHDF5():
    directory2: Path = Path("../TestData/CPUHDF5/")
 
    print("Running tests...")
+   # Double check that the previous steps have actually generated the files we expect
+   assert(len(list(directory2.glob("*")))>0)
+   assert(len(list(directory1.glob("*")))>0)
 
    # For each file, determine the appendix and use the CompareHDF5 script 
    for serfile in directory2.glob("*"):
diff --git a/Tests/CPU/Src/compareSerialHDF5.py b/Tests/CPU/Src/compareSerialHDF5.py
index 0c34d59e..ffd0fb11 100644
--- a/Tests/CPU/Src/compareSerialHDF5.py
+++ b/Tests/CPU/Src/compareSerialHDF5.py
@@ -21,6 +21,9 @@ def test_compareSerialHDF5():
    directory2: Path = Path("../TestData/SerialHDF5/")
 
    print("Running tests...")
+   # Double check that the previous steps have actually generated the files we expect
+   assert(len(list(directory2.glob("*")))>0)
+   assert(len(list(directory1.glob("*")))>0)
 
    # For each file, determine the appendix and use the CompareHDF5 script 
    for serfile in directory2.glob("*"):
diff --git a/Tests/GPU/Makefile b/Tests/GPU/Makefile
index 33be5794..e5f79c0d 100644
--- a/Tests/GPU/Makefile
+++ b/Tests/GPU/Makefile
@@ -80,7 +80,7 @@ TESTS = test_simulation \
 				test_imex
 
 
-PARALLEL_TESTS = test_parallel_rk2    
+PARALLEL_TESTS = test_hdf5_parallel_rk2    
 
 
 # All Google Test headers.  Usually you shouldn't change this
@@ -222,6 +222,9 @@ serialSaveDataHDF5.o : $(MODULE_DIR)/serialSaveDataHDF5.cu $(INC_DIR)/serialSave
 parallelSaveData.o : $(MODULE_DIR)/parallelSaveData.cu $(INC_DIR)/parallelSaveData.h
 	@$(CC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) $(MPI_FLAGS) -c $(MODULE_DIR)/parallelSaveData.cu -I$(INC_DIR)
 
+parallelSaveDataHDF5.o : $(MODULE_DIR)/parallelSaveDataHDF5.cu $(INC_DIR)/parallelSaveDataHDF5.h
+	@$(CC) $(CPPFLAGS) $(CXXFLAGS) $(HDF5_FLAGS) $(NVFLAGS) $(MPI_FLAGS) -c $(MODULE_DIR)/parallelSaveDataHDF5.cu -I$(INC_DIR)
+
 # Platform env
 serialEnv.o : $(MODULE_DIR)/serialEnv.cu $(INC_DIR)/serialEnv.h
 	$(CC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) -c $(MODULE_DIR)/serialEnv.cu -I$(INC_DIR)
@@ -350,6 +353,13 @@ test_parallel_rk2.o : $(TEST_DIR)/test_parallel_rk2.cu $(INC_DIR)/RK2.h \
 test_parallel_rk2 : main.o C2PArgs.o test_parallel_rk2.o srmhd.o srrmhd.o fluxVectorSplitting.o parallelBoundaryConds.o simData.o RK2.o initFunc.o simulation.o weno.o parallelEnv.o parallelSaveData.o $(RTFIND_OBJS) gtest.a
 	$(CC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) $(MPI_FLAGS) -lpthread $^ -o $@
 
+test_hdf5_parallel_rk2.o : $(TEST_DIR)/test_hdf5_parallel_rk2.cu $(INC_DIR)/RK2.h \
+                        $(INC_DIR)/twoFluidEMHD.h $(INC_DIR)/simulation.h $(INC_DIR)/simData.h $(GTEST_HEADERS)
+	$(CC) $(CPPFLAGS) $(CXXFLAGS) $(HDF5_FLAGS) $(NVFLAGS) $(MPI_FLAGS) -c $(TEST_DIR)/test_hdf5_parallel_rk2.cu -I$(INC_DIR)  -I$(RTFIND_INC_DIR)
+
+test_hdf5_parallel_rk2 : main.o C2PArgs.o test_hdf5_parallel_rk2.o srmhd.o srrmhd.o fluxVectorSplitting.o parallelBoundaryConds.o simData.o RK2.o initFunc.o simulation.o weno.o parallelEnv.o parallelSaveDataHDF5.o $(RTFIND_OBJS) gtest.a
+	$(CC) $(CPPFLAGS) $(CXXFLAGS) $(HDF5_FLAGS) $(NVFLAGS) $(MPI_FLAGS) -lpthread $^ -o $@
+
 test_parallel_rkSplit.o : $(TEST_DIR)/test_parallel_rkSplit.cu $(INC_DIR)/rkSplit.h \
                         $(INC_DIR)/twoFluidEMHD.h $(INC_DIR)/simulation.h $(INC_DIR)/simData.h $(GTEST_HEADERS)
 	$(CC) $(CPPFLAGS) $(CXXFLAGS) $(NVFLAGS) $(MPI_FLAGS) -c $(TEST_DIR)/test_parallel_rkSplit.cu -I$(INC_DIR)  -I$(RTFIND_INC_DIR)
diff --git a/Tests/GPU/Src/compareParallelAndSerialHDF5.py b/Tests/GPU/Src/compareParallelAndSerialHDF5.py
new file mode 100644
index 00000000..cd70ee82
--- /dev/null
+++ b/Tests/GPU/Src/compareParallelAndSerialHDF5.py
@@ -0,0 +1,37 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Mon 26 Oct 2020 
+
+@author: ania
+
+Tests the precision of the serial plaintext and HDF5 version of METHOD to within
+some tolerance. To execute these tests, `make test` from the Tests/CPU directory
+"""
+
+import sys
+from glob import glob
+from pathlib import Path
+
+from compareHDF5 import compare
+
+
+def test_compareParallelAndSerialHDF5():
+   directory1: Path = Path("../TestData/GPUHDF5/")
+   directory2: Path = Path("../TestData/MPIGPUHDF5/")
+
+   print("Running tests...")
+
+   # Double check that the previous steps have actually generated the files we expect
+   assert(len(list(directory2.glob("*")))>0)
+   assert(len(list(directory1.glob("*")))>0)
+
+   # For each file, determine the appendix and use the CompareHDF5 script 
+   for serfile in directory2.glob("*"):
+       appendix = serfile.stem
+       # TODO -- is this still necessary?
+       appendix = appendix.strip('aux')
+       file1 = directory1 / (appendix + ".hdf5")
+       file2 = directory2 / (appendix + ".hdf5")
+       print(file1, file2)
+       assert(compare(str(file1), str(file2)))
diff --git a/Tests/GPU/Src/test_hdf5_parallel_rk2.cu b/Tests/GPU/Src/test_hdf5_parallel_rk2.cu
new file mode 100644
index 00000000..385bf228
--- /dev/null
+++ b/Tests/GPU/Src/test_hdf5_parallel_rk2.cu
@@ -0,0 +1,186 @@
+#include "gtest/gtest.h"
+#include "srrmhd.h"
+#include "srmhd.h"
+#include "simulation.h"
+#include "simData.h"
+#include "parallelSaveDataHDF5.h"
+#include "parallelBoundaryConds.h"
+#include "initFunc.h"
+#include "RK2.h"
+#include "fluxVectorSplitting.h"
+#include "parallelEnv.h"
+#include <cstdlib>
+
+/*
+ Assumptions:
+   RKRandomInstabilitySingleFluid is tested in 2D only
+   BrioWuSingleFluid is tested in 1D only
+*/
+
+
+#if 1
+// RKOTVSingleFluidPeriodic
+TEST(RK2OutputConsistentWithSerial, RK2SrmhdPeriodicOTVSF)
+{
+
+  /*
+    The following was used to gather data to compare the parallel
+     version with. No tests are run in the serial version of this test
+  */
+
+  double cfl(0.6);
+  int Ng(4);
+  double gamma(2.0);
+
+  ParallelEnv env(0, NULL, 2, 2, 1, 1);
+  Data d(40, 40, 0, 0, 1, 0, 1, 0, 1, 0.004, &env, cfl, Ng, gamma);
+  SRMHD model(&d);
+  FVS fluxMethod(&d, &model);
+  ParallelPeriodic bcs(&d, &env);
+  Simulation sim(&d, &env);
+  OTVortexSingleFluid init(&d);
+  RK2 timeInt(&d, &model, &bcs, &fluxMethod);
+  ParallelSaveDataHDF5 save(&d, &env, "../TestData/MPIGPUHDF5/RK2SrmhdPeriodicOTVSF", ParallelSaveDataHDF5::OUTPUT_ALL);
+  sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save);
+
+  // sim.evolve();
+  sim.updateTime();
+  // sim.updateTime();
+
+
+  // Save data in test directory
+  strcpy(save.dir, "../TestData/MPIGPUHDF5");
+  strcpy(save.app, "RK2SrmhdPeriodicOTVSF");
+
+  save.saveAll();
+}
+// RKOTVSingleFluidOutflow
+TEST(RK2OutputConsistentWithSerial, RK2SrmhdOutflowOTVSF)
+{
+
+  /*
+    The following was used to gather data to compare the parallel
+     version with. No tests are run in the serial version of this test
+  */
+
+  double cfl(0.6);
+  int Ng(4);
+  double gamma(2.0);
+
+  ParallelEnv env(0, NULL, 2, 2, 1, 1);
+  Data d(40, 40, 0, 0, 1, 0, 1, 0, 1, 0.004, &env, cfl, Ng, gamma);
+  SRMHD model(&d);
+  FVS fluxMethod(&d, &model);
+  ParallelOutflow bcs(&d, &env);
+  Simulation sim(&d, &env);
+  OTVortexSingleFluid init(&d);
+  RK2 timeInt(&d, &model, &bcs, &fluxMethod);
+  ParallelSaveDataHDF5 save(&d, &env, "../TestData/MPIGPUHDF5/RK2SrmhdOutflowOTVSF", ParallelSaveDataHDF5::OUTPUT_ALL);
+  sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save);
+
+  // sim.evolve();
+  sim.updateTime();
+  // sim.updateTime();
+
+
+  // Save data in test directory
+  strcpy(save.dir, "../TestData/MPIGPUHDF5");
+  strcpy(save.app, "RK2SrmhdOutflowOTVSF");
+
+  save.saveAll();
+}
+
+
+
+
+// BrioWuSingleFluid
+
+TEST(RK2OutputConsistentWithSerial, RK2SrmhdOutflowBrioWuSF)
+{
+
+  /*
+    The following was used to gather data to compare the parallel
+     version with. No tests are run in the serial version of this test
+  */
+
+  ParallelEnv env(0, NULL, 2, 2, 1, 1);
+  Data d(40, 40, 0, 0, 1, 0, 1, 0, 1, 0.004, &env);
+  SRMHD model(&d);
+  FVS fluxMethod(&d, &model);
+  ParallelOutflow bcs(&d, &env);
+  Simulation sim(&d, &env);
+  BrioWuSingleFluid init(&d);
+  RK2 timeInt(&d, &model, &bcs, &fluxMethod);
+  ParallelSaveDataHDF5 save(&d, &env, "../TestData/MPIGPUHDF5/RK2SrmhdOutflowBrioWuSF", ParallelSaveDataHDF5::OUTPUT_ALL);
+  sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save);
+
+  sim.evolve();
+
+
+  // Save data in test directory
+  strcpy(save.dir, "../TestData/MPIGPUHDF5");
+  strcpy(save.app, "RK2SrmhdOutflowBrioWuSF");
+
+  save.saveAll();
+}
+
+TEST(RK2OutputConsistentWithSerial, RK2SrmhdPeriodicBrioWuSF)
+{
+
+  /*
+    The following was used to gather data to compare the parallel
+     version with. No tests are run in the serial version of this test
+  */
+
+  ParallelEnv env(0, NULL, 2, 2, 1, 1);
+  Data d(40, 40, 0, 0, 1, 0, 1, 0, 1, 0.004, &env);
+  SRMHD model(&d);
+  FVS fluxMethod(&d, &model);
+  ParallelPeriodic bcs(&d, &env);
+  Simulation sim(&d, &env);
+  BrioWuSingleFluid init(&d);
+  RK2 timeInt(&d, &model, &bcs, &fluxMethod);
+  ParallelSaveDataHDF5 save(&d, &env, "../TestData/MPIGPUHDF5/RK2SrmhdPeriodicBrioWuSF", ParallelSaveDataHDF5::OUTPUT_ALL);
+  sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save);
+
+  sim.evolve();
+
+
+  // Save data in test directory
+  strcpy(save.dir, "../TestData/MPIGPUHDF5");
+  strcpy(save.app, "RK2SrmhdPeriodicBrioWuSF");
+
+  save.saveAll();
+}
+
+TEST(RK2OutputConsistentWithSerial, RK2SrmhdFlowBrioWuSF)
+{
+
+  /*
+    The following was used to gather data to compare the parallel
+     version with. No tests are run in the serial version of this test
+  */
+
+  ParallelEnv env(0, NULL, 2, 2, 1, 1);
+  Data d(40, 40, 0, 0, 1, 0, 1, 0, 1, 0.004, &env);
+  SRMHD model(&d);
+  FVS fluxMethod(&d, &model);
+  ParallelFlow bcs(&d, &env);
+  Simulation sim(&d, &env);
+  BrioWuSingleFluid init(&d);
+  RK2 timeInt(&d, &model, &bcs, &fluxMethod);
+  ParallelSaveDataHDF5 save(&d, &env, "../TestData/MPIGPUHDF5/RK2SrmhdFlowBrioWuSF", ParallelSaveDataHDF5::OUTPUT_ALL);
+  sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save);
+
+  sim.evolve();
+
+
+  // Save data in test directory
+  strcpy(save.dir, "../TestData/MPIGPUHDF5");
+  strcpy(save.app, "RK2SrmhdFlowBrioWuSF");
+
+  save.saveAll();
+}
+#endif
+
+
diff --git a/Tests/GPU/Src/test_hdf5_rk2.cu b/Tests/GPU/Src/test_hdf5_rk2.cu
index d46684af..f5c4cf67 100644
--- a/Tests/GPU/Src/test_hdf5_rk2.cu
+++ b/Tests/GPU/Src/test_hdf5_rk2.cu
@@ -114,8 +114,8 @@ TEST(RK2OutputConsistentWithSerial, RK2SrmhdOutflowBrioWuSF)
   SerialSaveDataHDF5 save(&d, &env, "../TestData/GPUHDF5/RK2SrmhdOutflowBrioWuSF", SerialSaveDataHDF5::OUTPUT_ALL);
   sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save);
 
-  // sim.evolve();
-  sim.updateTime();
+  sim.evolve();
+  //sim.updateTime();
   // sim.updateTime();
 
   // Save data in test directory
@@ -147,8 +147,8 @@ TEST(RK2OutputConsistentWithSerial, RK2SrmhdPeriodicBrioWuSF)
   SerialSaveDataHDF5 save(&d, &env, "../TestData/GPUHDF5/RK2SrmhdPeriodicBrioWuSF", SerialSaveDataHDF5::OUTPUT_ALL);
   sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save);
 
-  // sim.evolve();
-  sim.updateTime();
+  sim.evolve();
+  //sim.updateTime();
   // sim.updateTime();
 
   // Save data in test directory
@@ -179,8 +179,8 @@ TEST(RK2OutputConsistentWithSerial, RK2SrmhdFlowBrioWuSF)
   SerialSaveDataHDF5 save(&d, &env, "../TestData/GPUHDF5/RK2SrmhdFlowBrioWuSF", SerialSaveDataHDF5::OUTPUT_ALL);
   sim.set(&init, &model, &timeInt, &bcs, &fluxMethod, &save);
 
-  // sim.evolve();
-  sim.updateTime();
+  sim.evolve();
+  //sim.updateTime();
   // sim.updateTime();
 
   // Save data in test directory

From 6e6f5c7ee970ec8f44e004bbe4f823b22c2aa064 Mon Sep 17 00:00:00 2001
From: "ania.brown" <ania.brown@gmail.com>
Date: Fri, 22 Jan 2021 11:56:41 +0000
Subject: [PATCH 50/56] fixing a typo which lead to the latest cpu and gpu
 instructions for iridis being stored in the wrong folder

---
 Scrips/IridisEnv/tests_instructions.md        | 46 -------------------
 Scripts/IridisEnv/tests_instructions.md       | 20 ++++++--
 Scripts/IridisEnv/tests_job.sh                | 22 ---------
 .../IridisEnv/tests_job_cpu.sh                |  0
 .../IridisEnv/tests_job_gpu.sh                |  0
 5 files changed, 17 insertions(+), 71 deletions(-)
 delete mode 100644 Scrips/IridisEnv/tests_instructions.md
 delete mode 100644 Scripts/IridisEnv/tests_job.sh
 rename {Scrips => Scripts}/IridisEnv/tests_job_cpu.sh (100%)
 rename {Scrips => Scripts}/IridisEnv/tests_job_gpu.sh (100%)

diff --git a/Scrips/IridisEnv/tests_instructions.md b/Scrips/IridisEnv/tests_instructions.md
deleted file mode 100644
index 3569ca88..00000000
--- a/Scrips/IridisEnv/tests_instructions.md
+++ /dev/null
@@ -1,46 +0,0 @@
-## Tests Instructions
-
-These are instructions to run GPU or CPU unit tests as a batch job on Iridis 5
-
-## Setting up python env
-
-In the root METHOD folder, create a python venv using
-
-```
-module purge
-module load gcc/6.4.0
-module load python/3.6.4
-module load hdf5/1.10.2/gcc/parallel
-```
-
-Optionally also type `module load cuda/8.0` if using gpu,
-
-Finish creating and activating the python venv with:
-
-```
-python3 -m venv venv
-source venv/bin/activate
-```
-
-Then install python modules using
-
-```
-python -m pip install -r Scripts/IridisEnv/requirements.txt
-```
-
-## Runing unit tests as a batch job
-
-For GPU:
-
-From `Tests/GPU` run `sbatch ../../Scripts/IridisEnv/tests_job_gpu.sh`
-
-This will run all GPU tests
-
-For CPU:
-
-From `Tests/CPU` run `sbatch ../../Scripts/IridisEnv/tests_job_cpu.sh`
-
-
-
-
-
diff --git a/Scripts/IridisEnv/tests_instructions.md b/Scripts/IridisEnv/tests_instructions.md
index 7b00163d..3569ca88 100644
--- a/Scripts/IridisEnv/tests_instructions.md
+++ b/Scripts/IridisEnv/tests_instructions.md
@@ -1,6 +1,6 @@
 ## Tests Instructions
 
-These are instructions to run CPU unit tests as a batch job on Iridis 5
+These are instructions to run GPU or CPU unit tests as a batch job on Iridis 5
 
 ## Setting up python env
 
@@ -11,6 +11,13 @@ module purge
 module load gcc/6.4.0
 module load python/3.6.4
 module load hdf5/1.10.2/gcc/parallel
+```
+
+Optionally also type `module load cuda/8.0` if using gpu,
+
+Finish creating and activating the python venv with:
+
+```
 python3 -m venv venv
 source venv/bin/activate
 ```
@@ -23,9 +30,16 @@ python -m pip install -r Scripts/IridisEnv/requirements.txt
 
 ## Runing unit tests as a batch job
 
-From `Tests/CPU` run `sbatch ../../Scripts/IridisEnv/tests_job.sh`
+For GPU:
+
+From `Tests/GPU` run `sbatch ../../Scripts/IridisEnv/tests_job_gpu.sh`
+
+This will run all GPU tests
+
+For CPU:
+
+From `Tests/CPU` run `sbatch ../../Scripts/IridisEnv/tests_job_cpu.sh`
 
-This will run all CPU tests including tests of the hdf5 serial and parallel writers
 
 
 
diff --git a/Scripts/IridisEnv/tests_job.sh b/Scripts/IridisEnv/tests_job.sh
deleted file mode 100644
index 8dcaa48e..00000000
--- a/Scripts/IridisEnv/tests_job.sh
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/bin/bash
-
-#SBATCH --ntasks-per-node=4     # Tasks per node
-#SBATCH --nodes=1                # Number of nodes requested
-#SBATCH --time=00:10:00          # walltime
-
-module purge
-module load gcc/6.4.0
-module load python/3.6.4
-module load hdf5/1.10.2/gcc/parallel
-#module load hdf5/1.10.2/gcc/serial
-
-module list
-
-source ../../venv/bin/activate
-
-export PYTHONPATH=$PYTHONPATH:../../Scripts:/home/amb1u19/METHOD_branches/METHOD_dev_hdf5/Scripts
-
-gcc --version
-make clean
-make test
-
diff --git a/Scrips/IridisEnv/tests_job_cpu.sh b/Scripts/IridisEnv/tests_job_cpu.sh
similarity index 100%
rename from Scrips/IridisEnv/tests_job_cpu.sh
rename to Scripts/IridisEnv/tests_job_cpu.sh
diff --git a/Scrips/IridisEnv/tests_job_gpu.sh b/Scripts/IridisEnv/tests_job_gpu.sh
similarity index 100%
rename from Scrips/IridisEnv/tests_job_gpu.sh
rename to Scripts/IridisEnv/tests_job_gpu.sh

From 3a03cbf36ef0eb891ee38108256175b94725a072 Mon Sep 17 00:00:00 2001
From: "ania.brown" <ania.brown@gmail.com>
Date: Fri, 22 Jan 2021 11:56:41 +0000
Subject: [PATCH 51/56] fixing a typo which lead to the latest cpu and gpu
 instructions for iridis being stored in the wrong folder

---
 Scrips/IridisEnv/tests_instructions.md        | 46 -------------------
 Scripts/IridisEnv/tests_instructions.md       | 20 ++++++--
 Scripts/IridisEnv/tests_job.sh                | 22 ---------
 .../IridisEnv/tests_job_cpu.sh                |  0
 .../IridisEnv/tests_job_gpu.sh                |  0
 5 files changed, 17 insertions(+), 71 deletions(-)
 delete mode 100644 Scrips/IridisEnv/tests_instructions.md
 delete mode 100644 Scripts/IridisEnv/tests_job.sh
 rename {Scrips => Scripts}/IridisEnv/tests_job_cpu.sh (100%)
 rename {Scrips => Scripts}/IridisEnv/tests_job_gpu.sh (100%)

diff --git a/Scrips/IridisEnv/tests_instructions.md b/Scrips/IridisEnv/tests_instructions.md
deleted file mode 100644
index 3569ca88..00000000
--- a/Scrips/IridisEnv/tests_instructions.md
+++ /dev/null
@@ -1,46 +0,0 @@
-## Tests Instructions
-
-These are instructions to run GPU or CPU unit tests as a batch job on Iridis 5
-
-## Setting up python env
-
-In the root METHOD folder, create a python venv using
-
-```
-module purge
-module load gcc/6.4.0
-module load python/3.6.4
-module load hdf5/1.10.2/gcc/parallel
-```
-
-Optionally also type `module load cuda/8.0` if using gpu,
-
-Finish creating and activating the python venv with:
-
-```
-python3 -m venv venv
-source venv/bin/activate
-```
-
-Then install python modules using
-
-```
-python -m pip install -r Scripts/IridisEnv/requirements.txt
-```
-
-## Runing unit tests as a batch job
-
-For GPU:
-
-From `Tests/GPU` run `sbatch ../../Scripts/IridisEnv/tests_job_gpu.sh`
-
-This will run all GPU tests
-
-For CPU:
-
-From `Tests/CPU` run `sbatch ../../Scripts/IridisEnv/tests_job_cpu.sh`
-
-
-
-
-
diff --git a/Scripts/IridisEnv/tests_instructions.md b/Scripts/IridisEnv/tests_instructions.md
index 7b00163d..3569ca88 100644
--- a/Scripts/IridisEnv/tests_instructions.md
+++ b/Scripts/IridisEnv/tests_instructions.md
@@ -1,6 +1,6 @@
 ## Tests Instructions
 
-These are instructions to run CPU unit tests as a batch job on Iridis 5
+These are instructions to run GPU or CPU unit tests as a batch job on Iridis 5
 
 ## Setting up python env
 
@@ -11,6 +11,13 @@ module purge
 module load gcc/6.4.0
 module load python/3.6.4
 module load hdf5/1.10.2/gcc/parallel
+```
+
+Optionally also type `module load cuda/8.0` if using gpu,
+
+Finish creating and activating the python venv with:
+
+```
 python3 -m venv venv
 source venv/bin/activate
 ```
@@ -23,9 +30,16 @@ python -m pip install -r Scripts/IridisEnv/requirements.txt
 
 ## Runing unit tests as a batch job
 
-From `Tests/CPU` run `sbatch ../../Scripts/IridisEnv/tests_job.sh`
+For GPU:
+
+From `Tests/GPU` run `sbatch ../../Scripts/IridisEnv/tests_job_gpu.sh`
+
+This will run all GPU tests
+
+For CPU:
+
+From `Tests/CPU` run `sbatch ../../Scripts/IridisEnv/tests_job_cpu.sh`
 
-This will run all CPU tests including tests of the hdf5 serial and parallel writers
 
 
 
diff --git a/Scripts/IridisEnv/tests_job.sh b/Scripts/IridisEnv/tests_job.sh
deleted file mode 100644
index 8dcaa48e..00000000
--- a/Scripts/IridisEnv/tests_job.sh
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/bin/bash
-
-#SBATCH --ntasks-per-node=4     # Tasks per node
-#SBATCH --nodes=1                # Number of nodes requested
-#SBATCH --time=00:10:00          # walltime
-
-module purge
-module load gcc/6.4.0
-module load python/3.6.4
-module load hdf5/1.10.2/gcc/parallel
-#module load hdf5/1.10.2/gcc/serial
-
-module list
-
-source ../../venv/bin/activate
-
-export PYTHONPATH=$PYTHONPATH:../../Scripts:/home/amb1u19/METHOD_branches/METHOD_dev_hdf5/Scripts
-
-gcc --version
-make clean
-make test
-
diff --git a/Scrips/IridisEnv/tests_job_cpu.sh b/Scripts/IridisEnv/tests_job_cpu.sh
similarity index 100%
rename from Scrips/IridisEnv/tests_job_cpu.sh
rename to Scripts/IridisEnv/tests_job_cpu.sh
diff --git a/Scrips/IridisEnv/tests_job_gpu.sh b/Scripts/IridisEnv/tests_job_gpu.sh
similarity index 100%
rename from Scrips/IridisEnv/tests_job_gpu.sh
rename to Scripts/IridisEnv/tests_job_gpu.sh

From 59a08096639f2d5bbefd196d283949051ee68321 Mon Sep 17 00:00:00 2001
From: "ania.brown" <ania.brown@gmail.com>
Date: Fri, 22 Jan 2021 14:50:06 +0000
Subject: [PATCH 52/56] standardising gpu project and test makefile

---
 Project/GPU/Makefile | 39 +++++++++++++++++++++++++++++++++------
 Tests/GPU/Makefile   | 36 +++++++++++++++++++++++-------------
 2 files changed, 56 insertions(+), 19 deletions(-)

diff --git a/Project/GPU/Makefile b/Project/GPU/Makefile
index f72611e3..91d92563 100644
--- a/Project/GPU/Makefile
+++ b/Project/GPU/Makefile
@@ -7,21 +7,39 @@
 USE_MPI=1
 USE_HDF=1
 
-CC = mpic++
+# The compute capability of the GPU 
+GPU_COMPUTE_CAPABILITY = 52
 
+# --- IF USE_MPI ---
+# The c++ capable mpi compiler. In systems with multiple versions of MPI, the particular version may need to be specified with eg
+# mpicxx.mpich
+MPI_CC = mpic++
+
+# --- IF USE_HDF ---
 # HDF5 libraries must be linked explicitly like this rather than using the hdf5 compiler h5pcc. 
 # h5pcc should wrap mpicc with the hdf5 libraries included, but appears to interact badly with nvcc
 # The library paths below are found using h5pcc -show
 HDF5_FLAGS = -I/local/software/szip/2.1.1/include -L/local/software/hdf5/1.10.2/gcc/parallel/lib -L/local/software/szip/2.1.1/lib -lsz -lz -ldl -lm -I/local/software/hdf5/1.10.2/gcc/parallel/include -lhdf5 -lhdf5_hl
+# Ubuntu 18.04 mpich example
+#HDF5_FLAGS = -I/usr/include/hdf5/mpich -L/usr/lib/x86_64-linux-gnu/hdf5/mpich /usr/lib/x86_64-linux-gnu/hdf5/mpich/libhdf5_hl.a /usr/lib/x86_64-linux-gnu/hdf5/mpich/libhdf5.a -lsz -lz -lm
 
-# this should no longer be needed but leaving them in just in case
-MPI_FLAGS =
+# Points to the root of Google Test, relative to where this file is.
+# Remember to tweak this if you move this file.
+GTEST_DIR = ../../../GoogleTest
 
-# -------------- END PARAMETERS FOR USERS TO EDIT --------------------
+# -------------- END PARAMETERS USERS ARE LIKELY TO NEED TO EDIT --------------------
 
 # Compiler
 CC_GPU = nvcc
 
+# this should no longer be needed but leaving them in just in case
+MPI_FLAGS =
+
+ifneq ($(USE_HDF), 1)
+	HDF5_FLAGS = 
+endif
+
+
 # Module directory
 MODULE_DIR = ./Src
 
@@ -43,11 +61,20 @@ RTFIND_SRC_DIR = ./CminpackLibrary/Src
 CXXFLAGS = -fopenmp -Wall -std=c++11 -O3 -lineinfo
 
 # NVIDIA compiler flags
-NVFLAGS = -std=c++11 -Wno-deprecated-gpu-targets -rdc=true -fmad=false -O3 -arch=sm_52 -Xcompiler -fopenmp -Xcompiler -Wall -lineinfo
+NVFLAGS = -std=c++11 \
+                                        -rdc=true \
+                                        -fmad=false \
+					-O3 \
+                                        -Wno-deprecated-gpu-targets \
+                                        -ccbin ${MPI_CC} \
+                                        -arch=sm_${GPU_COMPUTE_CAPABILITY} \
+					-Xcompiler -Wall \
+					-Xcompiler -fopenmp \
+					-lineinfo
 
 
 ifeq ($(USE_MPI), 1)
-	NVFLAGS += -ccbin ${CC}
+	NVFLAGS += -ccbin ${MPI_CC}
 endif
 
 # Sources
diff --git a/Tests/GPU/Makefile b/Tests/GPU/Makefile
index e5f79c0d..a2dd60e9 100644
--- a/Tests/GPU/Makefile
+++ b/Tests/GPU/Makefile
@@ -15,26 +15,36 @@
 # project, except GTEST_HEADERS, which you can use in your own targets
 # but shouldn't modify.
 
-# Compiler
-CC = nvcc
-MPI_CC = mpicxx.mpich
-GPU_COMPUTE_CAPABILITY = 61
 
-##DOCKER_ENV = --allow-run-as-root
-DOCKER_ENV = 
+# -------------- PARAMETERS FOR USERS TO EDIT --------------------
+
+# The c++ capable mpi compiler. In systems with multiple versions of MPI, the particular version may need to be specified with eg
+# mpicxx.mpich
+MPI_CC = mpic++
+# The script used to launch mpi programs. In systems with multiple versions of MPI, the particular version may need to be 
+# specified with eg mpirun.mpich
+MPIEXEC = mpirun
+# The compute capability of the GPU 
+GPU_COMPUTE_CAPABILITY = 52
 
 # HDF5 libraries must be linked explicitly like this rather than using the hdf5 compiler h5pcc. 
 # h5pcc should wrap mpicc with the hdf5 libraries included, but appears to interact badly with nvcc
 # The library paths below are found using h5pcc -show
-#HDF5_FLAGS = -I/local/software/szip/2.1.1/include -L/local/software/hdf5/1.10.2/gcc/parallel/lib -L/local/software/szip/2.1.1/lib -lsz -lz -ldl -lm -I/local/software/hdf5/1.10.2/gcc/parallel/include -lhdf5 -lhdf5_hl
-HDF5_FLAGS = -I/usr/include/hdf5/mpich -L/usr/lib/x86_64-linux-gnu/hdf5/mpich /usr/lib/x86_64-linux-gnu/hdf5/mpich/libhdf5_hl.a /usr/lib/x86_64-linux-gnu/hdf5/mpich/libhdf5.a -lsz -lz -lm
-
-# this should no longer be needed but leaving them in just in case
-MPI_FLAGS =
+HDF5_FLAGS = -I/local/software/szip/2.1.1/include -L/local/software/hdf5/1.10.2/gcc/parallel/lib -L/local/software/szip/2.1.1/lib -lsz -lz -ldl -lm -I/local/software/hdf5/1.10.2/gcc/parallel/include -lhdf5 -lhdf5_hl
+# Ubuntu 18.04 mpich example
+#HDF5_FLAGS = -I/usr/include/hdf5/mpich -L/usr/lib/x86_64-linux-gnu/hdf5/mpich /usr/lib/x86_64-linux-gnu/hdf5/mpich/libhdf5_hl.a /usr/lib/x86_64-linux-gnu/hdf5/mpich/libhdf5.a -lsz -lz -lm
 
 # Points to the root of Google Test, relative to where this file is.
 # Remember to tweak this if you move this file.
-GTEST_DIR = ../../GoogleTest
+GTEST_DIR = ../../../GoogleTest
+
+# -------------- END PARAMETERS USERS ARE LIKELY TO NEED TO EDIT --------------------
+
+# Compiler
+CC = nvcc
+
+# this should no longer be needed but leaving them in just in case
+MPI_FLAGS =
 
 # Where to find user code.
 MODULE_DIR = ./../../Project/GPU/Src
@@ -101,7 +111,7 @@ gpu_test : $(RTFIND) $(TESTS) $(PARALLEL_TESTS)
 	# Run all tests
 	@$(foreach exe, $(TESTS), ./$(exe);)
 	# Run all parallel tests
-	$(foreach exe, $(PARALLEL_TESTS), mpirun.mpich -np 4 ${DOCKER_ENV} ./$(exe);)
+	$(foreach exe, $(PARALLEL_TESTS), ${MPIEXEC} -np 4 ./$(exe);)
 
 test : gpu_test compare_mpi_test
 

From 0920b8cc0d4b1c572a84717fdc45cc7d35a29c63 Mon Sep 17 00:00:00 2001
From: "ania.brown" <ania.brown@gmail.com>
Date: Fri, 22 Jan 2021 15:13:54 +0000
Subject: [PATCH 53/56] add Scripts to gpu test python path

---
 Scrips/IridisEnv/requirements.txt  | 5 -----
 Scripts/IridisEnv/tests_job_gpu.sh | 2 ++
 2 files changed, 2 insertions(+), 5 deletions(-)
 delete mode 100644 Scrips/IridisEnv/requirements.txt

diff --git a/Scrips/IridisEnv/requirements.txt b/Scrips/IridisEnv/requirements.txt
deleted file mode 100644
index af599da1..00000000
--- a/Scrips/IridisEnv/requirements.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-numpy
-matplotlib
-scipy
-pytest
-h5py
diff --git a/Scripts/IridisEnv/tests_job_gpu.sh b/Scripts/IridisEnv/tests_job_gpu.sh
index 2e701818..534f3a04 100644
--- a/Scripts/IridisEnv/tests_job_gpu.sh
+++ b/Scripts/IridisEnv/tests_job_gpu.sh
@@ -15,6 +15,8 @@ module list
 
 source ../../venv/bin/activate
 
+export PYTHONPATH=$PYTHONPATH:../../Scripts:/home/amb1u19/METHOD_branches/METHOD_dev_hdf5/Scripts
+
 make clean
 make gpu_test
 

From 8827b78eca084d1bf52bbc91527904d7622c25c0 Mon Sep 17 00:00:00 2001
From: "ania.brown" <ania.brown@gmail.com>
Date: Fri, 29 Jan 2021 15:43:26 +0000
Subject: [PATCH 54/56] remove unecessary line from makefile

---
 Project/GPU/Makefile | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/Project/GPU/Makefile b/Project/GPU/Makefile
index 91d92563..8d6aa1d5 100644
--- a/Project/GPU/Makefile
+++ b/Project/GPU/Makefile
@@ -23,10 +23,6 @@ HDF5_FLAGS = -I/local/software/szip/2.1.1/include -L/local/software/hdf5/1.10.2/
 # Ubuntu 18.04 mpich example
 #HDF5_FLAGS = -I/usr/include/hdf5/mpich -L/usr/lib/x86_64-linux-gnu/hdf5/mpich /usr/lib/x86_64-linux-gnu/hdf5/mpich/libhdf5_hl.a /usr/lib/x86_64-linux-gnu/hdf5/mpich/libhdf5.a -lsz -lz -lm
 
-# Points to the root of Google Test, relative to where this file is.
-# Remember to tweak this if you move this file.
-GTEST_DIR = ../../../GoogleTest
-
 # -------------- END PARAMETERS USERS ARE LIKELY TO NEED TO EDIT --------------------
 
 # Compiler

From ce306cd5c1b9355f08fc30f258fd413ae99d7e8e Mon Sep 17 00:00:00 2001
From: "ania.brown" <ania.brown@gmail.com>
Date: Thu, 18 Feb 2021 15:08:03 +0000
Subject: [PATCH 55/56] fixing small error in Project makefile

---
 Project/GPU/Makefile | 1 -
 1 file changed, 1 deletion(-)

diff --git a/Project/GPU/Makefile b/Project/GPU/Makefile
index 8d6aa1d5..1aa1817e 100644
--- a/Project/GPU/Makefile
+++ b/Project/GPU/Makefile
@@ -62,7 +62,6 @@ NVFLAGS = -std=c++11 \
                                         -fmad=false \
 					-O3 \
                                         -Wno-deprecated-gpu-targets \
-                                        -ccbin ${MPI_CC} \
                                         -arch=sm_${GPU_COMPUTE_CAPABILITY} \
 					-Xcompiler -Wall \
 					-Xcompiler -fopenmp \

From 5f5fa1a3fd2bb28a4c85eea5394f425e23df35bb Mon Sep 17 00:00:00 2001
From: "ania.brown" <ania.brown@gmail.com>
Date: Fri, 26 Mar 2021 14:00:22 +0000
Subject: [PATCH 56/56] removing hard coded path from iridis scripts

---
 Scripts/IridisEnv/tests_job_cpu.sh | 14 ++++++++++++--
 Scripts/IridisEnv/tests_job_gpu.sh | 13 ++++++++++++-
 2 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/Scripts/IridisEnv/tests_job_cpu.sh b/Scripts/IridisEnv/tests_job_cpu.sh
index 583b9043..34ed55f3 100644
--- a/Scripts/IridisEnv/tests_job_cpu.sh
+++ b/Scripts/IridisEnv/tests_job_cpu.sh
@@ -1,5 +1,8 @@
 #!/bin/bash
 
+# This script submits a Southampton Iridis5 batch job for the cpu tests
+# in Tests/CPU
+
 #SBATCH --ntasks-per-node=4     # Tasks per node
 #SBATCH --nodes=1                # Number of nodes requested
 #SBATCH --time=00:10:00          # walltime
@@ -11,10 +14,17 @@ module load hdf5/1.10.2/gcc/parallel
 #module load hdf5/1.10.2/gcc/serial
 
 module list
-
 source ../../venv/bin/activate
 
-export PYTHONPATH=$PYTHONPATH:../../Scripts:/home/amb1u19/METHOD_branches/METHOD_dev_hdf5/Scripts
+# -------------- PARAMETERS USERS NEED TO EDIT -------------------
+
+# Enter absolute path to METHOD/Scripts directory here
+SCRIPT_DIR=/absolute/path/to/method/root/Scripts
+
+# -----------------------------------------------------------------
+
+# Let python find the scripts for comparing hdf5 files
+export PYTHONPATH=$PYTHONPATH:$SCRIPT_DIR
 
 gcc --version
 make clean
diff --git a/Scripts/IridisEnv/tests_job_gpu.sh b/Scripts/IridisEnv/tests_job_gpu.sh
index 534f3a04..f6b84fcf 100644
--- a/Scripts/IridisEnv/tests_job_gpu.sh
+++ b/Scripts/IridisEnv/tests_job_gpu.sh
@@ -1,5 +1,8 @@
 #!/bin/bash
 
+# This script submits a Southampton Iridis5 batch job for the gpu tests
+# in Tests/GPU
+
 #SBATCH --ntasks-per-node=2     # Tasks per node
 #SBATCH --nodes=1                # Number of nodes requested
 #SBATCH --partition=gtx1080
@@ -15,7 +18,15 @@ module list
 
 source ../../venv/bin/activate
 
-export PYTHONPATH=$PYTHONPATH:../../Scripts:/home/amb1u19/METHOD_branches/METHOD_dev_hdf5/Scripts
+# -------------- PARAMETERS USERS NEED TO EDIT -------------------
+
+# Enter absolute path to METHOD/Scripts directory here
+SCRIPT_DIR=/absolute/path/to/method/root/Scripts
+
+# -----------------------------------------------------------------
+
+# Let python find the scripts for comparing hdf5 files
+export PYTHONPATH=$PYTHONPATH:$SCRIPT_DIR
 
 make clean
 make gpu_test