From 1625353f39903de64e8290eafcd3ab1aaea19082 Mon Sep 17 00:00:00 2001 From: Henri Menke Date: Mon, 7 Apr 2014 17:20:13 +0200 Subject: [PATCH 01/34] pdb-parser inserted into TCL interface --- src/Makefile.am | 1 + src/tcl/electrokinetics_tcl.cpp | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/src/Makefile.am b/src/Makefile.am index f225a884bdf..daf6dbe4691 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -45,6 +45,7 @@ libEspresso_la_SOURCES = \ cuda_init.hpp \ debug.cpp debug.hpp \ domain_decomposition.cpp domain_decomposition.hpp \ + electrokinetics_pdb_parse.cpp electrokinetics_pdb_parse.hpp \ energy.cpp energy.hpp \ errorhandling.cpp errorhandling.hpp \ fft.cpp fft.hpp \ diff --git a/src/tcl/electrokinetics_tcl.cpp b/src/tcl/electrokinetics_tcl.cpp index 4786330e241..581a6736432 100644 --- a/src/tcl/electrokinetics_tcl.cpp +++ b/src/tcl/electrokinetics_tcl.cpp @@ -5,6 +5,11 @@ #include "lb-boundaries.hpp" #include "lb-boundaries_tcl.hpp" #include "initialize.hpp" +#include "electrokinetics_pdb_parse.hpp" + +#ifdef ELECTROKINETICS +extern int ek_initialized; +#endif int tclcommand_electrokinetics(ClientData data, Tcl_Interp *interp, int argc, char **argv) { #ifndef ELECTROKINETICS @@ -84,8 +89,36 @@ int tclcommand_electrokinetics(ClientData data, Tcl_Interp *interp, int argc, ch Tcl_AppendResult(interp, " [print density vtk #string]\n", (char *)NULL); Tcl_AppendResult(interp, " [print flux vtk #string]\n", (char *)NULL); Tcl_AppendResult(interp, "electrokinetics #int node #int #int #int print density\n", (char *)NULL); + Tcl_AppendResult(interp, "electrokinetics pdb-parse #string #string\n", (char *)NULL); return TCL_ERROR; } + else if(ARG0_IS_S("pdb-parse")) + { +#ifndef EK_BOUNDARIES + Tcl_AppendResult(interp, "Feature EK_BOUNDARIES required", (char *) NULL); + return (TCL_ERROR); +#else + argc--; + argv++; + + if (argc != 2) + { + Tcl_AppendResult(interp, "You need to specify two filenames.\n", (char *) NULL); + Tcl_AppendResult(interp, "electrokinetics pdb-parse #string1 #string2\n", (char *)NULL); + Tcl_AppendResult(interp, "#string1 is the pdb-filename, #string2 is the itp-filename.\n", (char *)NULL); + } + if (!ek_initialized) + { + Tcl_AppendResult(interp, "Please initialize EK before you attempt parsing.", (char *) NULL); + return (TCL_ERROR); + } + if(pdb_parse(argv[0], argv[1]) != 0) + { + Tcl_AppendResult(interp, "Could not parse pdb- or itp-file. Please, check your format and filenames.", (char *) NULL); + return (TCL_ERROR); + } +#endif + } else if(ARG0_IS_S("boundary")) { #ifndef EK_BOUNDARIES From 5c82cc8e1275165a718f19bd2ddaf5805cbaf126 Mon Sep 17 00:00:00 2001 From: Florian Weik Date: Wed, 9 Apr 2014 16:35:25 +0200 Subject: [PATCH 02/34] Added missing file. --- src/HarmonicPotential.h | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 src/HarmonicPotential.h diff --git a/src/HarmonicPotential.h b/src/HarmonicPotential.h new file mode 100644 index 00000000000..05dd156ac72 --- /dev/null +++ b/src/HarmonicPotential.h @@ -0,0 +1,38 @@ +#include "OneParticleForce.h" + +template +class HarmonicPotential : public OneParticleForce +{ + public: + void init(double _x, double _k) { + x0 = _x; + k = _k; + }; + + + void run(void); + void computeForces(); + void writeForces(); + + private: + double x0; + double k; +}; + +template +void HarmonicPotential::writeForces() { +} + +template +void HarmonicPotential::computeForces() { + typename Adapter::RType::const_iterator it; + for(it = this->a.getR().begin(); it != this->a.getR().end(); ++it) { + + } +} + +template +void HarmonicPotential::run(void) { + this->computeForces(); + this->writeForces(); +} From 9cd192095cd87886f502c1f209a1d4d37f329b84 Mon Sep 17 00:00:00 2001 From: Florian Weik Date: Wed, 9 Apr 2014 17:06:56 +0200 Subject: [PATCH 03/34] Added access to gpu forces to SystemInterface and implementation. --- src/EspressoSystemInterface.hpp | 7 ++++++ src/HarmonicPotential.h | 38 --------------------------------- src/SystemInterface.hpp | 4 ++++ 3 files changed, 11 insertions(+), 38 deletions(-) delete mode 100644 src/HarmonicPotential.h diff --git a/src/EspressoSystemInterface.hpp b/src/EspressoSystemInterface.hpp index 06261edcbb7..c1dc83e218b 100644 --- a/src/EspressoSystemInterface.hpp +++ b/src/EspressoSystemInterface.hpp @@ -45,6 +45,7 @@ class EspressoSystemInterface : public SystemInterface { bool hasQ() { return true; }; #endif +#ifdef CUDA float *rGpuBegin() { return m_r_gpu_begin; }; float *rGpuEnd() { return m_r_gpu_end; }; bool hasRGpu() { return true; }; @@ -81,6 +82,12 @@ class EspressoSystemInterface : public SystemInterface { return true; } + float *fGpuBegin() { return (float *)gpu_get_particle_force_pointer(); }; + float *fGpuEnd() { return (float *)(gpu_get_particle_force_pointer()) + 3*m_gpu_npart; }; + bool hasFGpu() { return true; }; + +#endif + protected: void gatherParticles(); void split_particle_struct(); diff --git a/src/HarmonicPotential.h b/src/HarmonicPotential.h deleted file mode 100644 index 05dd156ac72..00000000000 --- a/src/HarmonicPotential.h +++ /dev/null @@ -1,38 +0,0 @@ -#include "OneParticleForce.h" - -template -class HarmonicPotential : public OneParticleForce -{ - public: - void init(double _x, double _k) { - x0 = _x; - k = _k; - }; - - - void run(void); - void computeForces(); - void writeForces(); - - private: - double x0; - double k; -}; - -template -void HarmonicPotential::writeForces() { -} - -template -void HarmonicPotential::computeForces() { - typename Adapter::RType::const_iterator it; - for(it = this->a.getR().begin(); it != this->a.getR().end(); ++it) { - - } -} - -template -void HarmonicPotential::run(void) { - this->computeForces(); - this->writeForces(); -} diff --git a/src/SystemInterface.hpp b/src/SystemInterface.hpp index 17b1bc51823..1cd4b55ea57 100644 --- a/src/SystemInterface.hpp +++ b/src/SystemInterface.hpp @@ -109,6 +109,10 @@ class SystemInterface { virtual bool hasQGpu() { return false; }; virtual bool requestQGpu() { m_needsQGpu = hasQGpu(); return m_needsQGpu; } + virtual float *fGpuBegin() { return 0; }; + virtual float *fGpuEnd() { return 0; }; + virtual bool hasFGpu() { return false; }; + virtual const_real_iterator &qBegin() { return null_scalar; }; virtual const const_real_iterator &qEnd() { return null_scalar; }; virtual bool hasQ() { return false; }; From 1060d42a2b045e6a90930be914b3a275bac6a90a Mon Sep 17 00:00:00 2001 From: Florian Weik Date: Wed, 9 Apr 2014 22:13:49 +0200 Subject: [PATCH 04/34] Make Interface enable particle comm if needed. --- src/EspressoSystemInterface.cpp | 1 + src/EspressoSystemInterface.hpp | 25 +++++++++++++++++++++++++ src/EspressoSystemInterface_cuda.cu | 1 + src/Makefile.am | 3 ++- src/SystemInterface.hpp | 5 ++++- src/features.def | 3 +++ src/forces.cpp | 6 ++++++ src/tcl/Makefile.am | 3 ++- src/tcl/initialize_interpreter.cpp | 4 ++++ src/tcl/p3m_tcl.cpp | 1 - 10 files changed, 48 insertions(+), 4 deletions(-) diff --git a/src/EspressoSystemInterface.cpp b/src/EspressoSystemInterface.cpp index a01f614e3e3..e22ca247b7c 100644 --- a/src/EspressoSystemInterface.cpp +++ b/src/EspressoSystemInterface.cpp @@ -54,6 +54,7 @@ void EspressoSystemInterface::gatherParticles() { { if(gpu_get_global_particle_vars_pointer_host()->communication_enabled) { copy_part_data_to_gpu(); + m_gpu_npart = gpu_get_global_particle_vars_pointer_host()->number_of_particles; if(m_splitParticleStructGpu && (this_node == 0)) split_particle_struct(); } diff --git a/src/EspressoSystemInterface.hpp b/src/EspressoSystemInterface.hpp index c1dc83e218b..21a5d8338b6 100644 --- a/src/EspressoSystemInterface.hpp +++ b/src/EspressoSystemInterface.hpp @@ -53,6 +53,8 @@ class EspressoSystemInterface : public SystemInterface { m_needsRGpu = hasRGpu(); m_splitParticleStructGpu |= m_needsRGpu; m_gpu |= m_needsRGpu; + if(m_gpu) + enableParticleCommunication(); return m_needsRGpu; }; @@ -63,6 +65,8 @@ class EspressoSystemInterface : public SystemInterface { m_needsVGpu = hasVGpu(); m_splitParticleStructGpu |= m_needsVGpu; m_gpu |= m_needsVGpu; + if(m_gpu) + enableParticleCommunication(); return m_needsVGpu; }; @@ -73,24 +77,45 @@ class EspressoSystemInterface : public SystemInterface { m_needsQGpu = hasQGpu(); m_splitParticleStructGpu |= m_needsQGpu; m_gpu |= m_needsQGpu; + if(m_gpu) + enableParticleCommunication(); return m_needsQGpu; }; bool requestParticleStructGpu() { m_needsParticleStructGpu = true; m_gpu |= m_needsParticleStructGpu; + if(m_gpu) + enableParticleCommunication(); return true; } float *fGpuBegin() { return (float *)gpu_get_particle_force_pointer(); }; float *fGpuEnd() { return (float *)(gpu_get_particle_force_pointer()) + 3*m_gpu_npart; }; bool hasFGpu() { return true; }; + bool requestFGpu() { + m_needsFGpu = hasFGpu(); + m_gpu |= m_needsFGpu; + if(m_gpu) + enableParticleCommunication(); + return m_needsFGpu; + }; + + unsigned int npart_gpu() { + return m_gpu_npart; + }; #endif protected: void gatherParticles(); void split_particle_struct(); + void enableParticleCommunication() { + if(!gpu_get_global_particle_vars_pointer_host()->communication_enabled) { + gpu_init_particle_comm(); + cuda_bcast_global_part_params(); + } + }; Vector3Container R; #ifdef ELECTROSTATICS diff --git a/src/EspressoSystemInterface_cuda.cu b/src/EspressoSystemInterface_cuda.cu index 33e42199890..5ce1aee85b7 100644 --- a/src/EspressoSystemInterface_cuda.cu +++ b/src/EspressoSystemInterface_cuda.cu @@ -1,3 +1,4 @@ + #include "EspressoSystemInterface.hpp" #include "cuda_interface.hpp" #include "cuda_utils.hpp" diff --git a/src/Makefile.am b/src/Makefile.am index f225a884bdf..6896838df61 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -105,7 +105,8 @@ libEspresso_la_SOURCES = \ ghmc.cpp ghmc.hpp \ Vector.hpp \ SystemInterface.hpp \ - EspressoSystemInterface.hpp EspressoSystemInterface.cpp + EspressoSystemInterface.hpp EspressoSystemInterface.cpp \ + HarmonicForce.hpp HarmonicForce.cu # nonbonded potentials and forces libEspresso_la_SOURCES += \ diff --git a/src/SystemInterface.hpp b/src/SystemInterface.hpp index 1cd4b55ea57..bf124e098d6 100644 --- a/src/SystemInterface.hpp +++ b/src/SystemInterface.hpp @@ -112,6 +112,7 @@ class SystemInterface { virtual float *fGpuBegin() { return 0; }; virtual float *fGpuEnd() { return 0; }; virtual bool hasFGpu() { return false; }; + virtual bool requestFGpu() { m_needsFGpu = hasFGpu(); return m_needsFGpu; } virtual const_real_iterator &qBegin() { return null_scalar; }; virtual const const_real_iterator &qEnd() { return null_scalar; }; @@ -119,12 +120,14 @@ class SystemInterface { virtual bool requestQ() { m_needsQ = hasQ(); return m_needsQ; } virtual unsigned int npart() = 0; + virtual unsigned int npart_gpu() = 0; virtual Vector3 box() = 0; virtual bool needsR() { return m_needsR; }; virtual bool needsRGpu() { return m_needsRGpu;}; virtual bool needsQGpu() { return m_needsQGpu;}; virtual bool needsQ() { return m_needsQ;}; + virtual bool needsFGpu() { return m_needsFGpu; }; protected: bool m_needsR; @@ -133,7 +136,7 @@ class SystemInterface { bool m_needsRGpu; bool m_needsVGpu; bool m_needsQGpu; - + bool m_needsFGpu; }; #endif diff --git a/src/features.def b/src/features.def index ea38a1b30a7..1967622eae5 100644 --- a/src/features.def +++ b/src/features.def @@ -28,6 +28,8 @@ NPT GHMC CATALYTIC_REACTIONS +HARMONICFORCE requires CUDA + /* Rotation */ ROTATION ROTATIONAL_INERTIA implies ROTATION @@ -134,6 +136,7 @@ OLD_DIHEDRAL notest /* turn off nonbonded interactions within molecules */ NO_INTRA_NB notest + /* Debugging */ ADDITIONAL_CHECKS ASYNC_BARRIER diff --git a/src/forces.cpp b/src/forces.cpp index 88a51954227..e7e1f07badd 100644 --- a/src/forces.cpp +++ b/src/forces.cpp @@ -54,6 +54,7 @@ #include "iccp3m.hpp" #include "p3m_gpu.hpp" #include "cuda_interface.hpp" +#include "HarmonicForce.hpp" #include "EspressoSystemInterface.hpp" @@ -75,6 +76,11 @@ void force_calc() espressoSystemInterface.update(); +#ifdef HARMONICFORCE + if(harmonicForce) + harmonicForce->calc(espressoSystemInterface); +#endif + #ifdef LB_GPU #ifdef SHANCHEN if (lattice_switch & LATTICE_LB_GPU && this_node == 0) lattice_boltzmann_calc_shanchen_gpu(); diff --git a/src/tcl/Makefile.am b/src/tcl/Makefile.am index a0a5c273713..8f7f9d6ab1e 100644 --- a/src/tcl/Makefile.am +++ b/src/tcl/Makefile.am @@ -90,7 +90,8 @@ libEspressoTcl_la_SOURCES += \ soft_sphere_tcl.cpp soft_sphere_tcl.hpp \ steppot_tcl.cpp steppot_tcl.hpp \ tab_tcl.cpp tab_tcl.hpp \ - tunable_slip_tcl.cpp tunable_slip_tcl.hpp + tunable_slip_tcl.cpp tunable_slip_tcl.hpp \ + harmonic_force_tcl.cpp harmonic_force_tcl.hpp # bonded potentials and forces libEspressoTcl_la_SOURCES += \ diff --git a/src/tcl/initialize_interpreter.cpp b/src/tcl/initialize_interpreter.cpp index 8cdfbf55e66..32f265af331 100644 --- a/src/tcl/initialize_interpreter.cpp +++ b/src/tcl/initialize_interpreter.cpp @@ -55,6 +55,7 @@ #include "ghmc_tcl.hpp" #include "tuning.hpp" #include "electrokinetics_tcl.hpp" +#include "harmonic_force_tcl.hpp" #ifdef TK @@ -213,6 +214,9 @@ static void register_tcl_commands(Tcl_Interp* interp) { REGISTER_COMMAND("galilei_transform", tclcommand_galilei_transform); REGISTER_COMMAND("time_integration", tclcommand_time_integration); REGISTER_COMMAND("electrokinetics", tclcommand_electrokinetics); +#ifdef HARMONICFORCE + REGISTER_COMMAND("harmonic_force", tclcommand_harmonic_force); +#endif } static void register_global_variables(Tcl_Interp *interp) diff --git a/src/tcl/p3m_tcl.cpp b/src/tcl/p3m_tcl.cpp index e06e553a313..b781542357c 100644 --- a/src/tcl/p3m_tcl.cpp +++ b/src/tcl/p3m_tcl.cpp @@ -134,7 +134,6 @@ int tclcommand_inter_coulomb_parse_p3m(Tcl_Interp * interp, int argc, char ** ar } if (ARG0_IS_S("gpu")) { - puts("Setting coulomb method to COULOMB_P3M_GPU"); coulomb.method = COULOMB_P3M_GPU; argc--; From 4b20ec8b4fc84ae7fdb4fce96121df8563679734 Mon Sep 17 00:00:00 2001 From: Florian Weik Date: Wed, 9 Apr 2014 22:16:50 +0200 Subject: [PATCH 05/34] Added simple test force to illustrate interface usage. --- src/HarmonicForce.cu | 44 +++++++++++++++++++++++++++ src/HarmonicForce.hpp | 37 +++++++++++++++++++++++ src/tcl/harmonic_force_tcl.cpp | 55 ++++++++++++++++++++++++++++++++++ src/tcl/harmonic_force_tcl.hpp | 33 ++++++++++++++++++++ 4 files changed, 169 insertions(+) create mode 100644 src/HarmonicForce.cu create mode 100644 src/HarmonicForce.hpp create mode 100644 src/tcl/harmonic_force_tcl.cpp create mode 100644 src/tcl/harmonic_force_tcl.hpp diff --git a/src/HarmonicForce.cu b/src/HarmonicForce.cu new file mode 100644 index 00000000000..54411557271 --- /dev/null +++ b/src/HarmonicForce.cu @@ -0,0 +1,44 @@ +#include "HarmonicForce.hpp" + +#include + +#ifdef HARMONICFORCE + +HarmonicForce *harmonicForce = 0; + +__global__ void HarmonicForce_kernel(float x, float y, float z, float k, + int n, float *pos, float *f) { + + int id = blockIdx.x * blockDim.x + threadIdx.x; + + if(id <= n) + return; + + f[3*id + 0] = k*(x - pos[3*id + 0]); + f[3*id + 1] = k*(y - pos[3*id + 1]); + f[3*id + 2] = k*(z - pos[3*id + 2]); +} + + +void HarmonicForce_kernel_wrapper(float x, float y, float z, float k, int n, float *pos, float *f) { + dim3 grid(1,1,1); + dim3 block(1,1,1); + + printf("n %d\n", n); + + if(n <= 512) { + puts("n <= 512"); + grid.x = 1; + block.x = n; + } else { + grid.x = n/512 + 1; + block.x = 512; + } + + printf("Calling HarmonicForce_kernel<<<(%d %d %d),(%d %d %d)>>>\n", + grid.x, grid.y, grid.z, block.x, block.y, block.z); + + HarmonicForce_kernel<<>>(x, y, z, k, n, pos, f); +} + +#endif diff --git a/src/HarmonicForce.hpp b/src/HarmonicForce.hpp new file mode 100644 index 00000000000..c7e563d43e4 --- /dev/null +++ b/src/HarmonicForce.hpp @@ -0,0 +1,37 @@ +#include "config.hpp" + +#ifdef HARMONICFORCE + +#include "SystemInterface.hpp" +#include + +void HarmonicForce_kernel_wrapper(float x, float y, float z, float k, + int n, float *pos, float *f); + +class HarmonicForce { +public: + HarmonicForce(float x1, float x2, float x3, float _k, SystemInterface &s) { + x = x1; + y = x2; + z = x3; + k = _k; + + if(!s.requestFGpu()) + std::cerr << "HarmonicForce needs access to forces on GPU!" << std::endl; + + if(!s.requestRGpu()) + std::cerr << "HarmonicForce needs access to positions on GPU!" << std::endl; + + }; + void calc(SystemInterface &s) { + HarmonicForce_kernel_wrapper(x,y,z,k,s.npart_gpu(), + s.rGpuBegin(), s.fGpuBegin()); + }; +protected: + float x,y,z; + float k; +}; + +extern HarmonicForce *harmonicForce; + +#endif diff --git a/src/tcl/harmonic_force_tcl.cpp b/src/tcl/harmonic_force_tcl.cpp new file mode 100644 index 00000000000..b341fdc6420 --- /dev/null +++ b/src/tcl/harmonic_force_tcl.cpp @@ -0,0 +1,55 @@ +/* + Copyright (C) 2010,2011,2012,2013 The ESPResSo project + Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010 + Max-Planck-Institute for Polymer Research, Theory Group + + This file is part of ESPResSo. + + ESPResSo is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + ESPResSo is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include "harmonic_force_tcl.hpp" + +#ifdef HARMONICFORCE + +#include "HarmonicForce.hpp" +#include "EspressoSystemInterface.hpp" + +int tclcommand_harmonic_force(ClientData data, Tcl_Interp *interp, int argc, char **argv) { + DoubleList dl; + + init_doublelist(&dl); + + if(!ARG1_IS_DOUBLELIST(dl)) { + puts("Expected double list"); + return TCL_ERROR; + } + + if(dl.n != 4) { + puts("Wrong # of args"); + for(int i = 0; i < dl.n; i++) + printf("%d %e\n", i, dl.e[i]); + + return TCL_ERROR; + } + + printf("x %e %e %e, k %e\n", dl.e[0], dl.e[1],dl.e[2],dl.e[3]); + + harmonicForce = new HarmonicForce(dl.e[0], dl.e[1],dl.e[2],dl.e[3], espressoSystemInterface); + + return TCL_OK; +} + + +#endif diff --git a/src/tcl/harmonic_force_tcl.hpp b/src/tcl/harmonic_force_tcl.hpp new file mode 100644 index 00000000000..4015a8525f5 --- /dev/null +++ b/src/tcl/harmonic_force_tcl.hpp @@ -0,0 +1,33 @@ +/* + Copyright (C) 2010,2011,2012,2013 The ESPResSo project + Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010 + Max-Planck-Institute for Polymer Research, Theory Group + + This file is part of ESPResSo. + + ESPResSo is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + ESPResSo is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#ifndef HARMONIC_FORCE_TCL_H +#define HARMONIC_FORCE_TCL_H + +#include "parser.hpp" + +#ifdef HARMONICFORCE + +int tclcommand_harmonic_force(ClientData data, Tcl_Interp *interp, int argc, char **argv); + +#endif + +#endif From 0d3a911cc80ad1fe4ea0737eb35538d5574e51f2 Mon Sep 17 00:00:00 2001 From: Florian Weik Date: Tue, 15 Apr 2014 10:35:46 +0200 Subject: [PATCH 06/34] Removed stray debug messages. --- src/HarmonicForce.cu | 8 +++++--- src/tcl/harmonic_force_tcl.cpp | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/HarmonicForce.cu b/src/HarmonicForce.cu index 54411557271..2db93211037 100644 --- a/src/HarmonicForce.cu +++ b/src/HarmonicForce.cu @@ -1,5 +1,6 @@ #include "HarmonicForce.hpp" +#include "cuda_utils.hpp" #include #ifdef HARMONICFORCE @@ -24,7 +25,8 @@ void HarmonicForce_kernel_wrapper(float x, float y, float z, float k, int n, flo dim3 grid(1,1,1); dim3 block(1,1,1); - printf("n %d\n", n); + if(n == 0) + return; if(n <= 512) { puts("n <= 512"); @@ -35,8 +37,8 @@ void HarmonicForce_kernel_wrapper(float x, float y, float z, float k, int n, flo block.x = 512; } - printf("Calling HarmonicForce_kernel<<<(%d %d %d),(%d %d %d)>>>\n", - grid.x, grid.y, grid.z, block.x, block.y, block.z); + // printf("Calling HarmonicForce_kernel<<<(%d %d %d),(%d %d %d)>>>\n", + // grid.x, grid.y, grid.z, block.x, block.y, block.z); HarmonicForce_kernel<<>>(x, y, z, k, n, pos, f); } diff --git a/src/tcl/harmonic_force_tcl.cpp b/src/tcl/harmonic_force_tcl.cpp index b341fdc6420..6f351185cde 100644 --- a/src/tcl/harmonic_force_tcl.cpp +++ b/src/tcl/harmonic_force_tcl.cpp @@ -44,7 +44,7 @@ int tclcommand_harmonic_force(ClientData data, Tcl_Interp *interp, int argc, cha return TCL_ERROR; } - printf("x %e %e %e, k %e\n", dl.e[0], dl.e[1],dl.e[2],dl.e[3]); + // printf("x %e %e %e, k %e\n", dl.e[0], dl.e[1],dl.e[2],dl.e[3]); harmonicForce = new HarmonicForce(dl.e[0], dl.e[1],dl.e[2],dl.e[3], espressoSystemInterface); From 8d1a568310aca498b5b4cbe5ec10ee3ea0b0d77c Mon Sep 17 00:00:00 2001 From: Florian Weik Date: Tue, 15 Apr 2014 10:37:51 +0200 Subject: [PATCH 07/34] HarmonicForce: Added Cuda wrappers. --- src/HarmonicForce.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/HarmonicForce.cu b/src/HarmonicForce.cu index 2db93211037..fd86fad4580 100644 --- a/src/HarmonicForce.cu +++ b/src/HarmonicForce.cu @@ -40,7 +40,7 @@ void HarmonicForce_kernel_wrapper(float x, float y, float z, float k, int n, flo // printf("Calling HarmonicForce_kernel<<<(%d %d %d),(%d %d %d)>>>\n", // grid.x, grid.y, grid.z, block.x, block.y, block.z); - HarmonicForce_kernel<<>>(x, y, z, k, n, pos, f); +KERNELCALL(HarmonicForce_kernel,grid,block,(x, y, z, k, n, pos, f)) } #endif From 24acd6ac3d7c5336b9fa718e6d3e2a964bce3142 Mon Sep 17 00:00:00 2001 From: Axel Arnold Date: Tue, 15 Apr 2014 11:01:40 +0200 Subject: [PATCH 08/34] Fixed time_step check in correlator --- src/statistics_correlation.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/statistics_correlation.cpp b/src/statistics_correlation.cpp index e1d0105f728..a1064811448 100644 --- a/src/statistics_correlation.cpp +++ b/src/statistics_correlation.cpp @@ -157,7 +157,7 @@ int double_correlation_init(double_correlation* self, double dt, unsigned int ta return 15; } // check if dt is a multiple of the md timestep - if ( abs(dt/time_step - round(dt/time_step)>1e-6 ) ) + if ( abs(dt/time_step - round(dt/time_step)) >1e-6 ) return 16; self->dt = dt; self->update_frequency = (int) floor(dt/time_step); From dae0b8197a7538c15063b1ecd6acb50396a19197 Mon Sep 17 00:00:00 2001 From: Florian Weik Date: Tue, 15 Apr 2014 11:13:19 +0200 Subject: [PATCH 09/34] Fix build errror when LB_BOUDARIES not set. --- src/statistics_fluid.cpp | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/src/statistics_fluid.cpp b/src/statistics_fluid.cpp index 8985ebd143e..a4d80ff8a0e 100644 --- a/src/statistics_fluid.cpp +++ b/src/statistics_fluid.cpp @@ -97,19 +97,22 @@ void lb_calc_fluid_temp(double *result) { int number_of_non_boundary_nodes = 0; for (x=1; x<=lblattice.grid[0]; x++) { - for (y=1; y<=lblattice.grid[1]; y++) { - for (z=1; z<=lblattice.grid[2]; z++) { + for (y=1; y<=lblattice.grid[1]; y++) { + for (z=1; z<=lblattice.grid[2]; z++) { - index = get_linear_index(x,y,z,lblattice.halo_grid); + index = get_linear_index(x,y,z,lblattice.halo_grid); - if ( !lbfields[index].boundary ) - { +#ifdef LB_BOUNDARIES + if ( !lbfields[index].boundary ) +#endif + { lb_calc_local_fields(index, &rho, j, NULL); - temp += scalar(j,j); - number_of_non_boundary_nodes++; + temp += scalar(j,j); + number_of_non_boundary_nodes++; + } + } } - - }}} + } temp *= 1./(3.*lbpar.rho[0]*number_of_non_boundary_nodes*lbpar.tau*lbpar.tau*lblattice.agrid)/n_nodes; From 7252c7008e2e24eb436051a02c3e7d34bc8ed174 Mon Sep 17 00:00:00 2001 From: Axel Arnold Date: Tue, 15 Apr 2014 12:01:16 +0200 Subject: [PATCH 10/34] refactored on_integrate_start --- src/communication.cpp | 6 +- src/energy.cpp | 2 +- src/initialize.cpp | 142 ++++++--------------------------------- src/integrate.cpp | 67 +++++++++++++++++- src/integrate.hpp | 10 ++- src/interaction_data.cpp | 2 +- src/interaction_data.hpp | 2 +- src/lb.cpp | 47 +++++++++---- src/lbgpu.cpp | 26 +++++++ src/lbgpu.hpp | 1 + src/pressure.cpp | 2 +- src/python/integrate.pxd | 2 +- src/python/integrate.pyx | 2 +- src/reaction.cpp | 20 ++++++ src/reaction.hpp | 2 + 15 files changed, 187 insertions(+), 146 deletions(-) diff --git a/src/communication.cpp b/src/communication.cpp index 69b4c259532..b63b0285d1a 100644 --- a/src/communication.cpp +++ b/src/communication.cpp @@ -1176,14 +1176,14 @@ int mpi_integrate(int n_steps) { if (!correlations_autoupdate) { mpi_call(mpi_integrate_slave, -1, n_steps); - integrate_vv(n_steps); + integrate_vv(n_steps, 0); COMM_TRACE(fprintf(stderr, "%d: integration task %d done.\n", \ this_node, n_steps)); return check_runtime_errors(); } else { for (int i=0; i=lbpar.agrid/2.0) { - errtext = runtime_error(128); - ERROR_SPRINTF(errtext, "{104 LB requires either no Verlet lists or that the skin of the verlet list to be less than half of lattice-Boltzmann grid spacing.} "); - } - } -#endif #ifdef LB_GPU if(this_node == 0){ - if(lattice_switch & LATTICE_LB_GPU) { - if (lbpar_gpu.agrid < 0.0) { - errtext = runtime_error(128); - ERROR_SPRINTF(errtext,"{098 Lattice Boltzmann agrid not set} "); - } - if (lbpar_gpu.tau < 0.0) { - errtext = runtime_error(128); - ERROR_SPRINTF(errtext,"{099 Lattice Boltzmann time step not set} "); - } - for(int i=0;i=lbpar.agrid/2.0) { + errtext = runtime_error(128); + ERROR_SPRINTF(errtext, "{104 LB requires either no Verlet lists or that the skin of the verlet list to be less than half of lattice-Boltzmann grid spacing.} "); + ret = 1; + } if (cell_structure.type != CELL_STRUCTURE_DOMDEC) { - errtxt = runtime_error(128); - ERROR_SPRINTF(errtxt, "{103 LB requires domain-decomposition cellsystem} "); + errtext = runtime_error(128); + ERROR_SPRINTF(errtext, "{103 LB requires domain-decomposition cellsystem} "); ret = -1; } else if (dd.use_vList && skin>=lbpar.agrid/2.0) { - errtxt = runtime_error(128); - ERROR_SPRINTF(errtxt, "{104 LB requires either no Verlet lists or that the skin of the verlet list to be less than half of lattice-Boltzmann grid spacing.} "); + errtext = runtime_error(128); + ERROR_SPRINTF(errtext, "{104 LB requires either no Verlet lists or that the skin of the verlet list to be less than half of lattice-Boltzmann grid spacing.} "); ret = -1; } - if (thermo_switch & ~THERMO_LB) { - errtxt = runtime_error(128); - ERROR_SPRINTF(errtxt, "{122 LB must not be used with other thermostats} "); + errtext = runtime_error(128); + ERROR_SPRINTF(errtext, "{122 LB must not be used with other thermostats} "); ret = 1; } - - return ret; - + } + return ret; } /***********************************************************************/ diff --git a/src/lbgpu.cpp b/src/lbgpu.cpp index 314871a69c2..ce3e1605c7a 100644 --- a/src/lbgpu.cpp +++ b/src/lbgpu.cpp @@ -380,4 +380,30 @@ int lb_lbnode_set_extforce_GPU(int ind[3], double f[3]) return ES_OK; } +void lb_GPU_sanity_checks() +{ + if(this_node == 0){ + if(lattice_switch & LATTICE_LB_GPU) { + if (lbpar_gpu.agrid < 0.0) { + errtext = runtime_error(128); + ERROR_SPRINTF(errtext,"{098 Lattice Boltzmann agrid not set} "); + } + if (lbpar_gpu.tau < 0.0) { + errtext = runtime_error(128); + ERROR_SPRINTF(errtext,"{099 Lattice Boltzmann time step not set} "); + } + for(int i=0;i Date: Tue, 15 Apr 2014 14:19:00 +0200 Subject: [PATCH 11/34] Cuda communication cleanup., small fix to HarmonicForce. --- src/EspressoSystemInterface.cpp | 3 +- src/EspressoSystemInterface.hpp | 6 ++++ src/EspressoSystemInterface_cuda.cu | 51 ++++++++++++++++------------- src/HarmonicForce.cu | 17 ++++------ src/cuda_common_cuda.cu | 3 +- src/cuda_interface.cpp | 1 - src/forces.cpp | 4 +-- src/initialize.cpp | 6 ++-- 8 files changed, 50 insertions(+), 41 deletions(-) diff --git a/src/EspressoSystemInterface.cpp b/src/EspressoSystemInterface.cpp index e22ca247b7c..7ea6d1160df 100644 --- a/src/EspressoSystemInterface.cpp +++ b/src/EspressoSystemInterface.cpp @@ -53,8 +53,9 @@ void EspressoSystemInterface::gatherParticles() { if (m_gpu) { if(gpu_get_global_particle_vars_pointer_host()->communication_enabled) { + ESIF_TRACE(puts("Calling copy_part_data_to_gpu()")); copy_part_data_to_gpu(); - m_gpu_npart = gpu_get_global_particle_vars_pointer_host()->number_of_particles; + reallocDeviceMemory(gpu_get_global_particle_vars_pointer_host()->number_of_particles); if(m_splitParticleStructGpu && (this_node == 0)) split_particle_struct(); } diff --git a/src/EspressoSystemInterface.hpp b/src/EspressoSystemInterface.hpp index 21a5d8338b6..d1767940d48 100644 --- a/src/EspressoSystemInterface.hpp +++ b/src/EspressoSystemInterface.hpp @@ -1,6 +1,8 @@ #ifndef ESPRESSOSYSTEMINTERFACE_H #define ESPRESSOSYSTEMINTERFACE_H +#define ESIF_TRACE(A) + #include "SystemInterface.hpp" #include "particle_data.hpp" #include "cuda_interface.hpp" @@ -112,10 +114,14 @@ class EspressoSystemInterface : public SystemInterface { void split_particle_struct(); void enableParticleCommunication() { if(!gpu_get_global_particle_vars_pointer_host()->communication_enabled) { + ESIF_TRACE(puts("gpu communication not enabled;")); + ESIF_TRACE(puts("enableParticleCommunication")); gpu_init_particle_comm(); cuda_bcast_global_part_params(); + reallocDeviceMemory(gpu_get_global_particle_vars_pointer_host()->number_of_particles); } }; + void reallocDeviceMemory(int n); Vector3Container R; #ifdef ELECTROSTATICS diff --git a/src/EspressoSystemInterface_cuda.cu b/src/EspressoSystemInterface_cuda.cu index 5ce1aee85b7..e005686111c 100644 --- a/src/EspressoSystemInterface_cuda.cu +++ b/src/EspressoSystemInterface_cuda.cu @@ -58,34 +58,39 @@ __global__ void split_kernel_v(CUDA_particle_data *particles, float *v, int n) { v[idx + 2] = p.v[2]; } +void EspressoSystemInterface::reallocDeviceMemory(int n) { + + if(m_needsRGpu && ((n != m_gpu_npart) || (m_r_gpu_begin == 0))) { + if(m_r_gpu_begin != 0) + cuda_safe_mem(cudaFree(m_r_gpu_begin)); + cuda_safe_mem(cudaMalloc(&m_r_gpu_begin, 3*n*sizeof(float))); + m_r_gpu_end = m_r_gpu_begin + 3*n; + } + + if(m_needsVGpu && ((n != m_gpu_npart) || (m_v_gpu_begin == 0))) { + if(m_v_gpu_begin != 0) + cuda_safe_mem(cudaFree(m_v_gpu_begin)); + cuda_safe_mem(cudaMalloc(&m_v_gpu_begin, 3*n*sizeof(float))); + m_v_gpu_end = m_v_gpu_begin + 3*n; + } + + if(m_needsQGpu && ((n != m_gpu_npart) || (m_q_gpu_begin == 0))) { + if(m_q_gpu_begin != 0) + cuda_safe_mem(cudaFree(m_q_gpu_begin)); + cuda_safe_mem(cudaMalloc(&m_q_gpu_begin, 3*n*sizeof(float))); + m_q_gpu_end = m_q_gpu_begin + 3*n; + } + + m_gpu_npart = n; +} + void EspressoSystemInterface::split_particle_struct() { int n = gpu_get_global_particle_vars_pointer_host()->number_of_particles; if(n == 0) return; - - if( (n != m_gpu_npart) ) { - if(m_needsRGpu) { - if(m_r_gpu_begin != 0) - cuda_safe_mem(cudaFree(m_r_gpu_begin)); - cuda_safe_mem(cudaMalloc(&m_r_gpu_begin, 3*n*sizeof(float))); - m_r_gpu_end = m_r_gpu_begin + 3*n; - } - if(m_needsVGpu) { - if(m_v_gpu_begin != 0) - cuda_safe_mem(cudaFree(m_v_gpu_begin)); - cuda_safe_mem(cudaMalloc(&m_v_gpu_begin, 3*n*sizeof(float))); - m_v_gpu_end = m_v_gpu_begin + 3*n; - } - if(m_needsQGpu) { - if(m_q_gpu_begin != 0) - cuda_safe_mem(cudaFree(m_q_gpu_begin)); - cuda_safe_mem(cudaMalloc(&m_q_gpu_begin, n*sizeof(float))); - m_q_gpu_end = m_q_gpu_begin + n; - } - } - m_gpu_npart = n; - + ESIF_TRACE(printf("n = %d, m_gpu_npart = %d\n", n, m_gpu_npart)); + dim3 grid(n/512+1,1,1); dim3 block(512,1,1); diff --git a/src/HarmonicForce.cu b/src/HarmonicForce.cu index fd86fad4580..0aaf0cfe313 100644 --- a/src/HarmonicForce.cu +++ b/src/HarmonicForce.cu @@ -12,24 +12,23 @@ __global__ void HarmonicForce_kernel(float x, float y, float z, float k, int id = blockIdx.x * blockDim.x + threadIdx.x; - if(id <= n) + if(id >= n) return; - f[3*id + 0] = k*(x - pos[3*id + 0]); - f[3*id + 1] = k*(y - pos[3*id + 1]); - f[3*id + 2] = k*(z - pos[3*id + 2]); + f[3*id + 0] += k*(x - pos[3*id + 0]); + f[3*id + 1] += k*(y - pos[3*id + 1]); + f[3*id + 2] += k*(z - pos[3*id + 2]); } void HarmonicForce_kernel_wrapper(float x, float y, float z, float k, int n, float *pos, float *f) { dim3 grid(1,1,1); dim3 block(1,1,1); - + if(n == 0) return; if(n <= 512) { - puts("n <= 512"); grid.x = 1; block.x = n; } else { @@ -37,10 +36,6 @@ void HarmonicForce_kernel_wrapper(float x, float y, float z, float k, int n, flo block.x = 512; } - // printf("Calling HarmonicForce_kernel<<<(%d %d %d),(%d %d %d)>>>\n", - // grid.x, grid.y, grid.z, block.x, block.y, block.z); - -KERNELCALL(HarmonicForce_kernel,grid,block,(x, y, z, k, n, pos, f)) + KERNELCALL(HarmonicForce_kernel,grid,block,(x, y, z, k, n, pos, f)) } - #endif diff --git a/src/cuda_common_cuda.cu b/src/cuda_common_cuda.cu index eaf53ee1376..534635ccb15 100644 --- a/src/cuda_common_cuda.cu +++ b/src/cuda_common_cuda.cu @@ -180,6 +180,7 @@ void gpu_change_number_of_part_to_comm() { #endif cuda_safe_mem(cudaMalloc((void**)&particle_forces_device, global_part_vars_host.number_of_particles * sizeof(CUDA_particle_force))); + cuda_safe_mem(cudaMalloc((void**)&particle_data_device, global_part_vars_host.number_of_particles * sizeof(CUDA_particle_data))); cuda_safe_mem(cudaMalloc((void**)&particle_seeds_device, global_part_vars_host.number_of_particles * sizeof(CUDA_particle_seed))); #ifdef SHANCHEN @@ -249,8 +250,8 @@ CUDA_fluid_composition* gpu_get_fluid_composition_pointer() { } void copy_part_data_to_gpu() { + COMM_TRACE(printf("global_part_vars_host.communication_enabled = %d && global_part_vars_host.number_of_particles = %d\n", global_part_vars_host.communication_enabled, global_part_vars_host.number_of_particles)); if ( global_part_vars_host.communication_enabled == 1 && global_part_vars_host.number_of_particles ) { - cuda_mpi_get_particles(particle_data_host); /** get espresso md particle values*/ diff --git a/src/cuda_interface.cpp b/src/cuda_interface.cpp index 63e68c9fbba..5dc50cae06f 100644 --- a/src/cuda_interface.cpp +++ b/src/cuda_interface.cpp @@ -38,7 +38,6 @@ void cuda_mpi_get_particles(CUDA_particle_data *particle_data_host) sizes = (int*) malloc(sizeof(int)*n_nodes); n_part = cells_get_n_particles(); - /* first collect number of particles on each node */ MPI_Gather(&n_part, 1, MPI_INT, sizes, 1, MPI_INT, 0, comm_cart); diff --git a/src/forces.cpp b/src/forces.cpp index e7e1f07badd..fddba00f0f0 100644 --- a/src/forces.cpp +++ b/src/forces.cpp @@ -88,7 +88,7 @@ void force_calc() // transfer_momentum_gpu check makes sure the LB fluid doesn't get updated on integrate 0 // this_node==0 makes sure it is the master node where the gpu exists - if (lattice_switch & LATTICE_LB_GPU && transfer_momentum_gpu && this_node==0 ) lb_calc_particle_lattice_ia_gpu(); + if (lattice_switch & LATTICE_LB_GPU && transfer_momentum_gpu && (this_node == 0) ) lb_calc_particle_lattice_ia_gpu(); #endif // LB_GPU #ifdef ELECTROSTATICS @@ -152,7 +152,7 @@ void force_calc() meta_perform(); #endif -#if defined(LB_GPU) || (defined(ELECTROSTATICS) && defined(CUDA)) +#ifdef CUDA copy_forces_from_GPU(); #endif diff --git a/src/initialize.cpp b/src/initialize.cpp index 4e02e9012bf..74455e72524 100644 --- a/src/initialize.cpp +++ b/src/initialize.cpp @@ -74,7 +74,7 @@ static int reinit_magnetostatics = 0; static int lb_reinit_particles_gpu = 1; #endif -#if defined(LB_GPU) || defined(ELECTROSTATICS) +#ifdef CUDA static int reinit_particle_comm_gpu = 1; #endif @@ -265,13 +265,15 @@ void on_integration_start() } } #endif -#if defined(LB_GPU) || (defined (ELECTROSTATICS) && defined (CUDA)) + //#if defined(LB_GPU) || (defined (ELECTROSTATICS) && defined (CUDA)) +#ifdef CUDA if (reinit_particle_comm_gpu){ gpu_change_number_of_part_to_comm(); reinit_particle_comm_gpu = 0; } MPI_Bcast(gpu_get_global_particle_vars_pointer_host(), sizeof(CUDA_global_part_vars), MPI_BYTE, 0, comm_cart); #endif + //#endif #ifdef METADYNAMICS From 80fdfdf086102dc4737ac2a036004057795c9460 Mon Sep 17 00:00:00 2001 From: Florian Weik Date: Tue, 15 Apr 2014 14:59:15 +0200 Subject: [PATCH 12/34] HarmonicForce: Doc and NEWS. --- NEWS | 2 ++ doc/ug/part.tex | 14 ++++++++++++++ 2 files changed, 16 insertions(+) diff --git a/NEWS b/NEWS index e1f4fac373e..c9d0cbff02d 100644 --- a/NEWS +++ b/NEWS @@ -29,6 +29,8 @@ New user-visible features * New command time_integration to get the runtime of the integration loop. +* New harmoic force that runs on the GPU. + User-visible changes -------------------- * Comfixed now works with periodic boundary conditions. diff --git a/doc/ug/part.tex b/doc/ug/part.tex index ce3aebdde7d..b225f0cc84c 100644 --- a/doc/ug/part.tex +++ b/doc/ug/part.tex @@ -865,6 +865,20 @@ \subsection{Getting the currently defined constraints} Prints out all constraint information. If \var{num} is specified only this constraint is displayed, otherwise all constraints will be printed. +\subsection{\texttt{harmonic_force}: Creating a harmonic trap.} + \newescommand{harmonic_trap} + \begin{essyntax} + harmonic_force \{ \var{x} \var{y} \var{z} \} \var{k} + \begin{features} + \required{HARMONICFORCE} + \required{CUDA} + \end{features} + \end{essyntax} + + Calculates a spring force for all particles, where the equilibrium position + of the spring is at \var{x y z} and it's force constant is \var{k}. A more + flexible trap can be constructed with constraints, but this one runs on the GPU. + \section{Virtual sites} \label{sec:virtual} \index{virtual sites|mainindex} From 104da462599798e41da6f7c31648bc85624e7ab0 Mon Sep 17 00:00:00 2001 From: Henri Menke Date: Tue, 15 Apr 2014 16:20:15 +0200 Subject: [PATCH 13/34] Electrokinetics pdb-parser --- src/electrokinetics_pdb_parse.cpp | 429 ++++++++++++++++++++++++++++++ src/electrokinetics_pdb_parse.hpp | 17 ++ src/lb-boundaries.cpp | 30 ++- src/lbgpu_cuda.cu | 3 +- src/tcl/electrokinetics_tcl.cpp | 3 +- 5 files changed, 474 insertions(+), 8 deletions(-) create mode 100644 src/electrokinetics_pdb_parse.cpp create mode 100644 src/electrokinetics_pdb_parse.hpp diff --git a/src/electrokinetics_pdb_parse.cpp b/src/electrokinetics_pdb_parse.cpp new file mode 100644 index 00000000000..3984127c6c8 --- /dev/null +++ b/src/electrokinetics_pdb_parse.cpp @@ -0,0 +1,429 @@ +/* vim: set ts=8 sts=2 sw=2 et: */ +#include +#include +#include +#include +#include "electrokinetics_pdb_parse.hpp" + +//#define DEBUG + +/* Replacements for bool variables */ +const int pdb_SUCCESS = 0; +const int pdb_ERROR = 1; + +float* pdb_charge_lattice = NULL; +int* pdb_boundary_lattice = NULL; + +typedef struct { + int i; // index + int m; // model index + float x,y,z; +} pdb_ATOM; + +typedef struct { + int i; + char type[2]; + float charge; +} itp_atoms; + +typedef struct { + char type[2]; + float sigma,epsilon; +} itp_atomtypes; + +typedef struct { + unsigned int pdb_n_particles; + pdb_ATOM* pdb_array_ATOM; + unsigned int itp_n_particles; + itp_atoms* itp_array_atoms; + unsigned int itp_n_parameters; + itp_atomtypes* itp_array_atomtypes; +} particle_data; + +typedef struct { + float max_x; + float max_y; + float max_z; + float min_x; + float min_y; + float min_z; + float center[3]; +} bounding_box; + +/* BEGIN CODE */ + +void galloc(void** ptr, size_t size) { + if (!*ptr) { + if (size > 0) { + *ptr = (void*) malloc(size); + } + else { + printf("You cannot malloc to size 0\n"); + } + } + else { + if (size > 0) { + *ptr = (void*) realloc(*ptr, size); + } + else { + free(*ptr); + } + } +} + +unsigned int pdb_rhoindex_cartesian2linear(unsigned int x, unsigned int y, unsigned int z) { + return z * ek_parameters.dim_y * ek_parameters.dim_x + y * ek_parameters.dim_x + x; +} + +int print_charge_field(char* filename) { + FILE* fp; + if ((fp = fopen(filename,"w")) == NULL) return pdb_ERROR; + + if( fp == NULL ) { + return 1; + } + + fprintf( fp, "\ +# vtk DataFile Version 2.0\n\ +charge_density\n\ +ASCII\n\ +\n\ +DATASET STRUCTURED_POINTS\n\ +DIMENSIONS %u %u %u\n\ +ORIGIN %f %f %f\n\ +SPACING %f %f %f\n\ +\n\ +POINT_DATA %u\n\ +SCALARS charge_density float 1\n\ +LOOKUP_TABLE default\n", + ek_parameters.dim_x, ek_parameters.dim_y, ek_parameters.dim_z, + ek_parameters.agrid*0.5f, ek_parameters.agrid*0.5f, ek_parameters.agrid*0.5f, + ek_parameters.agrid, ek_parameters.agrid, ek_parameters.agrid, + ek_parameters.dim_x * ek_parameters.dim_y * ek_parameters.dim_z); + + for( unsigned int i = 0; i < (ek_parameters.dim_x * ek_parameters.dim_y * ek_parameters.dim_z); i++ ) { + fprintf( fp, "%e ", pdb_charge_lattice[i] ); + } + + fclose( fp ); + return pdb_SUCCESS; +} + +int print_boundary_lattice(char* filename) { + FILE* fp; + if ((fp = fopen(filename,"w")) == NULL) return pdb_ERROR; + + if( fp == NULL ) { + return 1; + } + + fprintf( fp, "\ +# vtk DataFile Version 2.0\n\ +boundary_flag\n\ +ASCII\n\ +\n\ +DATASET STRUCTURED_POINTS\n\ +DIMENSIONS %u %u %u\n\ +ORIGIN %f %f %f\n\ +SPACING %f %f %f\n\ +\n\ +POINT_DATA %u\n\ +SCALARS boundary_flag float 1\n\ +LOOKUP_TABLE default\n", + ek_parameters.dim_x, ek_parameters.dim_y, ek_parameters.dim_z, + ek_parameters.agrid*0.5f, ek_parameters.agrid*0.5f, ek_parameters.agrid*0.5f, + ek_parameters.agrid, ek_parameters.agrid, ek_parameters.agrid, + ek_parameters.dim_x * ek_parameters.dim_y * ek_parameters.dim_z); + + for( unsigned int i = 0; i < (ek_parameters.dim_x * ek_parameters.dim_y * ek_parameters.dim_z); i++ ) { + fprintf( fp, "%d ", pdb_boundary_lattice[i] ); + } + + fclose( fp ); + return pdb_SUCCESS; +} + +int pdb_parse_files(char* pdb_filename, char* itp_filename, particle_data* atom_data) { + /* + * This routine parses the pdb- and itp-file to extract + * the relevant parameters. These are stored in arrays. + */ + + // Parse pdb-file + int model = 0; + char pdb_line[256]; + FILE* pdb_file; + if ((pdb_file = fopen(pdb_filename,"r")) == NULL) return pdb_ERROR; +#ifdef DEBUG + printf("### Reading pdb-file \"%s\" ###\n",pdb_filename); +#endif + while (fgets(pdb_line, sizeof(pdb_line), pdb_file)) { + if (strncmp(pdb_line,"MODEL",5) == 0) { + // read the MODEL identifier + sscanf(pdb_line,"MODEL %d",&model); +#ifdef DEBUG + printf("MODEL m=%d\n", model); +#endif + } + if ( strncmp(pdb_line,"ATOM",4) == 0) { + // read all ATOMs + galloc( (void**) &atom_data->pdb_array_ATOM , (atom_data->pdb_n_particles+1)*sizeof(pdb_ATOM) ); + pdb_ATOM* a = &atom_data->pdb_array_ATOM[atom_data->pdb_n_particles]; + // See http://deposit.rcsb.org/adit/docs/pdb_atom_format.html#ATOM for the meaning of the format string + sscanf(pdb_line,"ATOM %5d %*4s%*c%*3s%*c%*4d%*c %8f %8f %8f %*6f %*6f %*4s%*2s%*2s",&a->i,&a->x,&a->y,&a->z); + a->x /= 10.0; + a->y /= 10.0; + a->z /= 10.0; +#ifdef DEBUG + // Print all local variables + printf("ATOM i=%d x=%f y=%f z=%f\n",a->i,a->x,a->y,a->z); +#endif + atom_data->pdb_n_particles++; + } + } + fclose(pdb_file); + + // Parse itp-file + char itp_line[256]; + FILE* itp_file; + if ((itp_file = fopen(itp_filename,"r")) == NULL) return pdb_ERROR; +#ifdef DEBUG + printf("### Reading itp-file \"%s\" ###\n",itp_filename); +#endif + while (fgets(itp_line, sizeof(itp_line), itp_file)) { + // get section + char section[256]; + sscanf(itp_line,"[ %s ]",section); + // only read non-comment, non-section and non-empty lines + // TODO: Handling of lines consiting whitespace only (e.g. '\t' and ' ') + if (itp_line[0] != '[' && itp_line[0] != ';' && itp_line[0] != '\r' && itp_line[0] != '\n') { + if (strcmp(section,"atoms") == 0) { + // section [ atoms ] + galloc( (void**) &atom_data->itp_array_atoms , (atom_data->itp_n_particles+1)*sizeof(pdb_ATOM) ); + itp_atoms* a = &atom_data->itp_array_atoms[atom_data->itp_n_particles]; + // FIXME: no source :( Reverse engineered from the itp-file + sscanf(itp_line," %d %2s %*d %*s %*s %*d %f %*f ; %*s %*f",&a->i,a->type,&a->charge); +#ifdef DEBUG + // Print all local variables + printf("[ atoms ] i=%d type=%s charge=%f\n",a->i,a->type,a->charge); +#endif + atom_data->itp_n_particles++; + } + if (strcmp(section,"atomtypes") == 0) { + // section [ atomtypes ] + galloc( (void**) &atom_data->itp_array_atomtypes , (atom_data->itp_n_parameters+1)*sizeof(pdb_ATOM) ); + itp_atomtypes* a = &atom_data->itp_array_atomtypes[atom_data->itp_n_parameters]; + // FIXME: no source :( Reverse engineered from the itp-file + sscanf(itp_line," %2s %*s %*f %*f %*c %f %f ; %*f %*f",a->type,&a->sigma,&a->epsilon); +#ifdef DEBUG + // Print all local variables + printf("[ atomtypes ] name=%s sigma=%f epsilon=%f\n",a->type,a->sigma,a->epsilon); +#endif + atom_data->itp_n_parameters++; + } + } + } + fclose(itp_file); + + if (atom_data->pdb_n_particles != atom_data->itp_n_particles) return pdb_ERROR; + return pdb_SUCCESS; +} + +int calculate_bounding_box(bounding_box* bbox, particle_data* atom_data) { + // prototype for joining the arrays + if (atom_data->pdb_n_particles-1 == 0) return pdb_ERROR; + pdb_ATOM* a = &atom_data->pdb_array_ATOM[0]; + bbox->max_x = a->x; + bbox->max_y = a->y; + bbox->max_z = a->z; + bbox->min_x = a->x; + bbox->min_y = a->y; + bbox->min_z = a->z; + + for (unsigned int i = 1; i <= atom_data->pdb_n_particles-1; i++) { + a = &atom_data->pdb_array_ATOM[i]; + if (bbox->max_x < a->x) bbox->max_x = a->x; + if (bbox->max_y < a->y) bbox->max_y = a->y; + if (bbox->max_z < a->z) bbox->max_z = a->z; + if (bbox->min_x > a->x) bbox->min_x = a->x; + if (bbox->min_y > a->y) bbox->min_y = a->y; + if (bbox->min_z > a->z) bbox->min_z = a->z; + } + + bbox->center[0] = ( bbox->max_x + bbox->min_x )/2; + bbox->center[1] = ( bbox->max_y + bbox->min_y )/2; + bbox->center[2] = ( bbox->max_z + bbox->min_z )/2; + + return pdb_SUCCESS; +} + +int populate_lattice(particle_data* atom_data, int indices_only) { + /* + * This routine will populate the lattice using the + * values read from the pdb and itp files. + * WARNING: It contains much logic and interpolation stuff! + */ +#ifdef DEBUG + printf("pdb_n_particles=%u, itp_n_particles=%u, itp_n_parameters=%u\n",atom_data->pdb_n_particles,atom_data->itp_n_particles,atom_data->itp_n_parameters); +#endif + // TODO: Check if bounding box fits into simbox + bounding_box bbox; + calculate_bounding_box(&bbox, atom_data); + + // calculate the shift of the bounding box + float shift[3]; + shift[0] = ek_parameters.agrid / 2.0 * ek_parameters.dim_x - bbox.center[0]; + shift[1] = ek_parameters.agrid / 2.0 * ek_parameters.dim_y - bbox.center[1]; + shift[2] = ek_parameters.agrid / 2.0 * ek_parameters.dim_z - bbox.center[2]; + +#ifdef DEBUG + printf("bbox.max_x=%f, bbox.max_y=%f, bbox.max_z=%f, bbox.min_x=%f, bbox.min_y=%f, bbox.min_z=%f, bbox->center=[%f; %f; %f]\n", bbox.max_x, bbox.max_y, bbox.max_z, bbox.min_x, bbox.min_y, bbox.min_z, bbox.center[0], bbox.center[1], bbox.center[2]); + printf("agrid=%f, dim_x=%d, dim_y=%d, dim_z=%d\n",ek_parameters.agrid, ek_parameters.dim_x, ek_parameters.dim_y, ek_parameters.dim_z); + printf("shift=[%f; %f; %f]\n",shift[0], shift[1], shift[2]); +#endif + + // joining the array + int lowernode[3]; + float cellpos[3]; + float gridpos; + float a_x_shifted, a_y_shifted, a_z_shifted; + + for (unsigned int i = 0; i <= atom_data->pdb_n_particles-1; i++) { + pdb_ATOM* a = &atom_data->pdb_array_ATOM[i]; + itp_atoms* b; + itp_atomtypes* c; + for (unsigned int j = 0; j <= atom_data->itp_n_particles-1; j++) { + b = &atom_data->itp_array_atoms[j]; + if (a->i == b->i) { + for (unsigned int k = 0; k <= atom_data->itp_n_parameters-1; k++) { + c = &atom_data->itp_array_atomtypes[k]; + if (strcmp(b->type,c->type) == 0) { +#ifdef DEBUG + printf("i=%d x=%f y=%f z=%f type=%s charge=%f sigma=%f epsilon=%f\n",a->i,a->x,a->y,a->z,b->type,b->charge,c->sigma,c->epsilon); +#endif + + // Interpolate the charge to the lattice + gridpos = (a->x + shift[0]) / ek_parameters.agrid - 0.5f; + lowernode[0] = (int) floorf( gridpos ); + cellpos[0] = gridpos - lowernode[0]; + + gridpos = (a->y + shift[1]) / ek_parameters.agrid - 0.5f; + lowernode[1] = (int) floorf( gridpos ); + cellpos[1] = gridpos - lowernode[1]; + + gridpos = (a->z + shift[2]) / ek_parameters.agrid - 0.5f; + lowernode[2] = (int) floorf( gridpos ); + cellpos[2] = gridpos - lowernode[2]; + + lowernode[0] = (lowernode[0] + ek_parameters.dim_x) % ek_parameters.dim_x; + lowernode[1] = (lowernode[1] + ek_parameters.dim_y) % ek_parameters.dim_y; + lowernode[2] = (lowernode[2] + ek_parameters.dim_z) % ek_parameters.dim_z; + + if ( indices_only == 0 ) { + pdb_charge_lattice[pdb_rhoindex_cartesian2linear( lowernode[0],lowernode[1],lowernode[2] )] + = b->charge * ( 1 - cellpos[0] ) * ( 1 - cellpos[1] ) * ( 1 - cellpos[2] ); + + pdb_charge_lattice[pdb_rhoindex_cartesian2linear( ( lowernode[0] + 1 ) % ek_parameters.dim_x,lowernode[1],lowernode[2] )] + = b->charge * cellpos[0] * ( 1 - cellpos[1] ) * ( 1 - cellpos[2] ); + + pdb_charge_lattice[pdb_rhoindex_cartesian2linear( lowernode[0],( lowernode[1] + 1 ) % ek_parameters.dim_y,lowernode[2] )] + = b->charge * ( 1 - cellpos[0] ) * cellpos[1] * ( 1 - cellpos[2] ); + + pdb_charge_lattice[pdb_rhoindex_cartesian2linear( lowernode[0],lowernode[1],( lowernode[2] + 1 ) % ek_parameters.dim_z )] + = b->charge * ( 1 - cellpos[0] ) * ( 1 - cellpos[1] ) * cellpos[2]; + + pdb_charge_lattice[pdb_rhoindex_cartesian2linear( ( lowernode[0] + 1 ) % ek_parameters.dim_x,( lowernode[1] + 1 ) % ek_parameters.dim_y,lowernode[2] )] + = b->charge * cellpos[0] * cellpos[1] * ( 1 - cellpos[2] ); + + pdb_charge_lattice[pdb_rhoindex_cartesian2linear( ( lowernode[0] + 1 ) % ek_parameters.dim_x,lowernode[1],( lowernode[2] + 1 ) % ek_parameters.dim_z )] + = b->charge * cellpos[0] * ( 1 - cellpos[1] ) * cellpos[2]; + + pdb_charge_lattice[pdb_rhoindex_cartesian2linear( lowernode[0],( lowernode[1] + 1 ) % ek_parameters.dim_y,( lowernode[2] + 1 ) % ek_parameters.dim_z )] + = b->charge * ( 1 - cellpos[0] ) * cellpos[1] * cellpos[2]; + + pdb_charge_lattice[pdb_rhoindex_cartesian2linear( ( lowernode[0] + 1 ) % ek_parameters.dim_x,( lowernode[1] + 1 ) % ek_parameters.dim_y,( lowernode[2] + 1 ) % ek_parameters.dim_z )] + = b->charge * cellpos[0] * cellpos[1] * cellpos[2]; + } + else if ( indices_only == 1 ) { + printf("Only indices!\n"); + } + // Interpolate lennard-jones parameters to boundary + float r = pow(2,1./6.)*c->sigma; + + a_x_shifted = (a->x + shift[0]) / ek_parameters.agrid - 0.5f; + a_y_shifted = (a->y + shift[1]) / ek_parameters.agrid - 0.5f; + a_z_shifted = (a->z + shift[2]) / ek_parameters.agrid - 0.5f; + + for (float z = a->z - r; z <= a->z + r + ek_parameters.agrid; z += ek_parameters.agrid) { + for (float y = a->y - r; y <= a->y + r + ek_parameters.agrid; y += ek_parameters.agrid) { + for (float x = a->x - r; x <= a->x + r + ek_parameters.agrid; x += ek_parameters.agrid) { + gridpos = (x + shift[0]) / ek_parameters.agrid - 0.5f; + lowernode[0] = (int) floorf( gridpos ); + + gridpos = (y + shift[1]) / ek_parameters.agrid - 0.5f; + lowernode[1] = (int) floorf( gridpos ); + + gridpos = (z + shift[2]) / ek_parameters.agrid - 0.5f; + lowernode[2] = (int) floorf( gridpos ); + + lowernode[0] = (lowernode[0] + ek_parameters.dim_x) % ek_parameters.dim_x; + lowernode[1] = (lowernode[1] + ek_parameters.dim_y) % ek_parameters.dim_y; + lowernode[2] = (lowernode[2] + ek_parameters.dim_z) % ek_parameters.dim_z; +#ifdef DEBUG + printf("shifted: %f %f %f\n", a_x_shifted, a_y_shifted, a_z_shifted); + printf("lowernode: %d %d %d\n", lowernode[0], lowernode[1], lowernode[2]); + printf("distance: %f %f %f\n", lowernode[0] - a_x_shifted, lowernode[1] - a_y_shifted, lowernode[2] - a_z_shifted); + printf("distance: %f <= %f\n\n", pow(lowernode[0] - a_x_shifted,2) + pow(lowernode[1] - a_y_shifted,2) + pow(lowernode[2] - a_z_shifted,2), pow(r/ek_parameters.agrid,2)); +#endif + if ( pow(lowernode[0] - a_x_shifted,2) + pow(lowernode[1] - a_y_shifted,2) + pow(lowernode[2] - a_z_shifted,2) <= pow(r/ek_parameters.agrid,2) ) { + if ( indices_only == 0 ) { + pdb_boundary_lattice[ek_parameters.dim_y*ek_parameters.dim_x*lowernode[2] + ek_parameters.dim_x*lowernode[1] + lowernode[0]] = 1; + } + else if ( indices_only == 1 ) { + printf("Only indices!\n"); + } + } + } + } + } + + break; + } + } + } + } + } + + return pdb_SUCCESS; +} + +int pdb_parse(char* pdb_filename, char* itp_filename) { + /* + * This is the main parsing routine, which is visible to the outside + * through the header electrokinetics_pdb_parse.h. It doesn't contain any logic and just + * deploys the input to the subroutines. + */ + + /* BEGIN DEPLOY */ + int indices_only = 0; + galloc( (void**) &pdb_charge_lattice, ek_parameters.dim_x * ek_parameters.dim_y * ek_parameters.dim_z * sizeof(float)); + galloc( (void**) &pdb_boundary_lattice, ek_parameters.dim_x * ek_parameters.dim_y * ek_parameters.dim_z * sizeof(int)); + for ( unsigned int i = 0; i < ek_parameters.dim_x * ek_parameters.dim_y * ek_parameters.dim_z; i++ ) { + pdb_charge_lattice[i] = 0.0; + pdb_boundary_lattice[i] = 0; + } + + particle_data atom_data; + atom_data.pdb_n_particles = 0; + atom_data.pdb_array_ATOM = NULL; + atom_data.itp_n_particles = 0; + atom_data.itp_array_atoms = NULL; + atom_data.itp_n_parameters = 0; + atom_data.itp_array_atomtypes = NULL; + + pdb_parse_files(pdb_filename, itp_filename, &atom_data); + + populate_lattice(&atom_data, indices_only); + + return pdb_SUCCESS; +} diff --git a/src/electrokinetics_pdb_parse.hpp b/src/electrokinetics_pdb_parse.hpp new file mode 100644 index 00000000000..e18f48a9ac2 --- /dev/null +++ b/src/electrokinetics_pdb_parse.hpp @@ -0,0 +1,17 @@ +/* vim: set ts=8 sts=2 sw=2 et: */ +#ifndef _PDB_PARSER_H +#define _PDB_PARSER_H + +#include "electrokinetics.hpp" + +extern float* pdb_charge_lattice; +extern int* pdb_boundary_lattice; + +/* Returns 0/1 if reading the files was successful/unsuccessful */ +int pdb_parse(char* pdb_filename, char* itp_filename); + +int print_charge_field(char* filename); + +int print_boundary_lattice(char* filename); + +#endif diff --git a/src/lb-boundaries.cpp b/src/lb-boundaries.cpp index 4e7c5675b30..6224b2fd606 100644 --- a/src/lb-boundaries.cpp +++ b/src/lb-boundaries.cpp @@ -31,6 +31,7 @@ #include "lbgpu.hpp" #include "lb.hpp" #include "electrokinetics.hpp" +#include "electrokinetics_pdb_parse.hpp" #include "interaction_data.hpp" #include "communication.hpp" @@ -120,6 +121,9 @@ void lb_init_boundaries() { break; } } + if (pdb_charge_lattice) { + charged_boundaries = 1; + } for(n = 0; n < int(ek_parameters.number_of_species); n++) if(ek_parameters.valency[n] != 0.0) { @@ -137,7 +141,7 @@ void lb_init_boundaries() { for(z=0; z= 0 && n_lb_boundaries > 0) { + if(pdb_boundary_lattice && + pdb_boundary_lattice[ek_parameters.dim_y*ek_parameters.dim_x*z + ek_parameters.dim_x*y + x]) { + dist = -1; + boundary_number = n_lb_boundaries; // Make sure that boundary_number is not used by a constraint + } + + if (dist <= 0 && boundary_number >= 0 && (n_lb_boundaries > 0 || pdb_boundary_lattice)) { size_of_index = (number_of_boundnodes+1)*sizeof(int); host_boundary_node_list = (int *) realloc(host_boundary_node_list, size_of_index); host_boundary_index_list = (int *) realloc(host_boundary_index_list, size_of_index); host_boundary_node_list[number_of_boundnodes] = x + lbpar_gpu.dim_x*y + lbpar_gpu.dim_x*lbpar_gpu.dim_y*z; host_boundary_index_list[number_of_boundnodes] = boundary_number + 1; number_of_boundnodes++; - // printf("boundindex %i: \n", number_of_boundnodes); + //printf("boundindex %i: \n", number_of_boundnodes); } #ifdef EK_BOUNDARIES if (ek_initialized) { if(wallcharge_species != -1) { + if(pdb_charge_lattice && + pdb_charge_lattice[ek_parameters.dim_y*ek_parameters.dim_x*z + ek_parameters.dim_x*y + x] != 0.0f) { + node_charged = 1; + node_wallcharge += pdb_charge_lattice[ek_parameters.dim_y*ek_parameters.dim_x*z + ek_parameters.dim_x*y + x]; + } if(node_charged) host_wallcharge_species_density[ek_parameters.dim_y*ek_parameters.dim_x*z + ek_parameters.dim_x*y + x] = node_wallcharge / ek_parameters.valency[wallcharge_species]; else if(dist <= 0) @@ -232,7 +246,7 @@ void lb_init_boundaries() { } /**call of cuda fkt*/ - float* boundary_velocity = (float *) malloc(3*n_lb_boundaries*sizeof(float)); + float* boundary_velocity = (float *) malloc(3*(n_lb_boundaries+1)*sizeof(float)); for (n=0; n #include "electrokinetics.hpp" +#include "electrokinetics_pdb_parse.hpp" #include "lbgpu.hpp" #include "cuda_interface.hpp" #include "cuda_utils.hpp" @@ -2987,7 +2988,7 @@ void lb_init_boundaries_GPU(int host_n_lb_boundaries, int number_of_boundnodes, KERNELCALL(reset_boundaries, dim_grid, threads_per_block, (nodes_a, nodes_b)); - if (n_lb_boundaries == 0) + if (n_lb_boundaries == 0 && !pdb_boundary_lattice) { cudaThreadSynchronize(); return; diff --git a/src/tcl/electrokinetics_tcl.cpp b/src/tcl/electrokinetics_tcl.cpp index 581a6736432..729d561927d 100644 --- a/src/tcl/electrokinetics_tcl.cpp +++ b/src/tcl/electrokinetics_tcl.cpp @@ -46,7 +46,7 @@ int tclcommand_electrokinetics(ClientData data, Tcl_Interp *interp, int argc, ch if(argc < 2) { - Tcl_AppendResult(interp, "Usage of \"electrokinetics\":", (char *)NULL); + Tcl_AppendResult(interp, "Usage of \"electrokinetics\":\n\n", (char *)NULL); Tcl_AppendResult(interp, "electrokinetics [agrid #float] [lb_density #float] [viscosity #float] [friction #float]\n", (char *)NULL); Tcl_AppendResult(interp, " [bulk_viscosity #float] [gamma_even #float] [gamma_odd #float] [T #float] [bjerrum_length #float]\n", (char *)NULL); Tcl_AppendResult(interp, "electrokinetics print vtk #string]\n", (char *)NULL); @@ -117,6 +117,7 @@ int tclcommand_electrokinetics(ClientData data, Tcl_Interp *interp, int argc, ch Tcl_AppendResult(interp, "Could not parse pdb- or itp-file. Please, check your format and filenames.", (char *) NULL); return (TCL_ERROR); } + lb_init_boundaries(); #endif } else if(ARG0_IS_S("boundary")) From 3675cee3178968781bc4da26916e9cc37b33d514 Mon Sep 17 00:00:00 2001 From: Florian Weik Date: Tue, 15 Apr 2014 16:24:44 +0200 Subject: [PATCH 14/34] Fixed typo in ug. --- doc/ug/part.tex | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/ug/part.tex b/doc/ug/part.tex index b225f0cc84c..7765874c1e4 100644 --- a/doc/ug/part.tex +++ b/doc/ug/part.tex @@ -865,8 +865,8 @@ \subsection{Getting the currently defined constraints} Prints out all constraint information. If \var{num} is specified only this constraint is displayed, otherwise all constraints will be printed. -\subsection{\texttt{harmonic_force}: Creating a harmonic trap.} - \newescommand{harmonic_trap} +\subsection{\texttt{harmonic_force}: Creating a harmonic trap} + \newescommand{harmonic-force} \begin{essyntax} harmonic_force \{ \var{x} \var{y} \var{z} \} \var{k} \begin{features} From 70287e6778671103bfcb31b6c3d88cf1b0805ae8 Mon Sep 17 00:00:00 2001 From: Florian Weik Date: Tue, 15 Apr 2014 16:43:09 +0200 Subject: [PATCH 15/34] No more compiler warnings from SystemInterface. --- src/EspressoSystemInterface.cpp | 2 +- src/EspressoSystemInterface.hpp | 2 +- src/SystemInterface.hpp | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/EspressoSystemInterface.cpp b/src/EspressoSystemInterface.cpp index 7ea6d1160df..32b47e7a32f 100644 --- a/src/EspressoSystemInterface.cpp +++ b/src/EspressoSystemInterface.cpp @@ -9,7 +9,7 @@ /********************************************************************************************/ template -const value_type EspressoSystemInterface::const_iterator::operator*() const { +value_type EspressoSystemInterface::const_iterator::operator*() const { return (*m_const_iterator); } diff --git a/src/EspressoSystemInterface.hpp b/src/EspressoSystemInterface.hpp index d1767940d48..79ec8848461 100644 --- a/src/EspressoSystemInterface.hpp +++ b/src/EspressoSystemInterface.hpp @@ -22,7 +22,7 @@ class EspressoSystemInterface : public SystemInterface { template class const_iterator : public SystemInterface::const_iterator { public: - const value_type operator*() const; + value_type operator*() const; SystemInterface::const_iterator &operator=(const SystemInterface::const_iterator &rhs); EspressoSystemInterface::const_iterator &operator=(typename std::vector::const_iterator rhs); bool operator==(SystemInterface::const_iterator const &rhs) const; diff --git a/src/SystemInterface.hpp b/src/SystemInterface.hpp index bf124e098d6..35a0c26dc97 100644 --- a/src/SystemInterface.hpp +++ b/src/SystemInterface.hpp @@ -18,7 +18,7 @@ class SystemInterface { template class const_iterator { public: - virtual const value_type operator*() const = 0; + virtual value_type operator*() const = 0; virtual const_iterator &operator=(const const_iterator &rhs) = 0; virtual bool operator==(const_iterator const &rhs) const = 0; virtual bool operator!=(const_iterator const &rhs) const = 0; @@ -27,7 +27,7 @@ class SystemInterface { class null_vec_iterator : public SystemInterface::const_iterator { public: - const Vector3 operator*() const { return Vector3(); }; + Vector3 operator*() const { return Vector3(); }; const_iterator &operator=(const const_iterator &rhs) { return *this; }; bool operator==(const_iterator const &rhs) const { return true; }; bool operator!=(const_iterator const &rhs) const { return false; }; @@ -38,7 +38,7 @@ class SystemInterface { class null_real_iterator : public SystemInterface::const_iterator { public: - const Real operator*() const { return Real(); }; + Real operator*() const { return Real(); }; const_iterator &operator=(const const_iterator &rhs) { return *this; }; bool operator==(const_iterator const &rhs) const { return true; }; bool operator!=(const_iterator const &rhs) const { return false; }; From f3461cd033927bdc8d300a9fb4ed279482904d69 Mon Sep 17 00:00:00 2001 From: Georg Rempfer Date: Tue, 15 Apr 2014 21:06:20 +0200 Subject: [PATCH 16/34] More robust string parsing. --- src/electrokinetics_pdb_parse.cpp | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/electrokinetics_pdb_parse.cpp b/src/electrokinetics_pdb_parse.cpp index 3984127c6c8..51e35a3f96c 100644 --- a/src/electrokinetics_pdb_parse.cpp +++ b/src/electrokinetics_pdb_parse.cpp @@ -4,6 +4,9 @@ #include #include #include "electrokinetics_pdb_parse.hpp" +#include +#include +#include //#define DEBUG @@ -170,7 +173,16 @@ int pdb_parse_files(char* pdb_filename, char* itp_filename, particle_data* atom_ galloc( (void**) &atom_data->pdb_array_ATOM , (atom_data->pdb_n_particles+1)*sizeof(pdb_ATOM) ); pdb_ATOM* a = &atom_data->pdb_array_ATOM[atom_data->pdb_n_particles]; // See http://deposit.rcsb.org/adit/docs/pdb_atom_format.html#ATOM for the meaning of the format string - sscanf(pdb_line,"ATOM %5d %*4s%*c%*3s%*c%*4d%*c %8f %8f %8f %*6f %*6f %*4s%*2s%*2s",&a->i,&a->x,&a->y,&a->z); +// # sscanf(pdb_line,"ATOM %6d %*4s%*c%*4s%*c%*4d%*c %8f %8f %8f %*6f %*6f %*4s%*2s%*2s",&a->i,&a->x,&a->y,&a->z); + std::istringstream str(pdb_line); + + std::string tmp; + + str.ignore(246,' '); + str >> a->i; + str >> tmp >> tmp >> tmp >> tmp; + str >> a->x >> a->y >> a->z; + a->x /= 10.0; a->y /= 10.0; a->z /= 10.0; From d32869c021702a82a85c663cd907448e6ab7c0d2 Mon Sep 17 00:00:00 2001 From: Henri Menke Date: Wed, 16 Apr 2014 10:23:56 +0200 Subject: [PATCH 17/34] Removed indices_only from pdb-parser and wrote documentation for the User Guide --- doc/ug/electrokinetics.tex | 15 ++++++++++ src/electrokinetics_pdb_parse.cpp | 49 ++++++++++++------------------- 2 files changed, 34 insertions(+), 30 deletions(-) diff --git a/doc/ug/electrokinetics.tex b/doc/ug/electrokinetics.tex index 1401bc9b89d..c4c05289c1b 100644 --- a/doc/ug/electrokinetics.tex +++ b/doc/ug/electrokinetics.tex @@ -473,6 +473,21 @@ \subsection{\label{ssec:ek-reac-init}Initialization and Geometry Definition} a reaction, as described above. To successfully specify a region all the relevant arguments that go with the shape constraints must be provided. +\subsubsection{\label{sssec:ek-pdb-parse}Parsing PDB Files} + +\begin{essyntax} + \require{1 or 2 or 3}{electrokinetics} + \require{2}{pdb-parse} + \var{pdb\_filename} + \var{itp\_filename} + \begin{features} + \required[1]{ELECTROKINETICS} + \required[2]{EK_BOUNDARIES} + \required[3]{EK_REACTIONS} + \end{features} +\end{essyntax} +The \lit{electrokinetics pdb-parse} feature allows the user to parse simple PDB files, a file format introduced by the protein database to encode molecular structures. Together with a topology file (here \var{itp\_filename}) the structure gets interpolated to the \lit{electrokinetics} grid. For the input you will need to prepare a PDB file with a \lit{gromacs} force field to generate the topology file. Normally the PDB file extension is \lit{.pdb}, the topology file extension is \lit{.itp}. Obviously the PDB file is placed instead of \var{pdb\_filename} and the topology file instead of \var{itp\_filename}. \todo{At the moment this fails badly, if you try to parse incorrectly formatted files. This will be fixed in the future.} + \subsection{\label{ssec:ek-reac-output}Reaction-Specific Output} \begin{essyntax} diff --git a/src/electrokinetics_pdb_parse.cpp b/src/electrokinetics_pdb_parse.cpp index 3984127c6c8..9b7277a19ca 100644 --- a/src/electrokinetics_pdb_parse.cpp +++ b/src/electrokinetics_pdb_parse.cpp @@ -257,7 +257,7 @@ int calculate_bounding_box(bounding_box* bbox, particle_data* atom_data) { return pdb_SUCCESS; } -int populate_lattice(particle_data* atom_data, int indices_only) { +int populate_lattice(particle_data* atom_data) { /* * This routine will populate the lattice using the * values read from the pdb and itp files. @@ -319,34 +319,29 @@ int populate_lattice(particle_data* atom_data, int indices_only) { lowernode[1] = (lowernode[1] + ek_parameters.dim_y) % ek_parameters.dim_y; lowernode[2] = (lowernode[2] + ek_parameters.dim_z) % ek_parameters.dim_z; - if ( indices_only == 0 ) { - pdb_charge_lattice[pdb_rhoindex_cartesian2linear( lowernode[0],lowernode[1],lowernode[2] )] - = b->charge * ( 1 - cellpos[0] ) * ( 1 - cellpos[1] ) * ( 1 - cellpos[2] ); + pdb_charge_lattice[pdb_rhoindex_cartesian2linear( lowernode[0],lowernode[1],lowernode[2] )] + = b->charge * ( 1 - cellpos[0] ) * ( 1 - cellpos[1] ) * ( 1 - cellpos[2] ); - pdb_charge_lattice[pdb_rhoindex_cartesian2linear( ( lowernode[0] + 1 ) % ek_parameters.dim_x,lowernode[1],lowernode[2] )] - = b->charge * cellpos[0] * ( 1 - cellpos[1] ) * ( 1 - cellpos[2] ); + pdb_charge_lattice[pdb_rhoindex_cartesian2linear( ( lowernode[0] + 1 ) % ek_parameters.dim_x,lowernode[1],lowernode[2] )] + = b->charge * cellpos[0] * ( 1 - cellpos[1] ) * ( 1 - cellpos[2] ); - pdb_charge_lattice[pdb_rhoindex_cartesian2linear( lowernode[0],( lowernode[1] + 1 ) % ek_parameters.dim_y,lowernode[2] )] - = b->charge * ( 1 - cellpos[0] ) * cellpos[1] * ( 1 - cellpos[2] ); + pdb_charge_lattice[pdb_rhoindex_cartesian2linear( lowernode[0],( lowernode[1] + 1 ) % ek_parameters.dim_y,lowernode[2] )] + = b->charge * ( 1 - cellpos[0] ) * cellpos[1] * ( 1 - cellpos[2] ); - pdb_charge_lattice[pdb_rhoindex_cartesian2linear( lowernode[0],lowernode[1],( lowernode[2] + 1 ) % ek_parameters.dim_z )] - = b->charge * ( 1 - cellpos[0] ) * ( 1 - cellpos[1] ) * cellpos[2]; + pdb_charge_lattice[pdb_rhoindex_cartesian2linear( lowernode[0],lowernode[1],( lowernode[2] + 1 ) % ek_parameters.dim_z )] + = b->charge * ( 1 - cellpos[0] ) * ( 1 - cellpos[1] ) * cellpos[2]; - pdb_charge_lattice[pdb_rhoindex_cartesian2linear( ( lowernode[0] + 1 ) % ek_parameters.dim_x,( lowernode[1] + 1 ) % ek_parameters.dim_y,lowernode[2] )] - = b->charge * cellpos[0] * cellpos[1] * ( 1 - cellpos[2] ); + pdb_charge_lattice[pdb_rhoindex_cartesian2linear( ( lowernode[0] + 1 ) % ek_parameters.dim_x,( lowernode[1] + 1 ) % ek_parameters.dim_y,lowernode[2] )] + = b->charge * cellpos[0] * cellpos[1] * ( 1 - cellpos[2] ); - pdb_charge_lattice[pdb_rhoindex_cartesian2linear( ( lowernode[0] + 1 ) % ek_parameters.dim_x,lowernode[1],( lowernode[2] + 1 ) % ek_parameters.dim_z )] - = b->charge * cellpos[0] * ( 1 - cellpos[1] ) * cellpos[2]; + pdb_charge_lattice[pdb_rhoindex_cartesian2linear( ( lowernode[0] + 1 ) % ek_parameters.dim_x,lowernode[1],( lowernode[2] + 1 ) % ek_parameters.dim_z )] + = b->charge * cellpos[0] * ( 1 - cellpos[1] ) * cellpos[2]; - pdb_charge_lattice[pdb_rhoindex_cartesian2linear( lowernode[0],( lowernode[1] + 1 ) % ek_parameters.dim_y,( lowernode[2] + 1 ) % ek_parameters.dim_z )] - = b->charge * ( 1 - cellpos[0] ) * cellpos[1] * cellpos[2]; + pdb_charge_lattice[pdb_rhoindex_cartesian2linear( lowernode[0],( lowernode[1] + 1 ) % ek_parameters.dim_y,( lowernode[2] + 1 ) % ek_parameters.dim_z )] + = b->charge * ( 1 - cellpos[0] ) * cellpos[1] * cellpos[2]; - pdb_charge_lattice[pdb_rhoindex_cartesian2linear( ( lowernode[0] + 1 ) % ek_parameters.dim_x,( lowernode[1] + 1 ) % ek_parameters.dim_y,( lowernode[2] + 1 ) % ek_parameters.dim_z )] - = b->charge * cellpos[0] * cellpos[1] * cellpos[2]; - } - else if ( indices_only == 1 ) { - printf("Only indices!\n"); - } + pdb_charge_lattice[pdb_rhoindex_cartesian2linear( ( lowernode[0] + 1 ) % ek_parameters.dim_x,( lowernode[1] + 1 ) % ek_parameters.dim_y,( lowernode[2] + 1 ) % ek_parameters.dim_z )] + = b->charge * cellpos[0] * cellpos[1] * cellpos[2]; // Interpolate lennard-jones parameters to boundary float r = pow(2,1./6.)*c->sigma; @@ -376,12 +371,7 @@ int populate_lattice(particle_data* atom_data, int indices_only) { printf("distance: %f <= %f\n\n", pow(lowernode[0] - a_x_shifted,2) + pow(lowernode[1] - a_y_shifted,2) + pow(lowernode[2] - a_z_shifted,2), pow(r/ek_parameters.agrid,2)); #endif if ( pow(lowernode[0] - a_x_shifted,2) + pow(lowernode[1] - a_y_shifted,2) + pow(lowernode[2] - a_z_shifted,2) <= pow(r/ek_parameters.agrid,2) ) { - if ( indices_only == 0 ) { - pdb_boundary_lattice[ek_parameters.dim_y*ek_parameters.dim_x*lowernode[2] + ek_parameters.dim_x*lowernode[1] + lowernode[0]] = 1; - } - else if ( indices_only == 1 ) { - printf("Only indices!\n"); - } + pdb_boundary_lattice[ek_parameters.dim_y*ek_parameters.dim_x*lowernode[2] + ek_parameters.dim_x*lowernode[1] + lowernode[0]] = 1; } } } @@ -405,7 +395,6 @@ int pdb_parse(char* pdb_filename, char* itp_filename) { */ /* BEGIN DEPLOY */ - int indices_only = 0; galloc( (void**) &pdb_charge_lattice, ek_parameters.dim_x * ek_parameters.dim_y * ek_parameters.dim_z * sizeof(float)); galloc( (void**) &pdb_boundary_lattice, ek_parameters.dim_x * ek_parameters.dim_y * ek_parameters.dim_z * sizeof(int)); for ( unsigned int i = 0; i < ek_parameters.dim_x * ek_parameters.dim_y * ek_parameters.dim_z; i++ ) { @@ -423,7 +412,7 @@ int pdb_parse(char* pdb_filename, char* itp_filename) { pdb_parse_files(pdb_filename, itp_filename, &atom_data); - populate_lattice(&atom_data, indices_only); + populate_lattice(&atom_data); return pdb_SUCCESS; } From fd9f02ef699f2b1fdb02d1d4e80ebf69733bd765 Mon Sep 17 00:00:00 2001 From: Florian Weik Date: Tue, 22 Apr 2014 21:53:26 +0200 Subject: [PATCH 18/34] Fixed build without EKIN. Needs proper fix in the future. --- src/electrokinetics_pdb_parse.cpp | 4 ++++ src/electrokinetics_pdb_parse.hpp | 4 ++++ src/p3m_gpu_cuda.cu | 2 +- 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/electrokinetics_pdb_parse.cpp b/src/electrokinetics_pdb_parse.cpp index 616bd892c1f..4f3f9f5b409 100644 --- a/src/electrokinetics_pdb_parse.cpp +++ b/src/electrokinetics_pdb_parse.cpp @@ -10,6 +10,8 @@ //#define DEBUG +#ifdef ELECTROKINETICS + /* Replacements for bool variables */ const int pdb_SUCCESS = 0; const int pdb_ERROR = 1; @@ -428,3 +430,5 @@ int pdb_parse(char* pdb_filename, char* itp_filename) { return pdb_SUCCESS; } + +#endif diff --git a/src/electrokinetics_pdb_parse.hpp b/src/electrokinetics_pdb_parse.hpp index e18f48a9ac2..6eaff809ff7 100644 --- a/src/electrokinetics_pdb_parse.hpp +++ b/src/electrokinetics_pdb_parse.hpp @@ -4,6 +4,8 @@ #include "electrokinetics.hpp" +#ifdef ELECTROKINETICS + extern float* pdb_charge_lattice; extern int* pdb_boundary_lattice; @@ -15,3 +17,5 @@ int print_charge_field(char* filename); int print_boundary_lattice(char* filename); #endif + +#endif diff --git a/src/p3m_gpu_cuda.cu b/src/p3m_gpu_cuda.cu index a8414213f9e..0e6f8e1f2ce 100644 --- a/src/p3m_gpu_cuda.cu +++ b/src/p3m_gpu_cuda.cu @@ -577,7 +577,7 @@ extern "C" { block.x = 512 - mesh*mesh; block.x -= block.x / 32; grid.x = mesh / block.x + 1; - calculate_influence_function_device<<>>(cao, mesh, box, alpha, p3m_gpu_data.G_hat); + KERNELCALL(calculate_influence_function_device,grid,block,(cao, mesh, box, alpha, p3m_gpu_data.G_hat)); cudaThreadSynchronize(); } p3m_gpu_data_initialized = 1; From 60a40e9e6b40b4a614f732673d19f5c7f4b71eb4 Mon Sep 17 00:00:00 2001 From: Axel Arnold Date: Tue, 22 Apr 2014 22:24:31 +0200 Subject: [PATCH 19/34] Fix lbgpu sanity_check --- src/lbgpu.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/lbgpu.cpp b/src/lbgpu.cpp index ce3e1605c7a..eb7df236adb 100644 --- a/src/lbgpu.cpp +++ b/src/lbgpu.cpp @@ -385,20 +385,20 @@ void lb_GPU_sanity_checks() if(this_node == 0){ if(lattice_switch & LATTICE_LB_GPU) { if (lbpar_gpu.agrid < 0.0) { - errtext = runtime_error(128); + char *errtext = runtime_error(128); ERROR_SPRINTF(errtext,"{098 Lattice Boltzmann agrid not set} "); } if (lbpar_gpu.tau < 0.0) { - errtext = runtime_error(128); + char *errtext = runtime_error(128); ERROR_SPRINTF(errtext,"{099 Lattice Boltzmann time step not set} "); } for(int i=0;i Date: Tue, 22 Apr 2014 22:25:03 +0200 Subject: [PATCH 20/34] Fix unused function warning in lbgpu_cuda --- src/p3m_gpu_cuda.cu | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/p3m_gpu_cuda.cu b/src/p3m_gpu_cuda.cu index a8414213f9e..e927ab955a3 100644 --- a/src/p3m_gpu_cuda.cu +++ b/src/p3m_gpu_cuda.cu @@ -113,6 +113,8 @@ __host__ __device__ void static Aliasing_sums_ik ( int cao, REAL_TYPE box, REAL_ } /* Calculate influence function */ +#if 0 +// host version, not used anywhere void static calculate_influence_function ( int cao, int mesh, REAL_TYPE box, REAL_TYPE alpha, REAL_TYPE *G_hat ) { int NX,NY,NZ; @@ -146,6 +148,7 @@ void static calculate_influence_function ( int cao, int mesh, REAL_TYPE box, REA } } } +#endif __global__ void calculate_influence_function_device ( int cao, int mesh, REAL_TYPE box, REAL_TYPE alpha, REAL_TYPE *G_hat ) { From 4637bcaf81da42d0c42a2aad8ec162f1c6c68f5e Mon Sep 17 00:00:00 2001 From: Axel Arnold Date: Tue, 22 Apr 2014 22:25:48 +0200 Subject: [PATCH 21/34] Fix incorrect ! == in observable parser --- src/tcl/statistics_observable_tcl.cpp | 28 +++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/tcl/statistics_observable_tcl.cpp b/src/tcl/statistics_observable_tcl.cpp index 1cee9df0a4d..41c3d88c54c 100644 --- a/src/tcl/statistics_observable_tcl.cpp +++ b/src/tcl/statistics_observable_tcl.cpp @@ -418,7 +418,7 @@ int tclcommand_observable_density_profile(Tcl_Interp* interp, int argc, char** a int temp; profile_data* pdata; obs->fun = &observable_density_profile; - if (! tclcommand_parse_profile(interp, argc-1, argv+1, &temp, &obs->n, &pdata) == TCL_OK ) + if (tclcommand_parse_profile(interp, argc-1, argv+1, &temp, &obs->n, &pdata) != TCL_OK ) return TCL_ERROR; if (pdata->id_list==0) { Tcl_AppendResult(interp, "Error in radial_profile: particle ids/types not specified\n" , (char *)NULL); @@ -437,7 +437,7 @@ int tclcommand_observable_lb_velocity_profile(Tcl_Interp* interp, int argc, char int temp; profile_data* pdata; obs->fun = &observable_lb_velocity_profile; - if (! tclcommand_parse_profile(interp, argc-1, argv+1, &temp, &obs->n, &pdata) == TCL_OK ) + if (tclcommand_parse_profile(interp, argc-1, argv+1, &temp, &obs->n, &pdata) != TCL_OK ) return TCL_ERROR; obs->args=(void*)pdata; obs->n=3*pdata->xbins*pdata->ybins*pdata->zbins; @@ -451,7 +451,7 @@ int tclcommand_observable_radial_density_profile(Tcl_Interp* interp, int argc, c int temp; radial_profile_data* rpdata; obs->fun = &observable_radial_density_profile; - if (! tclcommand_parse_radial_profile(interp, argc-1, argv+1, &temp, &obs->n, &rpdata) == TCL_OK ) + if (tclcommand_parse_radial_profile(interp, argc-1, argv+1, &temp, &obs->n, &rpdata) != TCL_OK ) return TCL_ERROR; if (rpdata->id_list==0) { Tcl_AppendResult(interp, "Error in radial_profile: particle ids/types not specified\n" , (char *)NULL); @@ -467,7 +467,7 @@ int tclcommand_observable_radial_flux_density_profile(Tcl_Interp* interp, int ar int temp; radial_profile_data* rpdata; obs->fun = &observable_radial_flux_density_profile; - if (! tclcommand_parse_radial_profile(interp, argc-1, argv+1, &temp, &obs->n, &rpdata) == TCL_OK ) + if (tclcommand_parse_radial_profile(interp, argc-1, argv+1, &temp, &obs->n, &rpdata) != TCL_OK ) return TCL_ERROR; if (rpdata->id_list==0) { Tcl_AppendResult(interp, "Error in radial_profile: particle ids/types not specified\n" , (char *)NULL); @@ -483,7 +483,7 @@ int tclcommand_observable_flux_density_profile(Tcl_Interp* interp, int argc, cha int temp; profile_data* pdata; obs->fun = &observable_flux_density_profile; - if (! tclcommand_parse_profile(interp, argc-1, argv+1, &temp, &obs->n, &pdata) == TCL_OK ) + if (tclcommand_parse_profile(interp, argc-1, argv+1, &temp, &obs->n, &pdata) != TCL_OK ) return TCL_ERROR; if (pdata->id_list==0) { Tcl_AppendResult(interp, "Error in radial_profile: particle ids/types not specified\n" , (char *)NULL); @@ -502,7 +502,7 @@ int tclcommand_observable_lb_radial_velocity_profile(Tcl_Interp* interp, int arg int temp; radial_profile_data* rpdata; obs->fun = &observable_lb_radial_velocity_profile; - if (! tclcommand_parse_radial_profile(interp, argc-1, argv+1, &temp, &obs->n, &rpdata) == TCL_OK ) + if (tclcommand_parse_radial_profile(interp, argc-1, argv+1, &temp, &obs->n, &rpdata) != TCL_OK ) return TCL_ERROR; obs->args=(void*)rpdata; obs->n=3*rpdata->rbins*rpdata->phibins*rpdata->zbins; @@ -516,7 +516,7 @@ int tclcommand_observable_particle_currents(Tcl_Interp* interp, int argc, char** int temp; IntList* ids; obs->fun = &observable_particle_currents; - if (! parse_id_list(interp, argc-1, argv+1, &temp, &ids) == TCL_OK ) + if (parse_id_list(interp, argc-1, argv+1, &temp, &ids) != TCL_OK ) return TCL_ERROR; obs->args=(void*)ids; obs->n=3*ids->n; @@ -534,7 +534,7 @@ int tclcommand_observable_currents(Tcl_Interp* interp, int argc, char** argv, in int temp; IntList* ids; obs->fun = &observable_currents; - if (! parse_id_list(interp, argc-1, argv+1, &temp, &ids) == TCL_OK ) + if (parse_id_list(interp, argc-1, argv+1, &temp, &ids) != TCL_OK ) return TCL_ERROR; obs->args=(void*)ids; obs->n=3; @@ -551,7 +551,7 @@ int tclcommand_observable_dipole_moment(Tcl_Interp* interp, int argc, char** arg int temp; IntList* ids; obs->fun = &observable_dipole_moment; - if (! parse_id_list(interp, argc-1, argv+1, &temp, &ids) == TCL_OK ) + if (parse_id_list(interp, argc-1, argv+1, &temp, &ids) != TCL_OK ) return TCL_ERROR; obs->args=(void*)ids; obs->n=3; @@ -704,7 +704,7 @@ int tclcommand_observable_interacts_with(Tcl_Interp* interp, int argc, char** ar ids1=(IntList*)malloc(sizeof(IntList)); ids2=(IntList*)malloc(sizeof(IntList)); iw_params* iw_params_p=(iw_params*) malloc(sizeof(iw_params)); - if (! parse_id_list(interp, argc-1, argv+1, &temp, &ids1) == TCL_OK ) { + if (parse_id_list(interp, argc-1, argv+1, &temp, &ids1) != TCL_OK ) { free(ids1); free(ids2); free(iw_params_p); @@ -713,7 +713,7 @@ int tclcommand_observable_interacts_with(Tcl_Interp* interp, int argc, char** ar iw_params_p=(iw_params*)malloc(sizeof(iw_params)); iw_params_p->ids1=ids1; *change=1+temp; - if (! parse_id_list(interp, argc-3, argv+3, &temp, &ids2) == TCL_OK ) { + if (parse_id_list(interp, argc-3, argv+3, &temp, &ids2) != TCL_OK ) { free(ids1); free(ids2); free(iw_params_p); @@ -780,7 +780,7 @@ int tclcommand_observable_interacts_with(Tcl_Interp* interp, int argc, char** ar #define REGISTER_OBSERVABLE(name,parser,id) \ if (ARG_IS_S(2,#name)) { \ observables[id]=(observable*)malloc(sizeof(observable)); \ - if (parser(interp, argc-2, argv+2, &temp, observables[n_observables]) ==TCL_OK) { \ + if (parser(interp, argc-2, argv+2, &temp, observables[n_observables]) == TCL_OK) { \ n_observables++; \ argc-=1+temp; \ argv+=1+temp; \ @@ -993,7 +993,7 @@ int tclcommand_parse_profile(Tcl_Interp* interp, int argc, char** argv, int* cha pdata->zbins=1; while (argc>0) { if (ARG0_IS_S("ids") || ARG0_IS_S("types") || ARG0_IS_S("all")) { - if (!parse_id_list(interp, argc, argv, &temp, &pdata->id_list )==TCL_OK) { + if (parse_id_list(interp, argc, argv, &temp, &pdata->id_list ) != TCL_OK) { Tcl_AppendResult(interp, "Error reading profile: Error parsing particle id information\n" , (char *)NULL); return TCL_ERROR; } else { @@ -1128,7 +1128,7 @@ int tclcommand_parse_radial_profile(Tcl_Interp* interp, int argc, char** argv, i } while (argc>0) { if (ARG0_IS_S("ids") || ARG0_IS_S("types") || ARG0_IS_S("all")) { - if (!parse_id_list(interp, argc, argv, &temp, &pdata->id_list )==TCL_OK) { + if (parse_id_list(interp, argc, argv, &temp, &pdata->id_list ) != TCL_OK) { Tcl_AppendResult(interp, "Error reading profile: Error parsing particle id information\n" , (char *)NULL); return TCL_ERROR; } else { From 97181f6f0f4ac1e9413483402032c4381da0ce8a Mon Sep 17 00:00:00 2001 From: Axel Arnold Date: Wed, 23 Apr 2014 09:34:16 +0200 Subject: [PATCH 22/34] refactored force calculation and added option to reuse forces --- src/communication.cpp | 15 +++-- src/communication.hpp | 3 +- src/forces.cpp | 47 +++++++++++++ src/integrate.cpp | 134 +++++++------------------------------- src/tcl/integrate_tcl.cpp | 16 +++-- src/tuning.cpp | 4 +- 6 files changed, 93 insertions(+), 126 deletions(-) diff --git a/src/communication.cpp b/src/communication.cpp index b63b0285d1a..48aab428a4e 100644 --- a/src/communication.cpp +++ b/src/communication.cpp @@ -1172,18 +1172,19 @@ void mpi_remove_particle_slave(int pnode, int part) } /********************* REQ_INTEGRATE ********/ -int mpi_integrate(int n_steps) +int mpi_integrate(int n_steps, int reuse_forces) { if (!correlations_autoupdate) { - mpi_call(mpi_integrate_slave, -1, n_steps); - integrate_vv(n_steps, 0); + mpi_call(mpi_integrate_slave, n_steps, reuse_forces); + integrate_vv(n_steps, reuse_forces); COMM_TRACE(fprintf(stderr, "%d: integration task %d done.\n", \ this_node, n_steps)); return check_runtime_errors(); } else { for (int i=0; i' for integrating n steps \n", (char *)NULL); + Tcl_AppendResult(interp, "'integrate [reuse_forces] ' for integrating n steps and reusing unconditionally the given forces for the first step \n", (char *)NULL); Tcl_AppendResult(interp, "'integrate set' for printing integrator status \n", (char *)NULL); Tcl_AppendResult(interp, "'integrate set nvt' for enabling NVT integration or \n" , (char *)NULL); #ifdef NPT @@ -194,7 +194,7 @@ int tclcommand_integrate_set_npt_isotropic(Tcl_Interp *interp, int argc, char ** int tclcommand_integrate(ClientData data, Tcl_Interp *interp, int argc, char **argv) { - int n_steps; + int n_steps, reuse_forces = 0; INTEG_TRACE(fprintf(stderr,"%d: integrate:\n",this_node)); @@ -213,15 +213,21 @@ int tclcommand_integrate(ClientData data, Tcl_Interp *interp, int argc, char **a Tcl_AppendResult(interp, "unknown integrator method:\n", (char *)NULL); return tclcommand_integrate_print_usage(interp); } - } else if ( !ARG_IS_I(1,n_steps) ) return tclcommand_integrate_print_usage(interp); - + } else { + // actual integration + if (ARG1_IS_S("reuse_forces") && (argc > 2)) { + reuse_forces = 1; + argc--; argv++; + } + if ( !ARG_IS_I(1,n_steps) ) return tclcommand_integrate_print_usage(interp); + } /* go on with integrate */ if(n_steps < 0) { Tcl_AppendResult(interp, "illegal number of steps (must be >0) \n", (char *) NULL); return tclcommand_integrate_print_usage(interp);; } /* perform integration */ - if (mpi_integrate(n_steps)) + if (mpi_integrate(n_steps, reuse_forces)) return gather_runtime_errors(interp, TCL_OK); return TCL_OK; } diff --git a/src/tuning.cpp b/src/tuning.cpp index d2ef29f84fc..fea24e5aa22 100644 --- a/src/tuning.cpp +++ b/src/tuning.cpp @@ -49,14 +49,14 @@ double time_force_calc(int default_samples) int rds = timing_samples > 0 ? timing_samples : default_samples; int i; - if (mpi_integrate(0)) + if (mpi_integrate(0, 0)) return -1; /* perform force calculation test */ markTime(); for (i = 0; i < rds; i++) { mpi_bcast_event(INVALIDATE_SYSTEM); - if (mpi_integrate(0)) + if (mpi_integrate(0, 0)) return -1; } markTime(); From d8346affebd1714a21bea642ae8a345b84f7ae76 Mon Sep 17 00:00:00 2001 From: Axel Arnold Date: Wed, 23 Apr 2014 15:04:28 +0200 Subject: [PATCH 23/34] Fixed lb_init errorhandling --- src/initialize.cpp | 8 +++-- src/lb.cpp | 85 +++++++++++++++++++++++----------------------- src/lbgpu.cpp | 28 +++++++-------- 3 files changed, 61 insertions(+), 60 deletions(-) diff --git a/src/initialize.cpp b/src/initialize.cpp index b154302d546..2997e0c1092 100644 --- a/src/initialize.cpp +++ b/src/initialize.cpp @@ -165,10 +165,14 @@ void on_integration_start() reactions_sanity_checks(); #endif #ifdef LB - lb_sanity_checks(); + if(lattice_switch & LATTICE_LB) { + lb_sanity_checks(); + } #endif #ifdef LB_GPU - lb_GPU_sanity_checks(); + if(lattice_switch & LATTICE_LB_GPU) { + lb_GPU_sanity_checks(); + } #endif /********************************************/ diff --git a/src/lb.cpp b/src/lb.cpp index 8548cad5a66..da6e05427a5 100644 --- a/src/lb.cpp +++ b/src/lb.cpp @@ -1625,48 +1625,46 @@ int lb_sanity_checks() { char *errtext; int ret = 0; - if(lattice_switch & LATTICE_LB) { - if (lbpar.agrid <= 0.0) { - errtext = runtime_error(128); - ERROR_SPRINTF(errtext,"{098 Lattice Boltzmann agrid not set} "); - ret = 1; - } - if (lbpar.tau <= 0.0) { - errtext = runtime_error(128); - ERROR_SPRINTF(errtext,"{099 Lattice Boltzmann time step not set} "); - ret = 1; - } - if (lbpar.rho[0] <= 0.0) { - errtext = runtime_error(128); - ERROR_SPRINTF(errtext,"{100 Lattice Boltzmann fluid density not set} "); - ret = 1; - } - if (lbpar.viscosity[0] <= 0.0) { - errtext = runtime_error(128); - ERROR_SPRINTF(errtext,"{101 Lattice Boltzmann fluid viscosity not set} "); - ret = 1; - } - if (dd.use_vList && skin>=lbpar.agrid/2.0) { - errtext = runtime_error(128); - ERROR_SPRINTF(errtext, "{104 LB requires either no Verlet lists or that the skin of the verlet list to be less than half of lattice-Boltzmann grid spacing.} "); - ret = 1; - } - if (cell_structure.type != CELL_STRUCTURE_DOMDEC) { - errtext = runtime_error(128); - ERROR_SPRINTF(errtext, "{103 LB requires domain-decomposition cellsystem} "); - ret = -1; - } - else if (dd.use_vList && skin>=lbpar.agrid/2.0) { - errtext = runtime_error(128); - ERROR_SPRINTF(errtext, "{104 LB requires either no Verlet lists or that the skin of the verlet list to be less than half of lattice-Boltzmann grid spacing.} "); - ret = -1; - } - - if (thermo_switch & ~THERMO_LB) { - errtext = runtime_error(128); - ERROR_SPRINTF(errtext, "{122 LB must not be used with other thermostats} "); - ret = 1; - } + if (lbpar.agrid <= 0.0) { + errtext = runtime_error(128); + ERROR_SPRINTF(errtext,"{098 Lattice Boltzmann agrid not set} "); + ret = 1; + } + if (lbpar.tau <= 0.0) { + errtext = runtime_error(128); + ERROR_SPRINTF(errtext,"{099 Lattice Boltzmann time step not set} "); + ret = 1; + } + if (lbpar.rho[0] <= 0.0) { + errtext = runtime_error(128); + ERROR_SPRINTF(errtext,"{100 Lattice Boltzmann fluid density not set} "); + ret = 1; + } + if (lbpar.viscosity[0] <= 0.0) { + errtext = runtime_error(128); + ERROR_SPRINTF(errtext,"{101 Lattice Boltzmann fluid viscosity not set} "); + ret = 1; + } + if (dd.use_vList && skin>=lbpar.agrid/2.0) { + errtext = runtime_error(128); + ERROR_SPRINTF(errtext, "{104 LB requires either no Verlet lists or that the skin of the verlet list to be less than half of lattice-Boltzmann grid spacing.} "); + ret = 1; + } + if (cell_structure.type != CELL_STRUCTURE_DOMDEC) { + errtext = runtime_error(128); + ERROR_SPRINTF(errtext, "{103 LB requires domain-decomposition cellsystem} "); + ret = -1; + } + else if (dd.use_vList && skin>=lbpar.agrid/2.0) { + errtext = runtime_error(128); + ERROR_SPRINTF(errtext, "{104 LB requires either no Verlet lists or that the skin of the verlet list to be less than half of lattice-Boltzmann grid spacing.} "); + ret = -1; + } + + if (thermo_switch & ~THERMO_LB) { + errtext = runtime_error(128); + ERROR_SPRINTF(errtext, "{122 LB must not be used with other thermostats} "); + ret = 1; } return ret; } @@ -1875,7 +1873,8 @@ double pi[6] = { 0., 0., 0., 0., 0., 0. }; void lb_init() { LB_TRACE(printf("Begin initialzing fluid on CPU\n")); - if (lb_sanity_checks()) return; + lb_sanity_checks(); + if (check_runtime_errors()) return; /* initialize the local lattice domain */ init_lattice(&lblattice,lbpar.agrid,lbpar.tau); diff --git a/src/lbgpu.cpp b/src/lbgpu.cpp index eb7df236adb..1112d7546fa 100644 --- a/src/lbgpu.cpp +++ b/src/lbgpu.cpp @@ -383,24 +383,22 @@ int lb_lbnode_set_extforce_GPU(int ind[3], double f[3]) void lb_GPU_sanity_checks() { if(this_node == 0){ - if(lattice_switch & LATTICE_LB_GPU) { - if (lbpar_gpu.agrid < 0.0) { + if (lbpar_gpu.agrid < 0.0) { + char *errtext = runtime_error(128); + ERROR_SPRINTF(errtext,"{098 Lattice Boltzmann agrid not set} "); + } + if (lbpar_gpu.tau < 0.0) { + char *errtext = runtime_error(128); + ERROR_SPRINTF(errtext,"{099 Lattice Boltzmann time step not set} "); + } + for(int i=0;i Date: Wed, 23 Apr 2014 16:07:23 +0200 Subject: [PATCH 24/34] Fixed lb_init sanity checking --- src/lb.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/lb.cpp b/src/lb.cpp index da6e05427a5..ac85e6ae6df 100644 --- a/src/lb.cpp +++ b/src/lb.cpp @@ -1873,7 +1873,11 @@ double pi[6] = { 0., 0., 0., 0., 0., 0. }; void lb_init() { LB_TRACE(printf("Begin initialzing fluid on CPU\n")); - lb_sanity_checks(); + + if (lbpar.agrid <= 0.0) { + char *errtext = runtime_error(128); + ERROR_SPRINTF(errtext,"{098 Lattice Boltzmann agrid not set when initializing fluid} "); + } if (check_runtime_errors()) return; /* initialize the local lattice domain */ From d19bf34b2abcb3968b3b4845562af249a2b9570e Mon Sep 17 00:00:00 2001 From: Axel Arnold Date: Wed, 23 Apr 2014 16:36:36 +0200 Subject: [PATCH 25/34] lb no longer touches resort_particles --- doc/ug/run.tex | 23 ++++++++++++++++++++++- src/lb.cpp | 19 +++++-------------- 2 files changed, 27 insertions(+), 15 deletions(-) diff --git a/doc/ug/run.tex b/doc/ug/run.tex index 3f9420a6d69..a5cd4754f78 100644 --- a/doc/ug/run.tex +++ b/doc/ug/run.tex @@ -24,7 +24,7 @@ \section{\texttt{integrate}: Running the simulation} \newescommand{integrate} \begin{essyntax} - \variant{1} integrate \var{steps} + \variant{1} integrate \opt{reuse_forces} \var{steps} \variant{2} integrate set \opt{nvt} \variant{3} integrate set npt_isotropic \var{p_{ext}} \var{piston} \opt{\var{x\: y\: z}} \opt{-cubic_box} \end{essyntax} @@ -34,6 +34,27 @@ \section{\texttt{integrate}: Running the simulation} \texttt{steps} as parameter integrates the system for \texttt{steps} time steps. +Note that this implementation of the Velocity Verlet algorithm reuses forces, +that is, they are computed once in the middle of the time step, but used twice, +at the beginning and end. However, in the first time step after setting up, +there are no forces present yet. Therefore, \es{} has to compute them before the +first time step. That has two consequences: first, random forces are redrawn, +resulting in a narrower distribution of the random forces, which we compensate +by stretching. Second, coupling forces of e.\,g. the Lattice Boltzmann fluid cannot +be computed and are therefore lacking in the first half time step. In order to +minimize these effects, \es{} has a quite conservative heuristics to decide whether +a change makes it necessary to recompute forces before the first time step. Therefore, +calling hundred times \texttt{integrate 1} does the same as \textt{integrate 100}, +apart from some small calling overhead. + +However, for checkpointing, there is no way for \es{} to tell that the forces that you +read back in actually match the parameters that are set. Therefore, \es{} would recompute +the forces before the first time step, which makes it essentially impossible to checkpoint +LB simulations, where it is vital to keep the coupling forces. Therefore, \texttt{integrate} +has an additional parameter \opt{reuse_forces}, which tells integrate to not recalculate +the forces for the first time step, but use that the values still stored with the particles. +Use this only if you are absolutely sure that the forces stored match your current setup! + Two methods for the integration can be set: For an NVT ensemble (thermostat) and for an NPT isotropic ensemble (barostat). The current method can be detected with the command \texttt{integrate set} without diff --git a/src/lb.cpp b/src/lb.cpp index ac85e6ae6df..0f2cfef21bb 100644 --- a/src/lb.cpp +++ b/src/lb.cpp @@ -880,20 +880,6 @@ int lb_lbfluid_save_checkpoint(char* filename, int binary) { return ES_OK; } int lb_lbfluid_load_checkpoint(char* filename, int binary) { - if (lattice_switch & LATTICE_LB) { - fprintf (stderr, "Loading an LB checkpoint requires first that all particles and forces are loaded into the system and then an integrate 0, this is required for the reformation of the the neighbor lists. After this integration the particle data must then be reloaded just prior to loading the LB checkpoint file. This is a rather inelegant hack to make the checkpointing work correctly.\n"); - recalc_forces = 0; //Indicates the forces need not be recalculated - resort_particles = 0; //Prevents a call of on_resort_particles which gets called when the particle data is reset and then set recalc_forces = 1 - } - else if (lattice_switch & LATTICE_LB_GPU) { - fprintf (stderr, "Loading an LB GPU checkpoint requires first that all particles and forces are loaded into the system and then an integrate 0, this is required for the reformation of the neighbor lists. After this integration the particle data must then be reloaded just prior to loading the LB GPU checkpoint file. This is a rather inelegant hack to make the checkpointing work correctly.\n"); - recalc_forces = 0; //Indicates the forces need not be recalculated - resort_particles = 0; //Prevents a call of on_resort_particles which gets called when the particle data is reset and then set recalc_forces = 1 - } - else { - fprintf (stderr, "To load an LB checkpoint one needs to have already initialized the LB fluid with the same grid size.\n"); - return ES_ERROR; - } if(lattice_switch & LATTICE_LB_GPU) { #ifdef LB_GPU FILE* cpfile; @@ -969,6 +955,11 @@ int lb_lbfluid_load_checkpoint(char* filename, int binary) { // mpi_bcast_lb_params(0); #endif } + else { + char *errtext = runtime_error(128); + ERROR_SPRINTF(errtext,"{To load an LB checkpoint one needs to have already initialized the LB fluid with the same grid size.}"); + return ES_ERROR; + } return ES_OK; } From f35daee5cdd8a2e9f8b918722de0c391949fb93d Mon Sep 17 00:00:00 2001 From: Axel Arnold Date: Wed, 23 Apr 2014 22:42:14 +0200 Subject: [PATCH 26/34] Fixed explicit template instantation --- src/EspressoSystemInterface.cpp | 6 ++++++ src/EspressoSystemInterface.hpp | 6 ------ 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/EspressoSystemInterface.cpp b/src/EspressoSystemInterface.cpp index 32b47e7a32f..d89c3ada9d9 100644 --- a/src/EspressoSystemInterface.cpp +++ b/src/EspressoSystemInterface.cpp @@ -6,6 +6,12 @@ #include +/* Need explicite specialization, otherwise some compilers do not produce the objects. */ + +template class EspressoSystemInterface::const_iterator; +template class EspressoSystemInterface::const_iterator; +template class EspressoSystemInterface::const_iterator; + /********************************************************************************************/ template diff --git a/src/EspressoSystemInterface.hpp b/src/EspressoSystemInterface.hpp index 79ec8848461..59111496a52 100644 --- a/src/EspressoSystemInterface.hpp +++ b/src/EspressoSystemInterface.hpp @@ -153,12 +153,6 @@ class EspressoSystemInterface : public SystemInterface { bool m_splitParticleStructGpu; }; -/* Need explicite specialization, otherwise some compilers do not produce the objects. */ - -template class EspressoSystemInterface::const_iterator; -template class EspressoSystemInterface::const_iterator; -template class EspressoSystemInterface::const_iterator; - extern EspressoSystemInterface espressoSystemInterface; #endif From 025943bade4a44b1fc1cadf034397b9bf87e7339 Mon Sep 17 00:00:00 2001 From: Axel Arnold Date: Wed, 23 Apr 2014 22:54:32 +0200 Subject: [PATCH 27/34] Fixed typo --- doc/ug/run.tex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/ug/run.tex b/doc/ug/run.tex index a5cd4754f78..9f3b5aa81e8 100644 --- a/doc/ug/run.tex +++ b/doc/ug/run.tex @@ -44,7 +44,7 @@ \section{\texttt{integrate}: Running the simulation} be computed and are therefore lacking in the first half time step. In order to minimize these effects, \es{} has a quite conservative heuristics to decide whether a change makes it necessary to recompute forces before the first time step. Therefore, -calling hundred times \texttt{integrate 1} does the same as \textt{integrate 100}, +calling hundred times \texttt{integrate 1} does the same as \texttt{integrate 100}, apart from some small calling overhead. However, for checkpointing, there is no way for \es{} to tell that the forces that you From 9820e7c0da1b906a9be856a2d35b5ad5eedc1c8d Mon Sep 17 00:00:00 2001 From: Florian Weik Date: Thu, 24 Apr 2014 10:15:28 +0200 Subject: [PATCH 28/34] Removed stray debug messages from p3m_gpu. --- src/p3m_gpu_cuda.cu | 27 +++++++++++++++++++-------- src/tcl/p3m_tcl.cpp | 2 -- 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/src/p3m_gpu_cuda.cu b/src/p3m_gpu_cuda.cu index 0e6f8e1f2ce..1675c55d28f 100644 --- a/src/p3m_gpu_cuda.cu +++ b/src/p3m_gpu_cuda.cu @@ -19,7 +19,7 @@ /** \file p3m_gpu_cuda.cu * - * Cuda (.cu) file for the Lattice Boltzmann implementation on GPUs. + * Cuda (.cu) file for the P3M electrostatics method. * Header file \ref p3m_gpu.hpp . */ @@ -33,8 +33,7 @@ #include "config.hpp" #include "p3m_gpu.hpp" #include "utils.hpp" - -//#include +#include "EspressoSystemInterface.hpp" #ifdef ELECTROSTATICS @@ -508,12 +507,19 @@ extern "C" { */ void p3m_gpu_init(int cao, int mesh, REAL_TYPE alpha, REAL_TYPE box) { - gpu_init_particle_comm(); + puts("p3m_gpu_init()"); + // gpu_init_particle_comm(); int reinit_if = 0, mesh_changed = 0; + espressoSystemInterface.requestParticleStructGpu(); + if ( this_node == 0 ) { + + p3m_gpu_data.npart = gpu_get_global_particle_vars_pointer_host()->number_of_particles; + // printf("p3m_gpu_data.npart = %d\n", p3m_gpu_data.npart); + if((p3m_gpu_data_initialized == 0) || (p3m_gpu_data.alpha != alpha)) { p3m_gpu_data.alpha = alpha; reinit_if = 1; @@ -550,6 +556,7 @@ extern "C" { p3m_gpu_data.G_hat_host = (REAL_TYPE *)malloc(mesh3*sizeof(REAL_TYPE)); + // printf("mesh3 = %d, p3m_gpu_data.charge_mesh = %p\n", mesh3, p3m_gpu_data.charge_mesh); cufftDestroy(p3m_gpu_data.fft_plan); cufftPlan3d(&(p3m_gpu_data.fft_plan), mesh, mesh, mesh, CUFFT_PLAN_FLAG); @@ -573,10 +580,14 @@ extern "C" { // cudaMemcpy( p3m_gpu_data.G_hat, p3m_gpu_data.G_hat_host, mesh3*sizeof(REAL_TYPE), cudaMemcpyHostToDevice); dim3 grid(1,1,1); dim3 block(1,1,1); - block.y = block.z = mesh; - block.x = 512 - mesh*mesh; - block.x -= block.x / 32; + block.y = mesh; + block.z = 1; + block.x = 512 / mesh + 1; grid.x = mesh / block.x + 1; + grid.z = mesh; + + // printf("mesh %d, grid (%d %d %d), block (%d %d %d)\n", mesh, grid.x, grid.y, grid.z, block.x, block.y, block.z); + KERNELCALL(calculate_influence_function_device,grid,block,(cao, mesh, box, alpha, p3m_gpu_data.G_hat)); cudaThreadSynchronize(); } @@ -612,7 +623,7 @@ void p3m_gpu_add_farfield_force() { REAL_TYPE hi = mesh/box; REAL_TYPE prefactor = 1.0/(box*box*box*2.0); - cudaMemset( p3m_gpu_data.charge_mesh, 0, mesh3*sizeof(CUFFT_TYPE_COMPLEX)); + cuda_safe_mem(cudaMemset( p3m_gpu_data.charge_mesh, 0, mesh3*sizeof(CUFFT_TYPE_COMPLEX))); KERNELCALL(assign_charges, gridAssignment, threadsAssignment, (lb_particle_gpu,p3m_gpu_data.charge_mesh,mesh,cao,pos_shift,hi)); diff --git a/src/tcl/p3m_tcl.cpp b/src/tcl/p3m_tcl.cpp index b781542357c..5de9fd18f11 100644 --- a/src/tcl/p3m_tcl.cpp +++ b/src/tcl/p3m_tcl.cpp @@ -24,8 +24,6 @@ #include "p3m_tcl.hpp" #include "p3m.hpp" - - int tclcommand_inter_coulomb_parse_p3m_tune(Tcl_Interp * interp, int argc, char ** argv, int adaptive) { int cao = -1, n_interpol = -1; From bba3881ec17c71c056b12cfa9f0a2e21b49792e1 Mon Sep 17 00:00:00 2001 From: Axel Arnold Date: Thu, 24 Apr 2014 16:10:55 +0200 Subject: [PATCH 29/34] Workaround for compiling with CUDA/openmpi on MacOSX. --- src/EspressoSystemInterface.hpp | 1 - src/tcl/Makefile.am | 22 +++++++++++++++++++++- src/tcl/cuda_workaround.cu | 3 +++ 3 files changed, 24 insertions(+), 2 deletions(-) create mode 100644 src/tcl/cuda_workaround.cu diff --git a/src/EspressoSystemInterface.hpp b/src/EspressoSystemInterface.hpp index 59111496a52..8b0ceb7198b 100644 --- a/src/EspressoSystemInterface.hpp +++ b/src/EspressoSystemInterface.hpp @@ -4,7 +4,6 @@ #define ESIF_TRACE(A) #include "SystemInterface.hpp" -#include "particle_data.hpp" #include "cuda_interface.hpp" class EspressoSystemInterface : public SystemInterface { diff --git a/src/tcl/Makefile.am b/src/tcl/Makefile.am index 8f7f9d6ab1e..50a05db489e 100644 --- a/src/tcl/Makefile.am +++ b/src/tcl/Makefile.am @@ -153,7 +153,6 @@ Espresso_install_CPPFLAGS = -D ESPRESSO_SCRIPTS_DEFAULT=\"$(scriptsdir)\" $(AM_C Espresso_install_SOURCES = scriptsdir.cpp main.cpp Espresso_install_LDADD = libEspressoTcl.la ../libEspresso.la - ESPRESSO = `echo Espresso | sed '$(transform)'`$(EXEEXT) ESPRESSO_INSTALL = `echo Espresso.install | sed '$(transform)'`$(EXEEXT) # rename Espresso after installation @@ -164,3 +163,24 @@ install-exec-hook: uninstall-local: -rm -f $(DESTDIR)$(bindir)/$(ESPRESSO) + +################################################################# +# Build an empty CUDA object +################################################################# +# this is necessary since Xcode 4.3 otherwise builds binaries +# with a broken dyld relocation table +if CUDA + +SUFFIXES=.cu + +cuda_verbose = $(cuda_verbose_@AM_V@) +cuda_verbose_ = $(cuda_verbose_@AM_DEFAULT_V@) +cuda_verbose_0 = @echo " NVCC $@"; +.cu.o: + $(NVCC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_NVCCFLAGS) $(NVCCFLAGS) -c -o $@ $< + +Espresso_SOURCES += cuda_workaround.cu +Espresso_install_SOURCES += cuda_workaround.cu + +endif diff --git a/src/tcl/cuda_workaround.cu b/src/tcl/cuda_workaround.cu new file mode 100644 index 00000000000..59183814966 --- /dev/null +++ b/src/tcl/cuda_workaround.cu @@ -0,0 +1,3 @@ +/* Just an empty file that is compiled as workaround for CUDA with Xcode 4.3 and openmpi. + If CUDA files are only included via archives, the linking breaks and creates unstartable binaries. + */ From 843e16925479149593404d1a2b7b3ca6b5b32679 Mon Sep 17 00:00:00 2001 From: Axel Arnold Date: Fri, 25 Apr 2014 11:38:50 +0200 Subject: [PATCH 30/34] Fixed integrator debug messages --- src/communication.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/communication.cpp b/src/communication.cpp index 48aab428a4e..0d007eeb52d 100644 --- a/src/communication.cpp +++ b/src/communication.cpp @@ -1199,7 +1199,7 @@ int mpi_integrate(int n_steps, int reuse_forces) void mpi_integrate_slave(int n_steps, int reuse_forces) { integrate_vv(n_steps, reuse_forces); - COMM_TRACE(fprintf(stderr, "%d: integration task %d done.\n", this_node, task)); + COMM_TRACE(fprintf(stderr, "%d: integration for %d n_steps with %d reuse_forces done.\n", this_node, n_steps, reuse_forces)); check_runtime_errors(); } From 5da530cf08a609353bbbff5af24033309f94afcd Mon Sep 17 00:00:00 2001 From: Axel Arnold Date: Mon, 28 Apr 2014 15:22:10 +0200 Subject: [PATCH 31/34] HARMONICFORCE now part of maxset --- maintainer/jenkins/configs/maxset.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/maintainer/jenkins/configs/maxset.hpp b/maintainer/jenkins/configs/maxset.hpp index 1049791bb45..70aaa3e760a 100644 --- a/maintainer/jenkins/configs/maxset.hpp +++ b/maintainer/jenkins/configs/maxset.hpp @@ -36,6 +36,7 @@ #define ELECTROKINETICS #define EK_BOUNDARIES #define EK_REACTION +#define HARMONICFORCE #endif #define AREA_FORCE_GLOBAL From eb12ec31be9b6a08488e1a56a895654b45d91170 Mon Sep 17 00:00:00 2001 From: Axel Arnold Date: Mon, 28 Apr 2014 16:01:20 +0200 Subject: [PATCH 32/34] Bumped compute model to 1.1, fixed compiling w/o cuda --- configure.ac | 4 ++-- doc/ug/installation.tex | 7 ++++--- src/EspressoSystemInterface.hpp | 8 +++++++- src/initialize.cpp | 2 +- 4 files changed, 14 insertions(+), 7 deletions(-) diff --git a/configure.ac b/configure.ac index ba211e1a2ed..ad716176d7a 100644 --- a/configure.ac +++ b/configure.ac @@ -300,10 +300,10 @@ AS_IF([test x$with_cuda != xno],[ # NVCC compile check AC_MSG_CHECKING([whether CUDA compiles]) - # if no other compute capability is defined by the user, we require at least 1.1 + # if no other compute capability is defined by the user, we default to 2.0 case "$NVCCFLAGS" in *-arch=*) ;; - *) NVCCFLAGS="$NVCCFLAGS --ptxas-options=-v -gencode arch=compute_11,code=compute_11 -gencode arch=compute_20,code=compute_20" + *) NVCCFLAGS="$NVCCFLAGS --ptxas-options=-v -gencode arch=compute_20,code=compute_20" esac # use nvcc diff --git a/doc/ug/installation.tex b/doc/ug/installation.tex index ff4b382042b..d9ed7532b8b 100644 --- a/doc/ug/installation.tex +++ b/doc/ug/installation.tex @@ -155,9 +155,10 @@ \subsection{Options} be used to define compiler flags for the NVIDIA CUDA-compiler \texttt{nvcc}. For example, \texttt{NVCCFLAGS = "{}-gencode arch=compute_20,code=sm_20"{}} will compile code only for Fermi - cards. Default is to compile for compute models 1.1 and 2.0, - i.e. everything with a G90 chip or newer. Note that we require at - least compute model 1.1. + cards. Default is to compile for compute model 2.0, + i.e. everything with a Fermi chip or newer. Note that we require at + least compute model 1.1, that is G90. However, to use G90 (e.\,g.~Tesla + C1060), you need to manually specificy compute model 1.1. \end{description} \section{\texttt{make}: Compiling, testing and installing \es} diff --git a/src/EspressoSystemInterface.hpp b/src/EspressoSystemInterface.hpp index 8b0ceb7198b..409a02220be 100644 --- a/src/EspressoSystemInterface.hpp +++ b/src/EspressoSystemInterface.hpp @@ -101,16 +101,21 @@ class EspressoSystemInterface : public SystemInterface { enableParticleCommunication(); return m_needsFGpu; }; +#endif unsigned int npart_gpu() { +#ifdef CUDA return m_gpu_npart; +#else + return 0; +#endif }; -#endif protected: void gatherParticles(); void split_particle_struct(); +#ifdef CUDA void enableParticleCommunication() { if(!gpu_get_global_particle_vars_pointer_host()->communication_enabled) { ESIF_TRACE(puts("gpu communication not enabled;")); @@ -121,6 +126,7 @@ class EspressoSystemInterface : public SystemInterface { } }; void reallocDeviceMemory(int n); +#endif Vector3Container R; #ifdef ELECTROSTATICS diff --git a/src/initialize.cpp b/src/initialize.cpp index 2997e0c1092..3ac7b25fd8e 100644 --- a/src/initialize.cpp +++ b/src/initialize.cpp @@ -274,7 +274,7 @@ void on_particle_change() #ifdef LB_GPU lb_reinit_particles_gpu = 1; #endif -#if defined(LB_GPU) || defined (ELECTROSTATICS) +#ifdef CUDA reinit_particle_comm_gpu = 1; #endif invalidate_obs(); From 772543580fde6ca60ad2ae95449060efb4990930 Mon Sep 17 00:00:00 2001 From: Axel Arnold Date: Mon, 28 Apr 2014 17:15:46 +0200 Subject: [PATCH 33/34] Made HarmonicForce.lo to be included only with CUDA --- src/Makefile.am | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/Makefile.am b/src/Makefile.am index 7a7a2d5149c..afb41717d29 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -107,7 +107,7 @@ libEspresso_la_SOURCES = \ Vector.hpp \ SystemInterface.hpp \ EspressoSystemInterface.hpp EspressoSystemInterface.cpp \ - HarmonicForce.hpp HarmonicForce.cu + HarmonicForce.hpp # nonbonded potentials and forces libEspresso_la_SOURCES += \ @@ -196,7 +196,8 @@ CUDA_SOURCES = \ electrokinetics.hpp \ lbgpu_cuda.cu \ p3m_gpu_cuda.cu \ - EspressoSystemInterface_cuda.cu + EspressoSystemInterface_cuda.cu \ + HarmonicForce.cu libEspresso_la_SOURCES += $(CUDA_SOURCES) EXTRA_DIST += $(CUDA_SOURCES) From b458fc8c6267d391e4664f87d796c998212dbe32 Mon Sep 17 00:00:00 2001 From: Marcello Sega Date: Tue, 29 Apr 2014 17:26:39 +0200 Subject: [PATCH 34/34] fixes VMD crash with long lines in vtf files --- scripts/vtf.tcl | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/scripts/vtf.tcl b/scripts/vtf.tcl index 2030da82e19..870fae0c94d 100644 --- a/scripts/vtf.tcl +++ b/scripts/vtf.tcl @@ -195,8 +195,20 @@ proc writevsf { file args } { lappend aids "$from:$to" } unset from - - puts $file "atom [join $aids ,] $desc" + # let's group atom ranges, so that there are no more than 8 per line + # This fixes a problem with the vmd plugin, and makes the vtf more + # readable anyway. + set start 0 + set maxlen 8 + set ll [llength [lrange $aids $start end ]] + while { $ll >= $maxlen } { + puts $file "atom [join [lrange $aids $start [expr $start + $maxlen -1]] ,] $desc" + incr start $maxlen + set ll [llength [lrange $aids $start end ]] + } + if { $start < [llength $aids ] } { + puts $file "atom [join [lrange $aids $start end] ,] $desc" + } } # Print bond data