From 1625353f39903de64e8290eafcd3ab1aaea19082 Mon Sep 17 00:00:00 2001
From: Henri Menke <henri@icp.uni-stuttgart.de>
Date: Mon, 7 Apr 2014 17:20:13 +0200
Subject: [PATCH 01/34] pdb-parser inserted into TCL interface

---
 src/Makefile.am                 |  1 +
 src/tcl/electrokinetics_tcl.cpp | 33 +++++++++++++++++++++++++++++++++
 2 files changed, 34 insertions(+)
diff --git a/src/Makefile.am b/src/Makefile.am
index f225a884bdf..daf6dbe4691 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -45,6 +45,7 @@ libEspresso_la_SOURCES = \
 	cuda_init.hpp \
 	debug.cpp debug.hpp \
 	domain_decomposition.cpp domain_decomposition.hpp \
+	electrokinetics_pdb_parse.cpp electrokinetics_pdb_parse.hpp \
 	energy.cpp energy.hpp \
 	errorhandling.cpp errorhandling.hpp \
 	fft.cpp fft.hpp \
diff --git a/src/tcl/electrokinetics_tcl.cpp b/src/tcl/electrokinetics_tcl.cpp
index 4786330e241..581a6736432 100644
--- a/src/tcl/electrokinetics_tcl.cpp
+++ b/src/tcl/electrokinetics_tcl.cpp
@@ -5,6 +5,11 @@
 #include "lb-boundaries.hpp"
 #include "lb-boundaries_tcl.hpp"
 #include "initialize.hpp"
+#include "electrokinetics_pdb_parse.hpp"
+
+#ifdef ELECTROKINETICS
+extern int ek_initialized;
+#endif
 
 int tclcommand_electrokinetics(ClientData data, Tcl_Interp *interp, int argc, char **argv) {
 #ifndef ELECTROKINETICS
@@ -84,8 +89,36 @@ int tclcommand_electrokinetics(ClientData data, Tcl_Interp *interp, int argc, ch
     Tcl_AppendResult(interp, "                     [print density vtk #string]\n", (char *)NULL);
     Tcl_AppendResult(interp, "                     [print flux vtk #string]\n", (char *)NULL);
     Tcl_AppendResult(interp, "electrokinetics #int node #int #int #int print density\n", (char *)NULL);
+    Tcl_AppendResult(interp, "electrokinetics pdb-parse #string #string\n", (char *)NULL);
     return TCL_ERROR;
   }
+  else if(ARG0_IS_S("pdb-parse"))
+  {
+#ifndef EK_BOUNDARIES
+    Tcl_AppendResult(interp, "Feature EK_BOUNDARIES required", (char *) NULL);
+    return (TCL_ERROR);
+#else
+    argc--;
+    argv++;
+
+    if (argc != 2)
+    {
+      Tcl_AppendResult(interp, "You need to specify two filenames.\n", (char *) NULL);
+      Tcl_AppendResult(interp, "electrokinetics pdb-parse #string1 #string2\n", (char *)NULL);
+      Tcl_AppendResult(interp, "#string1 is the pdb-filename, #string2 is the itp-filename.\n", (char *)NULL);
+    }
+    if (!ek_initialized)
+    {
+      Tcl_AppendResult(interp, "Please initialize EK before you attempt parsing.", (char *) NULL);
+      return (TCL_ERROR);
+    }
+    if(pdb_parse(argv[0], argv[1]) != 0)
+    {
+      Tcl_AppendResult(interp, "Could not parse pdb- or itp-file. Please, check your format and filenames.", (char *) NULL);
+      return (TCL_ERROR);
+    }
+#endif
+  }
   else if(ARG0_IS_S("boundary")) 
   {
 #ifndef EK_BOUNDARIES

From 5c82cc8e1275165a718f19bd2ddaf5805cbaf126 Mon Sep 17 00:00:00 2001
From: Florian Weik <fweik@icp.uni-stuttgart.de>
Date: Wed, 9 Apr 2014 16:35:25 +0200
Subject: [PATCH 02/34] Added missing file.

---
 src/HarmonicPotential.h | 38 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)
 create mode 100644 src/HarmonicPotential.h

diff --git a/src/HarmonicPotential.h b/src/HarmonicPotential.h
new file mode 100644
index 00000000000..05dd156ac72
--- /dev/null
+++ b/src/HarmonicPotential.h
@@ -0,0 +1,38 @@
+#include "OneParticleForce.h"
+
+template<class Adapter>
+class HarmonicPotential : public OneParticleForce<Adapter>
+{
+ public:
+  void init(double _x, double _k) {
+    x0 = _x;
+    k = _k;
+  };
+
+  
+  void run(void);
+  void computeForces();
+  void writeForces();
+
+ private:
+  double x0;
+  double k;
+};
+
+template<class Adapter>
+void HarmonicPotential<Adapter>::writeForces() {
+}
+
+template<class Adapter>
+void HarmonicPotential<Adapter>::computeForces() {
+  typename Adapter::RType::const_iterator it;
+  for(it = this->a.getR().begin(); it != this->a.getR().end(); ++it) {
+    
+  }
+}
+
+template<class Adapter>
+void HarmonicPotential<Adapter>::run(void) {
+  this->computeForces();
+  this->writeForces();
+}

From 9cd192095cd87886f502c1f209a1d4d37f329b84 Mon Sep 17 00:00:00 2001
From: Florian Weik <fweik@icp.uni-stuttgart.de>
Date: Wed, 9 Apr 2014 17:06:56 +0200
Subject: [PATCH 03/34] Added access to gpu forces to SystemInterface and
 implementation.

---
 src/EspressoSystemInterface.hpp |  7 ++++++
 src/HarmonicPotential.h         | 38 ---------------------------------
 src/SystemInterface.hpp         |  4 ++++
 3 files changed, 11 insertions(+), 38 deletions(-)
 delete mode 100644 src/HarmonicPotential.h

diff --git a/src/EspressoSystemInterface.hpp b/src/EspressoSystemInterface.hpp
index 06261edcbb7..c1dc83e218b 100644
--- a/src/EspressoSystemInterface.hpp
+++ b/src/EspressoSystemInterface.hpp
@@ -45,6 +45,7 @@ class EspressoSystemInterface : public SystemInterface {
   bool hasQ() { return true; };
 #endif
 
+#ifdef CUDA
   float *rGpuBegin() { return m_r_gpu_begin; };
   float *rGpuEnd() { return m_r_gpu_end; };
   bool hasRGpu() { return true; };
@@ -81,6 +82,12 @@ class EspressoSystemInterface : public SystemInterface {
     return true;
   }
 
+  float *fGpuBegin() { return (float *)gpu_get_particle_force_pointer(); };
+  float *fGpuEnd() { return (float *)(gpu_get_particle_force_pointer()) + 3*m_gpu_npart; };
+  bool hasFGpu() { return true; };
+
+#endif
+
 protected:
   void gatherParticles();
   void split_particle_struct();
diff --git a/src/HarmonicPotential.h b/src/HarmonicPotential.h
deleted file mode 100644
index 05dd156ac72..00000000000
--- a/src/HarmonicPotential.h
+++ /dev/null
@@ -1,38 +0,0 @@
-#include "OneParticleForce.h"
-
-template<class Adapter>
-class HarmonicPotential : public OneParticleForce<Adapter>
-{
- public:
-  void init(double _x, double _k) {
-    x0 = _x;
-    k = _k;
-  };
-
-  
-  void run(void);
-  void computeForces();
-  void writeForces();
-
- private:
-  double x0;
-  double k;
-};
-
-template<class Adapter>
-void HarmonicPotential<Adapter>::writeForces() {
-}
-
-template<class Adapter>
-void HarmonicPotential<Adapter>::computeForces() {
-  typename Adapter::RType::const_iterator it;
-  for(it = this->a.getR().begin(); it != this->a.getR().end(); ++it) {
-    
-  }
-}
-
-template<class Adapter>
-void HarmonicPotential<Adapter>::run(void) {
-  this->computeForces();
-  this->writeForces();
-}
diff --git a/src/SystemInterface.hpp b/src/SystemInterface.hpp
index 17b1bc51823..1cd4b55ea57 100644
--- a/src/SystemInterface.hpp
+++ b/src/SystemInterface.hpp
@@ -109,6 +109,10 @@ class SystemInterface {
   virtual bool hasQGpu() { return false; };
   virtual bool requestQGpu() { m_needsQGpu = hasQGpu(); return m_needsQGpu; }
 
+  virtual float *fGpuBegin() { return 0; };
+  virtual float *fGpuEnd() { return 0; };
+  virtual bool hasFGpu() { return false; };
+
   virtual const_real_iterator &qBegin() { return null_scalar; };
   virtual const const_real_iterator &qEnd() { return null_scalar; };
   virtual bool hasQ() { return false; };

From 1060d42a2b045e6a90930be914b3a275bac6a90a Mon Sep 17 00:00:00 2001
From: Florian Weik <fweik@icp.uni-stuttgart.de>
Date: Wed, 9 Apr 2014 22:13:49 +0200
Subject: [PATCH 04/34] Make Interface enable particle comm if needed.

---
 src/EspressoSystemInterface.cpp     |  1 +
 src/EspressoSystemInterface.hpp     | 25 +++++++++++++++++++++++++
 src/EspressoSystemInterface_cuda.cu |  1 +
 src/Makefile.am                     |  3 ++-
 src/SystemInterface.hpp             |  5 ++++-
 src/features.def                    |  3 +++
 src/forces.cpp                      |  6 ++++++
 src/tcl/Makefile.am                 |  3 ++-
 src/tcl/initialize_interpreter.cpp  |  4 ++++
 src/tcl/p3m_tcl.cpp                 |  1 -
 10 files changed, 48 insertions(+), 4 deletions(-)

diff --git a/src/EspressoSystemInterface.cpp b/src/EspressoSystemInterface.cpp
index a01f614e3e3..e22ca247b7c 100644
--- a/src/EspressoSystemInterface.cpp
+++ b/src/EspressoSystemInterface.cpp
@@ -54,6 +54,7 @@ void EspressoSystemInterface::gatherParticles() {
   {
     if(gpu_get_global_particle_vars_pointer_host()->communication_enabled) {
       copy_part_data_to_gpu();
+      m_gpu_npart = gpu_get_global_particle_vars_pointer_host()->number_of_particles;
       if(m_splitParticleStructGpu && (this_node == 0)) 
 	split_particle_struct();
     }
diff --git a/src/EspressoSystemInterface.hpp b/src/EspressoSystemInterface.hpp
index c1dc83e218b..21a5d8338b6 100644
--- a/src/EspressoSystemInterface.hpp
+++ b/src/EspressoSystemInterface.hpp
@@ -53,6 +53,8 @@ class EspressoSystemInterface : public SystemInterface {
     m_needsRGpu = hasRGpu();
     m_splitParticleStructGpu |= m_needsRGpu;
     m_gpu |= m_needsRGpu;
+    if(m_gpu)
+      enableParticleCommunication();
     return m_needsRGpu; 
   };
 
@@ -63,6 +65,8 @@ class EspressoSystemInterface : public SystemInterface {
     m_needsVGpu = hasVGpu();
     m_splitParticleStructGpu |= m_needsVGpu;
     m_gpu |= m_needsVGpu;
+    if(m_gpu)
+      enableParticleCommunication();
     return m_needsVGpu; 
   };
 
@@ -73,24 +77,45 @@ class EspressoSystemInterface : public SystemInterface {
     m_needsQGpu = hasQGpu(); 
     m_splitParticleStructGpu |= m_needsQGpu;
     m_gpu |= m_needsQGpu;
+    if(m_gpu)
+      enableParticleCommunication();
     return m_needsQGpu; 
   };
 
   bool requestParticleStructGpu() {
     m_needsParticleStructGpu = true;
     m_gpu |= m_needsParticleStructGpu;
+    if(m_gpu)
+      enableParticleCommunication();
     return true;
   }
 
   float *fGpuBegin() { return (float *)gpu_get_particle_force_pointer(); };
   float *fGpuEnd() { return (float *)(gpu_get_particle_force_pointer()) + 3*m_gpu_npart; };
   bool hasFGpu() { return true; };
+  bool requestFGpu() {
+    m_needsFGpu = hasFGpu();
+    m_gpu |= m_needsFGpu;
+    if(m_gpu)
+      enableParticleCommunication();
+    return m_needsFGpu;
+  };
+
+  unsigned int npart_gpu() {
+    return m_gpu_npart;
+  };
 
 #endif
 
 protected:
   void gatherParticles();
   void split_particle_struct();
+  void enableParticleCommunication() {
+    if(!gpu_get_global_particle_vars_pointer_host()->communication_enabled) {
+      gpu_init_particle_comm();
+      cuda_bcast_global_part_params();
+    }
+  };
 
   Vector3Container R;
   #ifdef ELECTROSTATICS
diff --git a/src/EspressoSystemInterface_cuda.cu b/src/EspressoSystemInterface_cuda.cu
index 33e42199890..5ce1aee85b7 100644
--- a/src/EspressoSystemInterface_cuda.cu
+++ b/src/EspressoSystemInterface_cuda.cu
@@ -1,3 +1,4 @@
+
 #include "EspressoSystemInterface.hpp"
 #include "cuda_interface.hpp"
 #include "cuda_utils.hpp"
diff --git a/src/Makefile.am b/src/Makefile.am
index f225a884bdf..6896838df61 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -105,7 +105,8 @@ libEspresso_la_SOURCES = \
 	ghmc.cpp ghmc.hpp \
 	Vector.hpp \
 	SystemInterface.hpp \
-	EspressoSystemInterface.hpp EspressoSystemInterface.cpp
+	EspressoSystemInterface.hpp EspressoSystemInterface.cpp \
+	HarmonicForce.hpp HarmonicForce.cu
 
 # nonbonded potentials and forces
 libEspresso_la_SOURCES += \
diff --git a/src/SystemInterface.hpp b/src/SystemInterface.hpp
index 1cd4b55ea57..bf124e098d6 100644
--- a/src/SystemInterface.hpp
+++ b/src/SystemInterface.hpp
@@ -112,6 +112,7 @@ class SystemInterface {
   virtual float *fGpuBegin() { return 0; };
   virtual float *fGpuEnd() { return 0; };
   virtual bool hasFGpu() { return false; };
+  virtual bool requestFGpu() { m_needsFGpu = hasFGpu(); return m_needsFGpu; }
 
   virtual const_real_iterator &qBegin() { return null_scalar; };
   virtual const const_real_iterator &qEnd() { return null_scalar; };
@@ -119,12 +120,14 @@ class SystemInterface {
   virtual bool requestQ() { m_needsQ = hasQ(); return m_needsQ; }
 
   virtual unsigned int npart() = 0;
+  virtual unsigned int npart_gpu() = 0;
   virtual Vector3 box() = 0;
 
   virtual bool needsR() { return m_needsR; };
   virtual bool needsRGpu() { return m_needsRGpu;};
   virtual bool needsQGpu() { return m_needsQGpu;};
   virtual bool needsQ() { return m_needsQ;};
+  virtual bool needsFGpu() { return m_needsFGpu; };
   
 protected:
   bool m_needsR;
@@ -133,7 +136,7 @@ class SystemInterface {
   bool m_needsRGpu;
   bool m_needsVGpu;
   bool m_needsQGpu;
-
+  bool m_needsFGpu;
 };
 
 #endif
diff --git a/src/features.def b/src/features.def
index ea38a1b30a7..1967622eae5 100644
--- a/src/features.def
+++ b/src/features.def
@@ -28,6 +28,8 @@ NPT
 GHMC
 CATALYTIC_REACTIONS
 
+HARMONICFORCE requires CUDA
+
 /* Rotation */
 ROTATION
 ROTATIONAL_INERTIA    implies ROTATION
@@ -134,6 +136,7 @@ OLD_DIHEDRAL notest
 /* turn off nonbonded interactions within molecules */
 NO_INTRA_NB	notest
 
+
 /* Debugging */
 ADDITIONAL_CHECKS
 ASYNC_BARRIER
diff --git a/src/forces.cpp b/src/forces.cpp
index 88a51954227..e7e1f07badd 100644
--- a/src/forces.cpp
+++ b/src/forces.cpp
@@ -54,6 +54,7 @@
 #include "iccp3m.hpp"
 #include "p3m_gpu.hpp"
 #include "cuda_interface.hpp"
+#include "HarmonicForce.hpp"
 
 #include "EspressoSystemInterface.hpp"
 
@@ -75,6 +76,11 @@ void force_calc()
 
   espressoSystemInterface.update();
 
+#ifdef HARMONICFORCE
+  if(harmonicForce) 
+    harmonicForce->calc(espressoSystemInterface);
+#endif
+
 #ifdef LB_GPU
 #ifdef SHANCHEN
   if (lattice_switch & LATTICE_LB_GPU && this_node == 0) lattice_boltzmann_calc_shanchen_gpu();
diff --git a/src/tcl/Makefile.am b/src/tcl/Makefile.am
index a0a5c273713..8f7f9d6ab1e 100644
--- a/src/tcl/Makefile.am
+++ b/src/tcl/Makefile.am
@@ -90,7 +90,8 @@ libEspressoTcl_la_SOURCES += \
 	soft_sphere_tcl.cpp soft_sphere_tcl.hpp \
 	steppot_tcl.cpp steppot_tcl.hpp \
 	tab_tcl.cpp tab_tcl.hpp \
-	tunable_slip_tcl.cpp tunable_slip_tcl.hpp
+	tunable_slip_tcl.cpp tunable_slip_tcl.hpp \
+	harmonic_force_tcl.cpp harmonic_force_tcl.hpp
 
 # bonded potentials and forces
 libEspressoTcl_la_SOURCES += \
diff --git a/src/tcl/initialize_interpreter.cpp b/src/tcl/initialize_interpreter.cpp
index 8cdfbf55e66..32f265af331 100644
--- a/src/tcl/initialize_interpreter.cpp
+++ b/src/tcl/initialize_interpreter.cpp
@@ -55,6 +55,7 @@
 #include "ghmc_tcl.hpp"
 #include "tuning.hpp"
 #include "electrokinetics_tcl.hpp"
+#include "harmonic_force_tcl.hpp"
 
 
 #ifdef TK
@@ -213,6 +214,9 @@ static void register_tcl_commands(Tcl_Interp* interp) {
   REGISTER_COMMAND("galilei_transform", tclcommand_galilei_transform);
   REGISTER_COMMAND("time_integration", tclcommand_time_integration);
   REGISTER_COMMAND("electrokinetics", tclcommand_electrokinetics);
+#ifdef HARMONICFORCE
+  REGISTER_COMMAND("harmonic_force", tclcommand_harmonic_force);
+#endif
 }
 
 static void register_global_variables(Tcl_Interp *interp)
diff --git a/src/tcl/p3m_tcl.cpp b/src/tcl/p3m_tcl.cpp
index e06e553a313..b781542357c 100644
--- a/src/tcl/p3m_tcl.cpp
+++ b/src/tcl/p3m_tcl.cpp
@@ -134,7 +134,6 @@ int tclcommand_inter_coulomb_parse_p3m(Tcl_Interp * interp, int argc, char ** ar
   }
 
   if (ARG0_IS_S("gpu")) {
-    puts("Setting coulomb method to COULOMB_P3M_GPU");
     coulomb.method = COULOMB_P3M_GPU;
     
     argc--;

From 4b20ec8b4fc84ae7fdb4fce96121df8563679734 Mon Sep 17 00:00:00 2001
From: Florian Weik <fweik@icp.uni-stuttgart.de>
Date: Wed, 9 Apr 2014 22:16:50 +0200
Subject: [PATCH 05/34] Added simple test force to illustrate interface usage.

---
 src/HarmonicForce.cu           | 44 +++++++++++++++++++++++++++
 src/HarmonicForce.hpp          | 37 +++++++++++++++++++++++
 src/tcl/harmonic_force_tcl.cpp | 55 ++++++++++++++++++++++++++++++++++
 src/tcl/harmonic_force_tcl.hpp | 33 ++++++++++++++++++++
 4 files changed, 169 insertions(+)
 create mode 100644 src/HarmonicForce.cu
 create mode 100644 src/HarmonicForce.hpp
 create mode 100644 src/tcl/harmonic_force_tcl.cpp
 create mode 100644 src/tcl/harmonic_force_tcl.hpp

diff --git a/src/HarmonicForce.cu b/src/HarmonicForce.cu
new file mode 100644
index 00000000000..54411557271
--- /dev/null
+++ b/src/HarmonicForce.cu
@@ -0,0 +1,44 @@
+#include "HarmonicForce.hpp"
+
+#include <stdio.h>
+
+#ifdef HARMONICFORCE
+
+HarmonicForce *harmonicForce = 0;
+
+__global__ void HarmonicForce_kernel(float x, float y, float z, float k,
+				     int n, float *pos, float *f) {
+
+  int id = blockIdx.x * blockDim.x + threadIdx.x;
+
+  if(id <= n)
+    return;
+
+  f[3*id + 0] = k*(x - pos[3*id + 0]);
+  f[3*id + 1] = k*(y - pos[3*id + 1]);
+  f[3*id + 2] = k*(z - pos[3*id + 2]);
+}
+
+
+void HarmonicForce_kernel_wrapper(float x, float y, float z, float k, int n, float *pos, float *f) {
+  dim3 grid(1,1,1);
+  dim3 block(1,1,1);
+  
+  printf("n %d\n", n);
+
+  if(n <= 512) {
+    puts("n <= 512");
+    grid.x = 1;
+    block.x = n;
+  } else {
+    grid.x = n/512 + 1;
+    block.x = 512;
+  }
+
+  printf("Calling HarmonicForce_kernel<<<(%d %d %d),(%d %d %d)>>>\n",
+	 grid.x, grid.y, grid.z, block.x, block.y, block.z);
+
+  HarmonicForce_kernel<<<grid,block>>>(x, y, z, k, n, pos, f);
+}
+
+#endif
diff --git a/src/HarmonicForce.hpp b/src/HarmonicForce.hpp
new file mode 100644
index 00000000000..c7e563d43e4
--- /dev/null
+++ b/src/HarmonicForce.hpp
@@ -0,0 +1,37 @@
+#include "config.hpp"
+
+#ifdef HARMONICFORCE
+
+#include "SystemInterface.hpp"
+#include <iostream>
+
+void HarmonicForce_kernel_wrapper(float x, float y, float z, float k,
+		     int n, float *pos, float *f);
+
+class HarmonicForce {
+public:
+  HarmonicForce(float x1, float x2, float x3, float _k, SystemInterface &s) {
+    x = x1;
+    y = x2;
+    z = x3;
+    k = _k;
+
+    if(!s.requestFGpu())
+      std::cerr << "HarmonicForce needs access to forces on GPU!" << std::endl;
+
+    if(!s.requestRGpu())
+      std::cerr << "HarmonicForce needs access to positions on GPU!" << std::endl;
+
+  }; 
+  void calc(SystemInterface &s) {
+    HarmonicForce_kernel_wrapper(x,y,z,k,s.npart_gpu(),
+					 s.rGpuBegin(), s.fGpuBegin());
+  };
+protected:
+  float x,y,z;
+  float k;
+};
+
+extern HarmonicForce *harmonicForce;
+
+#endif
diff --git a/src/tcl/harmonic_force_tcl.cpp b/src/tcl/harmonic_force_tcl.cpp
new file mode 100644
index 00000000000..b341fdc6420
--- /dev/null
+++ b/src/tcl/harmonic_force_tcl.cpp
@@ -0,0 +1,55 @@
+/*
+  Copyright (C) 2010,2011,2012,2013 The ESPResSo project
+  Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010 
+    Max-Planck-Institute for Polymer Research, Theory Group
+  
+  This file is part of ESPResSo.
+  
+  ESPResSo is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+  
+  ESPResSo is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+  
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>. 
+*/
+
+#include "harmonic_force_tcl.hpp"
+
+#ifdef HARMONICFORCE
+
+#include "HarmonicForce.hpp"
+#include "EspressoSystemInterface.hpp"
+
+int tclcommand_harmonic_force(ClientData data, Tcl_Interp *interp, int argc, char **argv) {
+  DoubleList dl;
+
+  init_doublelist(&dl);
+
+  if(!ARG1_IS_DOUBLELIST(dl)) {
+    puts("Expected double list");
+    return TCL_ERROR;
+  }
+
+  if(dl.n != 4) {
+    puts("Wrong # of args");
+    for(int i = 0; i < dl.n; i++)
+      printf("%d %e\n", i, dl.e[i]);
+
+    return TCL_ERROR;
+  }
+
+  printf("x %e %e %e, k %e\n", dl.e[0], dl.e[1],dl.e[2],dl.e[3]);
+
+  harmonicForce =  new HarmonicForce(dl.e[0], dl.e[1],dl.e[2],dl.e[3], espressoSystemInterface);
+
+  return TCL_OK;
+}
+
+
+#endif
diff --git a/src/tcl/harmonic_force_tcl.hpp b/src/tcl/harmonic_force_tcl.hpp
new file mode 100644
index 00000000000..4015a8525f5
--- /dev/null
+++ b/src/tcl/harmonic_force_tcl.hpp
@@ -0,0 +1,33 @@
+/*
+  Copyright (C) 2010,2011,2012,2013 The ESPResSo project
+  Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010 
+    Max-Planck-Institute for Polymer Research, Theory Group
+  
+  This file is part of ESPResSo.
+  
+  ESPResSo is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+  
+  ESPResSo is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+  
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>. 
+*/
+
+#ifndef HARMONIC_FORCE_TCL_H
+#define HARMONIC_FORCE_TCL_H
+
+#include "parser.hpp"
+
+#ifdef HARMONICFORCE
+
+int tclcommand_harmonic_force(ClientData data, Tcl_Interp *interp, int argc, char **argv);
+
+#endif
+
+#endif

From 0d3a911cc80ad1fe4ea0737eb35538d5574e51f2 Mon Sep 17 00:00:00 2001
From: Florian Weik <fweik@icp.uni-stuttgart.de>
Date: Tue, 15 Apr 2014 10:35:46 +0200
Subject: [PATCH 06/34] Removed stray debug messages.

---
 src/HarmonicForce.cu           | 8 +++++---
 src/tcl/harmonic_force_tcl.cpp | 2 +-
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/HarmonicForce.cu b/src/HarmonicForce.cu
index 54411557271..2db93211037 100644
--- a/src/HarmonicForce.cu
+++ b/src/HarmonicForce.cu
@@ -1,5 +1,6 @@
 #include "HarmonicForce.hpp"
 
+#include "cuda_utils.hpp"
 #include <stdio.h>
 
 #ifdef HARMONICFORCE
@@ -24,7 +25,8 @@ void HarmonicForce_kernel_wrapper(float x, float y, float z, float k, int n, flo
   dim3 grid(1,1,1);
   dim3 block(1,1,1);
   
-  printf("n %d\n", n);
+  if(n == 0)
+    return;
 
   if(n <= 512) {
     puts("n <= 512");
@@ -35,8 +37,8 @@ void HarmonicForce_kernel_wrapper(float x, float y, float z, float k, int n, flo
     block.x = 512;
   }
 
-  printf("Calling HarmonicForce_kernel<<<(%d %d %d),(%d %d %d)>>>\n",
-	 grid.x, grid.y, grid.z, block.x, block.y, block.z);
+  // printf("Calling HarmonicForce_kernel<<<(%d %d %d),(%d %d %d)>>>\n",
+  // 	 grid.x, grid.y, grid.z, block.x, block.y, block.z);
 
   HarmonicForce_kernel<<<grid,block>>>(x, y, z, k, n, pos, f);
 }
diff --git a/src/tcl/harmonic_force_tcl.cpp b/src/tcl/harmonic_force_tcl.cpp
index b341fdc6420..6f351185cde 100644
--- a/src/tcl/harmonic_force_tcl.cpp
+++ b/src/tcl/harmonic_force_tcl.cpp
@@ -44,7 +44,7 @@ int tclcommand_harmonic_force(ClientData data, Tcl_Interp *interp, int argc, cha
     return TCL_ERROR;
   }
 
-  printf("x %e %e %e, k %e\n", dl.e[0], dl.e[1],dl.e[2],dl.e[3]);
+  // printf("x %e %e %e, k %e\n", dl.e[0], dl.e[1],dl.e[2],dl.e[3]);
 
   harmonicForce =  new HarmonicForce(dl.e[0], dl.e[1],dl.e[2],dl.e[3], espressoSystemInterface);
 

From 8d1a568310aca498b5b4cbe5ec10ee3ea0b0d77c Mon Sep 17 00:00:00 2001
From: Florian Weik <fweik@icp.uni-stuttgart.de>
Date: Tue, 15 Apr 2014 10:37:51 +0200
Subject: [PATCH 07/34] HarmonicForce: Added Cuda wrappers.

---
 src/HarmonicForce.cu | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/HarmonicForce.cu b/src/HarmonicForce.cu
index 2db93211037..fd86fad4580 100644
--- a/src/HarmonicForce.cu
+++ b/src/HarmonicForce.cu
@@ -40,7 +40,7 @@ void HarmonicForce_kernel_wrapper(float x, float y, float z, float k, int n, flo
   // printf("Calling HarmonicForce_kernel<<<(%d %d %d),(%d %d %d)>>>\n",
   // 	 grid.x, grid.y, grid.z, block.x, block.y, block.z);
 
-  HarmonicForce_kernel<<<grid,block>>>(x, y, z, k, n, pos, f);
+KERNELCALL(HarmonicForce_kernel,grid,block,(x, y, z, k, n, pos, f))
 }
 
 #endif

From 24acd6ac3d7c5336b9fa718e6d3e2a964bce3142 Mon Sep 17 00:00:00 2001
From: Axel Arnold <arnolda@icp.uni-stuttgart.de>
Date: Tue, 15 Apr 2014 11:01:40 +0200
Subject: [PATCH 08/34] Fixed time_step check in correlator

---
 src/statistics_correlation.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/statistics_correlation.cpp b/src/statistics_correlation.cpp
index e1d0105f728..a1064811448 100644
--- a/src/statistics_correlation.cpp
+++ b/src/statistics_correlation.cpp
@@ -157,7 +157,7 @@ int double_correlation_init(double_correlation* self, double dt, unsigned int ta
     return 15;
   }
   // check if dt is a multiple of the md timestep
-  if ( abs(dt/time_step - round(dt/time_step)>1e-6 ) ) 
+  if ( abs(dt/time_step - round(dt/time_step)) >1e-6 ) 
     return 16;
   self->dt = dt;
   self->update_frequency = (int) floor(dt/time_step);

From dae0b8197a7538c15063b1ecd6acb50396a19197 Mon Sep 17 00:00:00 2001
From: Florian Weik <fweik@icp.uni-stuttgart.de>
Date: Tue, 15 Apr 2014 11:13:19 +0200
Subject: [PATCH 09/34] Fix build errror when LB_BOUDARIES not set.

---
 src/statistics_fluid.cpp | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/src/statistics_fluid.cpp b/src/statistics_fluid.cpp
index 8985ebd143e..a4d80ff8a0e 100644
--- a/src/statistics_fluid.cpp
+++ b/src/statistics_fluid.cpp
@@ -97,19 +97,22 @@ void lb_calc_fluid_temp(double *result) {
   int number_of_non_boundary_nodes = 0;
 
   for (x=1; x<=lblattice.grid[0]; x++) {
-  for (y=1; y<=lblattice.grid[1]; y++) {
-  for (z=1; z<=lblattice.grid[2]; z++) {
+    for (y=1; y<=lblattice.grid[1]; y++) {
+      for (z=1; z<=lblattice.grid[2]; z++) {
 
-    index = get_linear_index(x,y,z,lblattice.halo_grid);
+	index = get_linear_index(x,y,z,lblattice.halo_grid);
 
-    if ( !lbfields[index].boundary )
-    {
+#ifdef LB_BOUNDARIES
+	if ( !lbfields[index].boundary )
+#endif
+	  {
 	    lb_calc_local_fields(index, &rho, j, NULL);
-      temp += scalar(j,j);
-      number_of_non_boundary_nodes++;
+	    temp += scalar(j,j);
+	    number_of_non_boundary_nodes++;
+	  }
+      }
     }
-
-  }}}
+  }
 
   temp *= 1./(3.*lbpar.rho[0]*number_of_non_boundary_nodes*lbpar.tau*lbpar.tau*lblattice.agrid)/n_nodes;
 

From 7252c7008e2e24eb436051a02c3e7d34bc8ed174 Mon Sep 17 00:00:00 2001
From: Axel Arnold <arnolda@icp.uni-stuttgart.de>
Date: Tue, 15 Apr 2014 12:01:16 +0200
Subject: [PATCH 10/34] refactored on_integrate_start

---
 src/communication.cpp    |   6 +-
 src/energy.cpp           |   2 +-
 src/initialize.cpp       | 142 ++++++---------------------------------
 src/integrate.cpp        |  67 +++++++++++++++++-
 src/integrate.hpp        |  10 ++-
 src/interaction_data.cpp |   2 +-
 src/interaction_data.hpp |   2 +-
 src/lb.cpp               |  47 +++++++++----
 src/lbgpu.cpp            |  26 +++++++
 src/lbgpu.hpp            |   1 +
 src/pressure.cpp         |   2 +-
 src/python/integrate.pxd |   2 +-
 src/python/integrate.pyx |   2 +-
 src/reaction.cpp         |  20 ++++++
 src/reaction.hpp         |   2 +
 15 files changed, 187 insertions(+), 146 deletions(-)

diff --git a/src/communication.cpp b/src/communication.cpp
index 69b4c259532..b63b0285d1a 100644
--- a/src/communication.cpp
+++ b/src/communication.cpp
@@ -1176,14 +1176,14 @@ int mpi_integrate(int n_steps)
 {
   if (!correlations_autoupdate) {
     mpi_call(mpi_integrate_slave, -1, n_steps);
-    integrate_vv(n_steps);
+    integrate_vv(n_steps, 0);
     COMM_TRACE(fprintf(stderr, "%d: integration task %d done.\n", \
                        this_node, n_steps));
     return check_runtime_errors();
   } else {
     for (int i=0; i<n_steps; i++) {
       mpi_call(mpi_integrate_slave, -1, 1);
-      integrate_vv(1);
+      integrate_vv(1, 0);
       COMM_TRACE(fprintf(stderr, "%d: integration task %d done.\n",     \
                          this_node, i));
       if (check_runtime_errors())
@@ -1197,7 +1197,7 @@ int mpi_integrate(int n_steps)
 
 void mpi_integrate_slave(int pnode, int task)
 {
-  integrate_vv(task);
+  integrate_vv(task, 0);
   COMM_TRACE(fprintf(stderr, "%d: integration task %d done.\n", this_node, task));
 
   check_runtime_errors();
diff --git a/src/energy.cpp b/src/energy.cpp
index da9ebe5482b..45b8180d3d4 100644
--- a/src/energy.cpp
+++ b/src/energy.cpp
@@ -46,7 +46,7 @@ void calc_long_range_energies();
 
 void energy_calc(double *result)
 {
-  if (!check_obs_calc_initialized())
+  if (!interactions_sanity_checks())
     return;
 
   init_energies(&energy);
diff --git a/src/initialize.cpp b/src/initialize.cpp
index 4e02e9012bf..c856555f3c2 100644
--- a/src/initialize.cpp
+++ b/src/initialize.cpp
@@ -148,8 +148,6 @@ void on_program_start()
 
 void on_integration_start()
 {
-  char *errtext;
-
   EVENT_TRACE(fprintf(stderr, "%d: on_integration_start\n", this_node));
   INTEG_TRACE(fprintf(stderr,"%d: on_integration_start: reinit_thermo = %d, resort_particles=%d\n",
 		      this_node,reinit_thermo,resort_particles));
@@ -158,113 +156,35 @@ void on_integration_start()
   /* sanity checks                            */
   /********************************************/
 
-  if ( time_step < 0.0 ) {
-    errtext = runtime_error(128);
-    ERROR_SPRINTF(errtext, "{010 time_step not set} ");
-  }
-  if ( skin < 0.0 ) {
-    errtext = runtime_error(128);
-    ERROR_SPRINTF(errtext,"{011 skin not set} ");
-  }
-  if ( temperature < 0.0 ) {
-    errtext = runtime_error(128);
-    ERROR_SPRINTF(errtext,"{012 thermostat not initialized} ");
-  }
-  
+  integrator_sanity_checks();
 #ifdef NPT
-  if (integ_switch == INTEG_METHOD_NPT_ISO) {
-    if (nptiso.piston <= 0.0) {
-      char *errtext = runtime_error(128);
-      ERROR_SPRINTF(errtext,"{014 npt on, but piston mass not set} ");
-    }
-  
-#ifdef ELECTROSTATICS
-
-    switch(coulomb.method) {
-      case COULOMB_NONE:  break;
-      case COULOMB_DH:    break;
-      case COULOMB_RF:    break;
-#ifdef P3M
-      case COULOMB_P3M:   break;
-#endif /*P3M*/
-      default: {
-        char *errtext = runtime_error(128);
-        ERROR_SPRINTF(errtext,"{014 npt only works with P3M, Debye-Huckel or reaction field} ");
-      }
-    }
-#endif /*ELECTROSTATICS*/
-
-#ifdef DIPOLES
+  integrator_npt_sanity_checks();
+#endif
+  interactions_sanity_checks();
+#ifdef CATALYTIC_REACTIONS
+  reactions_sanity_checks();
+#endif
+#ifdef LB
+  lb_sanity_checks();
+#endif
+#ifdef LB_GPU
+  lb_GPU_sanity_checks();
+#endif
 
-    switch (coulomb.Dmethod) {
-      case DIPOLAR_NONE: break;
-#ifdef DP3M
-      case DIPOLAR_P3M: break;
-#endif /* DP3M */
-      default: {
-        char *errtext = runtime_error(128);
-        ERROR_SPRINTF(errtext,"NpT does not work with your dipolar method, please use P3M.");
-      }
-    }
-#endif  /* ifdef DIPOLES */
-  }
-#endif /*NPT*/
+  /********************************************/
+  /* end sanity checks                        */
+  /********************************************/
 
-  if (!check_obs_calc_initialized()) return;
 
-#ifdef LB
-  if(lattice_switch & LATTICE_LB) {
-    if (lbpar.agrid <= 0.0) {
-      errtext = runtime_error(128);
-      ERROR_SPRINTF(errtext,"{098 Lattice Boltzmann agrid not set} ");
-    }
-    if (lbpar.tau <= 0.0) {
-      errtext = runtime_error(128);
-      ERROR_SPRINTF(errtext,"{099 Lattice Boltzmann time step not set} ");
-    }
-    if (lbpar.rho[0] <= 0.0) {
-      errtext = runtime_error(128);
-      ERROR_SPRINTF(errtext,"{100 Lattice Boltzmann fluid density not set} ");
-    }
-    if (lbpar.viscosity[0] <= 0.0) {
-      errtext = runtime_error(128);
-      ERROR_SPRINTF(errtext,"{101 Lattice Boltzmann fluid viscosity not set} ");
-    }
-    if (dd.use_vList && skin>=lbpar.agrid/2.0) {
-      errtext = runtime_error(128);
-      ERROR_SPRINTF(errtext, "{104 LB requires either no Verlet lists or that the skin of the verlet list to be less than half of lattice-Boltzmann grid spacing.} ");
-    }
-  }
-#endif
 #ifdef LB_GPU
   if(this_node == 0){
-    if(lattice_switch & LATTICE_LB_GPU) {
-      if (lbpar_gpu.agrid < 0.0) {
-        errtext = runtime_error(128);
-        ERROR_SPRINTF(errtext,"{098 Lattice Boltzmann agrid not set} ");
-      }
-      if (lbpar_gpu.tau < 0.0) {
-        errtext = runtime_error(128);
-        ERROR_SPRINTF(errtext,"{099 Lattice Boltzmann time step not set} ");
-      }
-      for(int i=0;i<LB_COMPONENTS;i++){
-        if (lbpar_gpu.rho[0] < 0.0) {
-          errtext = runtime_error(128);
-          ERROR_SPRINTF(errtext,"{100 Lattice Boltzmann fluid density not set} ");
-        }
-        if (lbpar_gpu.viscosity[0] < 0.0) {
-          errtext = runtime_error(128);
-          ERROR_SPRINTF(errtext,"{101 Lattice Boltzmann fluid viscosity not set} ");
-        }
-      }
-
-      if (lb_reinit_particles_gpu) {
-        lb_realloc_particles_gpu();
-        lb_reinit_particles_gpu = 0;
-      }
+    if (lb_reinit_particles_gpu) {
+      lb_realloc_particles_gpu();
+      lb_reinit_particles_gpu = 0;
     }
   }
 #endif
+
 #if defined(LB_GPU) || (defined (ELECTROSTATICS) && defined (CUDA))
   if (reinit_particle_comm_gpu){
     gpu_change_number_of_part_to_comm();
@@ -273,32 +193,10 @@ void on_integration_start()
   MPI_Bcast(gpu_get_global_particle_vars_pointer_host(), sizeof(CUDA_global_part_vars), MPI_BYTE, 0, comm_cart);
 #endif
 
-
 #ifdef METADYNAMICS
   meta_init();
 #endif
 
-#ifdef CATALYTIC_REACTIONS
-  if(reaction.ct_rate != 0.0) {
-
-    if( dd.use_vList == 0 || cell_structure.type != CELL_STRUCTURE_DOMDEC) {
-        errtext = runtime_error(128);
-        ERROR_SPRINTF(errtext,"{105 The CATALYTIC_REACTIONS feature requires verlet lists and domain decomposition} ");
-        check_runtime_errors();
-    }
-
-    if(max_cut < reaction.range) {
-      errtext = runtime_error(128);
-      ERROR_SPRINTF(errtext,"{106 Reaction range of %f exceeds maximum cutoff of %f} ", reaction.range, max_cut);
-    }
-  }
-#endif
-
-  /********************************************/
-  /* end sanity checks                        */
-  /********************************************/
-
-
   /* Prepare the thermostat */
   if (reinit_thermo) {
     thermo_init();
diff --git a/src/integrate.cpp b/src/integrate.cpp
index 2ea34a638f4..8bf63b7aa6c 100644
--- a/src/integrate.cpp
+++ b/src/integrate.cpp
@@ -123,6 +123,67 @@ void finalize_p_inst_npt();
 
 /*@}*/
 
+void integrator_sanity_checks()
+{
+  char *errtext;
+
+  if ( time_step < 0.0 ) {
+    errtext = runtime_error(128);
+    ERROR_SPRINTF(errtext, "{010 time_step not set} ");
+  }
+  if ( skin < 0.0 ) {
+    errtext = runtime_error(128);
+    ERROR_SPRINTF(errtext,"{011 skin not set} ");
+  }
+  if ( temperature < 0.0 ) {
+    errtext = runtime_error(128);
+    ERROR_SPRINTF(errtext,"{012 thermostat not initialized} ");
+  }
+}
+
+#ifdef NPT
+
+void integrator_npt_sanity_checks()
+{  
+  if (integ_switch == INTEG_METHOD_NPT_ISO) {
+    if (nptiso.piston <= 0.0) {
+      char *errtext = runtime_error(128);
+      ERROR_SPRINTF(errtext,"{014 npt on, but piston mass not set} ");
+    }
+
+#ifdef ELECTROSTATICS
+
+    switch(coulomb.method) {
+      case COULOMB_NONE:  break;
+      case COULOMB_DH:    break;
+      case COULOMB_RF:    break;
+#ifdef P3M
+      case COULOMB_P3M:   break;
+#endif /*P3M*/
+      default: {
+        char *errtext = runtime_error(128);
+        ERROR_SPRINTF(errtext,"{014 npt only works with P3M, Debye-Huckel or reaction field} ");
+      }
+    }
+#endif /*ELECTROSTATICS*/
+
+#ifdef DIPOLES
+
+    switch (coulomb.Dmethod) {
+      case DIPOLAR_NONE: break;
+#ifdef DP3M
+      case DIPOLAR_P3M: break;
+#endif /* DP3M */
+      default: {
+        char *errtext = runtime_error(128);
+        ERROR_SPRINTF(errtext,"NpT does not work with your dipolar method, please use P3M.");
+      }
+    }
+#endif  /* ifdef DIPOLES */
+  }
+}
+#endif /*NPT*/
+
 /************************************************************/
 
 
@@ -158,7 +219,7 @@ void integrate_ensemble_init()
 
 /************************************************************/
 
-void integrate_vv(int n_steps)
+void integrate_vv(int n_steps, int reuse_forces)
 {
   int i;
 
@@ -177,8 +238,9 @@ void integrate_vv(int n_steps)
    
   /* Integration Step: Preparation for first integration step:
      Calculate forces f(t) as function of positions p(t) ( and velocities v(t) ) */
-  if (recalc_forces) {
+  if (recalc_forces && !reuse_forces) {
     thermo_heat_up();
+
 #ifdef LB
     transfer_momentum = 0;
     if (lattice_switch & LATTICE_LB && this_node == 0)
@@ -189,6 +251,7 @@ void integrate_vv(int n_steps)
     if (lattice_switch & LATTICE_LB_GPU && this_node == 0)
       if (warnings) fprintf (stderr, "Warning: Recalculating forces, so the LB coupling forces are not included in the particle force the first time step. This only matters if it happens frequently during sampling.\n");
 #endif
+
 //VIRTUAL_SITES pos (and vel for DPD) update for security reason !!!
 #ifdef VIRTUAL_SITES
     update_mol_vel_pos();
diff --git a/src/integrate.hpp b/src/integrate.hpp
index 5a7f574156d..efc7a5cc59f 100644
--- a/src/integrate.hpp
+++ b/src/integrate.hpp
@@ -69,13 +69,21 @@ extern double verlet_reuse;
 /************************************************************/
 /*@{*/
 
+/** check sanity of integrator params */
+void integrator_sanity_checks();
+
+/** check sanity of npt params */
+void integrator_npt_sanity_checks();
+
 /** Initialize the used thermodynamic Ensemble (NVT or NPT) */
 void integrate_ensemble_init();
 
 /** integrate with velocity verlet integrator.
     \param n_steps number of steps to integrate.
+    \param reuse_forces if nonzero, blindly trust
+    the forces still stored with the particles for the first time step.
  */
-void integrate_vv(int n_steps);
+void integrate_vv(int n_steps, int reuse_forces);
 
 /** function that rescales all velocities on one node according to a
     new time step. */
diff --git a/src/interaction_data.cpp b/src/interaction_data.cpp
index c35ee939c90..bc6f414b93c 100644
--- a/src/interaction_data.cpp
+++ b/src/interaction_data.cpp
@@ -777,7 +777,7 @@ void make_bond_type_exist(int type)
   n_bonded_ia = ns;
 }
 
-int check_obs_calc_initialized()
+int interactions_sanity_checks()
 {
   /* set to zero if initialization was not successful. */
   int state = 1;
diff --git a/src/interaction_data.hpp b/src/interaction_data.hpp
index ab9489ff91b..7b0de260f48 100644
--- a/src/interaction_data.hpp
+++ b/src/interaction_data.hpp
@@ -1017,7 +1017,7 @@ void recalc_maximal_cutoff();
 void recalc_coulomb_prefactor();
 
 /** check whether all force calculation routines are properly initialized. */
-int check_obs_calc_initialized();
+int interactions_sanity_checks();
 
 /**  check if a non bonded interaction is defined */
 inline int checkIfInteraction(IA_parameters *data) {
diff --git a/src/lb.cpp b/src/lb.cpp
index ec318d0ee2d..8548cad5a66 100644
--- a/src/lb.cpp
+++ b/src/lb.cpp
@@ -1622,30 +1622,53 @@ static void halo_push_communication() {
 
 /** Performs basic sanity checks. */
 int lb_sanity_checks() {
-
-  char *errtxt;
+  char *errtext;
   int ret = 0;
 
+  if(lattice_switch & LATTICE_LB) {
+    if (lbpar.agrid <= 0.0) {
+      errtext = runtime_error(128);
+      ERROR_SPRINTF(errtext,"{098 Lattice Boltzmann agrid not set} ");
+      ret = 1;
+    }
+    if (lbpar.tau <= 0.0) {
+      errtext = runtime_error(128);
+      ERROR_SPRINTF(errtext,"{099 Lattice Boltzmann time step not set} ");
+      ret = 1;
+    }
+    if (lbpar.rho[0] <= 0.0) {
+      errtext = runtime_error(128);
+      ERROR_SPRINTF(errtext,"{100 Lattice Boltzmann fluid density not set} ");
+      ret = 1;
+    }
+    if (lbpar.viscosity[0] <= 0.0) {
+      errtext = runtime_error(128);
+      ERROR_SPRINTF(errtext,"{101 Lattice Boltzmann fluid viscosity not set} ");
+      ret = 1;
+    }
+    if (dd.use_vList && skin>=lbpar.agrid/2.0) {
+      errtext = runtime_error(128);
+      ERROR_SPRINTF(errtext, "{104 LB requires either no Verlet lists or that the skin of the verlet list to be less than half of lattice-Boltzmann grid spacing.} ");
+      ret = 1;
+    }
     if (cell_structure.type != CELL_STRUCTURE_DOMDEC) {
-      errtxt = runtime_error(128);
-      ERROR_SPRINTF(errtxt, "{103 LB requires domain-decomposition cellsystem} ");
+      errtext = runtime_error(128);
+      ERROR_SPRINTF(errtext, "{103 LB requires domain-decomposition cellsystem} ");
       ret = -1;
     } 
     else if (dd.use_vList && skin>=lbpar.agrid/2.0) {
-      errtxt = runtime_error(128);
-      ERROR_SPRINTF(errtxt, "{104 LB requires either no Verlet lists or that the skin of the verlet list to be less than half of lattice-Boltzmann grid spacing.} ");
+      errtext = runtime_error(128);
+      ERROR_SPRINTF(errtext, "{104 LB requires either no Verlet lists or that the skin of the verlet list to be less than half of lattice-Boltzmann grid spacing.} ");
       ret = -1;
     }
 
-
     if (thermo_switch & ~THERMO_LB) {
-      errtxt = runtime_error(128);
-      ERROR_SPRINTF(errtxt, "{122 LB must not be used with other thermostats} ");
+      errtext = runtime_error(128);
+      ERROR_SPRINTF(errtext, "{122 LB must not be used with other thermostats} ");
       ret = 1;
     }
-
-    return ret;
-
+  }
+  return ret;
 }
 
 /***********************************************************************/
diff --git a/src/lbgpu.cpp b/src/lbgpu.cpp
index 314871a69c2..ce3e1605c7a 100644
--- a/src/lbgpu.cpp
+++ b/src/lbgpu.cpp
@@ -380,4 +380,30 @@ int lb_lbnode_set_extforce_GPU(int ind[3], double f[3])
   return ES_OK;
 }
 
+void lb_GPU_sanity_checks()
+{
+  if(this_node == 0){
+    if(lattice_switch & LATTICE_LB_GPU) {
+      if (lbpar_gpu.agrid < 0.0) {
+        errtext = runtime_error(128);
+        ERROR_SPRINTF(errtext,"{098 Lattice Boltzmann agrid not set} ");
+      }
+      if (lbpar_gpu.tau < 0.0) {
+        errtext = runtime_error(128);
+        ERROR_SPRINTF(errtext,"{099 Lattice Boltzmann time step not set} ");
+      }
+      for(int i=0;i<LB_COMPONENTS;i++){
+        if (lbpar_gpu.rho[0] < 0.0) {
+          errtext = runtime_error(128);
+          ERROR_SPRINTF(errtext,"{100 Lattice Boltzmann fluid density not set} ");
+        }
+        if (lbpar_gpu.viscosity[0] < 0.0) {
+          errtext = runtime_error(128);
+          ERROR_SPRINTF(errtext,"{101 Lattice Boltzmann fluid viscosity not set} ");
+        }
+      }
+    }
+  }
+}
+
 #endif /* LB_GPU */
diff --git a/src/lbgpu.hpp b/src/lbgpu.hpp
index ff9a0277275..0f1c687efea 100644
--- a/src/lbgpu.hpp
+++ b/src/lbgpu.hpp
@@ -220,6 +220,7 @@ extern int ek_initialized;
 /************************************************************/
 /*@{*/
 
+void lb_GPU_sanity_checks();
 
 void lb_get_device_values_pointer(LB_rho_v_gpu** pointeradress);
 void lb_get_boundary_force_pointer(float** pointeradress);
diff --git a/src/pressure.cpp b/src/pressure.cpp
index 7da66d31ba2..0cbd79c05fe 100644
--- a/src/pressure.cpp
+++ b/src/pressure.cpp
@@ -85,7 +85,7 @@ void pressure_calc(double *result, double *result_t, double *result_nb, double *
   int n, i;
   double volume = box_l[0]*box_l[1]*box_l[2];
 
-  if (!check_obs_calc_initialized())
+  if (!interactions_sanity_checks())
     return;
 
   init_virials(&virials);
diff --git a/src/python/integrate.pxd b/src/python/integrate.pxd
index 46e193c8e99..5eac32c86be 100644
--- a/src/python/integrate.pxd
+++ b/src/python/integrate.pxd
@@ -2,4 +2,4 @@ cdef extern from "config.hpp":
     pass
 
 cdef extern from "integrate.hpp":
-    cdef void integrate_vv(int n_steps)
+    cdef void integrate_vv(int n_steps, int reuse_forces)
diff --git a/src/python/integrate.pyx b/src/python/integrate.pyx
index 368cc684a42..e2b17e530a2 100644
--- a/src/python/integrate.pyx
+++ b/src/python/integrate.pyx
@@ -1,3 +1,3 @@
 def integrate(nSteps):
-    integrate_vv(nSteps)
+    integrate_vv(nSteps, 0)
     
diff --git a/src/reaction.cpp b/src/reaction.cpp
index 3b67b946a07..8c06ade91f5 100644
--- a/src/reaction.cpp
+++ b/src/reaction.cpp
@@ -31,6 +31,26 @@
 reaction_struct reaction;
 
 #ifdef CATALYTIC_REACTIONS
+
+void reactions_sanity_checks()
+{
+  char *errtext;
+
+  if(reaction.ct_rate != 0.0) {
+
+    if( dd.use_vList == 0 || cell_structure.type != CELL_STRUCTURE_DOMDEC) {
+      errtext = runtime_error(128);
+      ERROR_SPRINTF(errtext,"{105 The CATALYTIC_REACTIONS feature requires verlet lists and domain decomposition} ");
+    }
+
+    if(max_cut < reaction.range) {
+      errtext = runtime_error(128);
+      ERROR_SPRINTF(errtext,"{106 Reaction range of %f exceeds maximum cutoff of %f} ", reaction.range, max_cut);
+    }
+  }
+}
+
+
 void local_setup_reaction() {
   
   /* Make available the various reaction parameters */
diff --git a/src/reaction.hpp b/src/reaction.hpp
index c0d547b3c69..a58cac66e7f 100644
--- a/src/reaction.hpp
+++ b/src/reaction.hpp
@@ -40,6 +40,8 @@ typedef struct {
 extern reaction_struct reaction;
 
 #ifdef CATALYTIC_REACTIONS
+/** sanity checks for the reaction code */
+void reactions_sanity_checks();
 /** broadcasts reaction parameters and sets up an entry in the ia_params, so
     that the verlet radius is equal or bigger than the reaction range.
 **/

From d22335d47cfa247b6fae89531fc7d2b7852549a9 Mon Sep 17 00:00:00 2001
From: Florian Weik <fweik@icp.uni-stuttgart.de>
Date: Tue, 15 Apr 2014 14:19:00 +0200
Subject: [PATCH 11/34] Cuda communication cleanup., small fix to
 HarmonicForce.

---
 src/EspressoSystemInterface.cpp     |  3 +-
 src/EspressoSystemInterface.hpp     |  6 ++++
 src/EspressoSystemInterface_cuda.cu | 51 ++++++++++++++++-------------
 src/HarmonicForce.cu                | 17 ++++------
 src/cuda_common_cuda.cu             |  3 +-
 src/cuda_interface.cpp              |  1 -
 src/forces.cpp                      |  4 +--
 src/initialize.cpp                  |  6 ++--
 8 files changed, 50 insertions(+), 41 deletions(-)

diff --git a/src/EspressoSystemInterface.cpp b/src/EspressoSystemInterface.cpp
index e22ca247b7c..7ea6d1160df 100644
--- a/src/EspressoSystemInterface.cpp
+++ b/src/EspressoSystemInterface.cpp
@@ -53,8 +53,9 @@ void EspressoSystemInterface::gatherParticles() {
   if (m_gpu)
   {
     if(gpu_get_global_particle_vars_pointer_host()->communication_enabled) {
+      ESIF_TRACE(puts("Calling copy_part_data_to_gpu()"));
       copy_part_data_to_gpu();
-      m_gpu_npart = gpu_get_global_particle_vars_pointer_host()->number_of_particles;
+      reallocDeviceMemory(gpu_get_global_particle_vars_pointer_host()->number_of_particles);
       if(m_splitParticleStructGpu && (this_node == 0)) 
 	split_particle_struct();
     }
diff --git a/src/EspressoSystemInterface.hpp b/src/EspressoSystemInterface.hpp
index 21a5d8338b6..d1767940d48 100644
--- a/src/EspressoSystemInterface.hpp
+++ b/src/EspressoSystemInterface.hpp
@@ -1,6 +1,8 @@
 #ifndef ESPRESSOSYSTEMINTERFACE_H
 #define ESPRESSOSYSTEMINTERFACE_H
 
+#define ESIF_TRACE(A)
+
 #include "SystemInterface.hpp"
 #include "particle_data.hpp"
 #include "cuda_interface.hpp"
@@ -112,10 +114,14 @@ class EspressoSystemInterface : public SystemInterface {
   void split_particle_struct();
   void enableParticleCommunication() {
     if(!gpu_get_global_particle_vars_pointer_host()->communication_enabled) {
+      ESIF_TRACE(puts("gpu communication not enabled;"));
+      ESIF_TRACE(puts("enableParticleCommunication"));
       gpu_init_particle_comm();
       cuda_bcast_global_part_params();
+      reallocDeviceMemory(gpu_get_global_particle_vars_pointer_host()->number_of_particles);
     }
   };
+  void reallocDeviceMemory(int n);
 
   Vector3Container R;
   #ifdef ELECTROSTATICS
diff --git a/src/EspressoSystemInterface_cuda.cu b/src/EspressoSystemInterface_cuda.cu
index 5ce1aee85b7..e005686111c 100644
--- a/src/EspressoSystemInterface_cuda.cu
+++ b/src/EspressoSystemInterface_cuda.cu
@@ -58,34 +58,39 @@ __global__ void split_kernel_v(CUDA_particle_data *particles, float *v, int n) {
   v[idx + 2] = p.v[2];
 }
 
+void EspressoSystemInterface::reallocDeviceMemory(int n) {
+
+  if(m_needsRGpu && ((n != m_gpu_npart) || (m_r_gpu_begin == 0))) {
+    if(m_r_gpu_begin != 0)
+      cuda_safe_mem(cudaFree(m_r_gpu_begin));
+    cuda_safe_mem(cudaMalloc(&m_r_gpu_begin, 3*n*sizeof(float)));
+    m_r_gpu_end = m_r_gpu_begin + 3*n;
+  }
+
+  if(m_needsVGpu && ((n != m_gpu_npart) || (m_v_gpu_begin == 0))) {
+    if(m_v_gpu_begin != 0)
+      cuda_safe_mem(cudaFree(m_v_gpu_begin));
+    cuda_safe_mem(cudaMalloc(&m_v_gpu_begin, 3*n*sizeof(float)));
+    m_v_gpu_end = m_v_gpu_begin + 3*n;
+  }
+
+  if(m_needsQGpu && ((n != m_gpu_npart) || (m_q_gpu_begin == 0))) {
+    if(m_q_gpu_begin != 0)
+      cuda_safe_mem(cudaFree(m_q_gpu_begin));
+    cuda_safe_mem(cudaMalloc(&m_q_gpu_begin, 3*n*sizeof(float)));
+    m_q_gpu_end = m_q_gpu_begin + 3*n;
+  }
+
+  m_gpu_npart = n;
+}
+
 void EspressoSystemInterface::split_particle_struct() {
   int n = gpu_get_global_particle_vars_pointer_host()->number_of_particles;
   if(n == 0) 
     return;
-  
-  if( (n != m_gpu_npart) ) {
-    if(m_needsRGpu) {
-      if(m_r_gpu_begin != 0)
-	cuda_safe_mem(cudaFree(m_r_gpu_begin));
-      cuda_safe_mem(cudaMalloc(&m_r_gpu_begin, 3*n*sizeof(float)));
-      m_r_gpu_end = m_r_gpu_begin + 3*n;
-    }
-    if(m_needsVGpu) {
-      if(m_v_gpu_begin != 0)
-	cuda_safe_mem(cudaFree(m_v_gpu_begin));
-      cuda_safe_mem(cudaMalloc(&m_v_gpu_begin, 3*n*sizeof(float)));
-      m_v_gpu_end = m_v_gpu_begin + 3*n;
-    }
-    if(m_needsQGpu) {
-      if(m_q_gpu_begin != 0)
-	cuda_safe_mem(cudaFree(m_q_gpu_begin));
-      cuda_safe_mem(cudaMalloc(&m_q_gpu_begin, n*sizeof(float)));
-      m_q_gpu_end = m_q_gpu_begin + n;
-    }
-  }
 
-  m_gpu_npart = n;
-  
+  ESIF_TRACE(printf("n = %d, m_gpu_npart = %d\n", n, m_gpu_npart));
+    
   dim3 grid(n/512+1,1,1);
   dim3 block(512,1,1);
 
diff --git a/src/HarmonicForce.cu b/src/HarmonicForce.cu
index fd86fad4580..0aaf0cfe313 100644
--- a/src/HarmonicForce.cu
+++ b/src/HarmonicForce.cu
@@ -12,24 +12,23 @@ __global__ void HarmonicForce_kernel(float x, float y, float z, float k,
 
   int id = blockIdx.x * blockDim.x + threadIdx.x;
 
-  if(id <= n)
+  if(id >= n)
     return;
 
-  f[3*id + 0] = k*(x - pos[3*id + 0]);
-  f[3*id + 1] = k*(y - pos[3*id + 1]);
-  f[3*id + 2] = k*(z - pos[3*id + 2]);
+  f[3*id + 0] += k*(x - pos[3*id + 0]);
+  f[3*id + 1] += k*(y - pos[3*id + 1]);
+  f[3*id + 2] += k*(z - pos[3*id + 2]);
 }
 
 
 void HarmonicForce_kernel_wrapper(float x, float y, float z, float k, int n, float *pos, float *f) {
   dim3 grid(1,1,1);
   dim3 block(1,1,1);
-  
+
   if(n == 0)
     return;
 
   if(n <= 512) {
-    puts("n <= 512");
     grid.x = 1;
     block.x = n;
   } else {
@@ -37,10 +36,6 @@ void HarmonicForce_kernel_wrapper(float x, float y, float z, float k, int n, flo
     block.x = 512;
   }
 
-  // printf("Calling HarmonicForce_kernel<<<(%d %d %d),(%d %d %d)>>>\n",
-  // 	 grid.x, grid.y, grid.z, block.x, block.y, block.z);
-
-KERNELCALL(HarmonicForce_kernel,grid,block,(x, y, z, k, n, pos, f))
+  KERNELCALL(HarmonicForce_kernel,grid,block,(x, y, z, k, n, pos, f))
 }
-
 #endif
diff --git a/src/cuda_common_cuda.cu b/src/cuda_common_cuda.cu
index eaf53ee1376..534635ccb15 100644
--- a/src/cuda_common_cuda.cu
+++ b/src/cuda_common_cuda.cu
@@ -180,6 +180,7 @@ void gpu_change_number_of_part_to_comm() {
 #endif
       
       cuda_safe_mem(cudaMalloc((void**)&particle_forces_device, global_part_vars_host.number_of_particles * sizeof(CUDA_particle_force)));
+
       cuda_safe_mem(cudaMalloc((void**)&particle_data_device, global_part_vars_host.number_of_particles * sizeof(CUDA_particle_data)));
       cuda_safe_mem(cudaMalloc((void**)&particle_seeds_device, global_part_vars_host.number_of_particles * sizeof(CUDA_particle_seed)));
 #ifdef SHANCHEN
@@ -249,8 +250,8 @@ CUDA_fluid_composition* gpu_get_fluid_composition_pointer() {
 }
 
 void copy_part_data_to_gpu() {
+  COMM_TRACE(printf("global_part_vars_host.communication_enabled = %d && global_part_vars_host.number_of_particles = %d\n", global_part_vars_host.communication_enabled, global_part_vars_host.number_of_particles));
   if ( global_part_vars_host.communication_enabled == 1 && global_part_vars_host.number_of_particles ) {
-    
     cuda_mpi_get_particles(particle_data_host);
     
     /** get espresso md particle values*/
diff --git a/src/cuda_interface.cpp b/src/cuda_interface.cpp
index 63e68c9fbba..5dc50cae06f 100644
--- a/src/cuda_interface.cpp
+++ b/src/cuda_interface.cpp
@@ -38,7 +38,6 @@ void cuda_mpi_get_particles(CUDA_particle_data *particle_data_host)
     sizes = (int*) malloc(sizeof(int)*n_nodes);
 
     n_part = cells_get_n_particles();
-
     
     /* first collect number of particles on each node */
     MPI_Gather(&n_part, 1, MPI_INT, sizes, 1, MPI_INT, 0, comm_cart);
diff --git a/src/forces.cpp b/src/forces.cpp
index e7e1f07badd..fddba00f0f0 100644
--- a/src/forces.cpp
+++ b/src/forces.cpp
@@ -88,7 +88,7 @@ void force_calc()
 
   // transfer_momentum_gpu check makes sure the LB fluid doesn't get updated on integrate 0
   // this_node==0 makes sure it is the master node where the gpu exists
-  if (lattice_switch & LATTICE_LB_GPU && transfer_momentum_gpu && this_node==0 ) lb_calc_particle_lattice_ia_gpu();
+  if (lattice_switch & LATTICE_LB_GPU && transfer_momentum_gpu && (this_node == 0) ) lb_calc_particle_lattice_ia_gpu();
 #endif // LB_GPU
 
 #ifdef ELECTROSTATICS
@@ -152,7 +152,7 @@ void force_calc()
   meta_perform();
 #endif
 
-#if defined(LB_GPU) || (defined(ELECTROSTATICS) && defined(CUDA))
+#ifdef CUDA
   copy_forces_from_GPU();
 #endif
 
diff --git a/src/initialize.cpp b/src/initialize.cpp
index 4e02e9012bf..74455e72524 100644
--- a/src/initialize.cpp
+++ b/src/initialize.cpp
@@ -74,7 +74,7 @@ static int reinit_magnetostatics = 0;
 static int lb_reinit_particles_gpu = 1;
 #endif
 
-#if defined(LB_GPU) || defined(ELECTROSTATICS)
+#ifdef CUDA
 static int reinit_particle_comm_gpu = 1;
 #endif
 
@@ -265,13 +265,15 @@ void on_integration_start()
     }
   }
 #endif
-#if defined(LB_GPU) || (defined (ELECTROSTATICS) && defined (CUDA))
+  //#if defined(LB_GPU) || (defined (ELECTROSTATICS) && defined (CUDA))
+#ifdef CUDA
   if (reinit_particle_comm_gpu){
     gpu_change_number_of_part_to_comm();
     reinit_particle_comm_gpu = 0;
   }
   MPI_Bcast(gpu_get_global_particle_vars_pointer_host(), sizeof(CUDA_global_part_vars), MPI_BYTE, 0, comm_cart);
 #endif
+  //#endif
 
 
 #ifdef METADYNAMICS

From 80fdfdf086102dc4737ac2a036004057795c9460 Mon Sep 17 00:00:00 2001
From: Florian Weik <fweik@icp.uni-stuttgart.de>
Date: Tue, 15 Apr 2014 14:59:15 +0200
Subject: [PATCH 12/34] HarmonicForce: Doc and NEWS.

---
 NEWS            |  2 ++
 doc/ug/part.tex | 14 ++++++++++++++
 2 files changed, 16 insertions(+)

diff --git a/NEWS b/NEWS
index e1f4fac373e..c9d0cbff02d 100644
--- a/NEWS
+++ b/NEWS
@@ -29,6 +29,8 @@ New user-visible features
 * New command time_integration to get the runtime of the integration
   loop.
 
+* New harmoic force that runs on the GPU.
+
 User-visible changes
 --------------------
 * Comfixed now works with periodic boundary conditions.
diff --git a/doc/ug/part.tex b/doc/ug/part.tex
index ce3aebdde7d..b225f0cc84c 100644
--- a/doc/ug/part.tex
+++ b/doc/ug/part.tex
@@ -865,6 +865,20 @@ \subsection{Getting the currently defined constraints}
 Prints out all constraint information. If \var{num} is specified only this
 constraint is displayed, otherwise all constraints will be printed.
 
+\subsection{\texttt{harmonic_force}: Creating a harmonic trap.}
+ \newescommand{harmonic_trap}
+ \begin{essyntax}
+   harmonic_force \{ \var{x} \var{y} \var{z} \} \var{k}
+  \begin{features}
+    \required{HARMONICFORCE}
+    \required{CUDA}
+  \end{features}
+ \end{essyntax}
+
+ Calculates a spring force for all particles, where the equilibrium position
+ of the spring is at \var{x y z} and it's force constant is \var{k}. A more
+ flexible trap can be constructed with constraints, but this one runs on the GPU.
+
 \section{Virtual sites}
 \label{sec:virtual}
 \index{virtual sites|mainindex}

From 104da462599798e41da6f7c31648bc85624e7ab0 Mon Sep 17 00:00:00 2001
From: Henri Menke <henri@icp.uni-stuttgart.de>
Date: Tue, 15 Apr 2014 16:20:15 +0200
Subject: [PATCH 13/34] Electrokinetics pdb-parser

---
 src/electrokinetics_pdb_parse.cpp | 429 ++++++++++++++++++++++++++++++
 src/electrokinetics_pdb_parse.hpp |  17 ++
 src/lb-boundaries.cpp             |  30 ++-
 src/lbgpu_cuda.cu                 |   3 +-
 src/tcl/electrokinetics_tcl.cpp   |   3 +-
 5 files changed, 474 insertions(+), 8 deletions(-)
 create mode 100644 src/electrokinetics_pdb_parse.cpp
 create mode 100644 src/electrokinetics_pdb_parse.hpp

diff --git a/src/electrokinetics_pdb_parse.cpp b/src/electrokinetics_pdb_parse.cpp
new file mode 100644
index 00000000000..3984127c6c8
--- /dev/null
+++ b/src/electrokinetics_pdb_parse.cpp
@@ -0,0 +1,429 @@
+/* vim: set ts=8 sts=2 sw=2 et: */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include "electrokinetics_pdb_parse.hpp"
+
+//#define DEBUG
+
+/* Replacements for bool variables */
+const int pdb_SUCCESS = 0;
+const int pdb_ERROR = 1;
+
+float* pdb_charge_lattice = NULL;
+int* pdb_boundary_lattice = NULL;
+
+typedef struct {
+  int i; // index
+  int m; // model index
+  float x,y,z;
+} pdb_ATOM;
+
+typedef struct {
+  int i;
+  char type[2];
+  float charge;
+} itp_atoms;
+
+typedef struct {
+  char type[2];
+  float sigma,epsilon;
+} itp_atomtypes;
+
+typedef struct {
+  unsigned int pdb_n_particles;
+  pdb_ATOM* pdb_array_ATOM;
+  unsigned int itp_n_particles;
+  itp_atoms* itp_array_atoms;
+  unsigned int itp_n_parameters;
+  itp_atomtypes* itp_array_atomtypes;
+} particle_data;
+
+typedef struct {
+  float max_x;
+  float max_y;
+  float max_z;
+  float min_x;
+  float min_y;
+  float min_z;
+  float center[3];
+} bounding_box;
+
+/* BEGIN CODE */
+
+void galloc(void** ptr, size_t size) {
+  if (!*ptr) {
+    if (size > 0) {
+      *ptr = (void*) malloc(size);
+    }
+    else {
+      printf("You cannot malloc to size 0\n");
+    }
+  }
+  else {
+    if (size > 0) {
+      *ptr = (void*) realloc(*ptr, size);
+    }
+    else {
+      free(*ptr);
+    }
+  }
+}
+
+unsigned int pdb_rhoindex_cartesian2linear(unsigned int x, unsigned int y, unsigned int z) {
+  return z * ek_parameters.dim_y * ek_parameters.dim_x + y * ek_parameters.dim_x + x;
+}
+
+int print_charge_field(char* filename) {
+  FILE* fp;
+  if ((fp = fopen(filename,"w")) == NULL) return pdb_ERROR;
+  
+  if( fp == NULL ) {
+    return 1;
+  }
+  
+  fprintf( fp, "\
+# vtk DataFile Version 2.0\n\
+charge_density\n\
+ASCII\n\
+\n\
+DATASET STRUCTURED_POINTS\n\
+DIMENSIONS %u %u %u\n\
+ORIGIN %f %f %f\n\
+SPACING %f %f %f\n\
+\n\
+POINT_DATA %u\n\
+SCALARS charge_density float 1\n\
+LOOKUP_TABLE default\n",
+  ek_parameters.dim_x, ek_parameters.dim_y, ek_parameters.dim_z,
+  ek_parameters.agrid*0.5f, ek_parameters.agrid*0.5f, ek_parameters.agrid*0.5f,
+  ek_parameters.agrid, ek_parameters.agrid, ek_parameters.agrid,
+  ek_parameters.dim_x * ek_parameters.dim_y * ek_parameters.dim_z);
+  
+  for( unsigned int i = 0; i < (ek_parameters.dim_x * ek_parameters.dim_y * ek_parameters.dim_z); i++ ) {
+    fprintf( fp, "%e ", pdb_charge_lattice[i] );
+  }
+  
+  fclose( fp );
+  return pdb_SUCCESS;
+}
+
+int print_boundary_lattice(char* filename) {
+  FILE* fp;
+  if ((fp = fopen(filename,"w")) == NULL) return pdb_ERROR;
+  
+  if( fp == NULL ) {
+    return 1;
+  }
+  
+  fprintf( fp, "\
+# vtk DataFile Version 2.0\n\
+boundary_flag\n\
+ASCII\n\
+\n\
+DATASET STRUCTURED_POINTS\n\
+DIMENSIONS %u %u %u\n\
+ORIGIN %f %f %f\n\
+SPACING %f %f %f\n\
+\n\
+POINT_DATA %u\n\
+SCALARS boundary_flag float 1\n\
+LOOKUP_TABLE default\n",
+  ek_parameters.dim_x, ek_parameters.dim_y, ek_parameters.dim_z,
+  ek_parameters.agrid*0.5f, ek_parameters.agrid*0.5f, ek_parameters.agrid*0.5f,
+  ek_parameters.agrid, ek_parameters.agrid, ek_parameters.agrid,
+  ek_parameters.dim_x * ek_parameters.dim_y * ek_parameters.dim_z);
+
+  for( unsigned int i = 0; i < (ek_parameters.dim_x * ek_parameters.dim_y * ek_parameters.dim_z); i++ ) {
+    fprintf( fp, "%d ", pdb_boundary_lattice[i] );
+  }
+  
+  fclose( fp );
+  return pdb_SUCCESS;
+}
+
+int pdb_parse_files(char* pdb_filename, char* itp_filename, particle_data* atom_data) {
+  /*
+   * This routine parses the pdb- and itp-file to extract
+   * the relevant parameters. These are stored in arrays.
+   */
+
+  // Parse pdb-file
+  int model = 0;
+  char pdb_line[256];
+  FILE* pdb_file;
+  if ((pdb_file = fopen(pdb_filename,"r")) == NULL) return pdb_ERROR;
+#ifdef DEBUG
+  printf("### Reading pdb-file \"%s\" ###\n",pdb_filename);
+#endif  
+  while (fgets(pdb_line, sizeof(pdb_line), pdb_file)) {
+    if (strncmp(pdb_line,"MODEL",5) == 0) {
+      // read the MODEL identifier
+      sscanf(pdb_line,"MODEL %d",&model);
+#ifdef DEBUG
+      printf("MODEL m=%d\n", model);
+#endif
+    }
+    if ( strncmp(pdb_line,"ATOM",4) == 0) {
+      // read all ATOMs
+      galloc( (void**) &atom_data->pdb_array_ATOM , (atom_data->pdb_n_particles+1)*sizeof(pdb_ATOM) );
+      pdb_ATOM* a = &atom_data->pdb_array_ATOM[atom_data->pdb_n_particles];
+      // See http://deposit.rcsb.org/adit/docs/pdb_atom_format.html#ATOM for the meaning of the format string
+      sscanf(pdb_line,"ATOM %5d %*4s%*c%*3s%*c%*4d%*c %8f %8f %8f %*6f %*6f %*4s%*2s%*2s",&a->i,&a->x,&a->y,&a->z);
+      a->x /= 10.0;
+      a->y /= 10.0;
+      a->z /= 10.0;
+#ifdef DEBUG
+      // Print all local variables
+      printf("ATOM i=%d x=%f y=%f z=%f\n",a->i,a->x,a->y,a->z);
+#endif
+      atom_data->pdb_n_particles++;
+    }
+  }
+  fclose(pdb_file);
+
+  // Parse itp-file
+  char itp_line[256];
+  FILE* itp_file;
+  if ((itp_file = fopen(itp_filename,"r")) == NULL) return pdb_ERROR;
+#ifdef DEBUG
+  printf("### Reading itp-file \"%s\" ###\n",itp_filename);
+#endif  
+  while (fgets(itp_line, sizeof(itp_line), itp_file)) {
+    // get section
+    char section[256];
+    sscanf(itp_line,"[ %s ]",section);
+    // only read non-comment, non-section and non-empty lines
+    // TODO: Handling of lines consiting whitespace only (e.g. '\t' and ' ')
+    if (itp_line[0] != '[' && itp_line[0] != ';' && itp_line[0] != '\r' && itp_line[0] != '\n') {
+      if (strcmp(section,"atoms") == 0) {
+        // section [ atoms ]
+        galloc( (void**) &atom_data->itp_array_atoms , (atom_data->itp_n_particles+1)*sizeof(pdb_ATOM) );
+        itp_atoms* a = &atom_data->itp_array_atoms[atom_data->itp_n_particles];
+        // FIXME: no source :( Reverse engineered from the itp-file
+        sscanf(itp_line," %d %2s %*d %*s %*s %*d %f %*f ; %*s %*f",&a->i,a->type,&a->charge);
+#ifdef DEBUG
+        // Print all local variables
+        printf("[ atoms ] i=%d type=%s charge=%f\n",a->i,a->type,a->charge);
+#endif
+        atom_data->itp_n_particles++;
+      }
+      if (strcmp(section,"atomtypes") == 0) {
+        // section [ atomtypes ]
+        galloc( (void**) &atom_data->itp_array_atomtypes , (atom_data->itp_n_parameters+1)*sizeof(pdb_ATOM) );
+        itp_atomtypes* a = &atom_data->itp_array_atomtypes[atom_data->itp_n_parameters];
+        // FIXME: no source :( Reverse engineered from the itp-file
+        sscanf(itp_line," %2s %*s %*f %*f %*c %f %f ; %*f %*f",a->type,&a->sigma,&a->epsilon);
+#ifdef DEBUG
+        // Print all local variables
+        printf("[ atomtypes ] name=%s sigma=%f epsilon=%f\n",a->type,a->sigma,a->epsilon);
+#endif
+        atom_data->itp_n_parameters++;
+      }
+    }
+  }
+  fclose(itp_file);
+
+  if (atom_data->pdb_n_particles != atom_data->itp_n_particles) return pdb_ERROR;
+  return pdb_SUCCESS;
+}
+
+int calculate_bounding_box(bounding_box* bbox, particle_data* atom_data) {
+  // prototype for joining the arrays
+  if (atom_data->pdb_n_particles-1 == 0) return pdb_ERROR;
+  pdb_ATOM* a = &atom_data->pdb_array_ATOM[0];
+  bbox->max_x = a->x;
+  bbox->max_y = a->y;
+  bbox->max_z = a->z;
+  bbox->min_x = a->x;
+  bbox->min_y = a->y;
+  bbox->min_z = a->z;
+
+  for (unsigned int i = 1; i <= atom_data->pdb_n_particles-1; i++) {
+    a = &atom_data->pdb_array_ATOM[i];
+    if (bbox->max_x < a->x) bbox->max_x = a->x;
+    if (bbox->max_y < a->y) bbox->max_y = a->y;
+    if (bbox->max_z < a->z) bbox->max_z = a->z;
+    if (bbox->min_x > a->x) bbox->min_x = a->x;
+    if (bbox->min_y > a->y) bbox->min_y = a->y;
+    if (bbox->min_z > a->z) bbox->min_z = a->z;
+  }
+
+  bbox->center[0] = ( bbox->max_x + bbox->min_x )/2;
+  bbox->center[1] = ( bbox->max_y + bbox->min_y )/2;
+  bbox->center[2] = ( bbox->max_z + bbox->min_z )/2;
+
+  return pdb_SUCCESS;
+}
+
+int populate_lattice(particle_data* atom_data, int indices_only) {
+  /*
+   * This routine will populate the lattice using the
+   * values read from the pdb and itp files.
+   * WARNING: It contains much logic and interpolation stuff!
+   */
+#ifdef DEBUG
+  printf("pdb_n_particles=%u, itp_n_particles=%u, itp_n_parameters=%u\n",atom_data->pdb_n_particles,atom_data->itp_n_particles,atom_data->itp_n_parameters);
+#endif
+  // TODO: Check if bounding box fits into simbox
+  bounding_box bbox;
+  calculate_bounding_box(&bbox, atom_data);
+
+  // calculate the shift of the bounding box
+  float shift[3];
+  shift[0] = ek_parameters.agrid / 2.0 * ek_parameters.dim_x - bbox.center[0];
+  shift[1] = ek_parameters.agrid / 2.0 * ek_parameters.dim_y - bbox.center[1];
+  shift[2] = ek_parameters.agrid / 2.0 * ek_parameters.dim_z - bbox.center[2];
+
+#ifdef DEBUG
+  printf("bbox.max_x=%f, bbox.max_y=%f, bbox.max_z=%f, bbox.min_x=%f, bbox.min_y=%f, bbox.min_z=%f, bbox->center=[%f; %f; %f]\n", bbox.max_x, bbox.max_y, bbox.max_z, bbox.min_x, bbox.min_y, bbox.min_z, bbox.center[0], bbox.center[1], bbox.center[2]);
+  printf("agrid=%f, dim_x=%d, dim_y=%d, dim_z=%d\n",ek_parameters.agrid, ek_parameters.dim_x, ek_parameters.dim_y, ek_parameters.dim_z);
+  printf("shift=[%f; %f; %f]\n",shift[0], shift[1], shift[2]);
+#endif
+
+  // joining the array
+  int lowernode[3];
+  float cellpos[3];
+  float gridpos;
+  float a_x_shifted, a_y_shifted, a_z_shifted;
+
+  for (unsigned int i = 0; i <= atom_data->pdb_n_particles-1; i++) {
+    pdb_ATOM* a = &atom_data->pdb_array_ATOM[i];
+    itp_atoms* b;
+    itp_atomtypes* c;
+    for (unsigned int j = 0; j <= atom_data->itp_n_particles-1; j++) {
+      b = &atom_data->itp_array_atoms[j];
+      if (a->i == b->i) {
+        for (unsigned int k = 0; k <= atom_data->itp_n_parameters-1; k++) {
+          c = &atom_data->itp_array_atomtypes[k];
+          if (strcmp(b->type,c->type) == 0) {
+#ifdef DEBUG
+            printf("i=%d x=%f y=%f z=%f type=%s charge=%f sigma=%f epsilon=%f\n",a->i,a->x,a->y,a->z,b->type,b->charge,c->sigma,c->epsilon);
+#endif
+
+            // Interpolate the charge to the lattice
+            gridpos      = (a->x + shift[0]) / ek_parameters.agrid - 0.5f;
+            lowernode[0] = (int) floorf( gridpos );
+            cellpos[0]   = gridpos - lowernode[0];
+                                                
+            gridpos      = (a->y + shift[1]) / ek_parameters.agrid - 0.5f;
+            lowernode[1] = (int) floorf( gridpos );
+            cellpos[1]   = gridpos - lowernode[1];
+                                                
+            gridpos      = (a->z + shift[2]) / ek_parameters.agrid - 0.5f;
+            lowernode[2] = (int) floorf( gridpos );
+            cellpos[2]   = gridpos - lowernode[2];
+                                                
+            lowernode[0] = (lowernode[0] + ek_parameters.dim_x) % ek_parameters.dim_x;
+            lowernode[1] = (lowernode[1] + ek_parameters.dim_y) % ek_parameters.dim_y;
+            lowernode[2] = (lowernode[2] + ek_parameters.dim_z) % ek_parameters.dim_z;
+
+            if ( indices_only == 0 ) {
+              pdb_charge_lattice[pdb_rhoindex_cartesian2linear( lowernode[0],lowernode[1],lowernode[2] )]
+                = b->charge * ( 1 - cellpos[0] ) * ( 1 - cellpos[1] ) * ( 1 - cellpos[2] );
+
+              pdb_charge_lattice[pdb_rhoindex_cartesian2linear( ( lowernode[0] + 1 ) % ek_parameters.dim_x,lowernode[1],lowernode[2] )]
+                = b->charge * cellpos[0] * ( 1 - cellpos[1] ) * ( 1 - cellpos[2] );
+
+              pdb_charge_lattice[pdb_rhoindex_cartesian2linear( lowernode[0],( lowernode[1] + 1 ) % ek_parameters.dim_y,lowernode[2] )]
+                = b->charge * ( 1 - cellpos[0] ) * cellpos[1] * ( 1 - cellpos[2] );
+
+              pdb_charge_lattice[pdb_rhoindex_cartesian2linear( lowernode[0],lowernode[1],( lowernode[2] + 1 ) % ek_parameters.dim_z )]
+                = b->charge * ( 1 - cellpos[0] ) * ( 1 - cellpos[1] ) * cellpos[2];
+
+              pdb_charge_lattice[pdb_rhoindex_cartesian2linear( ( lowernode[0] + 1 ) % ek_parameters.dim_x,( lowernode[1] + 1 ) % ek_parameters.dim_y,lowernode[2] )]
+                = b->charge * cellpos[0] * cellpos[1] * ( 1 - cellpos[2] );
+
+              pdb_charge_lattice[pdb_rhoindex_cartesian2linear( ( lowernode[0] + 1 ) % ek_parameters.dim_x,lowernode[1],( lowernode[2] + 1 ) % ek_parameters.dim_z )]
+                = b->charge * cellpos[0] * ( 1 - cellpos[1] ) * cellpos[2];
+
+              pdb_charge_lattice[pdb_rhoindex_cartesian2linear( lowernode[0],( lowernode[1] + 1 ) % ek_parameters.dim_y,( lowernode[2] + 1 ) % ek_parameters.dim_z )]
+                = b->charge * ( 1 - cellpos[0] ) * cellpos[1] * cellpos[2];
+
+              pdb_charge_lattice[pdb_rhoindex_cartesian2linear( ( lowernode[0] + 1 ) % ek_parameters.dim_x,( lowernode[1] + 1 ) % ek_parameters.dim_y,( lowernode[2] + 1 ) % ek_parameters.dim_z )]
+                = b->charge * cellpos[0] * cellpos[1] * cellpos[2];
+            }
+	    else if ( indices_only == 1 ) {
+              printf("Only indices!\n");
+            }
+            // Interpolate lennard-jones parameters to boundary
+            float r = pow(2,1./6.)*c->sigma;
+
+            a_x_shifted = (a->x + shift[0]) / ek_parameters.agrid - 0.5f;
+            a_y_shifted = (a->y + shift[1]) / ek_parameters.agrid - 0.5f;
+            a_z_shifted = (a->z + shift[2]) / ek_parameters.agrid - 0.5f;
+
+            for (float z = a->z - r; z <= a->z + r + ek_parameters.agrid; z += ek_parameters.agrid) {
+              for (float y = a->y - r; y <= a->y + r + ek_parameters.agrid; y += ek_parameters.agrid) {
+                for (float x = a->x - r; x <= a->x + r + ek_parameters.agrid; x += ek_parameters.agrid) {
+                  gridpos      = (x + shift[0]) / ek_parameters.agrid - 0.5f;
+                  lowernode[0] = (int) floorf( gridpos );
+
+                  gridpos      = (y + shift[1]) / ek_parameters.agrid - 0.5f;
+                  lowernode[1] = (int) floorf( gridpos );
+
+                  gridpos      = (z + shift[2]) / ek_parameters.agrid - 0.5f;
+                  lowernode[2] = (int) floorf( gridpos );
+
+                  lowernode[0] = (lowernode[0] + ek_parameters.dim_x) % ek_parameters.dim_x;
+                  lowernode[1] = (lowernode[1] + ek_parameters.dim_y) % ek_parameters.dim_y;
+                  lowernode[2] = (lowernode[2] + ek_parameters.dim_z) % ek_parameters.dim_z;
+#ifdef DEBUG
+                  printf("shifted: %f %f %f\n", a_x_shifted, a_y_shifted, a_z_shifted);
+                  printf("lowernode: %d %d %d\n", lowernode[0], lowernode[1], lowernode[2]);
+                  printf("distance: %f %f %f\n", lowernode[0] - a_x_shifted, lowernode[1] - a_y_shifted, lowernode[2] - a_z_shifted);
+                  printf("distance: %f <= %f\n\n", pow(lowernode[0] - a_x_shifted,2) + pow(lowernode[1] - a_y_shifted,2) + pow(lowernode[2] - a_z_shifted,2), pow(r/ek_parameters.agrid,2));
+#endif
+                  if ( pow(lowernode[0] - a_x_shifted,2) + pow(lowernode[1] - a_y_shifted,2) + pow(lowernode[2] - a_z_shifted,2) <= pow(r/ek_parameters.agrid,2) ) {
+                    if ( indices_only == 0 ) {
+                      pdb_boundary_lattice[ek_parameters.dim_y*ek_parameters.dim_x*lowernode[2] + ek_parameters.dim_x*lowernode[1] + lowernode[0]] = 1;
+                    }
+                    else if ( indices_only == 1 ) {
+                      printf("Only indices!\n");
+                    }
+                  }
+                }
+              }
+            }
+
+            break;
+          }
+        }
+      }
+    }
+  }
+
+  return pdb_SUCCESS;
+}
+
+int pdb_parse(char* pdb_filename, char* itp_filename) {
+  /*
+   * This is the main parsing routine, which is visible to the outside
+   * through the header electrokinetics_pdb_parse.h. It doesn't contain any logic and just
+   * deploys the input to the subroutines.
+   */
+
+  /* BEGIN DEPLOY */
+  int indices_only = 0;
+  galloc( (void**) &pdb_charge_lattice, ek_parameters.dim_x * ek_parameters.dim_y * ek_parameters.dim_z * sizeof(float));
+  galloc( (void**) &pdb_boundary_lattice, ek_parameters.dim_x * ek_parameters.dim_y * ek_parameters.dim_z * sizeof(int));
+  for ( unsigned int i = 0; i < ek_parameters.dim_x * ek_parameters.dim_y * ek_parameters.dim_z; i++ ) {
+    pdb_charge_lattice[i] = 0.0;
+    pdb_boundary_lattice[i] = 0;
+  }
+
+  particle_data atom_data;
+  atom_data.pdb_n_particles = 0;
+  atom_data.pdb_array_ATOM = NULL;
+  atom_data.itp_n_particles = 0;
+  atom_data.itp_array_atoms = NULL;
+  atom_data.itp_n_parameters = 0;
+  atom_data.itp_array_atomtypes = NULL;
+
+  pdb_parse_files(pdb_filename, itp_filename, &atom_data);
+
+  populate_lattice(&atom_data, indices_only);
+
+  return pdb_SUCCESS;
+}
diff --git a/src/electrokinetics_pdb_parse.hpp b/src/electrokinetics_pdb_parse.hpp
new file mode 100644
index 00000000000..e18f48a9ac2
--- /dev/null
+++ b/src/electrokinetics_pdb_parse.hpp
@@ -0,0 +1,17 @@
+/* vim: set ts=8 sts=2 sw=2 et: */
+#ifndef _PDB_PARSER_H
+#define _PDB_PARSER_H
+
+#include "electrokinetics.hpp"
+
+extern float* pdb_charge_lattice;
+extern int* pdb_boundary_lattice;
+
+/* Returns 0/1 if reading the files was successful/unsuccessful */
+int pdb_parse(char* pdb_filename, char* itp_filename);
+
+int print_charge_field(char* filename);
+
+int print_boundary_lattice(char* filename);
+
+#endif
diff --git a/src/lb-boundaries.cpp b/src/lb-boundaries.cpp
index 4e7c5675b30..6224b2fd606 100644
--- a/src/lb-boundaries.cpp
+++ b/src/lb-boundaries.cpp
@@ -31,6 +31,7 @@
 #include "lbgpu.hpp"
 #include "lb.hpp"
 #include "electrokinetics.hpp"
+#include "electrokinetics_pdb_parse.hpp"
 #include "interaction_data.hpp"
 #include "communication.hpp"
 
@@ -120,6 +121,9 @@ void lb_init_boundaries() {
           break;
         }
       }
+      if (pdb_charge_lattice) {
+        charged_boundaries = 1;
+      }
         
       for(n = 0; n < int(ek_parameters.number_of_species); n++)
         if(ek_parameters.valency[n] != 0.0) {
@@ -137,7 +141,7 @@ void lb_init_boundaries() {
 
     for(z=0; z<int(lbpar_gpu.dim_z); z++) {
       for(y=0; y<int(lbpar_gpu.dim_y); y++) {
-        for (x=0; x<int(lbpar_gpu.dim_x); x++) {	    
+        for (x=0; x<int(lbpar_gpu.dim_x); x++) {
           pos[0] = (x+0.5)*lbpar_gpu.agrid;
           pos[1] = (y+0.5)*lbpar_gpu.agrid;
           pos[2] = (z+0.5)*lbpar_gpu.agrid;
@@ -192,7 +196,6 @@ void lb_init_boundaries() {
               dist = dist_tmp;
               boundary_number = n;
             }
-          
 #ifdef EK_BOUNDARIES
             if (ek_initialized)
             {
@@ -204,20 +207,31 @@ void lb_init_boundaries() {
 #endif
           }
           
-          if (dist <= 0 && boundary_number >= 0 && n_lb_boundaries > 0) {
+          if(pdb_boundary_lattice && 
+             pdb_boundary_lattice[ek_parameters.dim_y*ek_parameters.dim_x*z + ek_parameters.dim_x*y + x]) {
+            dist = -1;
+            boundary_number = n_lb_boundaries; // Make sure that boundary_number is not used by a constraint
+          }
+
+          if (dist <= 0 && boundary_number >= 0 && (n_lb_boundaries > 0 || pdb_boundary_lattice)) {
             size_of_index = (number_of_boundnodes+1)*sizeof(int);
             host_boundary_node_list = (int *) realloc(host_boundary_node_list, size_of_index);
             host_boundary_index_list = (int *) realloc(host_boundary_index_list, size_of_index);
             host_boundary_node_list[number_of_boundnodes] = x + lbpar_gpu.dim_x*y + lbpar_gpu.dim_x*lbpar_gpu.dim_y*z;
             host_boundary_index_list[number_of_boundnodes] = boundary_number + 1; 
             number_of_boundnodes++;  
-            // printf("boundindex %i: \n", number_of_boundnodes);  
+            //printf("boundindex %i: \n", number_of_boundnodes);  
           }
         
 #ifdef EK_BOUNDARIES
           if (ek_initialized)
           {
             if(wallcharge_species != -1) {
+              if(pdb_charge_lattice &&
+                 pdb_charge_lattice[ek_parameters.dim_y*ek_parameters.dim_x*z + ek_parameters.dim_x*y + x] != 0.0f) {
+                node_charged = 1;
+                node_wallcharge += pdb_charge_lattice[ek_parameters.dim_y*ek_parameters.dim_x*z + ek_parameters.dim_x*y + x];
+              }
               if(node_charged)
                 host_wallcharge_species_density[ek_parameters.dim_y*ek_parameters.dim_x*z + ek_parameters.dim_x*y + x] = node_wallcharge / ek_parameters.valency[wallcharge_species];
               else if(dist <= 0)
@@ -232,7 +246,7 @@ void lb_init_boundaries() {
     }
 
     /**call of cuda fkt*/
-    float* boundary_velocity = (float *) malloc(3*n_lb_boundaries*sizeof(float));
+    float* boundary_velocity = (float *) malloc(3*(n_lb_boundaries+1)*sizeof(float));
 
     for (n=0; n<n_lb_boundaries; n++) {
       boundary_velocity[3*n+0]=lb_boundaries[n].velocity[0];
@@ -240,7 +254,11 @@ void lb_init_boundaries() {
       boundary_velocity[3*n+2]=lb_boundaries[n].velocity[2];
     }
 
-    if (n_lb_boundaries)
+    boundary_velocity[3*n_lb_boundaries+0] = 0.0f;
+    boundary_velocity[3*n_lb_boundaries+1] = 0.0f;
+    boundary_velocity[3*n_lb_boundaries+2] = 0.0f;
+
+    if (n_lb_boundaries || pdb_boundary_lattice)
       lb_init_boundaries_GPU(n_lb_boundaries, number_of_boundnodes, host_boundary_node_list, host_boundary_index_list, boundary_velocity);
 
     free(boundary_velocity);
diff --git a/src/lbgpu_cuda.cu b/src/lbgpu_cuda.cu
index f499e2a6808..6dcf77bbeec 100644
--- a/src/lbgpu_cuda.cu
+++ b/src/lbgpu_cuda.cu
@@ -31,6 +31,7 @@
 #include <stdlib.h>
 
 #include "electrokinetics.hpp"
+#include "electrokinetics_pdb_parse.hpp"
 #include "lbgpu.hpp"
 #include "cuda_interface.hpp"
 #include "cuda_utils.hpp"
@@ -2987,7 +2988,7 @@ void lb_init_boundaries_GPU(int host_n_lb_boundaries, int number_of_boundnodes,
 
   KERNELCALL(reset_boundaries, dim_grid, threads_per_block, (nodes_a, nodes_b));
 
-  if (n_lb_boundaries == 0)
+  if (n_lb_boundaries == 0 && !pdb_boundary_lattice)
   {
     cudaThreadSynchronize();
     return;
diff --git a/src/tcl/electrokinetics_tcl.cpp b/src/tcl/electrokinetics_tcl.cpp
index 581a6736432..729d561927d 100644
--- a/src/tcl/electrokinetics_tcl.cpp
+++ b/src/tcl/electrokinetics_tcl.cpp
@@ -46,7 +46,7 @@ int tclcommand_electrokinetics(ClientData data, Tcl_Interp *interp, int argc, ch
 
   if(argc < 2) 
   {
-    Tcl_AppendResult(interp, "Usage of \"electrokinetics\":", (char *)NULL);
+    Tcl_AppendResult(interp, "Usage of \"electrokinetics\":\n\n", (char *)NULL);
     Tcl_AppendResult(interp, "electrokinetics [agrid #float] [lb_density #float] [viscosity #float] [friction #float]\n", (char *)NULL);
     Tcl_AppendResult(interp, "                [bulk_viscosity #float] [gamma_even #float] [gamma_odd #float] [T #float] [bjerrum_length #float]\n", (char *)NULL);
     Tcl_AppendResult(interp, "electrokinetics print <density|velocity|potential|boundary|pressure|lbforce|reaction_tags|mass_flux> vtk #string]\n", (char *)NULL);
@@ -117,6 +117,7 @@ int tclcommand_electrokinetics(ClientData data, Tcl_Interp *interp, int argc, ch
       Tcl_AppendResult(interp, "Could not parse pdb- or itp-file. Please, check your format and filenames.", (char *) NULL);
       return (TCL_ERROR);
     }
+    lb_init_boundaries();
 #endif
   }
   else if(ARG0_IS_S("boundary")) 

From 3675cee3178968781bc4da26916e9cc37b33d514 Mon Sep 17 00:00:00 2001
From: Florian Weik <fweik@icp.uni-stuttgart.de>
Date: Tue, 15 Apr 2014 16:24:44 +0200
Subject: [PATCH 14/34] Fixed typo in ug.

---
 doc/ug/part.tex | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/ug/part.tex b/doc/ug/part.tex
index b225f0cc84c..7765874c1e4 100644
--- a/doc/ug/part.tex
+++ b/doc/ug/part.tex
@@ -865,8 +865,8 @@ \subsection{Getting the currently defined constraints}
 Prints out all constraint information. If \var{num} is specified only this
 constraint is displayed, otherwise all constraints will be printed.
 
-\subsection{\texttt{harmonic_force}: Creating a harmonic trap.}
- \newescommand{harmonic_trap}
+\subsection{\texttt{harmonic_force}: Creating a harmonic trap}
+ \newescommand{harmonic-force}
  \begin{essyntax}
    harmonic_force \{ \var{x} \var{y} \var{z} \} \var{k}
   \begin{features}

From 70287e6778671103bfcb31b6c3d88cf1b0805ae8 Mon Sep 17 00:00:00 2001
From: Florian Weik <fweik@icp.uni-stuttgart.de>
Date: Tue, 15 Apr 2014 16:43:09 +0200
Subject: [PATCH 15/34] No more compiler warnings from SystemInterface.

---
 src/EspressoSystemInterface.cpp | 2 +-
 src/EspressoSystemInterface.hpp | 2 +-
 src/SystemInterface.hpp         | 6 +++---
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/EspressoSystemInterface.cpp b/src/EspressoSystemInterface.cpp
index 7ea6d1160df..32b47e7a32f 100644
--- a/src/EspressoSystemInterface.cpp
+++ b/src/EspressoSystemInterface.cpp
@@ -9,7 +9,7 @@
 /********************************************************************************************/
 
 template<class value_type>
-const value_type EspressoSystemInterface::const_iterator<value_type>::operator*() const {
+value_type EspressoSystemInterface::const_iterator<value_type>::operator*() const {
   return (*m_const_iterator);
 }
 
diff --git a/src/EspressoSystemInterface.hpp b/src/EspressoSystemInterface.hpp
index d1767940d48..79ec8848461 100644
--- a/src/EspressoSystemInterface.hpp
+++ b/src/EspressoSystemInterface.hpp
@@ -22,7 +22,7 @@ class EspressoSystemInterface : public SystemInterface {
   template<class value_type>
   class const_iterator : public SystemInterface::const_iterator<value_type> {
   public:
-    const value_type operator*() const;
+    value_type operator*() const;
     SystemInterface::const_iterator<value_type> &operator=(const SystemInterface::const_iterator<value_type> &rhs);
     EspressoSystemInterface::const_iterator<value_type> &operator=(typename std::vector<value_type>::const_iterator rhs);
     bool operator==(SystemInterface::const_iterator<value_type> const &rhs) const;
diff --git a/src/SystemInterface.hpp b/src/SystemInterface.hpp
index bf124e098d6..35a0c26dc97 100644
--- a/src/SystemInterface.hpp
+++ b/src/SystemInterface.hpp
@@ -18,7 +18,7 @@ class SystemInterface {
   template<class value_type>
   class const_iterator {
   public:
-    virtual const value_type operator*() const = 0;
+    virtual value_type operator*() const = 0;
     virtual const_iterator<value_type> &operator=(const const_iterator<value_type> &rhs) = 0;
     virtual bool operator==(const_iterator<value_type> const &rhs) const = 0;
     virtual bool operator!=(const_iterator<value_type> const &rhs) const = 0;
@@ -27,7 +27,7 @@ class SystemInterface {
 
   class null_vec_iterator : public SystemInterface::const_iterator<Vector3> {
   public:
-    const Vector3 operator*() const { return Vector3(); };
+    Vector3 operator*() const { return Vector3(); };
     const_iterator<Vector3> &operator=(const const_iterator<Vector3> &rhs) { return *this; };
     bool operator==(const_iterator<Vector3> const &rhs) const { return true; };
     bool operator!=(const_iterator<Vector3> const &rhs) const { return false; };
@@ -38,7 +38,7 @@ class SystemInterface {
 
   class null_real_iterator : public SystemInterface::const_iterator<Real> {
   public:
-    const Real operator*() const { return Real(); };
+    Real operator*() const { return Real(); };
     const_iterator<Real> &operator=(const const_iterator<Real> &rhs) { return *this; };
     bool operator==(const_iterator<Real> const &rhs) const { return true; };
     bool operator!=(const_iterator<Real> const &rhs) const { return false; };

From f3461cd033927bdc8d300a9fb4ed279482904d69 Mon Sep 17 00:00:00 2001
From: Georg Rempfer <github@wo-geht-was.net>
Date: Tue, 15 Apr 2014 21:06:20 +0200
Subject: [PATCH 16/34] More robust string parsing.

---
 src/electrokinetics_pdb_parse.cpp | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/src/electrokinetics_pdb_parse.cpp b/src/electrokinetics_pdb_parse.cpp
index 3984127c6c8..51e35a3f96c 100644
--- a/src/electrokinetics_pdb_parse.cpp
+++ b/src/electrokinetics_pdb_parse.cpp
@@ -4,6 +4,9 @@
 #include <string.h>
 #include <math.h>
 #include "electrokinetics_pdb_parse.hpp"
+#include <iostream>
+#include <string>
+#include <sstream>
 
 //#define DEBUG
 
@@ -170,7 +173,16 @@ int pdb_parse_files(char* pdb_filename, char* itp_filename, particle_data* atom_
       galloc( (void**) &atom_data->pdb_array_ATOM , (atom_data->pdb_n_particles+1)*sizeof(pdb_ATOM) );
       pdb_ATOM* a = &atom_data->pdb_array_ATOM[atom_data->pdb_n_particles];
       // See http://deposit.rcsb.org/adit/docs/pdb_atom_format.html#ATOM for the meaning of the format string
-      sscanf(pdb_line,"ATOM %5d %*4s%*c%*3s%*c%*4d%*c %8f %8f %8f %*6f %*6f %*4s%*2s%*2s",&a->i,&a->x,&a->y,&a->z);
+//  #    sscanf(pdb_line,"ATOM %6d %*4s%*c%*4s%*c%*4d%*c %8f %8f %8f %*6f %*6f %*4s%*2s%*2s",&a->i,&a->x,&a->y,&a->z);
+    std::istringstream str(pdb_line);
+
+    std::string tmp;   
+ 
+    str.ignore(246,' ');
+    str >> a->i;
+    str >> tmp >> tmp >> tmp >> tmp;    
+    str >> a->x >> a->y >> a->z;
+
       a->x /= 10.0;
       a->y /= 10.0;
       a->z /= 10.0;

From d32869c021702a82a85c663cd907448e6ab7c0d2 Mon Sep 17 00:00:00 2001
From: Henri Menke <henri@icp.uni-stuttgart.de>
Date: Wed, 16 Apr 2014 10:23:56 +0200
Subject: [PATCH 17/34] Removed indices_only from pdb-parser and wrote
 documentation for the User Guide

---
 doc/ug/electrokinetics.tex        | 15 ++++++++++
 src/electrokinetics_pdb_parse.cpp | 49 ++++++++++++-------------------
 2 files changed, 34 insertions(+), 30 deletions(-)

diff --git a/doc/ug/electrokinetics.tex b/doc/ug/electrokinetics.tex
index 1401bc9b89d..c4c05289c1b 100644
--- a/doc/ug/electrokinetics.tex
+++ b/doc/ug/electrokinetics.tex
@@ -473,6 +473,21 @@ \subsection{\label{ssec:ek-reac-init}Initialization and Geometry Definition}
 a reaction, as described above. To successfully specify a region all the 
 relevant arguments that go with the shape constraints must be provided.
 
+\subsubsection{\label{sssec:ek-pdb-parse}Parsing PDB Files}
+
+\begin{essyntax}
+  \require{1 or 2 or 3}{electrokinetics}
+  \require{2}{pdb-parse}
+  \var{pdb\_filename}
+  \var{itp\_filename}
+  \begin{features}
+  \required[1]{ELECTROKINETICS}
+  \required[2]{EK_BOUNDARIES}
+  \required[3]{EK_REACTIONS}
+  \end{features}
+\end{essyntax}
+The \lit{electrokinetics pdb-parse} feature allows the user to parse simple PDB files, a file format introduced by the protein database to encode molecular structures. Together with a topology file (here \var{itp\_filename}) the structure gets interpolated to the \lit{electrokinetics} grid. For the input you will need to prepare a PDB file with a \lit{gromacs} force field to generate the topology file. Normally the PDB file extension is \lit{.pdb}, the topology file extension is \lit{.itp}. Obviously the PDB file is placed instead of \var{pdb\_filename} and the topology file instead of \var{itp\_filename}. \todo{At the moment this fails badly, if you try to parse incorrectly formatted files. This will be fixed in the future.}
+
 \subsection{\label{ssec:ek-reac-output}Reaction-Specific Output}
 
 \begin{essyntax}
diff --git a/src/electrokinetics_pdb_parse.cpp b/src/electrokinetics_pdb_parse.cpp
index 3984127c6c8..9b7277a19ca 100644
--- a/src/electrokinetics_pdb_parse.cpp
+++ b/src/electrokinetics_pdb_parse.cpp
@@ -257,7 +257,7 @@ int calculate_bounding_box(bounding_box* bbox, particle_data* atom_data) {
   return pdb_SUCCESS;
 }
 
-int populate_lattice(particle_data* atom_data, int indices_only) {
+int populate_lattice(particle_data* atom_data) {
   /*
    * This routine will populate the lattice using the
    * values read from the pdb and itp files.
@@ -319,34 +319,29 @@ int populate_lattice(particle_data* atom_data, int indices_only) {
             lowernode[1] = (lowernode[1] + ek_parameters.dim_y) % ek_parameters.dim_y;
             lowernode[2] = (lowernode[2] + ek_parameters.dim_z) % ek_parameters.dim_z;
 
-            if ( indices_only == 0 ) {
-              pdb_charge_lattice[pdb_rhoindex_cartesian2linear( lowernode[0],lowernode[1],lowernode[2] )]
-                = b->charge * ( 1 - cellpos[0] ) * ( 1 - cellpos[1] ) * ( 1 - cellpos[2] );
+            pdb_charge_lattice[pdb_rhoindex_cartesian2linear( lowernode[0],lowernode[1],lowernode[2] )]
+              = b->charge * ( 1 - cellpos[0] ) * ( 1 - cellpos[1] ) * ( 1 - cellpos[2] );
 
-              pdb_charge_lattice[pdb_rhoindex_cartesian2linear( ( lowernode[0] + 1 ) % ek_parameters.dim_x,lowernode[1],lowernode[2] )]
-                = b->charge * cellpos[0] * ( 1 - cellpos[1] ) * ( 1 - cellpos[2] );
+            pdb_charge_lattice[pdb_rhoindex_cartesian2linear( ( lowernode[0] + 1 ) % ek_parameters.dim_x,lowernode[1],lowernode[2] )]
+              = b->charge * cellpos[0] * ( 1 - cellpos[1] ) * ( 1 - cellpos[2] );
 
-              pdb_charge_lattice[pdb_rhoindex_cartesian2linear( lowernode[0],( lowernode[1] + 1 ) % ek_parameters.dim_y,lowernode[2] )]
-                = b->charge * ( 1 - cellpos[0] ) * cellpos[1] * ( 1 - cellpos[2] );
+            pdb_charge_lattice[pdb_rhoindex_cartesian2linear( lowernode[0],( lowernode[1] + 1 ) % ek_parameters.dim_y,lowernode[2] )]
+              = b->charge * ( 1 - cellpos[0] ) * cellpos[1] * ( 1 - cellpos[2] );
 
-              pdb_charge_lattice[pdb_rhoindex_cartesian2linear( lowernode[0],lowernode[1],( lowernode[2] + 1 ) % ek_parameters.dim_z )]
-                = b->charge * ( 1 - cellpos[0] ) * ( 1 - cellpos[1] ) * cellpos[2];
+            pdb_charge_lattice[pdb_rhoindex_cartesian2linear( lowernode[0],lowernode[1],( lowernode[2] + 1 ) % ek_parameters.dim_z )]
+              = b->charge * ( 1 - cellpos[0] ) * ( 1 - cellpos[1] ) * cellpos[2];
 
-              pdb_charge_lattice[pdb_rhoindex_cartesian2linear( ( lowernode[0] + 1 ) % ek_parameters.dim_x,( lowernode[1] + 1 ) % ek_parameters.dim_y,lowernode[2] )]
-                = b->charge * cellpos[0] * cellpos[1] * ( 1 - cellpos[2] );
+            pdb_charge_lattice[pdb_rhoindex_cartesian2linear( ( lowernode[0] + 1 ) % ek_parameters.dim_x,( lowernode[1] + 1 ) % ek_parameters.dim_y,lowernode[2] )]
+              = b->charge * cellpos[0] * cellpos[1] * ( 1 - cellpos[2] );
 
-              pdb_charge_lattice[pdb_rhoindex_cartesian2linear( ( lowernode[0] + 1 ) % ek_parameters.dim_x,lowernode[1],( lowernode[2] + 1 ) % ek_parameters.dim_z )]
-                = b->charge * cellpos[0] * ( 1 - cellpos[1] ) * cellpos[2];
+            pdb_charge_lattice[pdb_rhoindex_cartesian2linear( ( lowernode[0] + 1 ) % ek_parameters.dim_x,lowernode[1],( lowernode[2] + 1 ) % ek_parameters.dim_z )]
+              = b->charge * cellpos[0] * ( 1 - cellpos[1] ) * cellpos[2];
 
-              pdb_charge_lattice[pdb_rhoindex_cartesian2linear( lowernode[0],( lowernode[1] + 1 ) % ek_parameters.dim_y,( lowernode[2] + 1 ) % ek_parameters.dim_z )]
-                = b->charge * ( 1 - cellpos[0] ) * cellpos[1] * cellpos[2];
+            pdb_charge_lattice[pdb_rhoindex_cartesian2linear( lowernode[0],( lowernode[1] + 1 ) % ek_parameters.dim_y,( lowernode[2] + 1 ) % ek_parameters.dim_z )]
+              = b->charge * ( 1 - cellpos[0] ) * cellpos[1] * cellpos[2];
 
-              pdb_charge_lattice[pdb_rhoindex_cartesian2linear( ( lowernode[0] + 1 ) % ek_parameters.dim_x,( lowernode[1] + 1 ) % ek_parameters.dim_y,( lowernode[2] + 1 ) % ek_parameters.dim_z )]
-                = b->charge * cellpos[0] * cellpos[1] * cellpos[2];
-            }
-	    else if ( indices_only == 1 ) {
-              printf("Only indices!\n");
-            }
+            pdb_charge_lattice[pdb_rhoindex_cartesian2linear( ( lowernode[0] + 1 ) % ek_parameters.dim_x,( lowernode[1] + 1 ) % ek_parameters.dim_y,( lowernode[2] + 1 ) % ek_parameters.dim_z )]
+              = b->charge * cellpos[0] * cellpos[1] * cellpos[2];
             // Interpolate lennard-jones parameters to boundary
             float r = pow(2,1./6.)*c->sigma;
 
@@ -376,12 +371,7 @@ int populate_lattice(particle_data* atom_data, int indices_only) {
                   printf("distance: %f <= %f\n\n", pow(lowernode[0] - a_x_shifted,2) + pow(lowernode[1] - a_y_shifted,2) + pow(lowernode[2] - a_z_shifted,2), pow(r/ek_parameters.agrid,2));
 #endif
                   if ( pow(lowernode[0] - a_x_shifted,2) + pow(lowernode[1] - a_y_shifted,2) + pow(lowernode[2] - a_z_shifted,2) <= pow(r/ek_parameters.agrid,2) ) {
-                    if ( indices_only == 0 ) {
-                      pdb_boundary_lattice[ek_parameters.dim_y*ek_parameters.dim_x*lowernode[2] + ek_parameters.dim_x*lowernode[1] + lowernode[0]] = 1;
-                    }
-                    else if ( indices_only == 1 ) {
-                      printf("Only indices!\n");
-                    }
+                    pdb_boundary_lattice[ek_parameters.dim_y*ek_parameters.dim_x*lowernode[2] + ek_parameters.dim_x*lowernode[1] + lowernode[0]] = 1;
                   }
                 }
               }
@@ -405,7 +395,6 @@ int pdb_parse(char* pdb_filename, char* itp_filename) {
    */
 
   /* BEGIN DEPLOY */
-  int indices_only = 0;
   galloc( (void**) &pdb_charge_lattice, ek_parameters.dim_x * ek_parameters.dim_y * ek_parameters.dim_z * sizeof(float));
   galloc( (void**) &pdb_boundary_lattice, ek_parameters.dim_x * ek_parameters.dim_y * ek_parameters.dim_z * sizeof(int));
   for ( unsigned int i = 0; i < ek_parameters.dim_x * ek_parameters.dim_y * ek_parameters.dim_z; i++ ) {
@@ -423,7 +412,7 @@ int pdb_parse(char* pdb_filename, char* itp_filename) {
 
   pdb_parse_files(pdb_filename, itp_filename, &atom_data);
 
-  populate_lattice(&atom_data, indices_only);
+  populate_lattice(&atom_data);
 
   return pdb_SUCCESS;
 }

From fd9f02ef699f2b1fdb02d1d4e80ebf69733bd765 Mon Sep 17 00:00:00 2001
From: Florian Weik <fweik@icp.uni-stuttgart.de>
Date: Tue, 22 Apr 2014 21:53:26 +0200
Subject: [PATCH 18/34] Fixed build without EKIN. Needs proper fix in the
 future.

---
 src/electrokinetics_pdb_parse.cpp | 4 ++++
 src/electrokinetics_pdb_parse.hpp | 4 ++++
 src/p3m_gpu_cuda.cu               | 2 +-
 3 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/electrokinetics_pdb_parse.cpp b/src/electrokinetics_pdb_parse.cpp
index 616bd892c1f..4f3f9f5b409 100644
--- a/src/electrokinetics_pdb_parse.cpp
+++ b/src/electrokinetics_pdb_parse.cpp
@@ -10,6 +10,8 @@
 
 //#define DEBUG
 
+#ifdef ELECTROKINETICS
+
 /* Replacements for bool variables */
 const int pdb_SUCCESS = 0;
 const int pdb_ERROR = 1;
@@ -428,3 +430,5 @@ int pdb_parse(char* pdb_filename, char* itp_filename) {
 
   return pdb_SUCCESS;
 }
+
+#endif
diff --git a/src/electrokinetics_pdb_parse.hpp b/src/electrokinetics_pdb_parse.hpp
index e18f48a9ac2..6eaff809ff7 100644
--- a/src/electrokinetics_pdb_parse.hpp
+++ b/src/electrokinetics_pdb_parse.hpp
@@ -4,6 +4,8 @@
 
 #include "electrokinetics.hpp"
 
+#ifdef ELECTROKINETICS
+
 extern float* pdb_charge_lattice;
 extern int* pdb_boundary_lattice;
 
@@ -15,3 +17,5 @@ int print_charge_field(char* filename);
 int print_boundary_lattice(char* filename);
 
 #endif
+
+#endif
diff --git a/src/p3m_gpu_cuda.cu b/src/p3m_gpu_cuda.cu
index a8414213f9e..0e6f8e1f2ce 100644
--- a/src/p3m_gpu_cuda.cu
+++ b/src/p3m_gpu_cuda.cu
@@ -577,7 +577,7 @@ extern "C" {
 	block.x = 512 - mesh*mesh;
 	block.x -= block.x / 32;
 	grid.x = mesh / block.x + 1;
-	calculate_influence_function_device<<<grid,block>>>(cao, mesh, box, alpha, p3m_gpu_data.G_hat);
+	KERNELCALL(calculate_influence_function_device,grid,block,(cao, mesh, box, alpha, p3m_gpu_data.G_hat));
 	cudaThreadSynchronize();
       }
       p3m_gpu_data_initialized = 1;

From 60a40e9e6b40b4a614f732673d19f5c7f4b71eb4 Mon Sep 17 00:00:00 2001
From: Axel Arnold <arnolda@icp.uni-stuttgart.de>
Date: Tue, 22 Apr 2014 22:24:31 +0200
Subject: [PATCH 19/34] Fix lbgpu sanity_check

---
 src/lbgpu.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/lbgpu.cpp b/src/lbgpu.cpp
index ce3e1605c7a..eb7df236adb 100644
--- a/src/lbgpu.cpp
+++ b/src/lbgpu.cpp
@@ -385,20 +385,20 @@ void lb_GPU_sanity_checks()
   if(this_node == 0){
     if(lattice_switch & LATTICE_LB_GPU) {
       if (lbpar_gpu.agrid < 0.0) {
-        errtext = runtime_error(128);
+        char *errtext = runtime_error(128);
         ERROR_SPRINTF(errtext,"{098 Lattice Boltzmann agrid not set} ");
       }
       if (lbpar_gpu.tau < 0.0) {
-        errtext = runtime_error(128);
+        char *errtext = runtime_error(128);
         ERROR_SPRINTF(errtext,"{099 Lattice Boltzmann time step not set} ");
       }
       for(int i=0;i<LB_COMPONENTS;i++){
         if (lbpar_gpu.rho[0] < 0.0) {
-          errtext = runtime_error(128);
+          char *errtext = runtime_error(128);
           ERROR_SPRINTF(errtext,"{100 Lattice Boltzmann fluid density not set} ");
         }
         if (lbpar_gpu.viscosity[0] < 0.0) {
-          errtext = runtime_error(128);
+          char *errtext = runtime_error(128);
           ERROR_SPRINTF(errtext,"{101 Lattice Boltzmann fluid viscosity not set} ");
         }
       }

From 8ef47e2be6a673903d5cc67f969796a227444adf Mon Sep 17 00:00:00 2001
From: Axel Arnold <arnolda@icp.uni-stuttgart.de>
Date: Tue, 22 Apr 2014 22:25:03 +0200
Subject: [PATCH 20/34] Fix unused function warning in lbgpu_cuda

---
 src/p3m_gpu_cuda.cu | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/p3m_gpu_cuda.cu b/src/p3m_gpu_cuda.cu
index a8414213f9e..e927ab955a3 100644
--- a/src/p3m_gpu_cuda.cu
+++ b/src/p3m_gpu_cuda.cu
@@ -113,6 +113,8 @@ __host__ __device__ void static Aliasing_sums_ik ( int cao, REAL_TYPE box, REAL_
 }
 
 /* Calculate influence function */
+#if 0
+// host version, not used anywhere
 void static calculate_influence_function ( int cao, int mesh, REAL_TYPE box, REAL_TYPE alpha, REAL_TYPE *G_hat ) {
 
   int    NX,NY,NZ;
@@ -146,6 +148,7 @@ void static calculate_influence_function ( int cao, int mesh, REAL_TYPE box, REA
     }
   }
 }
+#endif
 
 __global__ void calculate_influence_function_device ( int cao, int mesh, REAL_TYPE box, REAL_TYPE alpha, REAL_TYPE *G_hat ) {
 

From 4637bcaf81da42d0c42a2aad8ec162f1c6c68f5e Mon Sep 17 00:00:00 2001
From: Axel Arnold <arnolda@icp.uni-stuttgart.de>
Date: Tue, 22 Apr 2014 22:25:48 +0200
Subject: [PATCH 21/34] Fix incorrect ! == in observable parser

---
 src/tcl/statistics_observable_tcl.cpp | 28 +++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/src/tcl/statistics_observable_tcl.cpp b/src/tcl/statistics_observable_tcl.cpp
index 1cee9df0a4d..41c3d88c54c 100644
--- a/src/tcl/statistics_observable_tcl.cpp
+++ b/src/tcl/statistics_observable_tcl.cpp
@@ -418,7 +418,7 @@ int tclcommand_observable_density_profile(Tcl_Interp* interp, int argc, char** a
   int temp;
   profile_data* pdata;
   obs->fun = &observable_density_profile;
-  if (! tclcommand_parse_profile(interp, argc-1, argv+1, &temp, &obs->n, &pdata) == TCL_OK ) 
+  if (tclcommand_parse_profile(interp, argc-1, argv+1, &temp, &obs->n, &pdata) != TCL_OK ) 
     return TCL_ERROR;
   if (pdata->id_list==0) {
     Tcl_AppendResult(interp, "Error in radial_profile: particle ids/types not specified\n" , (char *)NULL);
@@ -437,7 +437,7 @@ int tclcommand_observable_lb_velocity_profile(Tcl_Interp* interp, int argc, char
   int temp;
   profile_data* pdata;
   obs->fun = &observable_lb_velocity_profile;
-  if (! tclcommand_parse_profile(interp, argc-1, argv+1, &temp, &obs->n, &pdata) == TCL_OK ) 
+  if (tclcommand_parse_profile(interp, argc-1, argv+1, &temp, &obs->n, &pdata) != TCL_OK ) 
     return TCL_ERROR;
   obs->args=(void*)pdata;
   obs->n=3*pdata->xbins*pdata->ybins*pdata->zbins;
@@ -451,7 +451,7 @@ int tclcommand_observable_radial_density_profile(Tcl_Interp* interp, int argc, c
   int temp;
   radial_profile_data* rpdata;
   obs->fun = &observable_radial_density_profile;
-  if (! tclcommand_parse_radial_profile(interp, argc-1, argv+1, &temp, &obs->n, &rpdata) == TCL_OK ) 
+  if (tclcommand_parse_radial_profile(interp, argc-1, argv+1, &temp, &obs->n, &rpdata) != TCL_OK ) 
      return TCL_ERROR;
   if (rpdata->id_list==0) {
     Tcl_AppendResult(interp, "Error in radial_profile: particle ids/types not specified\n" , (char *)NULL);
@@ -467,7 +467,7 @@ int tclcommand_observable_radial_flux_density_profile(Tcl_Interp* interp, int ar
   int temp;
   radial_profile_data* rpdata;
   obs->fun = &observable_radial_flux_density_profile;
-  if (! tclcommand_parse_radial_profile(interp, argc-1, argv+1, &temp, &obs->n, &rpdata) == TCL_OK ) 
+  if (tclcommand_parse_radial_profile(interp, argc-1, argv+1, &temp, &obs->n, &rpdata) != TCL_OK ) 
      return TCL_ERROR;
   if (rpdata->id_list==0) {
     Tcl_AppendResult(interp, "Error in radial_profile: particle ids/types not specified\n" , (char *)NULL);
@@ -483,7 +483,7 @@ int tclcommand_observable_flux_density_profile(Tcl_Interp* interp, int argc, cha
   int temp;
   profile_data* pdata;
   obs->fun = &observable_flux_density_profile;
-  if (! tclcommand_parse_profile(interp, argc-1, argv+1, &temp, &obs->n, &pdata) == TCL_OK ) 
+  if (tclcommand_parse_profile(interp, argc-1, argv+1, &temp, &obs->n, &pdata) != TCL_OK ) 
      return TCL_ERROR;
   if (pdata->id_list==0) {
     Tcl_AppendResult(interp, "Error in radial_profile: particle ids/types not specified\n" , (char *)NULL);
@@ -502,7 +502,7 @@ int tclcommand_observable_lb_radial_velocity_profile(Tcl_Interp* interp, int arg
   int temp;
   radial_profile_data* rpdata;
   obs->fun = &observable_lb_radial_velocity_profile;
-  if (! tclcommand_parse_radial_profile(interp, argc-1, argv+1, &temp, &obs->n, &rpdata) == TCL_OK ) 
+  if (tclcommand_parse_radial_profile(interp, argc-1, argv+1, &temp, &obs->n, &rpdata) != TCL_OK ) 
      return TCL_ERROR;
   obs->args=(void*)rpdata;
   obs->n=3*rpdata->rbins*rpdata->phibins*rpdata->zbins;
@@ -516,7 +516,7 @@ int tclcommand_observable_particle_currents(Tcl_Interp* interp, int argc, char**
   int temp;
   IntList* ids;
   obs->fun = &observable_particle_currents;
-  if (! parse_id_list(interp, argc-1, argv+1, &temp, &ids) == TCL_OK ) 
+  if (parse_id_list(interp, argc-1, argv+1, &temp, &ids) != TCL_OK ) 
     return TCL_ERROR;
   obs->args=(void*)ids;
   obs->n=3*ids->n;
@@ -534,7 +534,7 @@ int tclcommand_observable_currents(Tcl_Interp* interp, int argc, char** argv, in
   int temp;
   IntList* ids;
   obs->fun = &observable_currents;
-  if (! parse_id_list(interp, argc-1, argv+1, &temp, &ids) == TCL_OK ) 
+  if (parse_id_list(interp, argc-1, argv+1, &temp, &ids) != TCL_OK ) 
     return TCL_ERROR;
   obs->args=(void*)ids;
   obs->n=3;
@@ -551,7 +551,7 @@ int tclcommand_observable_dipole_moment(Tcl_Interp* interp, int argc, char** arg
   int temp;
   IntList* ids;
   obs->fun = &observable_dipole_moment;
-  if (! parse_id_list(interp, argc-1, argv+1, &temp, &ids) == TCL_OK ) 
+  if (parse_id_list(interp, argc-1, argv+1, &temp, &ids) != TCL_OK ) 
     return TCL_ERROR;
   obs->args=(void*)ids;
   obs->n=3;
@@ -704,7 +704,7 @@ int tclcommand_observable_interacts_with(Tcl_Interp* interp, int argc, char** ar
   ids1=(IntList*)malloc(sizeof(IntList));
   ids2=(IntList*)malloc(sizeof(IntList));
   iw_params* iw_params_p=(iw_params*) malloc(sizeof(iw_params));
-  if (! parse_id_list(interp, argc-1, argv+1, &temp, &ids1) == TCL_OK ) {
+  if (parse_id_list(interp, argc-1, argv+1, &temp, &ids1) != TCL_OK ) {
     free(ids1);
     free(ids2);
     free(iw_params_p);
@@ -713,7 +713,7 @@ int tclcommand_observable_interacts_with(Tcl_Interp* interp, int argc, char** ar
   iw_params_p=(iw_params*)malloc(sizeof(iw_params));
   iw_params_p->ids1=ids1;
   *change=1+temp;
-  if (! parse_id_list(interp, argc-3, argv+3, &temp, &ids2) == TCL_OK ) {
+  if (parse_id_list(interp, argc-3, argv+3, &temp, &ids2) != TCL_OK ) {
     free(ids1);
     free(ids2);
     free(iw_params_p);
@@ -780,7 +780,7 @@ int tclcommand_observable_interacts_with(Tcl_Interp* interp, int argc, char** ar
 #define REGISTER_OBSERVABLE(name,parser,id) \
   if (ARG_IS_S(2,#name)) { \
     observables[id]=(observable*)malloc(sizeof(observable));            \
-    if (parser(interp, argc-2, argv+2, &temp, observables[n_observables]) ==TCL_OK) { \
+    if (parser(interp, argc-2, argv+2, &temp, observables[n_observables]) == TCL_OK) { \
       n_observables++; \
       argc-=1+temp; \
       argv+=1+temp; \
@@ -993,7 +993,7 @@ int tclcommand_parse_profile(Tcl_Interp* interp, int argc, char** argv, int* cha
   pdata->zbins=1;
   while (argc>0) {
     if (ARG0_IS_S("ids") || ARG0_IS_S("types") || ARG0_IS_S("all")) {
-      if (!parse_id_list(interp, argc, argv, &temp, &pdata->id_list )==TCL_OK) {
+      if (parse_id_list(interp, argc, argv, &temp, &pdata->id_list ) != TCL_OK) {
         Tcl_AppendResult(interp, "Error reading profile: Error parsing particle id information\n" , (char *)NULL);
         return TCL_ERROR;
       } else {
@@ -1128,7 +1128,7 @@ int tclcommand_parse_radial_profile(Tcl_Interp* interp, int argc, char** argv, i
   }
   while (argc>0) {
     if (ARG0_IS_S("ids") || ARG0_IS_S("types") || ARG0_IS_S("all")) {
-      if (!parse_id_list(interp, argc, argv, &temp, &pdata->id_list )==TCL_OK) {
+      if (parse_id_list(interp, argc, argv, &temp, &pdata->id_list ) != TCL_OK) {
         Tcl_AppendResult(interp, "Error reading profile: Error parsing particle id information\n" , (char *)NULL);
         return TCL_ERROR;
       } else {

From 97181f6f0f4ac1e9413483402032c4381da0ce8a Mon Sep 17 00:00:00 2001
From: Axel Arnold <arnolda@icp.uni-stuttgart.de>
Date: Wed, 23 Apr 2014 09:34:16 +0200
Subject: [PATCH 22/34] refactored force calculation and added option to reuse
 forces

---
 src/communication.cpp     |  15 +++--
 src/communication.hpp     |   3 +-
 src/forces.cpp            |  47 +++++++++++++
 src/integrate.cpp         | 134 +++++++-------------------------------
 src/tcl/integrate_tcl.cpp |  16 +++--
 src/tuning.cpp            |   4 +-
 6 files changed, 93 insertions(+), 126 deletions(-)

diff --git a/src/communication.cpp b/src/communication.cpp
index b63b0285d1a..48aab428a4e 100644
--- a/src/communication.cpp
+++ b/src/communication.cpp
@@ -1172,18 +1172,19 @@ void mpi_remove_particle_slave(int pnode, int part)
 }
 
 /********************* REQ_INTEGRATE ********/
-int mpi_integrate(int n_steps)
+int mpi_integrate(int n_steps, int reuse_forces)
 {
   if (!correlations_autoupdate) {
-    mpi_call(mpi_integrate_slave, -1, n_steps);
-    integrate_vv(n_steps, 0);
+    mpi_call(mpi_integrate_slave, n_steps, reuse_forces);
+    integrate_vv(n_steps, reuse_forces);
     COMM_TRACE(fprintf(stderr, "%d: integration task %d done.\n", \
                        this_node, n_steps));
     return check_runtime_errors();
   } else {
     for (int i=0; i<n_steps; i++) {
-      mpi_call(mpi_integrate_slave, -1, 1);
-      integrate_vv(1, 0);
+      mpi_call(mpi_integrate_slave, 1, reuse_forces);
+      integrate_vv(1, reuse_forces);
+      reuse_forces = 0; // makes even less sense after the first time step
       COMM_TRACE(fprintf(stderr, "%d: integration task %d done.\n",     \
                          this_node, i));
       if (check_runtime_errors())
@@ -1195,9 +1196,9 @@ int mpi_integrate(int n_steps)
   return 0;
 }
 
-void mpi_integrate_slave(int pnode, int task)
+void mpi_integrate_slave(int n_steps, int reuse_forces)
 {
-  integrate_vv(task, 0);
+  integrate_vv(n_steps, reuse_forces);
   COMM_TRACE(fprintf(stderr, "%d: integration task %d done.\n", this_node, task));
 
   check_runtime_errors();
diff --git a/src/communication.hpp b/src/communication.hpp
index f4e2c34fcc6..c2bb54a8bbb 100644
--- a/src/communication.hpp
+++ b/src/communication.hpp
@@ -310,9 +310,10 @@ void mpi_recv_part(int node, int part, Particle *part_data);
 
 /** Issue REQ_INTEGRATE: start integrator.
     @param n_steps how many steps to do.
+    @param reuse_forces whether to trust the old forces for the first half step
     @return nonzero on error
 */
-int mpi_integrate(int n_steps);
+int mpi_integrate(int n_steps, int reuse_forces);
 
 /** Issue REQ_BCAST_IA: send new ia params.
     Also calls \ref on_short_range_ia_change.
diff --git a/src/forces.cpp b/src/forces.cpp
index fddba00f0f0..8a0a96f1452 100644
--- a/src/forces.cpp
+++ b/src/forces.cpp
@@ -73,6 +73,27 @@ void init_forces();
 
 void force_calc()
 {
+  // Communication step: distribute ghost positions
+  cells_update_ghosts();
+
+  // VIRTUAL_SITES pos (and vel for DPD) update for security reason !!!
+#ifdef VIRTUAL_SITES
+  update_mol_vel_pos();
+  ghost_communicator(&cell_structure.update_ghost_pos_comm);
+#endif
+
+#if defined(VIRTUAL_SITES_RELATIVE) && defined(LB) 
+  // This is on a workaround stage: 
+  // When using virtual sites relative and LB at the same time, it is necessary 
+  // to reassemble the cell lists after all position updates, also of virtual
+  // particles. 
+  if ((lattice_switch & LATTICE_LB) && cell_structure.type == CELL_STRUCTURE_DOMDEC && (!dd.use_vList) ) 
+    cells_update_ghosts();
+#endif
+  
+#ifdef COLLISION_DETECTION
+  prepare_collision_queue();
+#endif
 
   espressoSystemInterface.update();
 
@@ -156,6 +177,32 @@ void force_calc()
   copy_forces_from_GPU();
 #endif
 
+  // VIRTUAL_SITES distribute forces
+#ifdef VIRTUAL_SITES
+  ghost_communicator(&cell_structure.collect_ghost_force_comm);
+  init_forces_ghosts();
+  distribute_mol_force();
+#endif
+
+  // Communication Step: ghost forces
+  ghost_communicator(&cell_structure.collect_ghost_force_comm);
+
+  // apply trap forces to trapped molecules
+#ifdef MOLFORCES         
+  calc_and_apply_mol_constraints();
+#endif
+
+  // should be pretty late, since it needs to zero out the total force
+#ifdef COMFIXED
+  calc_comfixed();
+#endif
+
+  // mark that forces are now up-to-date
+  recalc_forces = 0;
+
+#ifdef COLLISION_DETECTION
+  handle_collisions();
+#endif
 }
 
 /************************************************************/
diff --git a/src/integrate.cpp b/src/integrate.cpp
index 8bf63b7aa6c..e37a451c81d 100644
--- a/src/integrate.cpp
+++ b/src/integrate.cpp
@@ -252,58 +252,19 @@ void integrate_vv(int n_steps, int reuse_forces)
       if (warnings) fprintf (stderr, "Warning: Recalculating forces, so the LB coupling forces are not included in the particle force the first time step. This only matters if it happens frequently during sampling.\n");
 #endif
 
-//VIRTUAL_SITES pos (and vel for DPD) update for security reason !!!
-#ifdef VIRTUAL_SITES
-    update_mol_vel_pos();
-    ghost_communicator(&cell_structure.update_ghost_pos_comm);
-    if (check_runtime_errors()) return;
-#endif
-#ifdef COLLISION_DETECTION
-    prepare_collision_queue();
-#endif
-
-   force_calc();
-   
-   //VIRTUAL_SITES distribute forces
-#ifdef VIRTUAL_SITES
-   ghost_communicator(&cell_structure.collect_ghost_force_comm);
-   init_forces_ghosts();
-   distribute_mol_force();
-   if (check_runtime_errors()) return;
-#endif
-
-   ghost_communicator(&cell_structure.collect_ghost_force_comm);
+    force_calc();
 
+    rescale_forces();
 #ifdef ROTATION
     convert_initial_torques();
 #endif
-
-    thermo_cool_down();
-
-    /* Communication Step: ghost forces */
-
-
-    /*apply trap forces to trapped molecules*/
-#ifdef MOLFORCES         
-    calc_and_apply_mol_constraints();
-#endif
-
-    /* should be pretty late, since it needs to zero out the total force */
-#ifdef COMFIXED
-    calc_comfixed();
-#endif
-
-    rescale_forces();
-    recalc_forces = 0;
     
-#ifdef COLLISION_DETECTION
-    handle_collisions();
-#endif
+    thermo_cool_down();
   }
 
 #ifdef GHMC
-    if(thermo_switch & THERMO_GHMC)
-      ghmc_init();
+  if(thermo_switch & THERMO_GHMC)
+    ghmc_init();
 #endif
 
   if (check_runtime_errors())
@@ -355,29 +316,11 @@ void integrate_vv(int n_steps, int reuse_forces)
 #ifdef NPT
     if (check_runtime_errors())
       break;
-#endif
-
-    cells_update_ghosts();
-
-    //VIRTUAL_SITES update pos and vel (for DPD)
-#ifdef VIRTUAL_SITES
-    update_mol_vel_pos();
-    ghost_communicator(&cell_structure.update_ghost_pos_comm);
-
-    if (check_runtime_errors()) break;
-#if  defined(VIRTUAL_SITES_RELATIVE) && defined(LB) 
-    // This is on a workaround stage: 
-    // When using virtual sites relative and LB at the same time, it is necessary 
-    // to reassemble the cell lists after all position updates, also of virtual
-    // particles. 
-    if (cell_structure.type == CELL_STRUCTURE_DOMDEC && (!dd.use_vList) ) 
-      cells_update_ghosts();
-#endif
-
 #endif
 
     /* Integration Step: Step 3 of Velocity Verlet scheme:
        Calculate f(t+dt) as function of positions p(t+dt) ( and velocities v(t+0.5*dt) ) */
+
 #ifdef LB
     transfer_momentum = 1;
 #endif
@@ -385,35 +328,10 @@ void integrate_vv(int n_steps, int reuse_forces)
     transfer_momentum_gpu = 1;
 #endif
 
-#ifdef COLLISION_DETECTION
-    prepare_collision_queue();
-#endif
-
     force_calc();
 
-    //VIRTUAL_SITES distribute forces
-#ifdef VIRTUAL_SITES
-    ghost_communicator(&cell_structure.collect_ghost_force_comm);
-    init_forces_ghosts();
-    distribute_mol_force();
-    if (check_runtime_errors()) break;
-#endif
-
 #ifdef CATALYTIC_REACTIONS
-  integrate_reaction();
-#endif
-
-    /* Communication step: ghost forces */
-    ghost_communicator(&cell_structure.collect_ghost_force_comm);
-
-    /*apply trap forces to trapped molecules*/
-#ifdef MOLFORCES         
-    calc_and_apply_mol_constraints();
-#endif
-
-    /* should be pretty late, since it needs to zero out the total force */
-#ifdef COMFIXED
-    calc_comfixed();
+    integrate_reaction();
 #endif
 
     if (check_runtime_errors())
@@ -422,8 +340,22 @@ void integrate_vv(int n_steps, int reuse_forces)
     /* Integration Step: Step 4 of Velocity Verlet scheme:
        v(t+dt) = v(t+0.5*dt) + 0.5*dt * f(t+dt) */
     rescale_forces_propagate_vel();
-    recalc_forces = 0;
-    
+#ifdef ROTATION
+    convert_torques_propagate_omega();
+#endif
+    // SHAKE velocity updates
+#ifdef BOND_CONSTRAINT
+    ghost_communicator(&cell_structure.update_ghost_pos_comm);
+    correct_vel_shake();
+#endif
+    //VIRTUAL_SITES update vel
+#ifdef VIRTUAL_SITES
+    ghost_communicator(&cell_structure.update_ghost_pos_comm);
+    update_mol_vel();
+    if (check_runtime_errors()) break;
+#endif
+
+    // progagate one-step functionalities
 #ifdef LB
     if (lattice_switch & LATTICE_LB)
       lattice_boltzmann_update();
@@ -448,32 +380,12 @@ void integrate_vv(int n_steps, int reuse_forces)
     }
 #endif //LB_GPU
 
-#ifdef BOND_CONSTRAINT
-    ghost_communicator(&cell_structure.update_ghost_pos_comm);
-    correct_vel_shake();
-#endif
-
-#ifdef ROTATION
-    convert_torques_propagate_omega();
-#endif
-
-    //VIRTUAL_SITES update vel
-#ifdef VIRTUAL_SITES
-    ghost_communicator(&cell_structure.update_ghost_pos_comm);
-    update_mol_vel();
-    if (check_runtime_errors()) break;
-#endif
-
 #ifdef ELECTROSTATICS
     if(coulomb.method == COULOMB_MAGGS) {
       maggs_propagate_B_field(0.5*time_step); 
     }
 #endif
 
-#ifdef COLLISION_DETECTION
-    handle_collisions();
-#endif
-
 #ifdef NPT
     if((this_node==0) && (integ_switch == INTEG_METHOD_NPT_ISO))
       nptiso.p_inst_av += nptiso.p_inst;
diff --git a/src/tcl/integrate_tcl.cpp b/src/tcl/integrate_tcl.cpp
index e3782f7b712..7a65c3385bc 100644
--- a/src/tcl/integrate_tcl.cpp
+++ b/src/tcl/integrate_tcl.cpp
@@ -43,7 +43,7 @@ int tclcommand_invalidate_system(ClientData data, Tcl_Interp *interp, int argc,
 int tclcommand_integrate_print_usage(Tcl_Interp *interp) 
 {
   Tcl_AppendResult(interp, "Usage of tcl-command integrate:\n", (char *)NULL);
-  Tcl_AppendResult(interp, "'integrate <INT n steps>' for integrating n steps \n", (char *)NULL);
+  Tcl_AppendResult(interp, "'integrate [reuse_forces] <INT n steps>' for integrating n steps and reusing unconditionally the given forces for the first step \n", (char *)NULL);
   Tcl_AppendResult(interp, "'integrate set' for printing integrator status \n", (char *)NULL);
   Tcl_AppendResult(interp, "'integrate set nvt' for enabling NVT integration or \n" , (char *)NULL);
 #ifdef NPT
@@ -194,7 +194,7 @@ int tclcommand_integrate_set_npt_isotropic(Tcl_Interp *interp, int argc, char **
 
 int tclcommand_integrate(ClientData data, Tcl_Interp *interp, int argc, char **argv) 
 {
-  int  n_steps;
+  int  n_steps, reuse_forces = 0;
   
   INTEG_TRACE(fprintf(stderr,"%d: integrate:\n",this_node));
 
@@ -213,15 +213,21 @@ int tclcommand_integrate(ClientData data, Tcl_Interp *interp, int argc, char **a
       Tcl_AppendResult(interp, "unknown integrator method:\n", (char *)NULL);
       return tclcommand_integrate_print_usage(interp);
     }
-  } else if ( !ARG_IS_I(1,n_steps) ) return tclcommand_integrate_print_usage(interp);
-
+  } else {
+     // actual integration
+     if (ARG1_IS_S("reuse_forces") && (argc > 2)) {
+       reuse_forces = 1;
+       argc--; argv++;
+     }
+     if ( !ARG_IS_I(1,n_steps) ) return tclcommand_integrate_print_usage(interp);
+  }
   /* go on with integrate <n_steps> */
   if(n_steps < 0) {
     Tcl_AppendResult(interp, "illegal number of steps (must be >0) \n", (char *) NULL);
     return tclcommand_integrate_print_usage(interp);;
   }
   /* perform integration */
-  if (mpi_integrate(n_steps))
+  if (mpi_integrate(n_steps, reuse_forces))
     return gather_runtime_errors(interp, TCL_OK);
   return TCL_OK;
 }
diff --git a/src/tuning.cpp b/src/tuning.cpp
index d2ef29f84fc..fea24e5aa22 100644
--- a/src/tuning.cpp
+++ b/src/tuning.cpp
@@ -49,14 +49,14 @@ double time_force_calc(int default_samples)
   int rds = timing_samples > 0 ? timing_samples : default_samples;
   int i;
 
-  if (mpi_integrate(0))
+  if (mpi_integrate(0, 0))
     return -1;
 
   /* perform force calculation test */
   markTime();
   for (i = 0; i < rds; i++) {
     mpi_bcast_event(INVALIDATE_SYSTEM);
-    if (mpi_integrate(0))
+    if (mpi_integrate(0, 0))
       return -1;
   }
   markTime();

From d8346affebd1714a21bea642ae8a345b84f7ae76 Mon Sep 17 00:00:00 2001
From: Axel Arnold <arnolda@icp.uni-stuttgart.de>
Date: Wed, 23 Apr 2014 15:04:28 +0200
Subject: [PATCH 23/34] Fixed lb_init errorhandling

---
 src/initialize.cpp |  8 +++--
 src/lb.cpp         | 85 +++++++++++++++++++++++-----------------------
 src/lbgpu.cpp      | 28 +++++++--------
 3 files changed, 61 insertions(+), 60 deletions(-)

diff --git a/src/initialize.cpp b/src/initialize.cpp
index b154302d546..2997e0c1092 100644
--- a/src/initialize.cpp
+++ b/src/initialize.cpp
@@ -165,10 +165,14 @@ void on_integration_start()
   reactions_sanity_checks();
 #endif
 #ifdef LB
-  lb_sanity_checks();
+  if(lattice_switch & LATTICE_LB) {
+    lb_sanity_checks();
+  }
 #endif
 #ifdef LB_GPU
-  lb_GPU_sanity_checks();
+  if(lattice_switch & LATTICE_LB_GPU) {
+    lb_GPU_sanity_checks();
+  }
 #endif
 
   /********************************************/
diff --git a/src/lb.cpp b/src/lb.cpp
index 8548cad5a66..da6e05427a5 100644
--- a/src/lb.cpp
+++ b/src/lb.cpp
@@ -1625,48 +1625,46 @@ int lb_sanity_checks() {
   char *errtext;
   int ret = 0;
 
-  if(lattice_switch & LATTICE_LB) {
-    if (lbpar.agrid <= 0.0) {
-      errtext = runtime_error(128);
-      ERROR_SPRINTF(errtext,"{098 Lattice Boltzmann agrid not set} ");
-      ret = 1;
-    }
-    if (lbpar.tau <= 0.0) {
-      errtext = runtime_error(128);
-      ERROR_SPRINTF(errtext,"{099 Lattice Boltzmann time step not set} ");
-      ret = 1;
-    }
-    if (lbpar.rho[0] <= 0.0) {
-      errtext = runtime_error(128);
-      ERROR_SPRINTF(errtext,"{100 Lattice Boltzmann fluid density not set} ");
-      ret = 1;
-    }
-    if (lbpar.viscosity[0] <= 0.0) {
-      errtext = runtime_error(128);
-      ERROR_SPRINTF(errtext,"{101 Lattice Boltzmann fluid viscosity not set} ");
-      ret = 1;
-    }
-    if (dd.use_vList && skin>=lbpar.agrid/2.0) {
-      errtext = runtime_error(128);
-      ERROR_SPRINTF(errtext, "{104 LB requires either no Verlet lists or that the skin of the verlet list to be less than half of lattice-Boltzmann grid spacing.} ");
-      ret = 1;
-    }
-    if (cell_structure.type != CELL_STRUCTURE_DOMDEC) {
-      errtext = runtime_error(128);
-      ERROR_SPRINTF(errtext, "{103 LB requires domain-decomposition cellsystem} ");
-      ret = -1;
-    } 
-    else if (dd.use_vList && skin>=lbpar.agrid/2.0) {
-      errtext = runtime_error(128);
-      ERROR_SPRINTF(errtext, "{104 LB requires either no Verlet lists or that the skin of the verlet list to be less than half of lattice-Boltzmann grid spacing.} ");
-      ret = -1;
-    }
-
-    if (thermo_switch & ~THERMO_LB) {
-      errtext = runtime_error(128);
-      ERROR_SPRINTF(errtext, "{122 LB must not be used with other thermostats} ");
-      ret = 1;
-    }
+  if (lbpar.agrid <= 0.0) {
+    errtext = runtime_error(128);
+    ERROR_SPRINTF(errtext,"{098 Lattice Boltzmann agrid not set} ");
+    ret = 1;
+  }
+  if (lbpar.tau <= 0.0) {
+    errtext = runtime_error(128);
+    ERROR_SPRINTF(errtext,"{099 Lattice Boltzmann time step not set} ");
+    ret = 1;
+  }
+  if (lbpar.rho[0] <= 0.0) {
+    errtext = runtime_error(128);
+    ERROR_SPRINTF(errtext,"{100 Lattice Boltzmann fluid density not set} ");
+    ret = 1;
+  }
+  if (lbpar.viscosity[0] <= 0.0) {
+    errtext = runtime_error(128);
+    ERROR_SPRINTF(errtext,"{101 Lattice Boltzmann fluid viscosity not set} ");
+    ret = 1;
+  }
+  if (dd.use_vList && skin>=lbpar.agrid/2.0) {
+    errtext = runtime_error(128);
+    ERROR_SPRINTF(errtext, "{104 LB requires either no Verlet lists or that the skin of the verlet list to be less than half of lattice-Boltzmann grid spacing.} ");
+    ret = 1;
+  }
+  if (cell_structure.type != CELL_STRUCTURE_DOMDEC) {
+    errtext = runtime_error(128);
+    ERROR_SPRINTF(errtext, "{103 LB requires domain-decomposition cellsystem} ");
+    ret = -1;
+  } 
+  else if (dd.use_vList && skin>=lbpar.agrid/2.0) {
+    errtext = runtime_error(128);
+    ERROR_SPRINTF(errtext, "{104 LB requires either no Verlet lists or that the skin of the verlet list to be less than half of lattice-Boltzmann grid spacing.} ");
+    ret = -1;
+  }
+  
+  if (thermo_switch & ~THERMO_LB) {
+    errtext = runtime_error(128);
+    ERROR_SPRINTF(errtext, "{122 LB must not be used with other thermostats} ");
+    ret = 1;
   }
   return ret;
 }
@@ -1875,7 +1873,8 @@ double pi[6] = { 0., 0., 0., 0., 0., 0. };
 void lb_init() {
 
   LB_TRACE(printf("Begin initialzing fluid on CPU\n"));
-  if (lb_sanity_checks()) return;
+  lb_sanity_checks();
+  if (check_runtime_errors()) return;
 
   /* initialize the local lattice domain */
   init_lattice(&lblattice,lbpar.agrid,lbpar.tau);  
diff --git a/src/lbgpu.cpp b/src/lbgpu.cpp
index eb7df236adb..1112d7546fa 100644
--- a/src/lbgpu.cpp
+++ b/src/lbgpu.cpp
@@ -383,24 +383,22 @@ int lb_lbnode_set_extforce_GPU(int ind[3], double f[3])
 void lb_GPU_sanity_checks()
 {
   if(this_node == 0){
-    if(lattice_switch & LATTICE_LB_GPU) {
-      if (lbpar_gpu.agrid < 0.0) {
+    if (lbpar_gpu.agrid < 0.0) {
+      char *errtext = runtime_error(128);
+      ERROR_SPRINTF(errtext,"{098 Lattice Boltzmann agrid not set} ");
+    }
+    if (lbpar_gpu.tau < 0.0) {
+      char *errtext = runtime_error(128);
+      ERROR_SPRINTF(errtext,"{099 Lattice Boltzmann time step not set} ");
+    }
+    for(int i=0;i<LB_COMPONENTS;i++){
+      if (lbpar_gpu.rho[0] < 0.0) {
         char *errtext = runtime_error(128);
-        ERROR_SPRINTF(errtext,"{098 Lattice Boltzmann agrid not set} ");
+        ERROR_SPRINTF(errtext,"{100 Lattice Boltzmann fluid density not set} ");
       }
-      if (lbpar_gpu.tau < 0.0) {
+      if (lbpar_gpu.viscosity[0] < 0.0) {
         char *errtext = runtime_error(128);
-        ERROR_SPRINTF(errtext,"{099 Lattice Boltzmann time step not set} ");
-      }
-      for(int i=0;i<LB_COMPONENTS;i++){
-        if (lbpar_gpu.rho[0] < 0.0) {
-          char *errtext = runtime_error(128);
-          ERROR_SPRINTF(errtext,"{100 Lattice Boltzmann fluid density not set} ");
-        }
-        if (lbpar_gpu.viscosity[0] < 0.0) {
-          char *errtext = runtime_error(128);
-          ERROR_SPRINTF(errtext,"{101 Lattice Boltzmann fluid viscosity not set} ");
-        }
+        ERROR_SPRINTF(errtext,"{101 Lattice Boltzmann fluid viscosity not set} ");
       }
     }
   }

From 3c72b607f60dbe223e6b26369a598c9518d821f9 Mon Sep 17 00:00:00 2001
From: Axel Arnold <arnolda@icp.uni-stuttgart.de>
Date: Wed, 23 Apr 2014 16:07:23 +0200
Subject: [PATCH 24/34] Fixed lb_init sanity checking

---
 src/lb.cpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/lb.cpp b/src/lb.cpp
index da6e05427a5..ac85e6ae6df 100644
--- a/src/lb.cpp
+++ b/src/lb.cpp
@@ -1873,7 +1873,11 @@ double pi[6] = { 0., 0., 0., 0., 0., 0. };
 void lb_init() {
 
   LB_TRACE(printf("Begin initialzing fluid on CPU\n"));
-  lb_sanity_checks();
+
+  if (lbpar.agrid <= 0.0) {
+    char *errtext = runtime_error(128);
+    ERROR_SPRINTF(errtext,"{098 Lattice Boltzmann agrid not set when initializing fluid} ");
+  }
   if (check_runtime_errors()) return;
 
   /* initialize the local lattice domain */

From d19bf34b2abcb3968b3b4845562af249a2b9570e Mon Sep 17 00:00:00 2001
From: Axel Arnold <arnolda@icp.uni-stuttgart.de>
Date: Wed, 23 Apr 2014 16:36:36 +0200
Subject: [PATCH 25/34] lb no longer touches resort_particles

---
 doc/ug/run.tex | 23 ++++++++++++++++++++++-
 src/lb.cpp     | 19 +++++--------------
 2 files changed, 27 insertions(+), 15 deletions(-)

diff --git a/doc/ug/run.tex b/doc/ug/run.tex
index 3f9420a6d69..a5cd4754f78 100644
--- a/doc/ug/run.tex
+++ b/doc/ug/run.tex
@@ -24,7 +24,7 @@ \section{\texttt{integrate}: Running the simulation}
 \newescommand{integrate}
 
 \begin{essyntax}
-  \variant{1} integrate \var{steps}
+  \variant{1} integrate \opt{reuse_forces} \var{steps}
   \variant{2} integrate set \opt{nvt}
   \variant{3} integrate set npt_isotropic \var{p_{ext}} \var{piston} \opt{\var{x\: y\: z}} \opt{-cubic_box}
 \end{essyntax}
@@ -34,6 +34,27 @@ \section{\texttt{integrate}: Running the simulation}
 \texttt{steps} as parameter integrates the system for \texttt{steps}
 time steps.
 
+Note that this implementation of the Velocity Verlet algorithm reuses forces,
+that is, they are computed once in the middle of the time step, but used twice,
+at the beginning and end. However, in the first time step after setting up,
+there are no forces present yet. Therefore, \es{} has to compute them before the
+first time step. That has two consequences: first, random forces are redrawn,
+resulting in a narrower distribution of the random forces, which we compensate
+by stretching. Second, coupling forces of e.\,g. the Lattice Boltzmann fluid cannot
+be computed and are therefore lacking in the first half time step. In order to
+minimize these effects, \es{} has a quite conservative heuristics to decide whether
+a change makes it necessary to recompute forces before the first time step. Therefore,
+calling hundred times \texttt{integrate 1} does the same as \textt{integrate 100},
+apart from some small calling overhead.
+
+However, for checkpointing, there is no way for \es{} to tell that the forces that you
+read back in actually match the parameters that are set. Therefore, \es{} would recompute
+the forces before the first time step, which makes it essentially impossible to checkpoint
+LB simulations, where it is vital to keep the coupling forces. Therefore, \texttt{integrate}
+has an additional parameter \opt{reuse_forces}, which tells integrate to not recalculate
+the forces for the first time step, but use that the values still stored with the particles.
+Use this only if you are absolutely sure that the forces stored match your current setup!
+
 Two methods for the integration can be set: For an NVT ensemble
 (thermostat) and for an NPT isotropic ensemble (barostat). The current
 method can be detected with the command \texttt{integrate set} without
diff --git a/src/lb.cpp b/src/lb.cpp
index ac85e6ae6df..0f2cfef21bb 100644
--- a/src/lb.cpp
+++ b/src/lb.cpp
@@ -880,20 +880,6 @@ int lb_lbfluid_save_checkpoint(char* filename, int binary) {
   return ES_OK;
 }
 int lb_lbfluid_load_checkpoint(char* filename, int binary) {
-  if (lattice_switch & LATTICE_LB) {
-    fprintf (stderr, "Loading an LB checkpoint requires first that all particles and forces are loaded into the system and then an integrate 0, this is required for the reformation of the the neighbor lists. After this integration the particle data must then be reloaded just prior to loading the LB checkpoint file. This is a rather inelegant hack to make the checkpointing work correctly.\n");
-    recalc_forces = 0;      //Indicates the forces need not be recalculated
-    resort_particles = 0;   //Prevents a call of on_resort_particles which gets called when the particle data is reset and then set recalc_forces = 1
-  }
-  else if (lattice_switch & LATTICE_LB_GPU) {
-    fprintf (stderr, "Loading an LB GPU checkpoint requires first that all particles and forces are loaded into the system and then an integrate 0, this is required for the reformation of the neighbor lists. After this integration the particle data must then be reloaded just prior to loading the LB GPU checkpoint file. This is a rather inelegant hack to make the checkpointing work correctly.\n");
-    recalc_forces = 0;      //Indicates the forces need not be recalculated
-    resort_particles = 0;   //Prevents a call of on_resort_particles which gets called when the particle data is reset and then set recalc_forces = 1
-  }
-  else {
-    fprintf (stderr, "To load an LB checkpoint one needs to have already initialized the LB fluid with the same grid size.\n");
-    return ES_ERROR;
-  }
   if(lattice_switch & LATTICE_LB_GPU) {
 #ifdef LB_GPU
     FILE* cpfile;
@@ -969,6 +955,11 @@ int lb_lbfluid_load_checkpoint(char* filename, int binary) {
 //  mpi_bcast_lb_params(0);
 #endif
   }
+  else {
+    char *errtext = runtime_error(128);
+    ERROR_SPRINTF(errtext,"{To load an LB checkpoint one needs to have already initialized the LB fluid with the same grid size.}");
+    return ES_ERROR;
+  }
   return ES_OK;
 }
 

From f35daee5cdd8a2e9f8b918722de0c391949fb93d Mon Sep 17 00:00:00 2001
From: Axel Arnold <arnolda@icp.uni-stuttgart.de>
Date: Wed, 23 Apr 2014 22:42:14 +0200
Subject: [PATCH 26/34] Fixed explicit template instantation

---
 src/EspressoSystemInterface.cpp | 6 ++++++
 src/EspressoSystemInterface.hpp | 6 ------
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/EspressoSystemInterface.cpp b/src/EspressoSystemInterface.cpp
index 32b47e7a32f..d89c3ada9d9 100644
--- a/src/EspressoSystemInterface.cpp
+++ b/src/EspressoSystemInterface.cpp
@@ -6,6 +6,12 @@
 
 #include <iostream>
 
+/* Need explicite specialization, otherwise some compilers do not produce the objects. */
+
+template class EspressoSystemInterface::const_iterator<SystemInterface::Real>;
+template class EspressoSystemInterface::const_iterator<SystemInterface::Vector3>;
+template class EspressoSystemInterface::const_iterator<int>;
+
 /********************************************************************************************/
 
 template<class value_type>
diff --git a/src/EspressoSystemInterface.hpp b/src/EspressoSystemInterface.hpp
index 79ec8848461..59111496a52 100644
--- a/src/EspressoSystemInterface.hpp
+++ b/src/EspressoSystemInterface.hpp
@@ -153,12 +153,6 @@ class EspressoSystemInterface : public SystemInterface {
   bool m_splitParticleStructGpu;
 };
 
-/* Need explicite specialization, otherwise some compilers do not produce the objects. */
-
-template class EspressoSystemInterface::const_iterator<SystemInterface::Real>;
-template class EspressoSystemInterface::const_iterator<SystemInterface::Vector3>;
-template class EspressoSystemInterface::const_iterator<int>;
-
 extern EspressoSystemInterface espressoSystemInterface;
 
 #endif

From 025943bade4a44b1fc1cadf034397b9bf87e7339 Mon Sep 17 00:00:00 2001
From: Axel Arnold <arnolda@icp.uni-stuttgart.de>
Date: Wed, 23 Apr 2014 22:54:32 +0200
Subject: [PATCH 27/34] Fixed typo

---
 doc/ug/run.tex | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/ug/run.tex b/doc/ug/run.tex
index a5cd4754f78..9f3b5aa81e8 100644
--- a/doc/ug/run.tex
+++ b/doc/ug/run.tex
@@ -44,7 +44,7 @@ \section{\texttt{integrate}: Running the simulation}
 be computed and are therefore lacking in the first half time step. In order to
 minimize these effects, \es{} has a quite conservative heuristics to decide whether
 a change makes it necessary to recompute forces before the first time step. Therefore,
-calling hundred times \texttt{integrate 1} does the same as \textt{integrate 100},
+calling hundred times \texttt{integrate 1} does the same as \texttt{integrate 100},
 apart from some small calling overhead.
 
 However, for checkpointing, there is no way for \es{} to tell that the forces that you

From 9820e7c0da1b906a9be856a2d35b5ad5eedc1c8d Mon Sep 17 00:00:00 2001
From: Florian Weik <fweik@icp.uni-stuttgart.de>
Date: Thu, 24 Apr 2014 10:15:28 +0200
Subject: [PATCH 28/34] Removed stray debug messages from p3m_gpu.

---
 src/p3m_gpu_cuda.cu | 27 +++++++++++++++++++--------
 src/tcl/p3m_tcl.cpp |  2 --
 2 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/src/p3m_gpu_cuda.cu b/src/p3m_gpu_cuda.cu
index 0e6f8e1f2ce..1675c55d28f 100644
--- a/src/p3m_gpu_cuda.cu
+++ b/src/p3m_gpu_cuda.cu
@@ -19,7 +19,7 @@
 
 /** \file p3m_gpu_cuda.cu
  *
- * Cuda (.cu) file for the Lattice Boltzmann implementation on GPUs.
+ * Cuda (.cu) file for the P3M electrostatics method.
  * Header file \ref p3m_gpu.hpp .
  */ 
 
@@ -33,8 +33,7 @@
 #include "config.hpp"
 #include "p3m_gpu.hpp"
 #include "utils.hpp"
-
-//#include <iostream>
+#include "EspressoSystemInterface.hpp"
 
 #ifdef ELECTROSTATICS
 
@@ -508,12 +507,19 @@ extern "C" {
    */
 
   void p3m_gpu_init(int cao, int mesh, REAL_TYPE alpha, REAL_TYPE box) {
-    gpu_init_particle_comm();
+    puts("p3m_gpu_init()");
+    //    gpu_init_particle_comm();
     int reinit_if = 0, mesh_changed = 0;
  
+    espressoSystemInterface.requestParticleStructGpu();
+
     if ( this_node == 0 ) {
+      
+
       p3m_gpu_data.npart = gpu_get_global_particle_vars_pointer_host()->number_of_particles;
       
+      //      printf("p3m_gpu_data.npart = %d\n", p3m_gpu_data.npart);
+
       if((p3m_gpu_data_initialized == 0) || (p3m_gpu_data.alpha != alpha)) {
 	p3m_gpu_data.alpha = alpha;
 	reinit_if = 1;
@@ -550,6 +556,7 @@ extern "C" {
 
 	p3m_gpu_data.G_hat_host = (REAL_TYPE *)malloc(mesh3*sizeof(REAL_TYPE));
       
+	//	printf("mesh3 = %d, p3m_gpu_data.charge_mesh = %p\n", mesh3, p3m_gpu_data.charge_mesh);
 
 	cufftDestroy(p3m_gpu_data.fft_plan);
 	cufftPlan3d(&(p3m_gpu_data.fft_plan), mesh, mesh, mesh, CUFFT_PLAN_FLAG);
@@ -573,10 +580,14 @@ extern "C" {
       // cudaMemcpy( p3m_gpu_data.G_hat, p3m_gpu_data.G_hat_host, mesh3*sizeof(REAL_TYPE), cudaMemcpyHostToDevice);
 	dim3 grid(1,1,1);
 	dim3 block(1,1,1);
-        block.y = block.z = mesh;
-	block.x = 512 - mesh*mesh;
-	block.x -= block.x / 32;
+        block.y = mesh;
+	block.z = 1;
+	block.x = 512 / mesh + 1;
 	grid.x = mesh / block.x + 1;
+	grid.z = mesh;
+
+	//	printf("mesh %d, grid (%d %d %d), block (%d %d %d)\n", mesh, grid.x, grid.y, grid.z, block.x, block.y, block.z);
+
 	KERNELCALL(calculate_influence_function_device,grid,block,(cao, mesh, box, alpha, p3m_gpu_data.G_hat));
 	cudaThreadSynchronize();
       }
@@ -612,7 +623,7 @@ void p3m_gpu_add_farfield_force() {
   REAL_TYPE hi = mesh/box;
   REAL_TYPE prefactor = 1.0/(box*box*box*2.0);
 
-  cudaMemset( p3m_gpu_data.charge_mesh, 0, mesh3*sizeof(CUFFT_TYPE_COMPLEX));
+  cuda_safe_mem(cudaMemset( p3m_gpu_data.charge_mesh, 0, mesh3*sizeof(CUFFT_TYPE_COMPLEX)));
 
   KERNELCALL(assign_charges, gridAssignment, threadsAssignment, (lb_particle_gpu,p3m_gpu_data.charge_mesh,mesh,cao,pos_shift,hi));
 
diff --git a/src/tcl/p3m_tcl.cpp b/src/tcl/p3m_tcl.cpp
index b781542357c..5de9fd18f11 100644
--- a/src/tcl/p3m_tcl.cpp
+++ b/src/tcl/p3m_tcl.cpp
@@ -24,8 +24,6 @@
 #include "p3m_tcl.hpp"
 #include "p3m.hpp"
 
-
-
 int tclcommand_inter_coulomb_parse_p3m_tune(Tcl_Interp * interp, int argc, char ** argv, int adaptive)
 {
   int cao = -1, n_interpol = -1;

From bba3881ec17c71c056b12cfa9f0a2e21b49792e1 Mon Sep 17 00:00:00 2001
From: Axel Arnold <arnolda@icp.uni-stuttgart.de>
Date: Thu, 24 Apr 2014 16:10:55 +0200
Subject: [PATCH 29/34] Workaround for compiling with CUDA/openmpi on MacOSX.

---
 src/EspressoSystemInterface.hpp |  1 -
 src/tcl/Makefile.am             | 22 +++++++++++++++++++++-
 src/tcl/cuda_workaround.cu      |  3 +++
 3 files changed, 24 insertions(+), 2 deletions(-)
 create mode 100644 src/tcl/cuda_workaround.cu

diff --git a/src/EspressoSystemInterface.hpp b/src/EspressoSystemInterface.hpp
index 59111496a52..8b0ceb7198b 100644
--- a/src/EspressoSystemInterface.hpp
+++ b/src/EspressoSystemInterface.hpp
@@ -4,7 +4,6 @@
 #define ESIF_TRACE(A)
 
 #include "SystemInterface.hpp"
-#include "particle_data.hpp"
 #include "cuda_interface.hpp"
 
 class EspressoSystemInterface : public SystemInterface {
diff --git a/src/tcl/Makefile.am b/src/tcl/Makefile.am
index 8f7f9d6ab1e..50a05db489e 100644
--- a/src/tcl/Makefile.am
+++ b/src/tcl/Makefile.am
@@ -153,7 +153,6 @@ Espresso_install_CPPFLAGS = -D ESPRESSO_SCRIPTS_DEFAULT=\"$(scriptsdir)\" $(AM_C
 Espresso_install_SOURCES = scriptsdir.cpp main.cpp
 Espresso_install_LDADD = libEspressoTcl.la ../libEspresso.la 
 
-
 ESPRESSO = `echo Espresso | sed '$(transform)'`$(EXEEXT)
 ESPRESSO_INSTALL = `echo Espresso.install | sed '$(transform)'`$(EXEEXT)
 # rename Espresso after installation
@@ -164,3 +163,24 @@ install-exec-hook:
 
 uninstall-local:
 	-rm -f $(DESTDIR)$(bindir)/$(ESPRESSO)
+
+#################################################################
+# Build an empty CUDA object
+#################################################################
+# this is necessary since Xcode 4.3 otherwise builds binaries
+# with a broken dyld relocation table
+if CUDA
+
+SUFFIXES=.cu
+
+cuda_verbose = $(cuda_verbose_@AM_V@)
+cuda_verbose_ = $(cuda_verbose_@AM_DEFAULT_V@)
+cuda_verbose_0 = @echo "  NVCC     $@";
+.cu.o:
+	$(NVCC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
+		$(AM_NVCCFLAGS) $(NVCCFLAGS) -c -o $@ $<
+
+Espresso_SOURCES += cuda_workaround.cu
+Espresso_install_SOURCES += cuda_workaround.cu
+
+endif
diff --git a/src/tcl/cuda_workaround.cu b/src/tcl/cuda_workaround.cu
new file mode 100644
index 00000000000..59183814966
--- /dev/null
+++ b/src/tcl/cuda_workaround.cu
@@ -0,0 +1,3 @@
+/* Just an empty file that is compiled as workaround for CUDA with Xcode 4.3 and openmpi.
+   If CUDA files are only included via archives, the linking breaks and creates unstartable binaries.
+ */

From 843e16925479149593404d1a2b7b3ca6b5b32679 Mon Sep 17 00:00:00 2001
From: Axel Arnold <arnolda@icp.uni-stuttgart.de>
Date: Fri, 25 Apr 2014 11:38:50 +0200
Subject: [PATCH 30/34] Fixed integrator debug messages

---
 src/communication.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/communication.cpp b/src/communication.cpp
index 48aab428a4e..0d007eeb52d 100644
--- a/src/communication.cpp
+++ b/src/communication.cpp
@@ -1199,7 +1199,7 @@ int mpi_integrate(int n_steps, int reuse_forces)
 void mpi_integrate_slave(int n_steps, int reuse_forces)
 {
   integrate_vv(n_steps, reuse_forces);
-  COMM_TRACE(fprintf(stderr, "%d: integration task %d done.\n", this_node, task));
+  COMM_TRACE(fprintf(stderr, "%d: integration for %d n_steps with %d reuse_forces done.\n", this_node, n_steps, reuse_forces));
 
   check_runtime_errors();
 }

From 5da530cf08a609353bbbff5af24033309f94afcd Mon Sep 17 00:00:00 2001
From: Axel Arnold <arnolda@icp.uni-stuttgart.de>
Date: Mon, 28 Apr 2014 15:22:10 +0200
Subject: [PATCH 31/34] HARMONICFORCE now part of maxset

---
 maintainer/jenkins/configs/maxset.hpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/maintainer/jenkins/configs/maxset.hpp b/maintainer/jenkins/configs/maxset.hpp
index 1049791bb45..70aaa3e760a 100644
--- a/maintainer/jenkins/configs/maxset.hpp
+++ b/maintainer/jenkins/configs/maxset.hpp
@@ -36,6 +36,7 @@
 #define ELECTROKINETICS
 #define EK_BOUNDARIES
 #define EK_REACTION
+#define HARMONICFORCE
 #endif
 
 #define AREA_FORCE_GLOBAL   

From eb12ec31be9b6a08488e1a56a895654b45d91170 Mon Sep 17 00:00:00 2001
From: Axel Arnold <arnolda@icp.uni-stuttgart.de>
Date: Mon, 28 Apr 2014 16:01:20 +0200
Subject: [PATCH 32/34] Bumped compute model to 1.1, fixed compiling w/o cuda

---
 configure.ac                    | 4 ++--
 doc/ug/installation.tex         | 7 ++++---
 src/EspressoSystemInterface.hpp | 8 +++++++-
 src/initialize.cpp              | 2 +-
 4 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/configure.ac b/configure.ac
index ba211e1a2ed..ad716176d7a 100644
--- a/configure.ac
+++ b/configure.ac
@@ -300,10 +300,10 @@ AS_IF([test x$with_cuda != xno],[
     # NVCC compile check
     AC_MSG_CHECKING([whether CUDA compiles])
     
-    # if no other compute capability is defined by the user, we require at least 1.1
+    # if no other compute capability is defined by the user, we default to 2.0
     case "$NVCCFLAGS" in
        *-arch=*) ;;
-       *) NVCCFLAGS="$NVCCFLAGS --ptxas-options=-v -gencode arch=compute_11,code=compute_11 -gencode arch=compute_20,code=compute_20"
+       *) NVCCFLAGS="$NVCCFLAGS --ptxas-options=-v -gencode arch=compute_20,code=compute_20"
     esac
     
     # use nvcc
diff --git a/doc/ug/installation.tex b/doc/ug/installation.tex
index ff4b382042b..d9ed7532b8b 100644
--- a/doc/ug/installation.tex
+++ b/doc/ug/installation.tex
@@ -155,9 +155,10 @@ \subsection{Options}
   be used to define compiler flags for the NVIDIA CUDA-compiler
   \texttt{nvcc}. For example, \texttt{NVCCFLAGS = "{}-gencode
     arch=compute_20,code=sm_20"{}} will compile code only for Fermi
-  cards.  Default is to compile for compute models 1.1 and 2.0,
-  i.e. everything with a G90 chip or newer.  Note that we require at
-  least compute model 1.1.
+  cards.  Default is to compile for compute model 2.0,
+  i.e. everything with a Fermi chip or newer.  Note that we require at
+  least compute model 1.1, that is G90. However, to use G90 (e.\,g.~Tesla
+  C1060), you need to manually specificy compute model 1.1.
 \end{description}
 
 \section{\texttt{make}: Compiling,  testing and installing \es}
diff --git a/src/EspressoSystemInterface.hpp b/src/EspressoSystemInterface.hpp
index 8b0ceb7198b..409a02220be 100644
--- a/src/EspressoSystemInterface.hpp
+++ b/src/EspressoSystemInterface.hpp
@@ -101,16 +101,21 @@ class EspressoSystemInterface : public SystemInterface {
       enableParticleCommunication();
     return m_needsFGpu;
   };
+#endif
 
   unsigned int npart_gpu() {
+#ifdef CUDA
     return m_gpu_npart;
+#else
+    return 0;
+#endif
   };
 
-#endif
 
 protected:
   void gatherParticles();
   void split_particle_struct();
+#ifdef CUDA
   void enableParticleCommunication() {
     if(!gpu_get_global_particle_vars_pointer_host()->communication_enabled) {
       ESIF_TRACE(puts("gpu communication not enabled;"));
@@ -121,6 +126,7 @@ class EspressoSystemInterface : public SystemInterface {
     }
   };
   void reallocDeviceMemory(int n);
+#endif
 
   Vector3Container R;
   #ifdef ELECTROSTATICS
diff --git a/src/initialize.cpp b/src/initialize.cpp
index 2997e0c1092..3ac7b25fd8e 100644
--- a/src/initialize.cpp
+++ b/src/initialize.cpp
@@ -274,7 +274,7 @@ void on_particle_change()
 #ifdef LB_GPU
   lb_reinit_particles_gpu = 1;
 #endif
-#if defined(LB_GPU) || defined (ELECTROSTATICS)
+#ifdef CUDA
   reinit_particle_comm_gpu = 1;
 #endif
   invalidate_obs();

From 772543580fde6ca60ad2ae95449060efb4990930 Mon Sep 17 00:00:00 2001
From: Axel Arnold <arnolda@icp.uni-stuttgart.de>
Date: Mon, 28 Apr 2014 17:15:46 +0200
Subject: [PATCH 33/34] Made HarmonicForce.lo to be included only with CUDA

---
 src/Makefile.am | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/Makefile.am b/src/Makefile.am
index 7a7a2d5149c..afb41717d29 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -107,7 +107,7 @@ libEspresso_la_SOURCES = \
 	Vector.hpp \
 	SystemInterface.hpp \
 	EspressoSystemInterface.hpp EspressoSystemInterface.cpp \
-	HarmonicForce.hpp HarmonicForce.cu
+	HarmonicForce.hpp
 
 # nonbonded potentials and forces
 libEspresso_la_SOURCES += \
@@ -196,7 +196,8 @@ CUDA_SOURCES = \
 	electrokinetics.hpp \
 	lbgpu_cuda.cu \
 	p3m_gpu_cuda.cu \
-	EspressoSystemInterface_cuda.cu
+	EspressoSystemInterface_cuda.cu \
+        HarmonicForce.cu
 
 libEspresso_la_SOURCES += $(CUDA_SOURCES)
 EXTRA_DIST += $(CUDA_SOURCES)

From b458fc8c6267d391e4664f87d796c998212dbe32 Mon Sep 17 00:00:00 2001
From: Marcello Sega <sega@roma2.infn.it>
Date: Tue, 29 Apr 2014 17:26:39 +0200
Subject: [PATCH 34/34] fixes VMD crash with long lines in vtf files

---
 scripts/vtf.tcl | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/scripts/vtf.tcl b/scripts/vtf.tcl
index 2030da82e19..870fae0c94d 100644
--- a/scripts/vtf.tcl
+++ b/scripts/vtf.tcl
@@ -195,8 +195,20 @@ proc writevsf { file args } {
             lappend aids "$from:$to"
         }
         unset from
-        
-        puts $file "atom [join $aids ,] $desc"
+        # let's group atom ranges, so that there are no more than 8 per line
+        # This fixes a problem with the vmd plugin, and makes the vtf more 
+        # readable anyway.
+        set start 0
+        set maxlen 8
+        set ll  [llength [lrange $aids $start end ]]
+        while { $ll >= $maxlen } {
+              puts $file "atom [join [lrange $aids $start [expr $start + $maxlen -1]] ,] $desc"
+              incr start $maxlen
+              set ll  [llength [lrange $aids $start end ]]
+        }
+        if { $start < [llength $aids ] } { 
+          puts $file "atom [join [lrange $aids  $start  end] ,] $desc"
+        }
     }
     
     # Print bond data