Add first refactor structure

plainerman · plainerman · commit 577ccba76a25 · 2024-06-14T08:51:10.000+02:00
diff --git a/environment.yml b/environment.yml
@@ -1,20 +1,20 @@
 name: tps-flow-md
 channels:
-  - pytorch
   - defaults
   - conda-forge
 dependencies:
   - python=3.11.7
   - pip=23.3.2
-  - pytorch=2.1.2
   - openmm=8.1.1
   - mdtraj=1.9.8
-  - jax=0.4.23
-  - flax=0.8.2
   - tqdm=4.65.0
+  - jax=0.4.26
+  - jaxlib=0.4.26
+  - flax=0.8.3
+  - matplotlib=3.8.4
+  - scipy=1.13.1
+  - scikit-image=0.23.2
+  - ParmEd=4.2.2
   - pip:
     - dmff @ git+https://github.com/deepmodeling/DMFF@v1.0.0
-    - matplotlib==3.8.2
     - rdkit==2023.3.3
-    - ParmEd==4.2.2
-    - scikit-image==0.23.2
diff --git a/main.py b/main.py
@@ -0,0 +1,151 @@
+from argparse import ArgumentParser
+
+from utils.args import parse_args
+from systems import System
+import matplotlib.pyplot as plt
+
+parser = ArgumentParser()
+parser.add_argument('--out', type=str, default=None, help="Specify a path where the data will be stored.")
+parser.add_argument('--config', type=str, help='Path to the config yaml file')
+
+# system configuration
+parser.add_argument('--test_system', type=str,
+                    choices=['double_well', 'double_well_hard', 'double_well_dual_channel', 'mueller_brown'])
+parser.add_argument('--start', type=str, help="Path to pdb file with the start structure A")
+parser.add_argument('--target', type=str, help="Path to pdb file with the target structure B")
+
+parser.add_argument('--T', type=float, required=True,
+                    help="Transition time in the base unit of the system. For molecular simulations, this is in picoseconds.")
+parser.add_argument('--xi', type=float, required=True)
+
+# training
+parser.add_argument('--epochs', type=int, default=10_000, help="Number of epochs the system is training for.")
+parser.add_argument('--BS', type=int, default=512, help="Batch size used for training.")
+parser.add_argument('--lr', type=float, default=1e-4, help="Learning rate")
+
+parser.add_argument('--seed', type=int, default=1, help="The seed that will be used for initialization")
+
+# inference
+parser.add_argument('--num_paths', type=int, default=1000, help="The number of paths that will be generated.")
+parser.add_argument('--dt', type=float, required=True)
+# TODO: add sampling method. it would be easy to just do a few MD steps from A and then use those. Might also be out of distribution, not sure
+# TODO: I think this could also be a reason why the paths are all the same
+# TODO: maybe we can also use MD_STEP(A) and MD_STEP(B) as a dynamic input to the neural network instead of using fixed A and B.s
+
+
+# TODO: remove this
+# parser.add_argument('--mechanism', type=str, choices=['one-way-shooting', 'two-way-shooting'], required=True)
+# parser.add_argument('--states', type=str, default='phi-psi', choices=['phi-psi', 'rmsd'])
+# parser.add_argument('--fixed_length', type=int, default=0)
+# parser.add_argument('--warmup', type=int, default=0)
+# parser.add_argument('--num_steps', type=int, default=10,
+#                     help='The number of MD steps taken at once. More takes longer to compile but runs faster in the end.')
+# parser.add_argument('--resume', action='store_true')
+# parser.add_argument('--override', action='store_true')
+# parser.add_argument('--ensure_connected', action='store_true',
+#                     help='Ensure that the initial path connects A with B by prepending A and appending B.')
+
+if __name__ == '__main__':
+    args = parse_args(parser)
+    assert args.test_system or args.start and args.target, "Either specify a test system or provide start and target structures"
+    assert not (
+            args.test_system and args.start and args.target), "Specify either a test system or provide start and target structures, not both"
+
+    print(f'Config: {args}')
+
+    if args.test_system:
+        system = System.from_name(args.test_system)
+    else:
+        raise NotImplementedError
+        # system = System.from_forcefield(args.start, args.target)
+
+    import jax.numpy as jnp
+    import jax
+    from tqdm import trange
+    from flax.training import train_state
+    import optax
+    import model.diagonal as diagonal
+    from model.train import train
+    from model import MLPq
+
+    N = int(args.T / args.dt)
+
+    # You can play around with any model here
+    model = MLPq([128, 128, 128])
+
+    # TODO: parameterize mixtures, weights, and base_sigma
+    base_sigma = 2.5 * 1e-2
+    setup = diagonal.FirstOrderSetup(system, model, args.T, 1, False, base_sigma)
+
+    key = jax.random.PRNGKey(args.seed)
+    key, init_key = jax.random.split(key)
+    params_q = setup.model_q.init(init_key, jnp.ones([args.BS, 1]))
+
+    optimizer_q = optax.adam(learning_rate=args.lr)
+    state_q = train_state.TrainState.create(apply_fn=setup.model_q.apply, params=params_q, tx=optimizer_q)
+    loss_fn = setup.construct_loss(state_q, args.xi, args.BS)
+
+    key, train_key = jax.random.split(key)
+    state_q, loss_plot = train(loss_fn, state_q, args.epochs, train_key)
+    print("Number of potential evaluations", args.BS * args.epochs)
+
+    plt.plot(loss_plot)
+    plt.show()
+
+    t = args.T * jnp.linspace(0, 1, args.BS).reshape((-1, 1))
+    key, path_key = jax.random.split(key)
+    eps = jax.random.normal(path_key, [args.BS, 2])
+    mu_t, sigma_t, _ = state_q.apply_fn(state_q.params, t)
+    samples = mu_t + sigma_t * eps
+    # plot_energy_surface()
+    # plt.scatter(samples[:, 0], samples[:, 1])
+    # plt.scatter(A[0, 0], A[0, 1], color='red')
+    # plt.scatter(B[0, 0], B[0, 1], color='orange')
+    # plt.show()
+
+    mu_t = lambda _t: state_q.apply_fn(state_q.params, _t)[0]
+    sigma_t = lambda _t: state_q.apply_fn(state_q.params, _t)[1]
+
+
+    def dmudt(_t):
+        _dmudt = jax.jacrev(lambda _t: mu_t(_t).sum(0), argnums=0)
+        return _dmudt(_t).squeeze().T
+
+
+    def dsigmadt(_t):
+        _dsigmadt = jax.jacrev(lambda _t: sigma_t(_t).sum(0))
+        return _dsigmadt(_t).squeeze().T
+
+
+    u_t = jax.jit(lambda _t, _x: dmudt(_t) + dsigmadt(_t) / sigma_t(_t) * (_x - mu_t(_t)))
+
+    key, loc_key = jax.random.split(key)
+    x_t = jnp.ones((args.BS, N + 1, 2)) * system.A[None:, ]
+    eps = jax.random.normal(key, shape=(args.BS, 2))
+    x_t = x_t.at[:, 0, :].set(x_t[:, 0, :] + sigma_t(jnp.zeros((args.BS, 1))) * eps)
+    t = jnp.zeros((args.BS, 1))
+    for i in trange(N):
+        dx = args.dt * u_t(t, x_t[:, i, :])
+        x_t = x_t.at[:, i + 1, :].set(x_t[:, i, :] + dx)
+        t += args.dt
+
+    x_t_det = x_t.copy()
+
+    u_t = jax.jit(
+        lambda _t, _x: dmudt(_t) + (dsigmadt(_t) / sigma_t(_t) - 0.5 * (args.xi / sigma_t(_t)) ** 2) * (_x - mu_t(_t)))
+
+    # TODO: find a better way then resetting BS
+    BS = args.num_paths
+    key, loc_key = jax.random.split(key)
+    x_t = jnp.ones((BS, N + 1, 2)) * system.A[None, :]
+    eps = jax.random.normal(key, shape=(BS, 2))
+    x_t = x_t.at[:, 0, :].set(x_t[:, 0, :] + sigma_t(jnp.zeros((BS, 1))) * eps)
+    t = jnp.zeros((BS, 1))
+    for i in trange(N):
+        key, loc_key = jax.random.split(key)
+        eps = jax.random.normal(key, shape=(BS, 2))
+        dx = args.dt * u_t(t, x_t[:, i, :]) + jnp.sqrt(args.dt) * args.xi * eps
+        x_t = x_t.at[:, i + 1, :].set(x_t[:, i, :] + dx)
+        t += args.dt
+
+    x_t_stoch = x_t.copy()
diff --git a/model/__init__.py b/model/__init__.py
@@ -0,0 +1,31 @@
+from abc import ABC, abstractmethod
+from flax import linen as nn
+from jax.typing import ArrayLike
+
+
+class WrappedModule(ABC, nn.Module):
+    other: nn.Module
+    T: float
+
+    def __call__(self, t: ArrayLike):
+        t = t / self.T
+
+        h = self.other(t)
+        return self._post_process(t, h)
+
+    @abstractmethod
+    def _post_process(self, t: ArrayLike, h: ArrayLike):
+        raise NotImplementedError
+
+
+class MLPq(nn.Module):
+    hidden_dims: ArrayLike
+
+    @nn.compact
+    def __call__(self, t):
+        h = t - 0.5
+        for dim in self.hidden_dims:
+            h = nn.Dense(dim)(h)
+            h = nn.swish(h)
+
+        return h
diff --git a/model/diagonal.py b/model/diagonal.py
@@ -0,0 +1,72 @@
+from flax import linen as nn
+import jax.numpy as jnp
+from typing import Union, Dict, Any, Callable
+from flax.training.train_state import TrainState
+import jax
+from flax.typing import FrozenVariableDict
+from jax.typing import ArrayLike
+from model import WrappedModule
+from model.setup import TrainSetup
+from systems import System
+
+
+class DiagonalWrapper(WrappedModule):
+    A: ArrayLike
+    B: ArrayLike
+    num_mixtures: int
+    trainable_weights: bool
+    base_sigma: float
+
+    @nn.compact
+    def _post_process(self, t: ArrayLike, h: ArrayLike):
+        print('WARNING: Gaussian Mixture Model not implemented yet')
+        ndim = self.A.shape[0]
+        h = nn.Dense(2 * ndim)(h)
+
+        mu = (1 - t) * self.A[None, :] + t * self.B[None, :] + (1 - t) * t * h[:, :ndim]
+        sigma = (1 - t) * self.base_sigma + t * self.base_sigma + (1 - t) * t * jnp.exp(h[:, ndim:])
+        w_logits = self.param('w_logits', nn.initializers.zeros_init(),
+                              (self.num_mixtures,)) if self.trainable_weights else jnp.zeros(self.num_mixtures)
+
+        return mu, sigma, w_logits
+
+
+class FirstOrderSetup(TrainSetup):
+
+    def __init__(self, system: System, model: nn.module, T: float, num_mixtures: int, trainable_weights: bool,
+                 base_sigma: float):
+        model_q = DiagonalWrapper(model, T, system.A, system.B, num_mixtures, trainable_weights, base_sigma)
+        super().__init__(system, model_q)
+        self.system = system
+        self.T = T
+
+    def construct_loss(self, state_q: TrainState, xi: float, BS: float) -> Callable[
+        [Union[FrozenVariableDict, Dict[str, Any]], ArrayLike], float]:
+        print('WARNING: Gaussian Mixture Loss not implemented yet')
+
+        def loss_fn(params_q: Union[FrozenVariableDict, Dict[str, Any]], key: ArrayLike) -> float:
+            key = jax.random.split(key)
+            t = self.T * jax.random.uniform(key[0], [BS, 1])
+            eps = jax.random.normal(key[1], [BS, 2])
+
+            mu_t = lambda _t: state_q.apply_fn(params_q, _t)[0]
+            sigma_t = lambda _t: state_q.apply_fn(params_q, _t)[1]
+
+            def dmudt(_t):
+                _dmudt = jax.jacrev(lambda _t: mu_t(_t).sum(0))
+                return _dmudt(_t).squeeze().T
+
+            def dsigmadt(_t):
+                _dsigmadt = jax.jacrev(lambda _t: sigma_t(_t).sum(0))
+                return _dsigmadt(_t).squeeze().T
+
+            def v_t(_eps, _t):
+                u_t = dmudt(_t) + dsigmadt(_t) * _eps
+                _x = mu_t(_t) + sigma_t(_t) * _eps
+                out = (u_t + self.system.dUdx(_x)) - 0.5 * (xi ** 2) * _eps / sigma_t(t)
+                return out
+
+            loss = 0.5 * ((v_t(eps, t) / xi) ** 2).sum(1, keepdims=True)
+            return loss.mean()
+
+        return loss_fn
diff --git a/model/setup.py b/model/setup.py
@@ -0,0 +1,17 @@
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+from typing import Callable
+
+from flax import linen as nn
+
+from systems import System
+
+
+@dataclass
+class TrainSetup(ABC):
+    system: System
+    model_q: nn.Module
+
+    @abstractmethod
+    def construct_loss(self, *args, **kwargs) -> Callable:
+        raise NotImplementedError
diff --git a/model/train.py b/model/train.py
@@ -0,0 +1,23 @@
+from typing import Callable, Tuple
+from flax.training.train_state import TrainState
+import jax
+from jax.typing import ArrayLike
+from tqdm import trange
+
+
+def train(loss_fn: Callable, state_q: TrainState, epochs: int, key: ArrayLike) -> Tuple[TrainState, list[float]]:
+    @jax.jit
+    def train_step(_state_q: TrainState, _key: ArrayLike) -> (TrainState, float):
+        grad_fn = jax.value_and_grad(loss_fn, argnums=0)
+        loss, grads = grad_fn(_state_q.params, _key)
+        _state_q = _state_q.apply_gradients(grads=grads)
+        return _state_q, loss
+
+    loss_plot = []
+
+    for _ in trange(epochs):
+        key, loc_key = jax.random.split(key)
+        state_q, loss = train_step(state_q, loc_key)
+        loss_plot.append(loss)
+
+    return state_q, loss_plot
diff --git a/potentials.py b/potentials.py
@@ -0,0 +1,44 @@
+import jax
+import jax.numpy as jnp
+
+
+@jax.jit
+def U_double_well(xs, a=1.0, b=-4.0, c=0, d=1.0, beta=1.0):
+    x, y = xs[:, 0], xs[:, 1]
+    return beta * (a * (x ** 4) + b * (x ** 2) + c * x + 0.5 * d * (y ** 2))
+
+
+@jax.jit
+def U_double_well_hard(xs, beta=1.0):
+    A = jnp.array([[-3, 0]])
+    B = jnp.array([[3, 0]])
+    U1 = -(((xs - A) @ jnp.array([[1, 0.5], [0.5, 1.0]])) * (xs - A)).sum(1)
+    U2 = -(((xs - B) @ jnp.array([[1, -0.5], [-0.5, 1.0]])) * (xs - B)).sum(1)
+    out = -jnp.log(jnp.exp(U1 - jnp.maximum(U1, U2)) + jnp.exp(U2 - jnp.maximum(U1, U2))) - jnp.maximum(U1, U2)
+    return beta * out
+
+
+@jax.jit
+def U_double_well_dual_channel(xs, beta=1.0):
+    x, y = xs[:, 0], xs[:, 1]
+    borders = x ** 6 + y ** 6
+    e1 = +2.0 * jnp.exp(-(12.0 * (x - 0.00) ** 2 + 12.0 * (y - 0.00) ** 2))
+    e2 = -1.0 * jnp.exp(-(12.0 * (x + 0.50) ** 2 + 12.0 * (y + 0.00) ** 2))
+    e3 = -1.0 * jnp.exp(-(12.0 * (x - 0.50) ** 2 + 12.0 * (y + 0.00) ** 2))
+    return beta * (borders + e1 + e2 + e3)
+
+
+@jax.jit
+def U_mueller_brown(xs, beta=1.0):
+    x, y = xs[:, 0], xs[:, 1]
+    e1 = -200 * jnp.exp(-(x - 1) ** 2 - 10 * y ** 2)
+    e2 = -100 * jnp.exp(-x ** 2 - 10 * (y - 0.5) ** 2)
+    e3 = -170 * jnp.exp(-6.5 * (0.5 + x) ** 2 + 11 * (x + 0.5) * (y - 1.5) - 6.5 * (y - 1.5) ** 2)
+    e4 = 15.0 * jnp.exp(0.7 * (1 + x) ** 2 + 0.6 * (x + 1) * (y - 1) + 0.7 * (y - 1) ** 2)
+    return beta * (e1 + e2 + e3 + e4)
+
+
+double_well = (U_double_well,)
+double_well_hard = (U_double_well_hard,)
+double_well_dual_channel = (U_double_well_dual_channel,)
+mueller_brown = (U_mueller_brown, jnp.array([-0.55828035, 1.44169]), jnp.array([0.62361133, 0.02804632]))
diff --git a/systems.py b/systems.py
diff --git a/utils/args.py b/utils/args.py
diff --git a/utils/toy_plot_helpers.py b/utils/toy_plot_helpers.py