In [1]:
from refine import *
import project
import mdtraj as mdt

import multiprocessing
from multiprocessing import Process, Queue, Lock
from queue import Empty
# multiprocessing.set_start_method("fork")

import io

%matplotlib
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from matplotlib import style

style.use("fivethirtyeight")

Using matplotlib backend: TkAgg


In [2]:
project.setup()

In [3]:
def confined_gradient_descent(
        obj_q, decrement=0.9, termination="growth",
        absolute_bound=float("inf"), relative_bounds=(0.01, 7.0), etol=1, max_iter=100, return_traj=False):
    """
    Performs gradient descent of a system with respect to a special confinement.

    @param nmw: system to optimize.
    @type nmw: NMSpaceWrapper
    @param decrement: fold step when choosing optimal step.
    @type decrement: float
    @param termination: termination condition.
    @type termination: str
    @param absolute_bound: maximum rmsd between inital state and any intermediate state.
    @type absolute_bound: float
    @param relative_bounds: minimum and maximum rmsd between actual intermediate state and the next one.
    @type relative_bounds: tuple
    @param etol: terminates when |E(i+1) - E(i)| < etol
    @type etol: float
    @param max_iter: maximum number of iterations
    @type max_iter: int
    @param return_traj: if true all intermediate states, energies and forces are returned.
        Otherwise, the function returns only final record.
    @type return_traj: bool
    @return: dictionary containing all the results.
        "states" - list of all states along optimization path.
        "energies" - list of all energies along optimization path.
        "forces" - list of all forces along optimization path.
        If return_traj is false returns only last record.
    @rtype: dict
    """
    
    path = str((project.data_path / "may_complex") / "1dfj.pdb")
    print(path)
    create_system(path, tmp_file=str(project.output_path / "tmp_system.pdb"))
    t0 = time.time()
    drs = DRSystem(str(project.output_path / "tmp_system.pdb"), 'charmm36.xml', refine="chain A", static="chain B")
    print("construction of a system:", -t0 + time.time(), "sec")
    t0 = time.time()
    nmw1 = NMSpaceWrapper(drs, n_modes=10)
    print("INIT ENERGY:", nmw1.get_energy())
    
    m = 0
    
    while m != 10:
    
        natoms = len(nmw1.get_system_position())
        weights = np.ones((natoms,))
        
        # random noise
        preset_rmsd = .7

        # mu = a * mu_0
        # preset_rmsd = |a| (np.dot(mu_0, mu_0) / natoms) ** 0.5 => 
        # => a = (preset_rmsd ** 2 * natoms / np.dot(mu_0, mu_0)) ** 0.5

        mu = np.random.normal(0, 1, 10)
        mu = (preset_rmsd ** 2 * natoms / np.dot(mu, mu)) ** 0.5 * mu
        nmw1.set_position(np.zeros(10,))
        x1 = nmw1.get_system_position().copy()
        nmw1.set_position(mu)
        x2 = nmw1.get_system_position().copy()
        print("rmsd init:", rmsd(x1, x2, weights))
        print("RANDOM ENERGY:", nmw1.get_energy())
        pdy.writePDB(str(project.output_path / "shifted_6.0.pdb"), drs._refine_prot)

        nmw2 = NMSpaceWrapper(drs, n_modes=10)
        m = len(nmw2.get_eigenvalues())
    
    nmw2.set_position(np.zeros((10,)))
    nmw = nmw2
    
    # number of modes
    m = len(nmw.get_eigenvalues())
    # number of atoms
    n = len(nmw.get_system_position())
    # modes
    modes = nmw.get_modes()
    # special values
    ru_bound_value = n * relative_bounds[1] ** 2
    rl_bound_value = n * relative_bounds[0] ** 2
    a_bound_value = n * absolute_bound ** 2
    # initial state
    iteration_count = 0
    states = []
    energies = []
    forces = []
    energy_init = nmw.get_energy()
    position_init = nmw.get_position().copy()
    energies.append(energy_init)
    states.append(nmw.get_system_position().copy())

    # it is a weighted anti-gradient!
    anti_gradient = nmw.get_force().copy()
    forces.append(anti_gradient)
    
    # update data
    obj_q.put({
        "energies": energies,
        "states": states,
        "forces": forces,
    })
    
    # main cycle
    while True:
        print("\n(main) CYCLE START\n")

        # find a step
        # relative bound
        a = np.dot(anti_gradient, anti_gradient)
        upper_bound = (ru_bound_value / a) ** 0.5
        lower_bound = (rl_bound_value / a) ** 0.5
        # absolute bound


        # find optimal step
        dec = [1]
        eng = []
        ind = [0]

        nmw.set_position(dec[0] * upper_bound * anti_gradient + position_init)

        eng.append(nmw.get_energy())
        score = 0
        while True:
            dec.append(dec[-1] * decrement)
            ind.append(ind[-1] + 1)
            if dec[-1] * upper_bound <= lower_bound:
                break

            nmw.set_position(dec[-1] * upper_bound * anti_gradient + position_init)
            eng.append(nmw.get_energy())
            print("\t(line) energy:", eng[-1])
            # update data
            obj_q.put({
                "energies_line": eng,
            })
            
            # exit condition
            if energy_init > eng[-1] > eng[-2]:
                score += 1
            elif eng[-1] < eng[-2] or energy_init <= eng[-1]:
                score = 0
            if score >= 3:
                break

        # update state list
        j = np.argmin(eng)
        mu = dec[j] * upper_bound * anti_gradient

        # new initial states
        position_init = mu + position_init
        energy_init = eng[j]
        nmw.set_position(position_init)

        # state energy force
        states.append(nmw.get_system_position().copy())
        energies.append(eng[j])
        anti_gradient = nmw.get_force().copy()
        forces.append(anti_gradient)
        
        # print results
        print("(main) optimum:")
        print("       index:", j)
        print("       energy:", energies[-1])
        print("       force:", np.linalg.norm(forces[-1]))
        
        # adaptive upper bound
        ru_bound_value = min(np.dot(mu, mu) / decrement ** 3, n * relative_bounds[1] ** 2)
        print("       upper_bound:", (ru_bound_value / n) ** 0.5)
        
        # termination
        iteration_count += 1
        
        # update data
        obj_q.put({
            "energies": energies,
            "states": states,
            "forces": forces,
        })
        
        if termination == "growth":
            if energy_init < eng[j]:
                states.pop()
                energies.pop()
                forces.pop()
                break
        elif termination == "etol":
            if abs(eng[j] - energy_init) < etol:
                break
        else:
            raise ValueError(f"Wrong termination criterion: {termination}")
        if iteration_count >= max_iter:
            break
    if not return_traj:
        return {"states": states[-1:], "energies": energies[-1:]}
    else:
        return {"states": states, "energies": energies, "forces": forces}

In [4]:
path = str((project.data_path / "may_complex") / "1dfj.pdb")
print(path)
create_system(path, tmp_file=str(project.output_path / "tmp_system.pdb"))
t0 = time.time()
drs = DRSystem(str(project.output_path / "tmp_system.pdb"), 'charmm36.xml', refine="chain A", static="chain B")
print("construction of a system:", -t0 + time.time(), "sec")
t0 = time.time()
nmw1 = NMSpaceWrapper(drs, n_modes=10)
print("INIT ENERGY:", nmw1.get_energy())

@> 4416 atoms and 1 coordinate set(s) were parsed in 0.04s.


/home/semyon/PycharmProjects/DiplomaPython/data/may_complex/1dfj.pdb
write PDB(prody): 0.0438 sec
read PDB(openmm): 0.2772839069366455 sec


@> 8726 atoms and 1 coordinate set(s) were parsed in 0.06s.


add hydrogens and extra particles(openmm): 19.37400507926941 sec
write PDB(openmm): 0.0868675708770752 sec
1856 6870
construction of a system: 10.496461153030396 sec


@> Hessian was built in 8.08s.
@> 10 modes were calculated in 17.17s.


INIT ENERGY: -10889.754022221883


In [5]:
natoms = len(nmw1.get_system_position())
weights = np.ones((natoms,))

In [24]:
# random noise
preset_rmsd = .7

# mu = a * mu_0
# preset_rmsd = |a| (np.dot(mu_0, mu_0) / natoms) ** 0.5 => 
# => a = (preset_rmsd ** 2 * natoms / np.dot(mu_0, mu_0)) ** 0.5

mu = np.random.normal(0, 1, 10)
mu = (preset_rmsd ** 2 * natoms / np.dot(mu, mu)) ** 0.5 * mu
nmw1.set_position(np.zeros(10,))
x1 = nmw1.get_system_position().copy()
nmw1.set_position(mu)
x2 = nmw1.get_system_position().copy()
print("RMSD INIT:", rmsd(x1, x2, weights))
print("RANDOM ENERGY:", nmw1.get_energy())
print("NM VECTOR:", mu)
pdy.writePDB(str(project.output_path / "shifted_6.0.pdb"), drs._refine_prot)

rmsd init: 0.6999999999999991
RANDOM ENERGY: 58085336.94942912


'/home/semyon/PycharmProjects/DiplomaPython/output/shifted_6.0.pdb'

In [26]:
nmw2 = NMSpaceWrapper(drs, n_modes=10)

@> Hessian was built in 7.74s.
@> 10 modes were calculated in 14.41s.


In [27]:
old_coords = nmw2.get_system_position().copy()

In [29]:
nmw2.set_position(np.zeros((10,)))

58085336.94943367


In [23]:
# mutiprocessing
obj_q = Queue(maxsize=20)
full_list = []
# it is impossible to pickle drs system => can not be transfered between processes

# gradient descent process
uprocess = Process(target=confined_gradient_descent, args=(obj_q,),
                   kwargs={"relative_bounds": (0.001, 0.25),
                           "decrement": 0.9,
                           "max_iter": 100,
                           "return_traj": True,
                           "termination": "growth",
                          }, daemon=True)
uprocess.start()

/home/semyon/PycharmProjects/DiplomaPython/data/may_complex/1dfj.pdb


@> 4416 atoms and 1 coordinate set(s) were parsed in 0.06s.


write PDB(prody): 0.0351 sec
read PDB(openmm): 0.6412689685821533 sec
add hydrogens and extra particles(openmm): 24.39337158203125 sec
write PDB(openmm): 0.1241302490234375 sec


@> 8726 atoms and 1 coordinate set(s) were parsed in 0.08s.


1856 6870
construction of a system: 12.669026136398315 sec


@> Hessian was built in 9.31s.
@> 10 modes were calculated in 28.28s.


INIT ENERGY: -11240.6403321988
rmsd init: 0.6999999999999995
RANDOM ENERGY: 5965426.495094404


@> Hessian was built in 8.08s.
@> 7 modes were calculated in 25.47s.


rmsd init: 0.7
RANDOM ENERGY: 638436222.5459614


@> Hessian was built in 8.27s.
@> 10 modes were calculated in 24.28s.



(main) CYCLE START

	(line) energy: 426016.8179755698
	(line) energy: 762776.0533407386
	(line) energy: 1760929.141309363
(main) optimum:
       index: 0
       energy: 342000.9902069886
       force: 7142083.236224925
       upper_bound: 0.25

(main) CYCLE START

	(line) energy: 313016.7040178206
	(line) energy: 311427.99610408925
	(line) energy: 310147.18766524526
	(line) energy: 309116.37624447705
	(line) energy: 308288.21360558714
	(line) energy: 307624.7125508394
	(line) energy: 307094.63576821494
	(line) energy: 306672.7250922553
	(line) energy: 306338.43111178745
	(line) energy: 306075.09654505266
	(line) energy: 305868.8254764337
	(line) energy: 305708.5818426554
	(line) energy: 305585.56408268306
	(line) energy: 305492.0412262463
	(line) energy: 305422.3646483691
	(line) energy: 305371.4785889023
	(line) energy: 305335.7745360642
	(line) energy: 305311.52167696087
	(line) energy: 305296.4597112595
	(line) energy: 305288.4951198534
	(line) energy: 305286.07666636736
	(line) en


(main) CYCLE START

	(line) energy: 79143.2387579895
	(line) energy: 79152.74623145026
	(line) energy: 79165.52097647924
(main) optimum:
       index: 0
       energy: 79138.47191782367
       force: 7185.566081726955
       upper_bound: 0.027354747282878112

(main) CYCLE START

	(line) energy: 78855.86915927098
	(line) energy: 78840.89674777936
	(line) energy: 78835.65678206974
	(line) energy: 78837.60077335927
	(line) energy: 78844.74229007297
	(line) energy: 78855.53357186634
(main) optimum:
       index: 3
       energy: 78835.65678206974
       force: 6189.8065618738165
       upper_bound: 0.023355892682693228

(main) CYCLE START

	(line) energy: 78549.79843825563
	(line) energy: 78553.6968725373
	(line) energy: 78561.89332238487
	(line) energy: 78573.06717631183
(main) optimum:
       index: 1
       energy: 78549.79843825563
       force: 6563.408165396679
       upper_bound: 0.024619272554590302

(main) CYCLE START

	(line) energy: 78283.46866249712
	(line) energy: 78277.62798


(main) CYCLE START

	(line) energy: 74334.01615218096
	(line) energy: 74330.70981132661
	(line) energy: 74330.2188841946
	(line) energy: 74331.78867833257
	(line) energy: 74334.83403625301
	(line) energy: 74338.89269356313
(main) optimum:
       index: 3
       energy: 74330.2188841946
       force: 3488.7546163637107
       upper_bound: 0.013083690825684027

(main) CYCLE START

	(line) energy: 74231.73220959542
	(line) energy: 74234.49964223946
	(line) energy: 74238.33689306889
(main) optimum:
       index: 0
       energy: 74230.51352856267
       force: 4040.8549983716994
       upper_bound: 0.015323801189115025

(main) CYCLE START

	(line) energy: 74143.1068426059
	(line) energy: 74138.15321659138
	(line) energy: 74136.29288322404
	(line) energy: 74136.72085048837
	(line) energy: 74138.81148475586
	(line) energy: 74142.07298317476
(main) optimum:
       index: 3
       energy: 74136.29288322404
       force: 3466.058787507642
       upper_bound: 0.013083690825684025

(main) CYCLE 

       upper_bound: 0.010053945960178364

(main) CYCLE START

	(line) energy: 72661.78517154459
	(line) energy: 72659.88164576101
	(line) energy: 72659.26988269608
	(line) energy: 72659.60959455607
	(line) energy: 72660.63767491964
	(line) energy: 72662.1540730187
(main) optimum:
       index: 3
       energy: 72659.26988269608
       force: 2298.857121866894
       upper_bound: 0.008584209550731286

(main) CYCLE START



In [41]:
# terminate the process
uprocess.terminate()

In [31]:
# connect to descent output
%matplotlib
fig, ax = plt.subplots(2, 2, figsize=(10, 10))

animation_on = True

def animate(i):
    global full_list, objects
    if not animation_on:
        return
    try:
        obj = obj_q.get(False)
        if "energies" in obj:
            full_list.append(obj)
            ax[0, 0].clear()
            ax[0, 1].clear()
            ax[1, 0].clear()
            ax[0, 0].set_title("Energy (main)")
            ax[0, 1].set_title("RMSD (main)")
            ax[1, 0].set_title("Force (main)")
            n = len(obj["energies"])
            m = len(obj["forces"])
            k = len(obj["states"])
            if len(obj["energies"]) > 0:
                ax[0, 0].scatter(np.arange(0, n, 1), obj["energies"])
                state_0 = obj["states"][0]
                weights = np.ones((len(state_0),))
                ax[0, 1].scatter(np.arange(0, k, 1), [rmsd(state_0, s, weights) for s in obj["states"]])
                forces = obj["forces"]
                ax[1, 0].scatter(np.arange(0, m - 1, 1),
                        [(np.dot(forces[u], forces[u - 1]) /
                          np.linalg.norm(forces[u]) /
                          np.linalg.norm(forces[u - 1])) for u in range(1, m)])
        elif "energies_line" in obj:
            ax[1, 1].clear()
            ax[1, 1].set_title("Energy (line)")
            n = len(obj["energies_line"])
            if n > 0:
                ax[1, 1].scatter(np.arange(0, n, 1), obj["energies_line"])
    except Empty:
        pass
anim = animation.FuncAnimation(fig, animate, 500)

Using matplotlib backend: TkAgg


In [34]:
# pause the animation
animation_on = False

In [35]:
# or resume the animation
animation_on = True

In [121]:
# CA comparison
calpha_0 = drs._refine_prot_init.select("calpha").copy()
nmw2.set_position(np.zeros(10,))
calpha_1 = drs._refine_prot.select("calpha").copy()
nmw2.set_position(opt_pos)
calpha_2 = drs._refine_prot.select("calpha").copy()

nca = len(calpha_0)
print(nca)
weights1 = np.ones((nca,))

124


In [122]:
print("01", rmsd(calpha_0.getCoords(), calpha_1.getCoords(), weights1))
print("02", rmsd(calpha_0.getCoords(), calpha_2.getCoords(), weights1))
print("12", rmsd(calpha_1.getCoords(), calpha_2.getCoords(), weights1))

01 0.3893572220770151
02 0.1114435447861749
12 0.4136979486083795


## Save trajectories
 1) save all states
 
 2) read them with mdtraj
 
 3) save them as .xtc
 
 4) enjoy your trajectories in VMD!!!

In [36]:
drs = DRSystem(str(project.output_path / "tmp_system.pdb"), 'charmm36.xml', refine="chain A", static="chain B")

@> 8726 atoms and 1 coordinate set(s) were parsed in 0.19s.


1856 6870


In [38]:
full_list = full_list[27:]

In [39]:
n_states = len(full_list)

prot_bytes = io.StringIO()
trj = None
for i in range(n_states):
    drs.set_position(full_list[i]["states"][i])
    with open(project.output_path / "inter_pdb.pdb", "w") as input_file:
        drs._omm_protein.writeFile(positions=drs._omm_protein.positions,
                                   topology=drs._omm_protein.topology,
                                   file=input_file)
    if trj is not None:
        trj = trj.join(mdt.load(str(project.output_path / "inter_pdb.pdb")))
    else:
        trj = mdt.load(str(project.output_path / "inter_pdb.pdb"))
    print(trj.xyz.shape)

(1, 8726, 3)
(2, 8726, 3)
(3, 8726, 3)
(4, 8726, 3)
(5, 8726, 3)
(6, 8726, 3)
(7, 8726, 3)
(8, 8726, 3)
(9, 8726, 3)
(10, 8726, 3)
(11, 8726, 3)
(12, 8726, 3)
(13, 8726, 3)
(14, 8726, 3)
(15, 8726, 3)
(16, 8726, 3)
(17, 8726, 3)
(18, 8726, 3)
(19, 8726, 3)
(20, 8726, 3)
(21, 8726, 3)
(22, 8726, 3)
(23, 8726, 3)
(24, 8726, 3)
(25, 8726, 3)
(26, 8726, 3)
(27, 8726, 3)
(28, 8726, 3)
(29, 8726, 3)
(30, 8726, 3)
(31, 8726, 3)
(32, 8726, 3)
(33, 8726, 3)
(34, 8726, 3)
(35, 8726, 3)
(36, 8726, 3)
(37, 8726, 3)
(38, 8726, 3)
(39, 8726, 3)
(40, 8726, 3)
(41, 8726, 3)
(42, 8726, 3)
(43, 8726, 3)
(44, 8726, 3)
(45, 8726, 3)
(46, 8726, 3)
(47, 8726, 3)
(48, 8726, 3)
(49, 8726, 3)
(50, 8726, 3)
(51, 8726, 3)
(52, 8726, 3)
(53, 8726, 3)
(54, 8726, 3)


In [40]:
trj.save_pdb(str(project.output_path / "trajectory.pdb"))

## WARNING: it is an abandoned code down there.