# Analysis of the HIV-1 viral capsid protein p24

The simulation we will analyse has been produced in: [M. T. Degiacomi and M. Dal Peraro, "Macromolecular Symmetric Assembly Prediction Using Swarm Intelligence
Dynamic Modeling", Structure 21, 1097 (2013)](https://www.cell.com/structure/fulltext/S0969-2126(13)00196-2?_returnURL=https%3A%2F%2Flinkinghub.elsevier.com%2Fretrieve%2Fpii%2FS0969212613001962%3Fshowall%3Dtrue).


In [None]:
import os
import numpy as np
import numpy.linalg
import matplotlib.pyplot as plt
import MDAnalysis as mda
from MDAnalysis.analysis import rms, align, diffusionmap

Load the simulation. To keep the data size small, we will loaded a downsampled version of the trajectory (1 frame every 2 ns). In the slides, we had a simulation sampled every 1 ns.

In [None]:
u = mda.Universe(f"..{os.sep}data{os.sep}p24_formatted.pdb")

# Root Mean Square Deviations (RMSD)

In [None]:
# RMSD by aligning the protein w.r.t. whole protein
R_all = rms.RMSD(u, u, select="backbone", groupselections=["backbone and resid 1-136", "backbone and not resid 1-136"])
R_all.run()

# RMSD by aligning the protein w.r.t. Domain 1 (residues 1 to 136)
R_D1 = rms.RMSD(u, u, select="backbone and resid 1-136", groupselections=["backbone", "backbone and not resid 1-136"])
R_D1.run()

# RMSD by aligning the protein w.r.t. Domain 2 (not residues 1 136)
R_D2 = rms.RMSD(u, u, select="backbone and not resid 1-136", groupselections=["backbone and resid 1-136", "backbone"])
R_D2.run()

In [None]:
fig = plt.figure(figsize=(14, 4))

# plot RMSDs after backbone alignment
rmsd = R_all.results.rmsd.T
time = rmsd[1]*2 # note: conformations in multi-PDB are separated by 1ns step
ax1 = fig.add_subplot(131)
ax1.plot(time, rmsd[2], '-', c="gray", label="all")
ax1.plot(time, rmsd[3], '-', c="mediumblue", label="Domain 1")
ax1.plot(time, rmsd[4], '-', c="deepskyblue", label="Domain 2")
ax1.legend(loc="best", frameon=False)
ax1.set_xlabel("time (ns)")
ax1.set_ylabel(r"RMSD ($\AA$)")
ax1.set_title("align all")
ax1.set_xlim([0, 1000])
ax1.set_ylim([0, 16])

# plot RMSDs after Domain 1 alignment
rmsd = R_D1.results.rmsd.T
ax2 = fig.add_subplot(132)
ax2.plot(time, rmsd[3], '-', c="gray", label="all")
ax2.plot(time, rmsd[2], '-', c="mediumblue", label="Domain 1")
ax2.plot(time, rmsd[4], '-', c="deepskyblue", label="Domain 2")
ax2.legend(loc="best", frameon=False)
ax2.set_xlabel("time (ns)")
ax2.set_title("align Domain 1")
ax2.set_xlim([0, 1000])
ax2.set_ylim([0, 42])

# plot RMSDs after Domain 2 alignment
rmsd = R_D2.results.rmsd.T
ax3 = fig.add_subplot(133)
ax3.plot(time, rmsd[4], '-', c="gray", label="all")
ax3.plot(time, rmsd[3], '-', c="mediumblue", label="Domain 1")
ax3.plot(time, rmsd[2], '-', c="deepskyblue", label="Domain 2")
ax3.legend(loc="best", frameon=False)
ax3.set_xlabel("time (ns)")
ax3.set_title("align Domain 2")
ax3.set_xlim([0, 1000])
ax3.set_ylim([0, 54])

plt.tight_layout()
fig.savefig("rmsd_p24.pdf")
fig.savefig("rmsd_p24.png")

For the first slide on RMSD, let's also plot only a single RMSD profile

In [None]:
rmsd = R_all.results.rmsd.T
fig = plt.figure(figsize=(6, 4))
ax1 = fig.add_subplot(111)
ax1.plot(time, rmsd[2], '-', c="steelblue")
ax1.set_xlabel("time (ns)")
ax1.set_ylabel(r"RMSD ($\AA$)")

ax1.set_xlim([-10, 1000])
ax1.set_ylim([0, 12])
fig.savefig("rmsd_single.pdf")
fig.savefig("rmsd_single.png")

# pairwise RMSD

In [None]:
aligner = align.AlignTraj(u, u, select='name CA', in_memory=True).run()
matrix = diffusionmap.DistanceMatrix(u, select='name CA').run()

In [None]:
plt.imshow(matrix.results.dist_matrix[:500, :500], cmap='viridis', origin="lower", vmax=16)
plt.xlabel('time (ns)')
plt.ylabel('time (ns)')
plt.colorbar(label=r'RMSD ($\AA$)')

plt.savefig("pairwise_all.png")
plt.savefig("pairwise_all.pdf")

# Root Mean Square Fluctuations (RMSF)

We start by defining a function that aligns the trajectory and calculates the RMSF of a selection of interest

In [None]:
def get_RMSF(u, select, rmsd_atoms):

    protein = u.select_atoms("protein")
    
    # Fit to the initial frame to get a better average structure (the trajectory is changed in memory)
    prealigner = align.AlignTraj(u, u, select=select, in_memory=True).run()
    # ref = average structure
    ref_coordinates = u.trajectory.timeseries(asel=protein).mean(axis=1)
    # Make a reference structure (need to reshape into a 1-frame "trajectory").
    ref = mda.Merge(protein).load_new(ref_coordinates[:, None, :], order="afc")
    
    aligner = align.AlignTraj(u, ref, select=select,  in_memory=True).run()
    # need to write the trajectory to disk for PMDA 0.3.0 (see issue #15)
    with mda.Writer("rmsfit.xtc", n_atoms=u.atoms.n_atoms) as W:
        for ts in u.trajectory:
            W.write(u.atoms)
    
    u = mda.Universe("rmsfit.xtc")
    atoms = protein.select_atoms(rmsd_atoms)
    
    return rms.RMSF(calphas).run()

Now we can call our function using different selections

In [None]:
calphas = u.select_atoms("protein and name CA")
rmsfer_all = get_RMSF(u, "protein and name CA", "protein and name CA")
rmsfer_D1 = get_RMSF(u, "protein and name CA and resid 1-136", "protein and name CA")
rmsfer_D2 = get_RMSF(u, "protein and name CA and not resid 1-136", "protein and name CA")

Now, let's plot!

In [None]:
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(calphas.resnums, rmsfer_all.results.rmsf, c="gray", label="all")

ax.set_xlabel("resid (#)")
ax.set_ylabel("RMSF ($\AA$)")
ax.set_xlim([0, 210])
ax.set_ylim([0, 13])
ax.legend(frameon=False)

fig.savefig("rmsf_p24_all.pdf")
fig.savefig("rmsf_p24_all.png")

ax.plot(calphas.resnums[0:136], rmsfer_D1.results.rmsf[0:136], c="mediumblue", label="Domain 1")
ax.plot(calphas.resnums[136:], rmsfer_D2.results.rmsf[136:], c="deepskyblue", label="Domain 2")
ax.legend(frameon=False)

fig.savefig("rmsf_p24.pdf")
fig.savefig("rmsf_p24.png")

# Radius of gyration

In [None]:
#u = mda.Universe("trajectory_formatted.pdb")

nterm = u.select_atoms('name N')[0]
cterm = u.select_atoms('name C')[-1]
bb = u.select_atoms('protein')

dist = []
rg = []
for ts in u.trajectory:     # iterate through all frames
    r = cterm.position - nterm.position # end-to-end vector from atom positions
    d = numpy.linalg.norm(r)  # end-to-end distance
    rgyr = bb.radius_of_gyration()  # method of AtomGroup
    print("frame = {0}: d = {1} A, Rgyr = {2} A".format(ts.frame, d, rgyr))
    dist.append(d)
    rg.append(rgyr)

In [None]:
fig, (ax1, ax2) = plt.subplots(nrows=2, sharex=True, figsize=(5, 7))

ax1.plot(time, rg, c="firebrick")
ax2.plot(time, dist, c="firebrick")

ax2.set_xlabel("time (ns)")
ax1.set_ylabel("Rgyr ($\AA$)")
ax2.set_ylabel("end-to-end distance ($\AA$)")
ax1.set_xlim([0, 1000])

plt.subplots_adjust(hspace=0)

fig.savefig("rg_dist_p24.pdf")
fig.savefig("rg_dist_p24.png")

# Hydrogen bonds

In [None]:
def hbonds(hydrogens, acceptors):
    
    """ this function calculates hydrogen bonds """
    
    acc_idx, hyd_idx = idx.T
    
    idx, dists = mda.lib.distances.capped_distance(acceptors.positions, 
                                                   hydrogens.positions, 
                                                   max_cutoff=3.0,
                                                   box=acceptors.dimensions)    

    
    acc_idx, hyd_idx = idx.T

    # select potential hydrogen bonds to check angles
    potential_hbond_acceptors = acceptors[acc_idx]
    potential_hbond_hydrogens = hydrogens[hyd_idx]

    # select hydrogen bond donors by looping over hydrogens and selecting the bonded oxygens
    potential_hbond_donors = sum(h.bonded_atoms[0] for h in potential_hbond_hydrogens)
    
    angles = mda.lib.distances.calc_angles(potential_hbond_acceptors.positions,
                                  potential_hbond_hydrogens.positions,
                                  potential_hbond_donors.positions, 
                                  box=u.dimensions)
    #convert to degrees
    angles = np.rad2deg(angles)
    
    #check angles are larger than 130 degrees
    angle_idx = np.where(angles >= 130.0)
    
    hbond_acceptors = potential_hbond_acceptors[angle_idx]
    hbond_hydrogens = potential_hbond_hydrogens[angle_idx]
    hbond_donors = potential_hbond_donors[angle_idx]
    
    return hbond_acceptors, hbond_hydrogens, hbond_donors

---