In [None]:
from google.colab import drive
drive.mount('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [None]:
#Set path
path = 'drive/MyDrive/SB_practical_notebooks/data/'
#path = 'drive/MyDrive/data/'

In [None]:
!pip install biopython
!pip install scipy
!pip install matplotlib
!pip install biopython



In [None]:
import numpy as np
from Bio.PDB import PDBList, Superimposer
from Bio.PDB.PDBParser import PDBParser

Superimpose all models to the first model, fragment-by-fragment(sliding window) of a NMR intrinsically disordered structure

In [None]:
pdb_id = '2k0e'

# Fetch a PDB file to the current dir
pdbl = PDBList()
pdbl.retrieve_pdb_file(pdb_id, pdir=path, file_format='pdb') # Will save to pdbXXXX.ent

# Load the structure
structure = PDBParser(QUIET=True).get_structure(pdb_id, path + "pdb{}.ent".format(pdb_id))

# Superimpose all models to the first model, fragment-by-fragment (sliding window)
super_imposer = Superimposer()
structure_rmsd_fragments = []  # RMSD, Numpy array (no_models X no_fragments X fragment_size)
window_size = 9
ref_model = [atom for atom in structure[0].get_atoms() if atom.get_name() == "CA"]  # CA of the first model [0]

Structure exists: 'drive/MyDrive/SB_practical_notebooks/data/pdb2k0e.ent' 


In [None]:
# Iterate all models
for i, model in enumerate(structure):
    if i > 0:
        model_rmsd = []  # RMSD, no_fragment X fragment_size
        alt_model = [atom for atom in model.get_atoms() if atom.get_name() == "CA"]  # coords of the model

        # Iterate fragments
        for start in range(len(ref_model) - window_size):
            end = start + window_size
            ref_fragment = ref_model[start:end]
            alt_fragment = alt_model[start:end]

            # Calculate rotation/translation matrices
            super_imposer.set_atoms(ref_fragment, alt_fragment)
            # print(super_imposer.rms, super_imposer.rotran)

            # Rotate-translate coordinates
            alt_fragment_coord = np.array([atom.get_coord() for atom in alt_fragment])
            alt_fragment_coord = np.dot(super_imposer.rotran[0].T, alt_fragment_coord.T).T
            alt_fragment_coord = alt_fragment_coord + super_imposer.rotran[1]

            # Calculate RMSD
            # https://en.wikipedia.org/wiki/Root-mean-square_deviation_of_atomic_positions
            ref_fragment_coord = np.array([atom.get_coord() for atom in ref_fragment])
            dist = ref_fragment_coord - alt_fragment_coord
            # rmsd_fragment = np.sqrt(np.sum(dist * dist) / window_size)  # Total RMSD of the fragment. Identical to super_imposer.rms
            rmsd_res = np.sqrt(np.sum(dist * dist, axis=1))  # RMSD for each residue of the fragment

            model_rmsd.append(rmsd_res)

        structure_rmsd_fragments.append(model_rmsd)



In [None]:
structure_rmsd_fragments = np.array(structure_rmsd_fragments)
structure_rmsd_fragments

array([[[ 5.37964399,  1.72985716,  1.82060651, ...,  1.89442856,
          1.1984524 ,  1.52670937],
        [ 1.3287223 ,  0.4035973 ,  1.22148376, ...,  0.62762375,
          0.44381392,  0.67192986],
        [ 0.81784585,  0.68444057,  0.51051992, ...,  0.11196707,
          0.45306161,  0.35479716],
        ...,
        [ 3.27940663,  2.75634176,  4.12690453, ...,  2.00588969,
          4.39911122,  3.11144575],
        [ 4.58306951,  3.94331269,  2.253897  , ...,  5.32299864,
          3.96045958,  3.78318973],
        [ 4.5011226 ,  1.54070192,  3.20511548, ...,  5.4870849 ,
          8.94952887, 10.14532134]],

       [[ 5.77319101,  1.93267722,  2.23815712, ...,  1.85442814,
          1.37588836,  0.25610338],
        [ 1.47066618,  0.39360431,  1.00467571, ...,  0.74586855,
          0.22910674,  0.49518258],
        [ 0.70980421,  0.47609092,  0.40413568, ...,  0.11689006,
          0.18418273,  0.28067923],
        ...,
        [ 5.32195091,  4.12309924,  3.43682565, ...,  

In [None]:

# Calculate the RMSD average for each fragments along all models
structure_rmsd_fragments = np.average(structure_rmsd_fragments, axis=0)  # no_fragments X fragment_size
structure_rmsd_fragments

np.savetxt(path + 'structure_rmsd_fragments_1.csv', structure_rmsd_fragments, delimiter = ",")

In [None]:
# Pad with right zeros to reach the sequence length (no_fragments + fragment_size)
structure_rmsd_fragments = np.pad(structure_rmsd_fragments, ((0, 0), (0, structure_rmsd_fragments.shape[0])))
print(structure_rmsd_fragments)
np.savetxt(path + 'structure_rmsd_fragments_2.csv', structure_rmsd_fragments, delimiter = ",")

[[4.04653072 1.44908325 1.61631458 ... 0.         0.         0.        ]
 [1.17545726 0.64401035 0.74328906 ... 0.         0.         0.        ]
 [0.60019824 0.37954893 0.33318836 ... 0.         0.         0.        ]
 ...
 [3.66753353 2.56773599 3.25767366 ... 0.         0.         0.        ]
 [3.73294551 3.66542637 2.57363945 ... 0.         0.         0.        ]
 [4.84010086 2.02569805 3.2361159  ... 0.         0.         0.        ]]


In [None]:
# Roll the fragments one by one (add heading zeros)
for i, row in enumerate(structure_rmsd_fragments):
    structure_rmsd_fragments[i] = np.roll(row, i)
print(structure_rmsd_fragments)
np.savetxt(path + 'structure_rmsd_fragments_3.csv', structure_rmsd_fragments, delimiter = ",")

[[ 4.04653072  1.44908325  1.61631458 ...  0.          0.
   0.        ]
 [ 0.          1.17545726  0.64401035 ...  0.          0.
   0.        ]
 [ 0.          0.          0.60019824 ...  0.          0.
   0.        ]
 ...
 [ 0.          0.          0.         ...  0.          0.
   0.        ]
 [ 0.          0.          0.         ...  6.29615627  0.
   0.        ]
 [ 0.          0.          0.         ...  8.40751981 11.04979497
   0.        ]]


In [None]:
# Calculate average along columns of overlapping fragments (average RMSD per residue)
structure_rmsd_average = np.average(structure_rmsd_fragments, axis=0)

print(np.sum(structure_rmsd_average))

6.219772927371118


#Exercise

Compare the RMSD calculated by-fragment with RMSD calculated aligning the complete models