In [1]:
import quippy
import ase
from ase.atoms import Atoms as AseAtoms 

In [2]:
protein = quippy.Atoms(ase.io.read("1ubq.pdb", format='proteindatabank'))


In [3]:
# randomly permute the contents of 1ubq.pdb
with open("1ubq.pdb") as flines:
    data = [line.strip() for line in flines]

In [4]:
data

In [5]:
import numpy as np
np.random.shuffle(data)


In [6]:
with open("1ubq_2.pdb", 'w') as outflines:
    outflines.write("\n".join(data))

In [7]:
!cat 1ubq_2.pdb

In [8]:
proteinShuffled = quippy.Atoms(ase.io.read("1ubq_2.pdb", format='proteindatabank'))

In [9]:
# Create an AtomsList, that is then written to an xyz file
twoUbiquitins = quippy.AtomsList([protein, proteinShuffled])

In [10]:
twoUbiquitins.write("twoproteins.xyz")

In [11]:
!cat twoproteins.xyz

In [12]:
!python /usr/local/src/glosim/glosim.py --help

In [13]:
!python /usr/local/src/glosim/glosim.py /root/twoproteins.xyz

In [14]:
!python /usr/local/src/glosim/glosim.py /root/twoproteins.xyz --kernel average

In [15]:
# Get similarities for all ubq-like proteins
import glob
proteinPaths = glob.glob("testproteins/ubqLike/*.pdb")
proteins = []
for proteinPath in proteinPaths:
    proteins.append(quippy.Atoms(ase.io.read(proteinPath, format='proteindatabank')))


In [16]:
proteins

In [17]:
ubqLike = quippy.AtomsList(proteins)
ubqLike.write("ubqLike.xyz")


In [18]:
!python /usr/local/src/glosim/glosim.py /root/ubqLike.xyz

In [19]:
glob.glob("testproteins/ubqLike/*.pdb") # The rows of the matrix

In [20]:
# run TM align, compare
import glob
tmalignscores = []
for pdb in glob.glob("testproteins/ubqLike/*.pdb"):
    tmalignscore = []
    for pdb2 in glob.glob("testproteins/ubqLike/*.pdb"):
        data = !./testproteins/TMalign -a T {pdb} {pdb2}
        TMScore = float([line for line in data if line.startswith("TM-score")][2][9:18])
        tmalignscore.append(TMScore)
    tmalignscores.append(tmalignscore)
    

In [21]:
tmalignscores

In [22]:
def plotKernelMatrix(inputArray):
    import palettable
    import numpy as np
    import seaborn as sns
    import matplotlib.pyplot as plt
    
    data = np.array(inputArray,dtype=float)
    print(data)
    cmap = palettable.cubehelix.purple_16.mpl_colormap
    sns.heatmap(data, vmin=0, vmax=1,cmap=cmap)
    plt.show()
    


In [23]:
%matplotlib inline
plotKernelMatrix(tmalignscores)


In [24]:
with open("ubqLike-n8-l6-c5.0-g0.5_match.k") as flines:
    data = [line.strip() for line in flines][1:]

data = [line.split() for line in data]
plotKernelMatrix(data)


In [26]:
glob.glob("testproteins/ubqLike/*.pdb")

In [27]:
# The difference lies in the match between 1wz0 and 2k1f, both of which
# have long tails

In [28]:
from IPython.display import Image
Image("testproteins/ubqLike/ubqLike.png")