In [1]:
import time
# science
import numpy as np
import torch
from einops import repeat, rearrange

In [2]:
# data / process
import joblib
import sidechainnet
VOCAB = sidechainnet.utils.sequence.ProteinVocabulary()

In [3]:
# self module
import mp_nerf

### Load a protein in SCN format - you can skip this since a joblib file is provided

In [4]:
dataloaders = sidechainnet.load(casp_version=7, with_pytorch="dataloaders")
dataloaders.keys() # ['train', 'train_eval', 'valid-10', ..., 'valid-90', 'test']
# ProteinDataset(casp_version=12, split='train', n_proteins=81454,
#               created='Sep 20, 2020')

SidechainNet was loaded from ./sidechainnet_data/sidechainnet_casp7_30.pkl.


dict_keys(['train', 'train-eval', 'test', 'valid-10', 'valid-20', 'valid-30', 'valid-40', 'valid-50', 'valid-70', 'valid-90'])

In [6]:
get_prot = mp_nerf.utils.get_prot
seq, int_seq, true_coords, angles, padding_seq, mask, pid = get_prot(dataloader_=dataloaders, vocab_=VOCAB, 
                                                                     min_len=700, max_len=1000)

stopping at sequence of length 907


### Load joblib file

In [7]:
!ls experiments

profile_csv


In [8]:
# joblib.dump({"seq": seq, "int_seq": int_seq, "angles": angles,
#              "id": batch.pids[i], "true_coords": batch.crds[i]}, "experiments/727_aas_seq_and_angles.joblib")
# info = joblib.load("experiments/727_aas_seq_and_angles.joblib")
# seq, int_seq, angles, id_, true_coords = info["seq"], info["int_seq"], info["angles"], info["id"], info["true_coords"]

### Test algo

In [9]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [10]:
%%timeit
# measure time to featurize
mp_nerf.proteins.build_scaffolds_from_scn_angles(seq, angles.to(device))

60.7 ms ± 2.84 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [11]:
# featurize
scaffolds = mp_nerf.proteins.build_scaffolds_from_scn_angles(seq, angles.to(device))

In [14]:
%%timeit
# convert coords - fold
coords, mask = mp_nerf.proteins.protein_fold(**scaffolds, device=device)
coords_flat  = rearrange(coords, 'l c d -> (l c) d') 

16.8 ms ± 56.2 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [12]:
coords, mask = mp_nerf.proteins.protein_fold(**scaffolds, device=device)
coords_flat  = rearrange(coords, 'l c d -> (l c) d') 

#### Profiling

In [13]:
%load_ext snakeviz
%snakeviz mp_nerf.proteins.protein_fold(**scaffolds, device=device)

 
*** Profile stats marshalled to file '/var/folders/lh/zgndpx8x755_lcsq48lp_5t40000gn/T/tmp59qcg8si'. 
Embedding SnakeViz in this document...


#### Display

In [21]:
sb = sidechainnet.StructureBuilder(int_seq, crd=coords_flat) 
sb.to_3Dmol()

<py3Dmol.view at 0x1329d42d0>

In [22]:
# base structure with current coords
sb = sidechainnet.StructureBuilder(int_seq, crd=true_coords) # coords_flat

# scn custom nerf
# sb = sidechainnet.StructureBuilder(int_seq[:-padding_angles], angles[:-padding_seq])

# put structure coords in wrapper
sb._initialize_coordinates_and_PdbCreator()
true_coords = sb.coords
# true_coords = rearrange(scn_struct_coords, '(l c) d -> l c d', c=14)

sb.to_3Dmol()

<py3Dmol.view at 0x1329d4950>

### Save oriented to manually diagnose

In [23]:
# save predicted
sb = sidechainnet.StructureBuilder(int_seq, crd=coords_flat) 
sb.to_pdb("preds/predicted.pdb")
sb = sidechainnet.StructureBuilder(int_seq, crd=true_coords) 
sb.to_pdb("preds/labels.pdb")
# go here: https://molstar.org/viewer/
# load chains and use superimposition tool