# Дизайн белок-белковых взаимодействий

*весь код - для запуска в колабе. перед запуском включите gpu*

## 0. Импорты

In [None]:
#@title Install dependencies for RFDiffusion

import os, time, signal
import sys, random, string, re
if not os.path.isdir("params"):
  os.system("apt-get install aria2")
  os.system("mkdir params")
  # send param download into background
  os.system("(\
  aria2c -q -x 16 https://files.ipd.uw.edu/krypton/schedules.zip; \
  aria2c -q -x 16 http://files.ipd.uw.edu/pub/RFdiffusion/6f5902ac237024bdd0c176cb93063dc4/Base_ckpt.pt; \
  aria2c -q -x 16 http://files.ipd.uw.edu/pub/RFdiffusion/e29311f6f1bf1af907f9ef9f44b8328b/Complex_base_ckpt.pt; \
  aria2c -q -x 16 http://files.ipd.uw.edu/pub/RFdiffusion/f572d396fae9206628714fb2ce00f72e/Complex_beta_ckpt.pt; \
  aria2c -q -x 16 http://files.ipd.uw.edu/pub/RFdiffusion/5532d2e1f3a4738decd58b19d633b3c3/ActiveSite_ckpt.pt; \
  aria2c -q -x 16 http://files.ipd.uw.edu/pub/RFdiffusion/60f09a193fb5e5ccdc4980417708dbab/Complex_Fold_base_ckpt.pt; \
  aria2c -q -x 16 https://storage.googleapis.com/alphafold/alphafold_params_2022-12-06.tar; \
  tar -xf alphafold_params_2022-12-06.tar -C params; \
  touch params/done.txt) &")

if not os.path.isdir("RFdiffusion"):
  print("installing RFdiffusion...")
  os.system("git clone https://github.com/sokrypton/RFdiffusion.git")
  os.system("pip -q install jedi omegaconf hydra-core icecream pyrsistent")
  os.system("pip install dgl -f https://data.dgl.ai/wheels/cu121/repo.html")
  os.system("cd RFdiffusion/env/SE3Transformer; pip -q install --no-cache-dir -r requirements.txt; pip -q install .")
  os.system("wget -qnc https://files.ipd.uw.edu/krypton/ananas")
  os.system("chmod +x ananas")

if not os.path.isdir("colabdesign"):
  print("installing ColabDesign...")
  os.system("pip -q install git+https://github.com/sokrypton/ColabDesign.git")
  os.system("ln -s /usr/local/lib/python3.*/dist-packages/colabdesign colabdesign")

if not os.path.isdir("RFdiffusion/models"):
  print("downloading RFdiffusion params...")
  os.system("mkdir RFdiffusion/models")
  models = ["Base_ckpt.pt","Complex_base_ckpt.pt","Complex_beta_ckpt.pt",'ActiveSite_ckpt.pt','Complex_Fold_base_ckpt.pt']
  for m in models:
    while os.path.isfile(f"{m}.aria2"):
      time.sleep(5)
  os.system(f"mv {' '.join(models)} RFdiffusion/models")
  os.system("unzip schedules.zip; rm schedules.zip")

if 'RFdiffusion' not in sys.path:
  os.environ["DGLBACKEND"] = "pytorch"
  sys.path.append('RFdiffusion')

if not os.path.isdir('pyrosetta'):
    os.system("gdown 1Pts0USd16GAtqz5v_4ErTu_7YBpysF9G; \
               pip install pyrosetta-2023.45+release.a6d9ba8-cp310-cp310-linux_x86_64.whl")

os.system("pip install py3Dmol")
os.system("pip install Bio" )

In [None]:
from Bio import PDB
from Bio.PDB.PDBParser import PDBParser
import py3Dmol
import numpy as np

## 1. Постановка задачи

Будем генерировать белок, способный связывать эктодомен рецептора инсулина (https://www.rcsb.org/structure/5KQV)

Загрузим файл со структурой и визуализируем ее.

In [None]:
! wget https://files.rcsb.org/download/5KQV.pdb

In [None]:
parser=PDBParser(QUIET=True)
structure=parser.get_structure("5KQV",'5KQV.pdb')
for model in structure:
    for chain in model:
        print(chain.get_id(), len(chain))
        ln=[]
        li=[]
        for residue in chain:
            if residue.get_resname()!='HOH':
                li.append('{0:3}'.format(residue.id[1]))
                ln.append(residue.get_resname())
        print(' '.join(li))
        print(' '.join(ln))

Вырежем только цепь Е.

In [None]:
!cat 5KQV.pdb | grep ' E ' > 5KQV_E.pdb

In [None]:
p = py3Dmol.view(js='https://3dmol.org/build/3Dmol.js')

p.addModel(open('/content/5KQV_E.pdb','r').read(),'pdb')
p.setStyle({'chain':'E'},{'cartoon':{'color':'green'}})
p.setStyle({'resi':list(range(6,156))},{'cartoon':{'color':'yellow'}})

p.zoomTo() # центрировать структуру
p.show()

Вырежем первые 150 остатков

In [55]:
from Bio.PDB.StructureBuilder import StructureBuilder
from Bio.PDB.PDBIO import PDBIO


bb=StructureBuilder()

bb.init_structure('part')
bb.init_model(0)
bb.init_chain('A')
pdb=bb.get_structure()

parser=PDBParser(QUIET=True)
structure=parser.get_structure("5KQV",'5KQV.pdb')

for residue in structure[0]['E']:
    if (residue.get_id()[1] in list(range(4, 154))):
        pdb[0]['A'].add(residue)

io = PDBIO()
io.set_structure(pdb)
io.save("5KQVpart.pdb")


## 2. Binder design с помощью RFDiffusion

Используем RFdiffusion (https://github.com/RosettaCommons/RFdiffusion) для дизайна байндера.

In [None]:
%%time
! RFdiffusion/run_inference.py \
inference.input_pdb=5KQV_E.pdb \
'contigmap.contigs=[E4-153/0 200-200]' \
inference.output_prefix=result/rf_diff  \
inference.num_designs=1

In [None]:
p = py3Dmol.view(js='https://3dmol.org/build/3Dmol.js')

p.addModel(open('/content/result/rf_diff_0.pdb','r').read(),'pdb')

p.setStyle({'chain':'B'},{'cartoon':{'color':'green'}})
p.setStyle({'chain':'A'},{'cartoon':{'color':'blue'}})

p.zoomTo() # центрировать структуру
p.show()

## 3. Определение hotspots

Для генерации байндера крайне желательно задать т.н. hotspots, то есть участки белка, которые должны взаимодействовать с байндером.

Для определения хотспотов можно использовать инструменты для предсказания участков белок-белкового взаимодействия. Мы используем dMaSIF (https://github.com/FreyrS/dMaSIF).

### MaSIF Colab

Источник: https://colab.research.google.com/github/casperg92/MaSIF_colab/blob/main/dMaSIF_Colab_V1.ipynb

In [53]:
#@title Install dependencies for dMaSIF

%cd -q /content
!rm -fr MaSIF_colab > /dev/null
!git clone --quiet https://github.com/casperg92/MaSIF_colab.git > /dev/null

import sys
sys.path.append("MaSIF_colab")
sys.path.append("MaSIF_colab/data_preprocessing")


!pip install torch_geometric &> /dev/null
print('Installing PyKeops..')
!pip install pykeops &> /dev/null
print('Installing plyfile..')
!pip install plyfile &> /dev/null
print('Installing pyvtk..')
!pip install pyvtk &> /dev/null
print('Installing nglview..')
!pip install -q nglview &> /dev/null
print('Installing pdbparser..')
!pip install pdbparser &> /dev/null
print('Installing reduce..')
!git clone --quiet https://github.com/rlabduke/reduce > /dev/null
!cmake reduce &> /dev/null
!make &> /dev/null
!sudo make install &> /dev/null

#Костыли

with open('/content/MaSIF_colab/benchmark_models.py','r', encoding="utf-8") as f:
    b=f.read().replace('''from torch_geometric.nn import (
    DynamicEdgeConv,
    PointConv,
    XConv,
    fps,
    radius,
    global_max_pool,
    knn_interpolate,
)''','''from torch_geometric.nn import (
    DynamicEdgeConv,
    XConv,
    fps,
    radius,
    global_max_pool,
    knn_interpolate,
)

from typing import Callable, Optional, Union

from torch import Tensor

from torch_geometric.nn.conv import MessagePassing
from torch_geometric.nn.inits import reset
from torch_geometric.typing import (
    Adj,
    OptTensor,
    PairOptTensor,
    PairTensor,
    SparseTensor,
    torch_sparse,
)
from torch_geometric.utils import add_self_loops, remove_self_loops

class PointNetConv(MessagePassing):
    def __init__(self, local_nn: Optional[Callable] = None,
                 global_nn: Optional[Callable] = None,
                 add_self_loops: bool = True, **kwargs):
        kwargs.setdefault('aggr', 'max')
        super().__init__(**kwargs)

        self.local_nn = local_nn
        self.global_nn = global_nn
        self.add_self_loops = add_self_loops

        self.reset_parameters()

    def reset_parameters(self):
        super().reset_parameters()
        reset(self.local_nn)
        reset(self.global_nn)

    def forward(self, x: Union[OptTensor, PairOptTensor],
                pos: Union[Tensor, PairTensor], edge_index: Adj) -> Tensor:

        if not isinstance(x, tuple):
            x: PairOptTensor = (x, None)

        if isinstance(pos, Tensor):
            pos: PairTensor = (pos, pos)

        if self.add_self_loops:
            if isinstance(edge_index, Tensor):
                edge_index, _ = remove_self_loops(edge_index)
                edge_index, _ = add_self_loops(
                    edge_index, num_nodes=min(pos[0].size(0), pos[1].size(0)))
            elif isinstance(edge_index, SparseTensor):
                edge_index = torch_sparse.set_diag(edge_index)

        # propagate_type: (x: PairOptTensor, pos: PairTensor)
        out = self.propagate(edge_index, x=x, pos=pos, size=None)

        if self.global_nn is not None:
            out = self.global_nn(out)

        return out


    def message(self, x_j: Optional[Tensor], pos_i: Tensor,
                pos_j: Tensor) -> Tensor:
        msg = pos_j - pos_i
        if x_j is not None:
            msg = torch.cat([x_j, msg], dim=1)
        if self.local_nn is not None:
            msg = self.local_nn(msg)
        return msg

    def __repr__(self) -> str:
        return (f'{self.__class__.__name__}(local_nn={self.local_nn}, '
                f'global_nn={self.global_nn})')

''')
with open('/content/MaSIF_colab/benchmark_models.py','w', encoding="utf-8") as f:
    f.write(b)

with open('/content/MaSIF_colab/benchmark_layers.py','r', encoding="utf-8") as f:
    b=f.read()
    b=b.replace('from torch_cluster import knn','')
    b=b.replace('knns = {"torch": knn, "keops": keops_knn}','knns = {"keops": keops_knn}')

with open('/content/MaSIF_colab/benchmark_layers.py','w', encoding="utf-8") as f:
    f.write(b)

with open('/content/MaSIF_colab/data.py','r', encoding="utf-8") as f:
    b=f.read()
    b=b.replace('def __cat_dim__(self, key, value)',
                'def __cat_dim__(self, key, value, *args, **kwargs)')
    b=b.replace('def __inc__(self, key, value)',
                'def __inc__(self, key, value, *args, **kwargs)')
with open('/content/MaSIF_colab/data.py','w', encoding="utf-8") as f:
    f.write(b)

with open('/content/MaSIF_colab/data_iteration.py','r', encoding="utf-8") as f:
    b=f.read()
    b=b.replace('in protein_pair.keys',
                'in protein_pair.keys()')
with open('/content/MaSIF_colab/data_iteration.py','w', encoding="utf-8") as f:
    f.write(b)

with open('/content/MaSIF_colab/geometry_processing.py','r', encoding="utf-8") as f:
    b=f.read()
    b=b.replace('S = torch.solve(PQt, PPt).solution',
                'S = torch.linalg.solve(PQt, PPt)')
with open('/content/MaSIF_colab/geometry_processing.py','w', encoding="utf-8") as f:
    f.write(b)


Installing PyKeops..
Installing plyfile..
Installing pyvtk..
Installing nglview..
Installing pdbparser..
Installing reduce..
fatal: destination path 'reduce' already exists and is not an empty directory.


In [56]:
#@title Change pdb path and chain name(s), then hit `Runtime` -> `Run all`
#@markdown Note: the pdb file cannot contain an underscore ('_') in its name.
import os
import glob
from google.colab import files
import os

# Create folder for the pdbs
pred_dir = '/content/pdbs'
isExist = os.path.exists(pred_dir)
if not isExist:
  os.makedirs(pred_dir)

# target pdb
target_pdb = "5KQVpart.pdb" #@param {type:"string"}

target_name = target_pdb.split('/')
shutil.copyfile(target_pdb, pred_dir+target_name[-1])
target_name = target_name[-1].split('.')

if target_name[-1] == 'pdb':
  target_name = target_name[0]
else:
  print('Please upload a valid .pdb file!')

chain_name = 'A' #@param {type:"string"}
chains = [chain_name]

# Path to MaSIF weights
#@markdown A resolution of 0.7 Angstrom gives a higher point cloud density and a higher performance. Different radii settings do not seem to impact performance.
model_resolution = '0.7 Angstrom' #@param ["1 Angstrom", "0.7 Angstrom"]
patch_radius = '9 Angstrom' #@param ["9 Angstrom", "12 Angstrom"]


if patch_radius == '9 Angstrom':
  if model_resolution == '1 Angstrom':
    model_path = '/content/MaSIF_colab/models/dMaSIF_site_3layer_16dims_9A_100sup_epoch64'
    resolution = 1.0
    radius = 9
    sup_sampling = 100
  else:
    model_path = '/content/MaSIF_colab/models/dMaSIF_site_3layer_16dims_9A_0.7res_150sup_epoch85'
    resolution = 0.7
    radius = 9
    supsampling = 150

elif patch_radius == '12 Angstrom':
  if model_resolution == '1 Angstrom':
    model_path = '/content/MaSIF_colab/models/dMaSIF_site_3layer_16dims_12A_100sup_epoch71'
    resolution = 1.0
    radius = 12
    supsampling = 100
  else:
    model_path = '/content/MaSIF_colab/models/dMaSIF_site_3layer_16dims_12A_0.7res_150sup_epoch59'
    resolution = 0.7
    radius = 12
    supsampling = 100


# create new folders
# chain dir
chains_dir = '/content/chains'
isExist = os.path.exists(chains_dir)
if not isExist:
  os.makedirs(chains_dir)
else:
  files = glob.glob(chains_dir + '/*')
  for f in files:
    os.remove(f)

# npy folder
npy_dir = '/content/npys'
isExist = os.path.exists(npy_dir)
if not isExist:
  os.makedirs(npy_dir)
else:
  files = glob.glob(npy_dir + '/*')
  for f in files:
    os.remove(f)

# Create folder for the embeddings
pred_dir = '/content/preds'
isExist = os.path.exists(pred_dir)
if not isExist:
  os.makedirs(pred_dir)
else:
  files = glob.glob(pred_dir + '/*')
  for f in files:
    os.remove(f)

In [57]:
#@title Load functions

import numpy as np
import pykeops
import torch
from Bio.PDB import *
from data_preprocessing.download_pdb import convert_to_npy
from torch_geometric.data import DataLoader
from torch_geometric.transforms import Compose
import argparse
import shutil
import locale

# Custom data loader and model:
from data import ProteinPairsSurfaces, PairData, CenterPairAtoms, load_protein_npy
from data import RandomRotationPairAtoms, NormalizeChemFeatures, iface_valid_filter
from model import dMaSIF
from data_iteration import iterate
from helper import *

# For showing the plot in nglview
from google.colab import output
output.enable_custom_widget_manager()
import nglview as ng
#import ipywidgets as widgets

# For downloading files
from google.colab import files

def my_load_protein_pair(pdb_id, data_dir,single_pdb=False):
    """Loads a protein surface mesh and its features"""
    pspl = pdb_id.split("_")
    p1_id = pspl[0] + "_" + pspl[1]
    p2_id = pspl[0] + "_" + pspl[2]

    p1 = load_protein_npy(p1_id, data_dir, center=False,single_pdb=single_pdb)
    p2 = load_protein_npy(p2_id, data_dir, center=False,single_pdb=single_pdb)


    protein_pair_data = PairData(
        xyz_p1=None,
        xyz_p2=None,
        face_p1=None,
        face_p2=None,
        chemical_features_p1=None,
        chemical_features_p2=None,
        y_p1=None,
        y_p2=None,
        normals_p1=None,
        normals_p2=None,
        center_location_p1=None,
        center_location_p2=None,
        atom_coords_p1=p1["atom_coords"],
        atom_coords_p2=p2["atom_coords"],
        atom_types_p1=p1["atom_types"],
        atom_types_p2=p2["atom_types"],
    )
    return protein_pair_data

def generate_descr(model_path, output_path, pdb_file, npy_directory, radius, resolution,supsampling):
    """Generat descriptors for a MaSIF site model"""
    parser = argparse.ArgumentParser(description="Network parameters")
    parser.add_argument("--experiment_name", type=str, default=model_path)
    parser.add_argument("--use_mesh", type=bool, default=False)
    parser.add_argument("--embedding_layer",type=str,default="dMaSIF")
    parser.add_argument("--curvature_scales",type=list,default=[1.0, 2.0, 3.0, 5.0, 10.0])
    parser.add_argument("--resolution",type=float,default=resolution)
    parser.add_argument("--distance",type=float,default=1.05)
    parser.add_argument("--variance",type=float,default=0.1)
    parser.add_argument("--sup_sampling", type=int, default=supsampling)
    parser.add_argument("--atom_dims",type=int,default=6)
    parser.add_argument("--emb_dims",type=int,default=16)
    parser.add_argument("--in_channels",type=int,default=16)
    parser.add_argument("--orientation_units",type=int,default=16)
    parser.add_argument("--unet_hidden_channels",type=int,default=8)
    parser.add_argument("--post_units",type=int,default=8)
    parser.add_argument("--n_layers", type=int, default=3)
    parser.add_argument("--radius", type=float, default=radius)
    parser.add_argument("--k",type=int,default=40)
    parser.add_argument("--dropout",type=float,default=0.0)
    parser.add_argument("--site", type=bool, default=True) # set to true for site model
    parser.add_argument("--batch_size", type=int, default=1)
    parser.add_argument("--search",type=bool,default=False) # Set to true for search model
    parser.add_argument("--single_pdb",type=str,default=pdb_file)
    parser.add_argument("--seed", type=int, default=42)
    parser.add_argument("--random_rotation",type=bool,default=False)
    parser.add_argument("--device", type=str, default="cpu")
    #parser.add_argument("--single_protein",type=bool,default=True)
    parser.add_argument("--single_protein",type=bool,default=True) # set to false for site
    parser.add_argument("--no_chem", type=bool, default=False)
    parser.add_argument("--no_geom", type=bool, default=False)

    args = parser.parse_args("")

    model_path = args.experiment_name
    save_predictions_path = Path(output_path)

    # Ensure reproducability:
    torch.backends.cudnn.deterministic = True
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)
    np.random.seed(args.seed)

    # Load the train and test datasets:
    transformations = (
        Compose([NormalizeChemFeatures(), CenterPairAtoms(), RandomRotationPairAtoms()])
        if args.random_rotation
        else Compose([NormalizeChemFeatures()])
    )

    if args.single_pdb != "":
        single_data_dir = Path(npy_directory)
        test_dataset = [my_load_protein_pair(args.single_pdb, single_data_dir, single_pdb=True)]
        test_pdb_ids = [args.single_pdb]

    # PyTorch geometric expects an explicit list of "batched variables":
    batch_vars = ["xyz_p1", "xyz_p2", "atom_coords_p1", "atom_coords_p2"]
    test_loader = DataLoader(
        test_dataset, batch_size=args.batch_size, follow_batch=batch_vars
    )

    net = dMaSIF(args)
    # net.load_state_dict(torch.load(model_path, map_location=args.device))
    net.load_state_dict(torch.load(model_path, map_location=args.device)["model_state_dict"])
    net = net.to(args.device)

    # Perform one pass through the data:
    info = iterate(
        net,
        test_loader,
        None,
        args,
        test=True,
        save_path=save_predictions_path,
        pdb_ids=test_pdb_ids,
    )
    return info



def show_pointcloud(main_pdb, coord_file, emb_file):
  # Normalize embedding to represent a b-factor value between 0-100
  b_factor = []
  for emb in emb_file:
      b_factor.append(emb[-2])

  # b_factor = [(float(i)-min(b_factor))/(max(b_factor)-min(b_factor)) for i in b_factor]

  # writing a pseudo pdb of all points using their coordinates and H atom.
  with open("pointcloud.pdb",'w') as f:
    for i in range(len(coord_file)):

      points = coord_file[i]
      s="ATOM  {:5d}  H   XYZ A   1    {:8.3f}{:8.3f}{:8.3f}  1.00{:6.2f}           H  \n".format(i+1,i+1,
                                                                                                   points[0],
                                                                                                   points[1],
                                                                                                   points[2],
                                                                                                   b_factor[i]*100)
      f.write(s)

  # reading the psudo PDB we generated above for the point cloud.
  coordPDB = "pointcloud.pdb"
  view = ng.NGLWidget()
  view.add_component(ng.FileStructure(os.path.join("/content", coordPDB)), defaultRepresentation=False)

  # representation with our customized colorscheme.
  view.add_representation('point',
                          useTexture = 1,
                          pointSize = 2,
                          colorScheme = "bfactor",
                          colorDomain = [100.0, 0.0],
                          colorScale = 'rwb',
                          selection='_H')

  view.add_component(ng.FileStructure(os.path.join("/content", main_pdb)))
  view.background = 'black'
  return view

def show_structure(main_pdb):
  # reading the psudo PDB we generated above for the point cloud.
  view = ng.NGLWidget()

  view.add_component(ng.FileStructure(main_pdb), defaultRepresentation=False)
  view.add_representation("cartoon", colorScheme = "bfactor", colorScale = 'rwb', colorDomain = [100.0, 0.0])
  view.add_representation("ball+stick", colorScheme = "bfactor", colorScale = 'rwb', colorDomain = [100.0, 0.0])
  view.background = 'black'
  return view

In [58]:
#@title Run MaSIF

locale.getpreferredencoding = lambda: "UTF-8"

tmp_pdb = '/content/pdbs/tmp_1.pdb'
shutil.copyfile(target_pdb, tmp_pdb)

# Remove protons if there are any
!reduce -Trim -Quiet /content/pdbs/tmp_1.pdb > /content/pdbs/tmp_2.pdb
# Add protons
!reduce -HIS -Quiet /content/pdbs/tmp_2.pdb > /content/pdbs/tmp_3.pdb

tmp_pdb = '/content/pdbs/tmp_3.pdb'
shutil.copyfile(tmp_pdb, target_pdb)
# Generate the surface features
convert_to_npy(target_pdb, chains_dir, npy_dir, chains)

# Generate the embeddings
pdb_name = "{n}_{c}_{c}".format(n= target_name, c=chain_name)
info = generate_descr(model_path, pred_dir, pdb_name, npy_dir, radius, resolution, supsampling)

# In info I hardcoded memory usage to 0 so MaSIF would run on the CPU. We might want to change this.

100%|██████████| 1/1 [00:01<00:00,  1.98s/it]


In [59]:
#@title Generate PDBs for hotspot atoms and residues
list_top_n_hotspot_residues = 50 #@param {type:"integer"}

from Bio.PDB.PDBParser import PDBParser
from pykeops.numpy import LazyTensor

parser=PDBParser(PERMISSIVE=1)
structure=parser.get_structure("structure", target_pdb)

coord = np.load("preds/{n}_{c}_predcoords.npy".format(n= target_name, c=chain_name))
embedding = np.load("/content/preds/{n}_{c}_predfeatures_emb1.npy".format(n= target_name, c=chain_name))
atom_coords = np.stack([atom.get_coord() for atom in structure.get_atoms()])

b_factor = embedding[:, -2]
# b_factor = (b_factor - min(b_factor)) / (max(b_factor) - min(b_factor))

x_i = LazyTensor(atom_coords[:, None, :])
y_j = LazyTensor(coord[None, :, :])
dists = ((x_i - y_j) ** 2).sum(-1)
nn_ind = dists.argmin(dim=1)
dists=dists.min(dim=1)

atom_b_factor = b_factor[nn_ind]
dist_thresh = 2.0
atom_b_factor[dists > dist_thresh] = 0.0

for i, atom in enumerate(structure.get_atoms()):
    atom.set_bfactor(atom_b_factor[i] * 100)

# Create folder for the embeddings
pred_dir = '/content/output'
os.makedirs(pred_dir, exist_ok=True)

# Save pdb file with per-atom b-factors
io = PDBIO()
io.set_structure(structure)
io.save("/content/output/per_atom_binding.pdb")

atom_residues = np.array([atom.get_parent().id[1] for atom in structure.get_atoms()])

hotspot_res = {}
for residue in structure.get_residues():
    res_id = residue.id[1]
    res_b_factor = np.max(atom_b_factor[atom_residues == res_id])
    hotspot_res[res_id] = res_b_factor
    for atom in residue.get_atoms():
        atom.set_bfactor(res_b_factor * 100)

# Save pdb file with per-residue b-factors
io = PDBIO()
io.set_structure(structure)
io.save("/content/output/per_resi_binding.pdb")

if list_top_n_hotspot_residues>0:
  print('Sorted on residue contribution (high to low')
  for w in sorted(hotspot_res, key=hotspot_res.get, reverse=True)[:list_top_n_hotspot_residues]:
    print(w, hotspot_res[w])
    if hotspot_res[w]==0:
      break

Sorted on residue contribution (high to low
118 0.9089528
88 0.9005107
96 0.88948977
94 0.8855943
109 0.8851982
107 0.86690444
113 0.86447966
89 0.8602897
14 0.83060646
46 0.8148788
108 0.80655044
110 0.7966335
121 0.7882006
128 0.7850416
144 0.78006583
97 0.77719116
83 0.7698663
4 0.7564565
130 0.75584483
80 0.7531414
62 0.7491793
7 0.744514
86 0.7407951
36 0.7354739
73 0.72647625
147 0.72275585
129 0.72260046
53 0.7150244
127 0.7115184
105 0.69669217
78 0.69633853
39 0.69068295
47 0.6887554
153 0.6860921
114 0.6834681
138 0.68142754
102 0.6803827
141 0.6761352
90 0.6674489
37 0.6660379
139 0.66483307
67 0.6593434
70 0.6587157
143 0.6577749
131 0.6518266
133 0.64116365
65 0.64060944
71 0.6394651
81 0.6362723
30 0.6346817




In [None]:
#@title Plot output
#@markdown Blue identifies non-binding and red identifies binding interaction sites. Rerun this cell if you want to change the plotted structure.

from google.colab import output
output.enable_custom_widget_manager()

plot_structure = 'Residues' #@param ["Pointcloud", "Residues", "Atoms"]

## file addresses
if plot_structure == 'Pointcloud':
  view = show_pointcloud(target_pdb, coord, embedding)
elif plot_structure == "Residues":
  view = show_structure('/content/output/per_resi_binding.pdb')
elif plot_structure == "Atoms":
  view = show_structure('/content/output/per_atom_binding.pdb')

view

## 4. Fold conditioning

Попробуем задизайнить белок-связывающее антитело. Для этого используем функционал fold conditioning.

Скачаем структуру антитела.

In [None]:
!wget https://files.rcsb.org/download/7DET.pdb
!cat 7DET.pdb  | grep ' B ' > 7DET_B.pdb

In [None]:
p = py3Dmol.view(js='https://3dmol.org/build/3Dmol.js')

p.addModel(open('/content/7DET_B.pdb','r').read(),'pdb')
p.setStyle({'chain':'B'},{'cartoon':{'color':'blue'}})

p.zoomTo() # центрировать структуру
p.show()

Сгенерируем файл со вторичной структурой

In [None]:
! RFdiffusion/helper_scripts/make_secstruc_adj.py --input_pdb ./7DET_B.pdb --out_dir antibody_secstruc/

Запустим RFDiffusion. Синтаксис здесь будет несколько отличаться.

In [None]:
%%time
! RFdiffusion/run_inference.py \
scaffoldguided.scaffoldguided=True \
scaffoldguided.target_pdb=True \
scaffoldguided.target_path=5KQVpart.pdb \
'ppi.hotspot_res=[E57,E81,E89]'  \
scaffoldguided.scaffold_dir=antibody_secstruc/ \
inference.num_designs=1 \
inference.output_prefix=result/rf_diff_fold_hotspot

In [None]:
p = py3Dmol.view(js='https://3dmol.org/build/3Dmol.js')

p.addModel(open('/content/result/rf_diff_fold_hotspot_0.pdb','r').read(),'pdb')
p.setStyle({'chain':'A'},{'cartoon':{'color':'green'}})
p.setStyle({'chain':'B'},{'cartoon':{'color':'blue'}})
p.setStyle({'chain':'B','resi':[231+57,231+81,231+89]},
 {'cartoon':{'color':'yellow'},'stick':{'colorscheme':'yellowCarbon'}})

p.zoomTo()
p.show()

In [None]:
parser=PDBParser(QUIET=True)
structure=parser.get_structure("5KQV",'result/rf_diff_fold_hotspot_0.pdb')
for model in structure:
    for chain in model:
        print(chain.get_id(), len(chain))
        ln=[]
        li=[]
        for residue in chain:
            if residue.get_resname()!='HOH':
                li.append('{0:3}'.format(residue.id[1]))
                ln.append(residue.get_resname())
        print(' '.join(li))
        print(' '.join(ln))