In [None]:
%%bash
# Setup environment
conda install pytorch cudatoolkit -c pytorch -y # cudatoolkit=11.3 recommended but not required
conda install -c pyg pyg -y
conda install -c conda-forge pip -y

# Download and install
mkdir antifold && cd antifold
wget https://opig.stats.ox.ac.uk/data/downloads/AntiFold/antifold.zip
unzip antifold.zip

pip install .

In [1]:
import os
import sys
import numpy as np
import pandas as pd

import antifold.main as antifold

%load_ext autoreload
%autoreload 2

#### Sample antibody in Notebook

In [4]:
# Define the PDB and chains in DataFrame
pdb_dir = "data/pdbs"
df_pdbs = pd.read_csv("data/example_pdbs.csv")

# Regions to mutate (IMGT)
regions_to_mutate = ["CDR1", "CDR2", "CDRH3"]

In [5]:
# Load model
model = antifold.load_IF1_model("models/model.pt")

# Sample from PDBs, 10 sequences each at temperature 0.50 in regions CDR1, CDR2, CDR3H
pdb_output_dict = antifold.sample_pdbs(
    model,
    pdbs_csv_or_dataframe=df_pdbs,  # Path to CSV file, or a DataFrame
    regions_to_mutate=regions_to_mutate,
    pdb_dir="data/pdbs",
    sample_n=10,
    sampling_temp=0.50,
    limit_expected_variation=False,
)

# Output dictionary with sequences, and residue probabilities or log-odds
pdb_output_dict.keys()

[2023-10-08 20:53:50,275] Loading checkpoint from models/model.pt...
[2023-10-08 20:53:50,873] Loaded model to cpu.
[2023-10-08 20:53:50,936] Reading in (3) PDBs from DataFrame
[2023-10-08 20:53:51,054] Predicting batch 1/3: PDBs 1-1 out of 3 total
[2023-10-08 20:53:58,238] Predicting batch 2/3: PDBs 2-2 out of 3 total
[2023-10-08 20:54:05,447] Predicting batch 3/3: PDBs 3-3 out of 3 total
[2023-10-08 20:54:10,256] 6y1l_imgt: , score=1.0545, global_score=1.0545, regions=['CDR1', 'CDR2', 'CDR3H'], model_name=AntiFold, seed=42
[2023-10-08 20:54:10,267] 6y1l_imgt__1: T=0.50, sample=1, score=0.5581, global_score=1.0089, seq_recovery=0.9369, mutations=14
[2023-10-08 20:54:10,279] 6y1l_imgt__2: T=0.50, sample=2, score=0.5667, global_score=1.0098, seq_recovery=0.9414, mutations=13
[2023-10-08 20:54:10,293] 6y1l_imgt__3: T=0.50, sample=3, score=0.5226, global_score=1.0049, seq_recovery=0.9414, mutations=13
[2023-10-08 20:54:10,306] 6y1l_imgt__4: T=0.50, sample=4, score=0.5376, global_score=1.0

dict_keys(['6y1l_imgt', '8ee8_imgt', 'C143_immunebuilder'])

In [7]:
pdb_output_dict["8ee8_imgt"]["logprobs"]

Unnamed: 0,pdb_pos,pdb_chain,aa_orig,aa_pred,pdb_posins,perplexity,A,C,D,E,...,M,N,P,Q,R,S,T,V,W,Y
0,2,D,V,M,2,1.548110,-5.291447,-6.706945,-6.018933,-6.282560,...,-0.072037,-6.027358,-5.083813,-6.409261,-6.198010,-4.862206,-5.209980,-4.373082,-7.594067,-6.746268
1,3,D,Q,Q,3,2.263210,-6.741985,-9.515722,-5.650609,-3.242996,...,-5.869053,-4.840443,-10.704819,-0.186739,-4.778812,-6.288636,-5.502401,-3.422149,-9.788743,-7.559047
2,4,D,L,L,4,1.004697,-15.512800,-13.386675,-18.231085,-12.137146,...,-7.887784,-16.396826,-16.016512,-13.089780,-14.650309,-17.041676,-17.819057,-9.947091,-13.954269,-14.525408
3,5,D,V,V,5,1.297059,-8.409853,-13.558203,-11.227506,-7.905055,...,-7.646740,-10.803213,-18.145462,-6.590911,-10.402006,-11.347192,-9.086602,-0.067211,-13.491364,-12.463367
4,6,D,E,E,6,1.018104,-9.534863,-12.929004,-8.245716,-0.002272,...,-14.025725,-13.189263,-11.846736,-6.324562,-12.891207,-11.860312,-11.707385,-10.978434,-16.713793,-18.207766
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
429,226,C,T,T,226,9.947654,-2.161382,-5.816751,-4.478692,-3.382313,...,-6.188020,-4.145348,-8.057058,-3.999777,-4.028618,-2.232061,-1.344917,-2.203058,-5.577538,-4.060819
430,227,C,V,V,227,1.223926,-4.725660,-7.114549,-13.447206,-10.106342,...,-8.631004,-11.763630,-14.287686,-11.212550,-10.907963,-8.704516,-8.394817,-0.033433,-8.825924,-7.483999
431,228,C,A,S,228,4.343285,-6.911370,-7.239207,-3.915302,-5.361331,...,-5.427602,-3.769648,-11.450274,-5.275705,-3.532100,-0.473178,-2.115455,-4.114420,-6.829089,-5.882618
432,229,C,P,V,229,3.119908,-3.790304,-7.937728,-7.531168,-5.954879,...,-5.178323,-7.532415,-3.036758,-7.490414,-6.182585,-5.849082,-4.384080,-0.320649,-5.417379,-7.379792


In [8]:
pdb_output_dict["8ee8_imgt"]["sequences"]

OrderedDict([('8ee8_imgt',
              SeqRecord(seq=Seq('VQLVESGGGLVQPGGSLRLSCAASGFTFSSDGMSWVRQAPGKGLEWVSYISSGG...APA'), id='8ee8_imgt', name='', description=", score=1.3685, global_score=1.3685, regions=['CDR1', 'CDR2', 'CDR3H'], model_name=AntiFold, seed=42", dbxrefs=[])),
             ('8ee8_imgt__1',
              SeqRecord(seq=Seq('VQLVESGGGLVQPGGSLRLSCAASGFTFSSYAMSWVRQAPGKGLEWVSYISSGG...APA'), id='', name='', description='T=0.50, sample=1, score=0.9159, global_score=1.3006, seq_recovery=0.9444, mutations=12', dbxrefs=[])),
             ('8ee8_imgt__2',
              SeqRecord(seq=Seq('VQLVESGGGLVQPGGSLRLSCAASGFTFSSYGMSWVRQAPGKGLEWVSYISSGG...APA'), id='', name='', description='T=0.50, sample=2, score=0.8424, global_score=1.2913, seq_recovery=0.9537, mutations=10', dbxrefs=[])),
             ('8ee8_imgt__3',
              SeqRecord(seq=Seq('VQLVESGGGLVQPGGSLRLSCAASGFTFSSYGMSWVRQAPGKGLEWVSYISSGG...APA'), id='', name='', description='T=0.50, sample=3, score=0.8473, global_score=1.

In [9]:
H_orig, L_orig = pdb_output_dict["8ee8_imgt"]["sequences"]["8ee8_imgt"].seq.split("/")
H_mut, L_mut = pdb_output_dict["8ee8_imgt"]["sequences"]["8ee8_imgt__1"].seq.split("/")
antifold.visualize_mutations(H_orig, H_mut, chain="H")
antifold.visualize_mutations(L_orig, L_mut, chain="L")

[2023-10-08 20:54:25,396] Mutations (12):	______________________________XX______________________X_________________________________________XXXXXX_XX___X___________________________________________________________________________________________________________
[2023-10-08 20:54:25,397] Original H:		VQLVESGGGLVQPGGSLRLSCAASGFTFSSDGMSWVRQAPGKGLEWVSYISSGGATTYYADSVKGRFTISRDNSKNTLSLQMNSLRGEDTAVYYCAKDITAPGRNGLDSWGQGVVVTVSSASTKGPSVFPLAPSSRSTSESTAALGCLVKDYFPEPVTVSWNSGSLTSGVHTFPAVLQSSGLYSLSSVVTVPSSSLGTQTYVCNVNHKPSNTKVDK
[2023-10-08 20:54:25,399] Mutated H:		VQLVESGGGLVQPGGSLRLSCAASGFTFSSYAMSWVRQAPGKGLEWVSYISSGGSTTYYADSVKGRFTISRDNSKNTLSLQMNSLRGEDTAVYYCARHYLLAGYYGLDYWGQGVVVTVSSASTKGPSVFPLAPSSRSTSESTAALGCLVKDYFPEPVTVSWNSGSLTSGVHTFPAVLQSSGLYSLSSVVTVPSSSLGTQTYVCNVNHKPSNTKVDK

[2023-10-08 20:54:25,400] Mutations (8):	__________________________XXX__XXX____________________XX____________________________________________________________________________________________________________________________________