### Objective: To download structures of antibodies from SAbDb. 

We want to compare the effects of using an experimentally solved structure versus a computational prediction with the AntibodyBuilder protien. 

We are getting the antibodies from SabDb. 


https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/sabdab/pdb/6r8x/?raw=true


#### Load modules

In [127]:
import pandas as pd
from biopandas.pdb import PandasPdb
import matplotlib.pyplot as plt
from pathlib import Path
import seaborn
import numpy as np
import requests
from developability.utils import ls
import nglview as ngl

#### Get tables for antibodies on ThSAbDb and one that I calculated descriptors for. 

In [128]:
desktop = Path().cwd().parent.parent.parent/'Desktop'

thab_df = pd.read_csv(desktop/'TSAbDb.csv')
thab_df = thab_df.rename({'Therapeutic': 'antibody'}, axis = 1)
thab_df['antibody'] =thab_df['antibody'].str.lower()


# clean up teh entries. 
new_structure_map ={'100% SI Structure': 'structure100', '99% SI Structure':'structure99', '95-98% SI Structure': 'structure95_98'}
structure_columns = list(new_structure_map.values())
cols = ['antibody']+structure_columns

thab_df = thab_df.rename(new_structure_map, axis = 1)

# get only abs with structure. 
thabs_with_structure = (thab_df.replace('None;None', np.NaN)
                        .dropna(subset = structure_columns, how = 'all')
                        )

thabs_with_structure.head(10)[cols]

Unnamed: 0,antibody,structure100,structure99,structure95_98
1,abciximab,6v4p:CD,,
2,abelacimab,6r8x:CB,,
9,acimtamig,,,7seg:AB:HL;None
12,adalimumab,6cr1:HL/4nyl:AB:CD:EF:HL,3wd5:HL,
15,adintrevimab,,7u2d:HL,7u2e:HL
16,aducanumab,6cnr:HL/6co3:HL,,
17,afasevikumab,6ppg:HL:BA,,
19,alemtuzumab,1bey:HL/1ce1:HL,,
22,alomfilimab,7joo:HL,,
25,amatuximab,7ued:HL,4f33:BA:DC:FE:HG/4f3f:BA,


In [129]:
renumbered_abs = desktop/'renumbered_abs'
desc_df = pd.read_csv(renumbered_abs/'antibody_descriptor_results.csv')
desc_df

Unnamed: 0,antibody,Heparin,FcRn,HCDR1_APBS_pos,LCDR1_APBS_pos,HCDR2_APBS_pos,LCDR2_APBS_pos,HCDR3_APBS_pos,LCDR3_APBS_pos,HFR1_APBS_pos,...,LFR2_APBS_net,HFR3_APBS_net,LFR3_APBS_net,HFR4_APBS_net,LFR4_APBS_net,HFR5_APBS_net,LFR5_APBS_net,CDR_APBS_pos,CDR_APBS_neg,CDR_APBS_net
0,adalimumab,0.79,1.04,0.000000,74.602083,0.000000,0.000000,72.789843,0.000000,153.383089,...,0.000000,0.000000,-748.778909,0.000000,0.000000,0.0,0.0,2082.976016,-1474.165517,608.810499
1,daclizumab,0.84,0.02,0.000000,0.000000,16.410573,496.853471,0.000000,0.000000,194.535384,...,-38.688851,758.857720,251.433303,0.000000,259.370668,0.0,0.0,3517.979296,-148.965592,3369.013704
2,ganitumab,0.79,3.20,0.000000,1784.197304,0.000000,541.321847,0.000000,101.209025,28.650125,...,1566.472993,326.574716,0.000000,0.000000,0.000000,0.0,0.0,5662.864570,-420.970374,5241.894197
3,vedolizumab,0.54,0.23,128.403964,141.894377,0.000000,0.000000,22.714955,0.000000,362.117973,...,255.914384,-116.203043,0.000000,0.000000,0.000000,0.0,0.0,1230.650483,-2715.406171,-1484.755688
4,natalizumab,0.54,0.27,0.000000,95.545055,2053.962629,0.000000,176.628811,0.000000,42.893355,...,0.000000,1223.740443,1002.670235,0.000000,145.016342,0.0,0.0,5948.233746,-98.765519,5849.468227
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
123,gevokizumab,0.63,0.40,0.000000,626.184917,1896.478472,0.000000,0.000000,0.000000,1746.311576,...,1645.095706,1171.407803,1263.330452,0.000000,0.000000,0.0,0.0,9959.805306,-249.899850,9709.905457
124,parsatuzumab,0.67,0.61,0.000000,0.000000,508.074484,0.000000,0.000000,0.000000,222.579599,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,981.647846,-222.873216,758.774630
125,otelixizumab,0.69,1.05,0.000000,0.000000,64.435047,0.000000,0.000000,0.000000,0.000000,...,-202.226299,-177.694637,548.158190,-62.743093,70.444820,0.0,0.0,721.834666,-2911.410455,-2189.575789
126,veltuzumab,0.54,0.39,0.000000,0.000000,0.000000,0.000000,0.000000,109.847684,300.291467,...,471.310616,0.000000,-316.003682,35.450280,0.000000,0.0,0.0,2129.911265,-525.927830,1603.983435


##### Check to see if all the antibodies in descriptors data are contained in therapeutic. 
They are of course. 

In [130]:
abs = set(desc_df['antibody']).intersection(set(thab_df['antibody']))
len(abs)==len(desc_df)

True

In [131]:
thabs_with_structure[cols].head(20)

Unnamed: 0,antibody,structure100,structure99,structure95_98
1,abciximab,6v4p:CD,,
2,abelacimab,6r8x:CB,,
9,acimtamig,,,7seg:AB:HL;None
12,adalimumab,6cr1:HL/4nyl:AB:CD:EF:HL,3wd5:HL,
15,adintrevimab,,7u2d:HL,7u2e:HL
16,aducanumab,6cnr:HL/6co3:HL,,
17,afasevikumab,6ppg:HL:BA,,
19,alemtuzumab,1bey:HL/1ce1:HL,,
22,alomfilimab,7joo:HL,,
25,amatuximab,7ued:HL,4f33:BA:DC:FE:HG/4f3f:BA,


In [132]:
def download_therapeutic_ab_pdb(code, name, output_dir=None):

    if not output_dir: 
        output_dir = Path().cwd()

    url = f'https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/sabdab/pdb/{code}/?raw=true'
    
    file = Path(output_dir)/f'{code}_{name}.pdb'
    file.write_text(requests.get(url).text)

def download_pdb(pdb_id, protein_name='',output_path=None, pdb=None, atoms_only=False): 
    """Uses BioPandas to download pdb"""
    if not pdb: 
        pdb= PandasPdb()
    p = pdb.fetch_pdb(pdb_id)

    if not output_path: 
        output_path = Path().cwd()
    
    if atoms_only:
        p.to_pdb(output_path/f'{protein_name}_{pdb_id}.pdb', records =['ATOM'])
    else:
        p.to_pdb(output_path/f'{protein_name}_{pdb_id}.pdb')
    

def parse_structure_string(string): 
    
    pdbs = string.split('/')
    for pdb in pdbs: 
        yield(pdb.split(':')[0])



In [133]:
output_path = desktop/'solved_therapeutic_abs'
if not output_path.exists(): 
    output_path.mkdir()
    
pdb = PandasPdb()
proteins = []
pdb_ids = []

for _,row in thabs_with_structure.head(1).iterrows():
    protein = row.antibody
    for string in [row.structure100, row.structure99, row.structure95_98 ]:
        if isinstance(string, str):
            for pdb_id in parse_structure_string(string):
                try:
                    download_pdb(pdb_id, protein, output_path )
                    proteins.append(protein)
                    pdb_ids.append(pdb_id)
                except:
                    pass