In [83]:
from mimotree_surface import *
from mimotree_alignment import *
from mimotree_matching import *
from mimotree_clustering import *

## Customization

In [84]:
agab = '1HX1'
chain = 'B'
PD_cutoff = 8
#distance_param: the maximum alpha-carbon to alpha-carbon distance between two adjacent residues
distance_param = 4
#mimo_distance_param: an estimated distance between adjacent residues in the mimotope
mimo_distance_param = 3.5
#maximum PD allowed for filtering seed connections
PD_th = 2

## Surface Map

In [85]:
#get a SASA file containing the SASAs for each antigen residue with Chimera first
#name the SASA file as: agabPDBID_antigenChainID_SASA.txt
surface_residue, pdb_df = surface_residue_id(agab+'.pdb',chain,agab+'_'+chain+'_SASA.txt')
pdb_df = generate_df(pdb_df, surface_residue, chain)
surface_dict = initial_graph(surface_residue, pdb_df, chain)
surface_dict #initialized empty surface map with each surface residue as a key

{'GLY150': [],
 'ASN151': [],
 'SER152': [],
 'PRO153': [],
 'GLN154': [],
 'GLU156': [],
 'VAL157': [],
 'GLU158': [],
 'LEU159': [],
 'LYS160': [],
 'LYS161': [],
 'LYS163': [],
 'HIS164': [],
 'LEU165': [],
 'GLU166': [],
 'LYS167': [],
 'SER168': [],
 'GLU170': [],
 'LYS171': [],
 'ALA173': [],
 'ASP174': [],
 'GLN175': [],
 'GLU177': [],
 'GLU178': [],
 'ASN180': [],
 'LYS181': [],
 'GLU182': [],
 'LEU183': [],
 'THR184': [],
 'GLY185': [],
 'ILE186': [],
 'GLN187': [],
 'GLN188': [],
 'GLY189': [],
 'PHE190': [],
 'LEU191': [],
 'PRO192': [],
 'LYS193': [],
 'ASP194': [],
 'LEU195': [],
 'GLN196': [],
 'ALA197': [],
 'GLU198': [],
 'ALA199': [],
 'LEU200': [],
 'CYS201': [],
 'LYS202': [],
 'ASP204': [],
 'ARG205': [],
 'ARG206': [],
 'LYS208': [],
 'ALA209': [],
 'ILE211': [],
 'GLU212': [],
 'GLN213': [],
 'MET215': [],
 'LYS216': [],
 'ILE217': [],
 'LEU218': [],
 'GLU219': [],
 'GLU220': [],
 'ASP222': [],
 'THR223': [],
 'LEU224': [],
 'ILE225': [],
 'PRO227': [],
 'GLU228':

In [99]:
#calculate the CA-CA distances between each two residues and save the values in a file
pdtxt = open('pairs_CA_distance_'+agab+'.txt','w')
idx_list = pdb_df['residue_number'].tolist() 
idx_list = sorted(set(idx_list),key=idx_list.index)
for k in tqdm(range(len(idx_list)-1)):
    for h in range(k+1, len(idx_list)):
        pdtxt.write(residue_distance(idx_list[k],idx_list[h],pdb_df)+'\n')
pdtxt.close()

100%|███████████████████████████████████████████| 80/80 [00:22<00:00,  3.58it/s]


In [86]:
surface_graph = surface_map('pairs_CA_distance_'+agab+'.txt',distance_param,surface_dict)

In [87]:
surface_graph  # finalized surface map

{'GLY150': ['ASN151'],
 'ASN151': ['GLY150', 'SER152'],
 'SER152': ['ASN151', 'PRO153'],
 'PRO153': ['SER152', 'GLN154'],
 'GLN154': ['PRO153'],
 'GLU156': ['VAL157'],
 'VAL157': ['GLU156', 'GLU158'],
 'GLU158': ['VAL157', 'LEU159'],
 'LEU159': ['GLU158', 'LYS160'],
 'LYS160': ['LEU159', 'LYS161'],
 'LYS161': ['LYS160'],
 'LYS163': ['HIS164'],
 'HIS164': ['LYS163', 'LEU165'],
 'LEU165': ['HIS164', 'GLU166'],
 'GLU166': ['LEU165', 'LYS167'],
 'LYS167': ['GLU166', 'SER168'],
 'SER168': ['LYS167'],
 'GLU170': ['LYS171'],
 'LYS171': ['GLU170'],
 'ALA173': ['ASP174'],
 'ASP174': ['ALA173', 'GLN175'],
 'GLN175': ['ASP174'],
 'GLU177': ['GLU178'],
 'GLU178': ['GLU177'],
 'ASN180': ['LYS181'],
 'LYS181': ['ASN180', 'GLU182'],
 'GLU182': ['LYS181', 'LEU183'],
 'LEU183': ['GLU182', 'THR184'],
 'THR184': ['LEU183', 'GLY185'],
 'GLY185': ['THR184', 'ILE186'],
 'ILE186': ['GLY185', 'GLN187'],
 'GLN187': ['ILE186', 'GLN188'],
 'GLN188': ['GLN187', 'GLY189'],
 'GLY189': ['GLN188', 'PHE190'],
 'PHE190

## Epitope Searching

In [88]:
mimo = import_mimo_data(agab+'_mimotopes.txt')


final = []  # final prediction with residue numbers only
prediction = []  # final prediction with residue name + residue number
all_connection = {}  # all connections of each mimotope

for mimo_seq in mimo:
    all_connection[mimo_seq] = []
    final_connection = []
    seeds = generate_seeds(mimo_seq, PD_cutoff, surface_dict) # finds seeds for the current mimotope
    
    #debugging seeds with residue numbers that are not in order
    del_list = []
    for se in seeds:
        temp = [int(i[2:]) for i in se]
        if order(temp) == False:
            del_list.append(se)
    for dl in del_list:
        seeds.remove(dl)
    ##

    seed_s = generate_connections(seeds, mimo_seq, mimo_distance_param, pdb_df) # find seed connections
    

    #debugging seed connections that are not in the mimo sequence order
    for test in seed_s:
        test_temp = ''.join([i[0] for i in test])
        if re.search('.*'.join(test_temp), mimo_seq) is not None:
            final_connection.append(test)
    ##
    
            
    if seeds != []:
    
        longestseed = max(seeds,key=len)
        for x in range(len(seeds)):
            if len(seeds[x]) == len(longestseed) and seeds[x] not in final_connection:
                final_connection.append(seeds[x]) # ADD the seeds of max length to connections

        for ss in final_connection:
            if ss not in all_connection[mimo_seq]:
                all_connection[mimo_seq].append(ss)


con_list = [i for sublist in list(all_connection.values()) for i in sublist]
rank_list = sorted([(j, avg_PD(j, pdb_df)) for j in con_list], key=lambda x:x[1])
zero_list = [h[0] for h in rank_list if h[1]==0]
nonzero_list = [k[0] for k in rank_list if k[1]!=0 and k[1]<=PD_th]
for f in zero_list:
    for o in f:
        if o[2:] not in final:
            final.append(o[2:])

if nonzero_list != []:            
    longest_non = max([len(p) for p in nonzero_list])
    non_selected = [l for l in nonzero_list if len(l)==longest_non]
    for n in non_selected:
        for m in n:
            if m[2:] not in final:
                final.append(m[2:])

## Prediction

In [89]:
prediction = show_aa_type(pdb_df, final)

In [90]:
prediction

['ARG205',
 'ARG206',
 'PRO192',
 'LEU191',
 'PHE247',
 'LEU248',
 'LYS160',
 'LEU159',
 'LYS236',
 'LEU235',
 'ASP204',
 'LYS163',
 'HIS164',
 'VAL241',
 'LYS242',
 'LYS243',
 'LYS161',
 'GLU158',
 'LEU165',
 'GLU166',
 'LEU218',
 'GLU219',
 'GLU182',
 'LEU183',
 'ARG234',
 'ILE217',
 'ILE225',
 'LEU224',
 'LYS216',
 'ASN229',
 'GLU228']