In [1]:
import json
import numpy as np
import pandas as pd
from collections import defaultdict
from typing import List
import re
# %matplotlib widget

In [24]:
#######################################################
# flatten
####################################################### 

def flatten2list(object):
    """ This function flattens objects in a nested structure """
    gather = []
    for item in object:
        if isinstance(item, (list, set)):
            gather.extend(flatten2list(item))            
        else:
            gather.append(item)
    return gather

def flatten2set(object) -> set:
    """ This function flattens objects in a nested structure and return a set"""

    return set(flatten2list(object))

#######################################################
# epitope dataset
####################################################### 
def dispatch_dict(operator, x, value):
    return {
    '==': lambda: x == value,
    ">": lambda:  x > value,
    "<": lambda:  x < value,
    }.get(operator, lambda: None)()

#######################################################
# epitope dataset
####################################################### 
class Epitope_DB:
    
    def __init__(self, path:str='../data/20201123_EpitopevsHLA.pickle'):
        self.df = pd.read_pickle(path)
        
    def __repr__(self):
        return f""" Epitope_DB(records={len(self.df)}, columns={self.df.columns}) """
    
    def __str__(self):
        return __repr__()
    
    def epitope(self, value):
        if isinstance(value, str):
            ind = self.df.Epitope == value 
        else: 
            ind = self.df.Epitope.apply(lambda x: x in value) 
        self.df = self.df[ind]
        return self
    
    def ellipro(self, value):
        if isinstance(value, str):
            ind = self.df.Epitope == value 
        else: 
            ind = self.df['ElliPro Score'].apply(lambda x: x in value) 
        self.df = self.df[ind]
        return self
    
    def hlavsep(self, hla_allel:str='Luminex Alleles'):
        import pandas as pd
        from collections import defaultdict
        
        hlas = flatten2set(self.df[hla_allel].values)
        hlavsep_dict = defaultdict(list)
        for hla in hlas:
            ind = self.df[hla_allel].apply(lambda x: hla in x)
            epitopes = flatten2set(self.df[ind]['Epitope'].values)
            hlavsep_dict['HLA'].append(hla)
            hlavsep_dict['Epitope'].append(epitopes)
        self._hlavsep = pd.DataFrame(hlavsep_dict)
        return self._hlavsep

    def min_hlavsep(self, epitopes):
        _epitopes = epitopes.copy()
        hlavsep_df = self.hlavsep()
        hla_ep = defaultdict(set)
        while len(_epitopes) != 0:
            ind_max = hlavsep_df.Epitope.apply(lambda x: len(x.intersection(_epitopes))).sort_values().index[-1]
            hla = hlavsep_df.iloc[ind_max].HLA
            ep = hlavsep_df.iloc[ind_max].Epitope.intersection(_epitopes)
            hla_ep[hla] = ep
            _epitopes.difference_update(ep)
        return dict(hla_ep)
    
#######################################################
# desa dataset
####################################################### 
class DESA_DB:
    
    def __init__(self, path:str='../data/desa_3d_view.pickle'):
        self.df = pd.read_pickle(path)
        
    def __repr__(self):
        return f""" DESA_DB(records={len(self.df)}, columns={self.df.columns}) """
    
    def __str__(self):
        return __repr__()
    
    def donor_type(self, donor_type:str='Deceased'):
        if donor_type not in ['Living', 'Deceased']:
            raise KeyError(f'{donor_type} does not exist in the df values, accepted values: {self.df.Donor_Type.unique()}')
        ind = self.df.Donor_Type.apply(lambda x: x == donor_type)                          
        self.df = self.df[ind]
        return self
        
    def hla_class(self, hla_class):
        if hla_class not in ['I', 'II', 'I,II']:
            raise KeyError(f'{hla_class} does not exist in the df values, accepted values: {self.df.Donor_HLA_Class.unique()}')
        ind = self.df.Donor_HLA_Class.apply(lambda x: x == hla_class)                          
        self.df = self.df[ind]
        return self
        
    def early_failed(self, threshold):
        ind_t = self.df['Survival[Y]'].apply(lambda x:  x < threshold)
        ind_e = self.df.Failure.apply(lambda x: x == 1)
        self.df = self.df[ind_t & ind_e]
        return self

    def late_failed(self, threshold:int):
        ind_t = self.df['Survival[Y]'].apply(lambda x: x > threshold)
        ind_e = self.df.Failure.apply(lambda x: x != 1 )
        self.df = self.df[ind_t & ind_e]
        return self
    
    def desa_num(self, num:List[int]):
        ind = self.df['#DESA'].apply(lambda x: x in num)
        self.df = self.df[ind]
        return self
    


# Find relevant Epitopes to feed HLA 3D app

In [47]:
desa_db = DESA_DB()
desa_early = desa_db.hla_class('I').desa_num([1]).donor_type('Deceased').early_failed(1/6).df
# print(desa_early.shape)
ep_early = set(flatten2list([set(item.keys()) for item in flatten2list(desa_early.EpvsHLA_Donor.values)]))

desa_db = DESA_DB()
desa_late = desa_db.hla_class('I').desa_num([1]).donor_type('Deceased').late_failed(10).df
ep_late = set(flatten2list([set(item.keys()) for item in flatten2list(desa_late.EpvsHLA_Donor.values)]))

relevant_ep = ep_early - ep_late

irrelevant_ep = ep_late - ep_early

relevant_ep, len(relevant_ep)

print('Early Epitopes', list(ep_early))
print('Late Epitopes', list(ep_late))
print('Early - Late Epitopes', list(relevant_ep))
print('Late - Early Epitopes', list(irrelevant_ep))

Relavant Epitopes ['45KE', '66IF', '144QL', '73AS']
Irrelavant Epitopes ['97V', '76ANT', '44RT', '152RR', '62LQ', '163RG', '161D', '158T']


In [46]:
desa_db = DESA_DB()
desa_db.hla_class('II').desa_num([1, 2])

 DESA_DB(records=71, columns=Index(['TransplantID', 'Status', 'DESA_Epitope', '#DESA', 'EpvsHLA_Pos',
       'EpvsHLA_Donor', 'Failure', 'Survival[Y]', 'DESA->Donor_HLA',
       'Donor_HLA', 'Donor_HLA_Class', 'Donor_Type'],
      dtype='object')) 

Relavant Epitopes ['57DE', '45EV', '70GT', '37FL', '57V[DR]', '51R', '9F[DQ]']
Irrelavant Epitopes ['57DE', '98Q', '45EV', '74R', '185I', '45GV', '70GT', '130A', '37FL', '57V[DR]', '55PPD', '48Q', '55PP', '51R', '9F[DQ]', '55PPA']


In [4]:
desa_db = DESA_DB()
desa_early = desa_db.hla_class('I,II').donor_type('Deceased').early_failed(1/6).df
desa_early[0:1].DESA_Epitope.values

array([{'70QA', '9F[DQ]', '55R', '30D', '70DRA', '74EL', '46VY', '77T[DQ]', '70DA', '108T', '67VG', '96EV', '86A', '70GT', '144KR', '125G', '38L', '67F', '28H', '71A', '66EV', '161D', '70QT', '55RPD', '52PQ', '37S', '142M', '37YA', '56PD', '45GV', '97I', '87F', '67VT', '275EL', '74A'}],
      dtype=object)

In [5]:
desa_db = DESA_DB().df
len(desa_db[desa_db.TransplantID==1903].EpvsHLA_Donor.values.tolist()[0])
a = sorted(desa_db[desa_db.TransplantID==1903].DESA_Epitope.values[0])
b = {'108T', '125G', '142M', '144KR', '161D', '28H', '30D', '37S', '37YA', '38L', '45GV', 
'46VY', '52PQ', '55R', '55RPD', '56PD', '66EV', '67F', '67VG', '67VT', '70DA', '70DRA', 
'70GT', '70QA', '70QT', '71A', '74A', '74EL', '77T[DQ]', '86A', '87F', '96EV', '97I', '9F[DQ]'}

In [36]:
ep_db = Epitope_DB()
hlavsep = ep_db.min_hlavsep(relevant_ep)
hlavsep

{'B*45:01': {'45KE'},
 'B*53:01': {'66IF'},
 'C*12:03': {'73AS'},
 'B*13:01': {'144QL'}}

In [45]:
ep_db.df[ep_db.df.Epitope == '37FL'].PolymorphicResidues.values[0]

[('37', 'F'), ('38', 'L')]

In [9]:
# print(desa_db.TransplantID.values.tolist())
desa_db[0:-15]

Unnamed: 0,TransplantID,Status,DESA_Epitope,#DESA,EpvsHLA_Pos,EpvsHLA_Donor,Failure,Survival[Y],DESA->Donor_HLA,Donor_HLA,Donor_HLA_Class,Donor_Type
0,327,DESA,"{94I, 62GRN, 71SA, 62GE, 44RMA, 97V, 74Y}",7,"{'62GRN': {'B*57:01', 'B*58:01'}, '97V': {'B*5...","{'44RMA': 'B*57:01', '62GE': 'B*57:01', '94I':...",1,0.287671,{'B*57:01': 7},{B*57},I,Deceased
1,369,DESA,"{37L, 85A, 38L, 57V[DR], 26L[DR]}",5,"{'85A': {'DRB1*12:01', 'DRB1*01:02', 'DRB5*02:...","{'26L[DR]': 'DRB1*12:01', '57V[DR]': 'DRB1*12:...",2,13.326027,{'DRB1*12:01': 5},{DRB1*12},II,Deceased
2,5580,DESA,"{45EV, 55PP, 55PPD}",3,"{'55PP': {'DQB1*03:02', 'DQB1*03:01', 'DQB1*03...","{'55PP': 'DQB1*03:01', '45EV': 'DQB1*03:01', '...",0,12.008219,{'DQB1*03:01': 3},{DQB1*03},II,Living
3,72,DESA,"{9F[DQ], 67VG, 55R, 30D, 86A, 70GT, 87F, 125G,...",11,"{'55RPD': {'DQB1*06:02', 'DQB1*05:03', 'DQB1*0...","{'70GT': 'DQB1*06:02', '86A': 'DQB1*06:02', '1...",2,5.539726,"{'DQB1*06:02': 9, 'DQB1*06:03': 1, 'DRB5*01:01...","{DQB1*06, DRB5*01}",II,Living
4,279,DESA,"{181T, 31FH, 149H, 4R, 70DA, 13SE, 70Q, 70QQ, ...",25,"{'32H': {'DRB1*14:04', 'DRB3*02:02', 'DRB1*10:...","{'32H': 'DRB1*13:02', '98Q': 'DRB3*03:01', '4R...",1,0.000000,"{'DRB1*13:02': 18, 'DRB3*03:01': 7}","{DRB1*13, DRB3*03}",II,Deceased
...,...,...,...,...,...,...,...,...,...,...,...,...
419,841,DESA,{71E},1,"{'71E': {'DRB1*04:02', 'DRB1*01:03', 'DRB1*13:...",{'71E': 'DRB1*13:02'},2,7.312329,{'DRB1*13:02': 1},{DRB1*13},II,Deceased
420,842,DESA,"{73AS, 193PL, 76VS, 270C, 76VRN, 267QE, 152RA}",7,"{'267QE': {'C*07:02', 'C*07:01', 'B*73:01'}, '...","{'193PL': 'C*07:01', '152RA': 'C*07:01', '76VS...",1,1.852055,"{'C*07:01': 5, 'C*17:01': 2}","{C*17, C*07}",I,Deceased
421,845,DESA,"{116F, 158T, 66IC, 71TTS, 69TNT}",5,"{'66IC': {'B*14:01', 'B*27:08', 'B*38:01', 'B*...","{'71TTS': 'B*39:06', '66IC': 'B*39:06', '158T'...",1,9.835616,{'B*39:06': 5},{B*39},I,Deceased
422,864,DESA,{97M},1,"{'97M': {'A*33:01', 'A*23:01', 'A*33:03', 'A*2...",{'97M': 'A*32:01'},2,9.038356,{'A*32:01': 1},{A*32},I,Deceased


# Cartesian Visualisation:

In [18]:
def load_epitope_db():
    """This function loads the Epitopes from different tables
    """
    desa = pd.read_pickle('../data/20201123_EpitopevsHLA.pickle')
    return desa

def load_desa_db():
    """This function loads the Epitopes from different tables
    """
    desa = pd.read_pickle('../data/desa_db.pickle')
    return desa

#######################################################
# get HLA locus
#######################################################
def get_hla_locus(hla:str) -> str:
    """ get the long locus (max 3 letters of gene) of hla """

    gene = hla.split('*')[0]
    return gene if len(gene) == 1 else gene[0:3]

#######################################################
# get HLA class
#######################################################
def get_hla_class(locus:str) -> str:
    """ get the hla class of a hla locus """

    hla_class = {'A':'Class_I', 'B':'Class_I', 'C':'Class_I', 
                 'DRB':'Class_II', 'DQA':'Class_II', 'DQB':'Class_II'}
    return hla_class.get(locus, 'Locus not found')

    
#######################################################
# get HLA class from path
#######################################################
def get_hla_class_from_path(path:str) -> str:
    """ get the long locus (max 3 letters of gene) of hla """
    
    hla_class = {'A':'Class_I', 'B':'Class_I', 'C':'Class_I', 
                 'DRB':'Class_II', 'DQA':'Class_II', 'DQB':'Class_II'}
    hla = path.split('/')[-1].split('_')[0]
    locus = get_hla_locus(hla)
    return hla_class[locus]

#######################################################
# pars pdb file
#######################################################
def pdb_parser(pdb_path:list):
    # Read PDB file to create atom/bond information
    with open(pdb_path, 'r') as infile:
        # store only non-empty lines
        lines = [l.strip() for l in infile if l.strip()]
    
    hla_class = get_hla_class_from_path(pdb_path)
    data = defaultdict(lambda: defaultdict(list))
    # Variables that store the character positions of different
    # parameters from the molecule PDB file
    serialpos = slice(6,11) 
    atm_namepos = slice(12, 16) 
    r_namepos = slice(17, 20) 
    chainpos = slice(21, 22) 
    r_idpos = slice(22, 26) 
    xpos = slice(30, 38) 
    ypos = slice(38, 46) 
    zpos = slice(46, 54) 
    occupos = slice(54, 60) 
    bfacpos = slice(60, 66) 
    atm_typepos = slice(77,79)

    for l in lines:
        line = l.split()
        if line[0] in ["ATOM", "HETATM"]:
            chain = l[chainpos].strip()
            x = float(l[xpos])
            y = float(l[ypos])
            z = float(l[zpos])
            data[hla_class][chain].append(np.array([x, y, z]))
    return data
        

#######################################################
# 3D to 2D Projection 
#######################################################
def loc_2_coord(atoms:List[np.array], dis:int=60):
    x, y, z = [], [], []
    for atom in atoms:
        x.append(atom[0])
        y.append(atom[1])
        z.append(atom[2])
        
    proj_xy = (np.array(x), np.array(y), -dis * np.ones(len(atoms)))
    proj_xz = (np.array(x),  -dis * np.ones(len(atoms)), np.array(z))
    proj_yz = (-dis * np.ones(len(atoms)), np.array(y), np.array(z))
    return proj_xy, proj_xz, proj_yz


#######################################################
# Visualise Projection 
#######################################################

def visualise_proj(data, hla_class, ep_coord):
    
    chain_colors = {
            "A": "#65A5E2", # Blue
            "B": "#CA7FE5", # purple
            "C": "#65E2AB", # green
    }
    
    import matplotlib as mpl
    from mpl_toolkits.mplot3d import Axes3D
    import numpy as np
    import matplotlib.pyplot as plt
    
    mpl.rcParams['legend.fontsize'] = 10

    fig = plt.figure()
    ax = fig.gca(projection='3d')
    alpha = 0.02
    
    proj_xy, proj_xz, proj_yz = loc_2_coord(data[hla_class]['A'])
    ax.scatter(proj_xy[0], proj_xy[1], proj_xy[2], s=10, c=chain_colors['A'], alpha=alpha)
    ax.scatter(proj_xz[0], proj_xz[1], proj_xz[2], s=10, c=chain_colors['A'], alpha=alpha)
    ax.scatter(proj_yz[0], proj_yz[1], proj_yz[2], s=10, c=chain_colors['A'], alpha=alpha)

    proj_xy, proj_xz, proj_yz = loc_2_coord(data[hla_class]['B'])
    ax.scatter(proj_xy[0], proj_xy[1], proj_xy[2], s=10, c=chain_colors['B'], alpha=alpha)
    ax.scatter(proj_xz[0], proj_xz[1], proj_xz[2], s=10, c=chain_colors['B'], alpha=alpha)
    ax.scatter(proj_yz[0], proj_yz[1], proj_yz[2], s=10, c=chain_colors['B'], alpha=alpha)

    proj_xy, proj_xz, proj_yz = loc_2_coord(data[hla_class]['C'])
    ax.scatter(proj_xy[0], proj_xy[1], proj_xy[2], s=10, c=chain_colors['C'], alpha=alpha)
    ax.scatter(proj_xz[0], proj_xz[1], proj_xz[2], s=10, c=chain_colors['C'], alpha=alpha)
    ax.scatter(proj_yz[0], proj_yz[1], proj_yz[2], s=10, c=chain_colors['C'], alpha=alpha)
    
    proj_xy, proj_xz, proj_yz = loc_2_coord(ep_coord)
    ax.scatter(proj_xy[0], proj_xy[1], proj_xy[2], s=10, c='y')
    ax.scatter(proj_xz[0], proj_xz[1], proj_xz[2], s=10, c='y')
    ax.scatter(proj_yz[0], proj_yz[1], proj_yz[2], s=10, c='y')

    ax.set_xlabel('X')
    ax.set_ylabel('Y')
    ax.set_zlabel('Z')
    
    ax.set_title(f'HLA {hla_class}')
    plt.show()


        

In [19]:
pdb_path = '../data/HLAMolecule/A/A_01_01_V1.pdb'
# get_hla_class_from_path(pdb_path)
data = pdb_parser(pdb_path)

In [20]:
ep_db = load_epitope_db()
# desa_db = load_desa_db()
dict(ep_db[ep_db.Epitope == '71TTS'].Location.values[0])

{'B*15:10': [45.97, 87.81, 33.5],
 'B*14:02': [30.05, -6.31, 33.63],
 'B*48:01': [-4.68, -5.75, -33.05],
 'B*15:12': [29.95, 34.68, 33.67],
 'B*41:01': [4.6, 6.54, -33.68],
 'B*35:01': [21.05, 75.55, 75.86],
 'B*15:03': [29.86, 34.79, 33.55],
 'B*40:02': [-4.43, -6.38, -33.42],
 'B*78:01': [-4.73, 35.18, 22.02],
 'B*39:01': [-4.49, 5.55, 33.21],
 'B*50:01': [-4.44, -6.43, -33.42],
 'B*18:01': [4.45, 5.56, -33.02],
 'B*15:02': [29.91, 34.84, 33.4],
 'B*07:03': [4.73, 6.06, -33.27],
 'B*40:01': [4.74, 6.68, -33.81],
 'B*14:01': [30.21, -6.52, 33.85],
 'B*45:01': [4.45, 6.68, -33.59],
 'B*15:11': [46.0, 87.58, 32.99],
 'B*35:08': [30.11, 6.96, 21.63],
 'B*08:01': [-4.72, -6.16, -33.45],
 'B*15:18': [45.99, 87.65, 33.25],
 'B*40:06': [-4.52, -6.75, -33.62],
 'B*15:01': [29.95, 34.89, 33.3]}

In [21]:
desa_db = DESA_DB()
# # dir(desa_db.early_failed(1/4))
db = desa_db.early_failed(1/4)
# # desa_db.late_failed(10)
# db.df

In [22]:
def get_desa_coord(desa_df, ep_df):
    
    coord = []
    for ep_hla in desa_df[desa_df.Donor_HLA_Class == 'I'].EpvsHLA_Donor.values.tolist():
        for ep, hla in ep_hla.items():
            try:
                _coord = dict(ep_df[ep_df.Epitope == ep].Location.values[0])[hla]
                coord.append(_coord)
            except Exception as e:
                print(f'{type(e).__name__}: epitope {ep} does not have location for hla {hla}')
    return np.array(coord)

ep_coord = get_desa_coord(db.df, ep_db)                

KeyError: epitope 71TTS does not have location for hla B*39:06
KeyError: epitope 158T does not have location for hla B*39:06
KeyError: epitope 76ESN does not have location for hla B*39:06
KeyError: epitope 63NI does not have location for hla B*39:06


In [12]:
# plt.close('all')
# visualise_proj(data, 'Class_I')
visualise_proj(data, 'Class_I', ep_coord)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …