In [35]:
import os
from typing import Union, List
from collections import defaultdict
import pandas as pd
################
def flatten2list(object):
    """ This function flattens objects in a nested structure and retu"""
    gather = []
    for item in object:
        if isinstance(item, (list, set)):
            gather.extend(flatten2list(item))            
        else:
            gather.append(item)
    return gather

def flatten2set(object) -> set:
    """ This function flattens objects in a nested structure and returns a set"""

    return set(flatten2list(object))

def flatten_dict_values(dictionary:dict) -> set:
    """ This function flattens objects in a nested structure and returns a set"""

    return flatten2set(dictionary.values())

def get_inventory_hlas(path='inventory_hla.json') -> dict:
    """ This function returns all the hla & locus
        available in the hla inventory
        base_dir: should be absolute path
        """
    with open(path) as json_file: 
        return json.load(json_file)

####################
class Epitope:
    """ This is a class that entails the data base [Pandas DataFrame] of all epitopes and
        all the related methods tha can be applied to this data base  """

    def __init__(self, path:str='../data/EpitopevsHLA.pickle'):
        # the path is consistent if dash_hla_3d/app.py is ran
        self.path = os.path.expanduser(path)
        print(self.path)
        # Get hlas with pdb files
        self.pdb_inventory = flatten_dict_values(get_inventory_hlas())
        self.df = pd.read_pickle(self.path) # pylint: disable=invalid-name
        self._hlavsep = None
        self._hlavsep_df = None

    def __repr__(self):
        return f""" Epitope_DB(records={len(self.df)}, columns={self.df.columns}) """

    def filter_mAb(self):
        ind = self.df.mAb == 'Yes'
        self.df = self.df[ind]
        return self
    
    def is_IgG(self):
        self.filter_isotype()
        if len(self.df != 0):
            return True
        else:
            return False

    def isotype(self, isotype:str='IgG'):
        ind = self.df.isotype.apply(lambda x: isotype == x)
        self.df = self.df[ind]
        return self

    def get_epitopes(self, value: Union[str, List[str]]):
        """ get epitope info from the df
        value: can be str or a list of strings """

        if isinstance(value, str):
            ind = self.df.Epitope == value
        else:
            ind = self.df.Epitope.apply(lambda x: x in value)
        self.df = self.df[ind]
        return self

    def ellipro(self, value):
        """ filter EpitopeDB based on desired ellipro score """
        if isinstance(value, str):
            ind = self.df.Epitope == value
        else:
            ind = self.df['ElliPro Score'].apply(lambda x: x in value)
        self.df = self.df[ind]
        return self
    
    def hlavsep(self,
                hla_allel:str='Luminex Alleles',
                only_with_pdb:bool=False, 
                ignore_hla:set =set()) -> pd.DataFrame:
        """ returns a DataFrame of HLA vs epitoes
        hla_allel [default is 'Luminex Alleles']: determines the allel type
        only_with_pdb [default is False]: If True, includes only luminex allels that pdb file 
        is available:
        only_with_pdb: Include only Luminex Alleles that pdb file is available
        { 'HLA' : {'epitopes'}} """
        
        if only_with_pdb:
            # Luminex Alleles with available pdb files
            hlas = flatten2set(self.df[hla_allel].values).intersection(self.pdb_inventory) - ignore_hla
        else:
            hlas = flatten2set(self.df[hla_allel].values)
        
        hlavsep_dict = defaultdict(list)
        for hla in hlas:
            ind = self.df[hla_allel].apply(lambda x: hla in x)
            epitopes = flatten2set(self.df[ind]['Epitope'].values)
            hlavsep_dict['HLA'].append(hla)
            hlavsep_dict['Epitope'].append(epitopes)
        self._hlavsep_df = pd.DataFrame(hlavsep_dict)
        return self._hlavsep_df

    def min_hlavsep(self, epitopes:set, ignore_hla:set=set()) -> dict:
        """ Returns the HLA vs epitope dictionary
            based on minimum number of HLA possible
            ignore_hla: ignores some hla
            format { 'HLA' : {'epitopes'} }
        """
        # Deep copy of epitopes set for later epitope removal
        _epitopes = epitopes.copy()
        hlavsep_df = self.hlavsep(only_with_pdb=True, ignore_hla=ignore_hla)
        hla_ep = defaultdict(set)
        for _ in range(10):
            intersect = hlavsep_df.Epitope.apply(
                lambda x: len(x.intersection(_epitopes))
            )
            # find the indexes with maximum value
            value_max = intersect.max()
            if value_max:
                ind_maxes = intersect == value_max
                hlavsep_max_df = hlavsep_df[ind_maxes]
                max_hla = hlavsep_max_df.HLA.values.tolist()[0]
                ind_max = hlavsep_max_df.HLA == max_hla
                set_of_ep = hlavsep_max_df[ind_max].Epitope.values[0].intersection(_epitopes)
                hla_ep[max_hla] = set_of_ep
                _epitopes.difference_update(set_of_ep)
        return dict(hla_ep)

    def epvshla2hlavsep(self, epvshla:dict) -> dict:
        """ Transform an ep vs hla dict 2 hla vs ep dict """
        hlavsep = defaultdict(set)
        for epitope, hla in epvshla.items():
            hlavsep[hla].add(epitope)
        return hlavsep


class DESA:
    """ This is a class that entails the data base [Pandas DataFrame] of all transplants
        with DESA and all the related methods that can be applied or update this DataFrame """
    
    def __init__(self, path:str='../data/desa_3d_view.pickle'):
        self.df = pd.read_pickle(path)

    def __repr__(self):
        return f""" DESA_DB(records={len(self.df)}, columns={self.df.columns}) """
    
    def __str__(self):
        return __repr__()
    
    def donor_type(self, donor_type:str='Deceased'):
        if donor_type not in ['Living', 'Deceased']:
            raise KeyError(f'{donor_type} does not exist in the df values, accepted values: {self.df.Donor_Type.unique()}')
        ind = self.df.Donor_Type.apply(lambda x: x == donor_type)                          
        self.df = self.df[ind]
        return self
    
    def get_tx(self, TxID:int) -> pd.DataFrame:
        ind = self.df.TransplantID == TxID
        if sum(ind) == 0:
            raise ValueError(f'Transplant ID {TxID} does not exist in the datat set')
        else:
            return self.df[ind]
        
    def hla_class(self, hla_class):
        if hla_class not in ['I', 'II', 'I,II']:
            raise KeyError(f'{hla_class} does not exist in the df values, accepted values: {self.df.Donor_HLA_Class.unique()}')
        ind = self.df.Donor_HLA_Class.apply(lambda x: x == hla_class)                          
        self.df = self.df[ind]
        return self
        
    def early_failed(self, threshold):
        ind_t = self.df['Survival[Y]'].apply(lambda x: x < threshold)
        ind_e = self.df.Failure.apply(lambda x: x == 1)
        self.df = self.df[ind_t & ind_e]
        return self

    def late_failed(self, threshold):
        ind_t = self.df['Survival[Y]'].apply(lambda x: x > threshold)
        ind_e = self.df.Failure.apply(lambda x: x != 1 )
        self.df = self.df[ind_t & ind_e]
        return self


In [58]:
epitope = Epitope()
desa = DESA()
mAb = set(
    epitope.isotype('IgG').df.Epitope.values.tolist()
)
desa.df['mAb'] = desa.df.DESA_Epitope.apply(lambda x: x.intersection(mAb) )
desa.df['#mAb'] = desa.df['mAb'].apply(lambda x: len(x) )
desa.df.head()

../data/EpitopevsHLA.pickle


Unnamed: 0,TransplantID,Status,DESA_Epitope,#DESA,EpvsHLA_Pos,EpvsHLA_Donor,Failure,Survival[Y],DESA->Donor_HLA,Donor_HLA,Donor_HLA_Class,Donor_Type,mAb,#mAb
0,327,DESA,"{94I, 62GE, 44RMA, 97V, 71SA, 74Y, 62GRN}",7,"{'62GRN': {'B*58:01', 'B*57:01'}, '97V': {'B*5...","{'44RMA': 'B*57:01', '62GE': 'B*57:01', '94I':...",1,0.287671,{'B*57:01': 7},{B*57},I,Deceased,{},0
1,369,DESA,"{85A, 37L, 38L, 26L[DR], 57V[DR]}",5,"{'85A': {'DRB1*12:01', 'DRB5*02:02', 'DRB1*01:...","{'26L[DR]': 'DRB1*12:01', '57V[DR]': 'DRB1*12:...",2,13.326027,{'DRB1*12:01': 5},{DRB1*12},II,Deceased,{},0
2,5580,DESA,"{45EV, 55PPD, 55PP}",3,"{'55PP': {'DQB1*03:03', 'DQB1*03:01', 'DQB1*03...","{'55PP': 'DQB1*03:01', '45EV': 'DQB1*03:01', '...",0,12.008219,{'DQB1*03:01': 3},{DQB1*03},II,Living,{},0
3,72,DESA,"{9F[DQ], 125G, 67VG, 86A, 30D, 30H[DQ], 55R, 7...",11,"{'55RPD': {'DQB1*06:02', 'DQB1*06:03', 'DQB1*0...","{'70GT': 'DQB1*06:02', '86A': 'DQB1*06:02', '1...",2,5.539726,"{'DQB1*06:02': 9, 'DQB1*06:03': 1, 'DRB5*01:01...","{DRB5*01, DQB1*06}",II,Living,{},0
4,279,DESA,"{149H, 13SE, 71K, 98Q, 70DA, 96HK, 31FH, 32H, ...",25,"{'32H': {'DRB1*13:01', 'DRB3*02:02', 'DRB1*10:...","{'32H': 'DRB1*13:02', '98Q': 'DRB3*03:01', '4R...",1,0.0,"{'DRB1*13:02': 18, 'DRB3*03:01': 7}","{DRB3*03, DRB1*13}",II,Deceased,{96HK},1


In [37]:
# desa.df['#mAb'] = desa.df['mAb'].apply(lambda x: len(x) )
ind_mAb = desa.df.DESA_Epitope.apply(lambda x: len(x.intersection(mAb))!=0 )
# print(desa.df['#mAb'])
# print(desa.late_failed(10).df[ind_mAb]['Survival[Y]'])
desa.df[ind_mAb].sort_values(by='#mAb', ascending=False)

Unnamed: 0,TransplantID,Status,DESA_Epitope,#DESA,EpvsHLA_Pos,EpvsHLA_Donor,Failure,Survival[Y],DESA->Donor_HLA,Donor_HLA,Donor_HLA_Class,Donor_Type,mAb,#mAb
130,1657,DESA,"{149H, 82LR, 13SE, 81ALR, 70QQ, 98Q, 96HK, 31F...",14,"{'149H': {'DRB1*08:01', 'DRB1*13:01', 'DRB1*11...","{'98Q': 'DRB3*03:01', '77N[DR]': 'DRB3*03:01',...",0,11.197260,"{'DRB3*03:01': 3, 'DRB1*13:02': 6, 'A*24:02': 5}","{DRB3*03, A*24, DRB1*13}","I,II",Deceased,"{65GK, 82LR, 80I, 96HK}",4
191,268,DESA,"{82LR, 81ALR, 65GK, 76EN, 80I}",5,"{'80I': {'B*59:01', 'B*15:16', 'B*15:13', 'A*2...","{'65GK': 'A*24:02', '76EN': 'A*24:02', '80I': ...",0,14.008219,{'A*24:02': 5},{A*24},I,Deceased,"{65GK, 82LR, 80I}",3
127,1602,DESA,"{81ALR, 62GE, 95V, 65GK, 97T, 65RK, 62GK, 127K...",21,"{'151AHV': {'A*02:05', 'A*24:02', 'A*69:01', '...","{'151AHV': 'A*02:01', '65GK': 'A*24:02', '127K...",0,18.010959,"{'A*02:01': 12, 'A*24:02': 1, 'B*51:01': 7, 'D...","{B*51, A*24, DRB1*08, A*02}","I,II",Living,"{65GK, 82LR, 80I}",3
302,4467,DESA,"{82LR, 81ALR, 71TD, 71TN, 55PP, 45EV, 76ED, 9H...",15,"{'80I': {'B*59:01', 'B*15:16', 'B*15:13', 'A*2...","{'9H': 'B*37:01', '80TLR': 'B*37:01', '71TD': ...",1,0.000000,"{'B*37:01': 5, 'B*53:01': 6, 'DRB1*11:01': 1, ...","{B*53, B*37, DQB1*03, DRB1*11}","I,II",Living,"{82LR, 80I, 57DE}",3
228,334,DESA,"{166ES, 199V, 41T, 162GLS, 80TA, 45KE}",6,"{'45KE': {'B*41:01', 'B*44:03', 'B*45:01', 'B*...","{'45KE': 'B*44:03', '41T': 'B*44:03', '199V': ...",1,1.353425,{'B*44:03': 6},{B*44},I,Deceased,"{45KE, 41T}",2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
155,2040,DESA,"{156QA, 151AHV, 66KH, 65GK, 66KA, 127K}",6,"{'151AHV': {'A*02:05', 'A*24:02', 'A*69:01', '...","{'151AHV': 'A*24:02', '65GK': 'A*24:02', '127K...",0,14.008219,{'A*24:02': 6},{A*24},I,Deceased,{65GK},1
149,193,DESA,"{82LR, 81ALR, 71TN, 199V, 76EN, 76ET, 80TA, 80...",8,"{'80TA': {'B*44:02', 'B*13:02', 'B*44:03'}, '7...","{'76EN': 'B*44:02', '71TN': 'B*44:02', '80TLR'...",1,8.720548,{'B*44:02': 8},{B*44},I,Living,{82LR},1
147,1918,DESA,"{163RG, 71TN, 76EN, 66IF, 74Y}",5,"{'163RG': {'A*01:01'}, '76EN': {'B*59:01', 'B*...","{'76EN': 'B*38:01', '71TN': 'B*38:01', '74Y': ...",1,2.369863,"{'B*38:01': 3, 'B*08:01': 1, 'A*01:01': 1}","{A*01, B*08, B*38}",I,Deceased,{163RG},1
139,1818,DESA,"{156QA, 44RME, 151AHV, 66KH, 127K, 150AAH, 65G...",13,"{'151AHV': {'A*02:05', 'A*24:02', 'A*69:01', '...","{'151AHV': 'A*24:02', '65GK': 'A*24:02', '127K...",1,0.000000,{'A*24:02': 13},{A*24},I,Deceased,{65GK},1


In [60]:
dataframe = desa.early_failed(1/3).df
dataframe[dataframe['#mAb'] !=0 ].sort_values(by='#mAb', ascending=False)

Unnamed: 0,TransplantID,Status,DESA_Epitope,#DESA,EpvsHLA_Pos,EpvsHLA_Donor,Failure,Survival[Y],DESA->Donor_HLA,Donor_HLA,Donor_HLA_Class,Donor_Type,mAb,#mAb
302,4467,DESA,"{82LR, 81ALR, 71TD, 71TN, 55PP, 45EV, 76ED, 9H...",15,"{'80I': {'B*59:01', 'B*15:16', 'B*15:13', 'A*2...","{'9H': 'B*37:01', '80TLR': 'B*37:01', '71TD': ...",1,0.0,"{'B*37:01': 5, 'B*53:01': 6, 'DRB1*11:01': 1, ...","{B*53, B*37, DQB1*03, DRB1*11}","I,II",Living,"{82LR, 80I, 57DE}",3
330,5031,DESA,"{45KE, 41T}",2,"{'45KE': {'B*41:01', 'B*44:03', 'B*45:01', 'B*...","{'45KE': 'B*40:01', '41T': 'B*40:01'}",1,0.169863,{'B*40:01': 2},{B*40},I,Deceased,"{45KE, 41T}",2
4,279,DESA,"{149H, 13SE, 71K, 98Q, 70DA, 96HK, 31FH, 32H, ...",25,"{'32H': {'DRB1*13:01', 'DRB3*02:02', 'DRB1*10:...","{'32H': 'DRB1*13:02', '98Q': 'DRB3*03:01', '4R...",1,0.0,"{'DRB1*13:02': 18, 'DRB3*03:01': 7}","{DRB3*03, DRB1*13}",II,Deceased,{96HK},1
97,1214,DESA,"{45EV, 55PPD, 57DE}",3,"{'55PPD': {'DQB1*03:03', 'DQB1*03:01'}, '45EV'...","{'57DE': 'DRB1*11:01', '45EV': 'DQB1*03:01', '...",1,0.016438,"{'DRB1*11:01': 1, 'DQB1*03:01': 2}","{DQB1*03, DRB1*11}",II,Deceased,{57DE},1
98,1215,DESA,"{66IY, 70IAQ, 65QIA}",3,"{'65QIA': {'B*27:05', 'B*73:01', 'B*81:01', 'B...","{'65QIA': 'B*55:01', '70IAQ': 'B*55:01', '66IY...",1,0.0,{'B*55:01': 3},{B*55},I,Deceased,{65QIA},1
102,130,DESA,"{76ESN, 71TTS, 63NI, 158T, 44RT}",5,"{'44RT': {'B*35:01', 'B*78:01', 'B*53:01', 'B*...","{'44RT': 'B*37:01', '71TTS': 'B*39:06', '158T'...",1,0.021918,"{'B*37:01': 1, 'B*39:06': 4}","{B*39, B*37}",I,Deceased,{71TTS},1
113,1473,DESA,"{71ATD, 65QIA, 97N, 76ED, 71KA}",5,"{'76ED': {'B*27:05', 'B*37:01', 'B*27:03', 'B*...","{'65QIA': 'B*27:05', '71KA': 'B*27:05', '97N':...",1,0.0,{'B*27:05': 5},{B*27},I,Living,{65QIA},1
139,1818,DESA,"{156QA, 44RME, 151AHV, 66KH, 127K, 150AAH, 65G...",13,"{'151AHV': {'A*02:05', 'A*24:02', 'A*69:01', '...","{'151AHV': 'A*24:02', '65GK': 'A*24:02', '127K...",1,0.0,{'A*24:02': 13},{A*24},I,Deceased,{65GK},1
234,3447,DESA,"{163LG, 166ES, 96HK, 199V, 162GLS, 11STS}",6,"{'96HK': {'DRB1*08:01', 'DRB1*13:01', 'DRB1*11...","{'96HK': 'DRB1*13:01', '11STS': 'DRB1*13:01', ...",1,0.175342,"{'DRB1*13:01': 2, 'B*44:02': 4}","{B*44, DRB1*13}","I,II",Living,{96HK},1
293,4157,DESA,"{151AHV, 66KH, 65GK, 66KA, 127K}",5,"{'151AHV': {'A*02:05', 'A*24:02', 'A*69:01', '...","{'151AHV': 'A*24:02', '65GK': 'A*24:02', '127K...",1,0.00274,{'A*24:02': 5},{A*24},I,Deceased,{65GK},1


In [None]:
from lifelines import KaplanMeierFitter
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')