In [15]:
import pandas as pd
import numpy as np
# !pip list

In [16]:
def flatten2list(object) -> list:
    """ This function flattens objects in a nested structure and return a list"""
    gather = []
    for item in object:
        if isinstance(item, (list, set)):
            gather.extend(flatten2list(item))            
        else:
            gather.append(item)
    return gather

def flatten2set(object) -> set:
    """ This function flattens objects in a nested structure and return a set"""

    return set(flatten2list(object))

#######################################################
# epitope class
####################################################### 

class Epitope_DB:
    
    def __init__(self, path:str='../data/20201123_EpitopevsHLA.pickle'):
        self.df = pd.read_pickle(path)
        
    def __repr__(self):
        return f""" Epitope_DB(records={len(self.df)}, columns={self.df.columns}) """
    
    def __str__(self):
        return __repr__()
    
    def epitope(self, value):
        if isinstance(value, str):
            ind = self.df.Epitope == value 
        else: 
            ind = self.df.Epitope.apply(lambda x: x in value) 
        self.df = self.df[ind]
        return self
    
    def hlavsep(self, hla_allel:str='Luminex Alleles'):
        from collections import defaultdict
        hlas = flatten2set(self.df[hla_allel].values)
        hlavsep_dict = defaultdict(set)
        for hla in hlas:
            ind = self.df[hla_allel].apply(lambda x: hla in x)
            epitopes = flatten2set(self.df[ind]['Epitope'].values)
            hlavsep_dict[hla].update(epitopes)
        self.hlavsep = hlavsep_dict
        return self.hlavsep
    
    def ellipro(self, value):
        if isinstance(value, str):
            ind = self.df.Epitope == value 
        else: 
            ind = self.df['ElliPro Score'].apply(lambda x: x in value) 
        self.df = self.df[ind]
        return self


In [17]:
epitope = Epitope_DB()

# len(flatten2set(ep_db.df['All Alleles'].values))
# hlavsep_dict = ep_db.hlavsep()

In [18]:
# pd.DataFrame.from_dict(hlavsep_dict)

In [47]:
epitopes = epitope.df
epitopes['isotype'] = 'unknown'
epitopes[epitopes.mAb == 'Yes'][25:35]

Unnamed: 0,Epitope,ElliPro Score,PolymorphicResidues,AntibodyReactivity,Luminex Alleles,All Alleles,distance [A],mean_distance [A],std_distance [A],mAb,Location,isotype
254,37YV[DR],High,"[(37, Y), (38, V)]",Yes,"{DRB1*13:03, DRB1*04:02, DRB1*04:01, DRB1*11:0...","{DRB1*08:14, DRB1*04:28, DRB1*04:62, DRB1*11:9...","{'DRB1*11:04': 65.001, 'DRB1*04:03': 64.691, '...",64.861091,0.154474,Yes,"{'DRB1*11:03': [10.64, -13.85, -0.28], 'DRB1*0...",unknown
262,48Q,High,"[(48, Q)]",Yes,"{DRB4*01:01, DRB4*01:03}","{DRB4*01:06, DRB1*01:31, DRB4*01:01, DRB4*01:0...","{'DRB4*01:03': 67.349, 'DRB4*01:01': 67.349}",67.349,0.0,Yes,"{'DRB4*01:03': [15.61, 18.96, 42.03], 'DRB4*01...",unknown
263,51R,High,"[(51, R)]",Yes,"{DRB3*02:01, DRB3*02:02}","{DRB3*02:01, DRB3*02:17, DRB3*02:16, DRB1*14:5...",{'DRB3*02:02': 63.637},63.637,0.0,Yes,"{'DRB3*02:02': [16.8, 28.01, 45.14]}",unknown
267,57DE,High,"[(57, D), (58, E)]",Yes,"{DRB1*11:03, DRB1*11:04, DRB1*11:01}","{DRB1*11:20, DRB1*11:128, DRB1*11:54, DRB1*11:...","{'DRB1*11:04': 70.793, 'DRB1*11:03': 70.793, '...",70.793,0.0,Yes,"{'DRB1*11:01': [5.32, -16.81, 3.0], 'DRB1*11:0...",unknown
298,74R,High,"[(70, Q), (73, G), (74, R)]",Yes,"{DRB1*03:01, DRB1*03:02, DRB3*01:01, DRB1*03:03}","{DRB1*03:60, DRB1*03:01, DRB3*01:05, DRB1*01:1...","{'DRB3*01:01': 67.762, 'DRB1*03:01': 65.819, '...",66.468,0.914998,Yes,"{'DRB1*03:01': [3.81, -25.76, -12.08], 'DRB3*0...",unknown
310,96HK,High,"[(96, H), (98, K), (120, S)]",Yes,"{DRB1*13:03, DRB1*13:02, DRB1*03:01, DRB1*14:5...","{DRB1*08:14, DRB1*03:01, DRB1*11:128, DRB1*08:...","{'DRB1*14:04': 30.583, 'DRB1*14:54': 30.583, '...",30.585882,0.002272,Yes,"{'DRB1*14:03': [36.73, -18.35, -33.72], 'DRB1*...",unknown
319,98Q,Intermediate,"[(96, H), (98, Q), (120, S)]",Yes,"{DRB3*01:01, DRB3*02:01, DRB3*03:01, DRB3*02:02}","{DRB3*03:03, DRB3*02:24, DRB3*02:01, DRB3*02:1...","{'DRB3*02:02': 30.018, 'DRB3*03:01': 30.468, '...",30.169,0.211428,Yes,"{'DRB3*02:02': [38.62, 14.27, 81.19], 'DRB3*01...",unknown
357,52PQ,High,"[(53, Q)]",Yes,"{DQB1*05:02, DQB1*05:03, DQB1*06:09, DQB1*06:0...","{DQB1*06:24, DQB1*06:52, DQB1*05:10, DQB1*06:1...","{'DQB1*05:02': 67.384, 'DQB1*05:03': 70.63, 'D...",67.864375,1.074072,Yes,"{'DQB1*05:02': [75.15, 51.49, -2.68], 'DQB1*06...",unknown
358,55PP,High,"[(55, P), (56, P)]",Yes,"{DQB1*03:02, DQB1*03:03, DQB1*03:19, DQB1*03:01}","{DQB1*03:38, DQB1*03:35, DQB1*03:05, DQB1*03:0...","{'DQB1*03:03': 75.615, 'DQB1*03:02': 77.048, '...",75.729,1.033567,Yes,"{'DQB1*03:02': [89.49, 16.05, 133.05], 'DQB1*0...",unknown


In [32]:
# Class I
epitopes.loc[epitopes.Epitope == '41T', 'isotype'] = 'IgG'
epitopes.loc[epitopes.Epitope == '45KE', 'isotype'] = 'IgG'
epitopes.loc[epitopes.Epitope == '62GE', 'isotype'] = 'IgG' + 'IgM'
epitopes.loc[epitopes.Epitope == '62GRN', 'isotype'] = 'IgM'
epitopes.loc[epitopes.Epitope == '62LQ', 'isotype'] = 'IgG'
epitopes.loc[epitopes.Epitope == '62QE', 'isotype'] = 'IgM'
epitopes.loc[epitopes.Epitope == '65GK', 'isotype'] = 'IgG'
epitopes.loc[epitopes.Epitope == '65QIA', 'isotype'] = 'IgG'
epitopes.loc[epitopes.Epitope == '65QKR', 'mAb'] = 'Yes'
epitopes.loc[epitopes.Epitope == '65QKR', 'isotype'] = 'IgM'
epitopes.loc[epitopes.Epitope == '65RNA', 'mAb'] = 'Yes'
epitopes.loc[epitopes.Epitope == '65RNA', 'isotype'] = 'IgG'
epitopes.loc[epitopes.Epitope == '71TTS', 'isotype'] = 'IgG'
epitopes.loc[epitopes.Epitope == '80I', 'isotype'] = 'IgG'
epitopes.loc[epitopes.Epitope == '80N', 'isotype'] = 'IgM' + 'IgG'
epitopes.loc[epitopes.Epitope == '82LR', 'isotype'] = 'IgG'
epitopes.loc[epitopes.Epitope == '82LR', 'isotype'] = 'IgG'
epitopes.loc[epitopes.Epitope == '107W', 'mAb'] = 'Yes'
epitopes.loc[epitopes.Epitope == '107W', 'isotype'] = 'IgM'
# epitopes.loc[epitopes.Epitope == '143S', 'mAb'] = 'Yes'
# epitopes.loc[epitopes.Epitope == '143S', 'isotype'] = 'IgM'
epitopes.loc[epitopes.Epitope == '144KR', 'isotype'] = 'IgG' + 'IgM'
epitopes.loc[epitopes.Epitope == '144QL', 'isotype'] = 'IgM'
epitopes.loc[epitopes.Epitope == '144TKH', 'isotype'] = 'IgG' + 'IgM'
epitopes.loc[epitopes.Epitope == '145KHA', 'isotype'] = 'unknown'
epitopes.loc[epitopes.Epitope == '151AHA', 'isotype'] = 'IgM'
epitopes.loc[epitopes.Epitope == '152W', 'mAb'] = 'unknown'
epitopes.loc[epitopes.Epitope == '158T', 'isotype'] = 'IgM'
epitopes.loc[epitopes.Epitope == '161D', 'isotype'] = 'IgM'
epitopes.loc[epitopes.Epitope == '163LW', 'isotype'] = 'IgM'
epitopes.loc[epitopes.Epitope == '163RG', 'isotype'] = 'IgG'
epitopes.loc[epitopes.Epitope == '163RW', 'isotype'] = 'IgM'
epitopes.loc[epitopes.Epitope == '163EW', 'isotype'] = 'IgG'
epitopes.loc[epitopes.Epitope == '166DG', 'mAb'] = 'unknown'
epitopes.loc[epitopes.Epitope == '219W', 'isotype'] = 'IgM'


In [None]:
# Class II human
epitopes.loc[epitopes.Epitope == '16Y', 'isotype'] = 'IgM'
epitopes.loc[epitopes.Epitope == '37YV', 'isotype'] = 'unknown'
epitopes.loc[epitopes.Epitope == '48Q', 'isotype'] = 'unknown'
epitopes.loc[epitopes.Epitope == '51R', 'mAb'] = 'unknown'
epitopes.loc[epitopes.Epitope == '57DE', 'isotype'] = 'IgG'
epitopes.loc[epitopes.Epitope == '57S', 'isotype'] = 'IgM'
epitopes.loc[epitopes.Epitope == '74R', 'isotype'] = 'IgM'
epitopes.loc[epitopes.Epitope == '96HK', 'isotype'] = 'IgG'
epitopes.loc[epitopes.Epitope == '98Q', 'isotype'] = 'unknown'



In [44]:
epitopes[epitopes.Epitope == '82LR'] 

Unnamed: 0,Epitope,ElliPro Score,PolymorphicResidues,AntibodyReactivity,Luminex Alleles,All Alleles,distance [A],mean_distance [A],std_distance [A],mAb,Location,isotype
100,82LR,High,"[(82, L), (83, R)]",Yes,"{B*51:02, A*24:03, B*38:01, B*27:05, B*13:02, ...","{A*24:25, B*53:02, B*44:37, B*13:18, B*51:06, ...","{'A*24:03': 62.66, 'B*51:02': 61.038, 'A*25:01...",62.79772,1.43262,Yes,"{'A*23:01': [-19.88, 12.77, -32.88], 'B*44:03'...",unknown
