In [1]:
import pandas as pd
import numpy as np
# !pip list

In [2]:
def flatten2list(object) -> list:
    """ This function flattens objects in a nested structure and return a list"""
    gather = []
    for item in object:
        if isinstance(item, (list, set)):
            gather.extend(flatten2list(item))            
        else:
            gather.append(item)
    return gather

def flatten2set(object) -> set:
    """ This function flattens objects in a nested structure and return a set"""

    return set(flatten2list(object))

#######################################################
# epitope class
####################################################### 

class Epitope_DB:
    
    def __init__(self, path:str='../data/EpitopevsHLA.pickle'):
        self.df = pd.read_pickle(path)
        
    def __repr__(self):
        return f""" Epitope_DB(records={len(self.df)}, columns={self.df.columns}) """
    
    def __str__(self):
        return __repr__()
    
    def epitope(self, value):
        if isinstance(value, str):
            ind = self.df.Epitope == value 
        else: 
            ind = self.df.Epitope.apply(lambda x: x in value) 
        self.df = self.df[ind]
        return self
    
    def hlavsep(self, hla_allel:str='Luminex Alleles'):
        from collections import defaultdict
        hlas = flatten2set(self.df[hla_allel].values)
        hlavsep_dict = defaultdict(set)
        for hla in hlas:
            ind = self.df[hla_allel].apply(lambda x: hla in x)
            epitopes = flatten2set(self.df[ind]['Epitope'].values)
            hlavsep_dict[hla].update(epitopes)
        self.hlavsep = hlavsep_dict
        return self.hlavsep
    
    def ellipro(self, value):
        if isinstance(value, str):
            ind = self.df.Epitope == value 
        else: 
            ind = self.df['ElliPro Score'].apply(lambda x: x in value) 
        self.df = self.df[ind]
        return self


In [3]:
epitope = Epitope_DB()

# len(flatten2set(ep_db.df['All Alleles'].values))
# hlavsep_dict = ep_db.hlavsep()

In [4]:
# pd.DataFrame.from_dict(hlavsep_dict)

In [5]:
epitopes = epitope.df
# epitopes['isotype'] = 'unknown'
# epitopes[epitopes.mAb == 'Yes'][25:35]

In [48]:
# Class I
epitopes.loc[epitopes.Epitope == '41T', 'isotype'] = 'IgG'
epitopes.loc[epitopes.Epitope == '45KE', 'isotype'] = 'IgG'
epitopes.loc[epitopes.Epitope == '62GE', 'isotype'] = 'IgG' + 'IgM'
epitopes.loc[epitopes.Epitope == '62GRN', 'isotype'] = 'IgM'
epitopes.loc[epitopes.Epitope == '62LQ', 'isotype'] = 'IgG'
epitopes.loc[epitopes.Epitope == '62QE', 'isotype'] = 'IgM'
epitopes.loc[epitopes.Epitope == '65GK', 'isotype'] = 'IgG'
epitopes.loc[epitopes.Epitope == '65QIA', 'isotype'] = 'IgG'
epitopes.loc[epitopes.Epitope == '65QKR', 'mAb'] = 'Yes'
epitopes.loc[epitopes.Epitope == '65QKR', 'isotype'] = 'IgM'
epitopes.loc[epitopes.Epitope == '65RNA', 'mAb'] = 'Yes'
epitopes.loc[epitopes.Epitope == '65RNA', 'isotype'] = 'IgG'
epitopes.loc[epitopes.Epitope == '71TTS', 'isotype'] = 'IgG'
epitopes.loc[epitopes.Epitope == '80I', 'isotype'] = 'IgG'
epitopes.loc[epitopes.Epitope == '80N', 'isotype'] = 'IgM' + 'IgG'
epitopes.loc[epitopes.Epitope == '82LR', 'isotype'] = 'IgG'
epitopes.loc[epitopes.Epitope == '82LR', 'isotype'] = 'IgG'
epitopes.loc[epitopes.Epitope == '107W', 'mAb'] = 'Yes'
epitopes.loc[epitopes.Epitope == '107W', 'isotype'] = 'IgM'
# epitopes.loc[epitopes.Epitope == '143S', 'mAb'] = 'Yes'
# epitopes.loc[epitopes.Epitope == '143S', 'isotype'] = 'IgM'
epitopes.loc[epitopes.Epitope == '144KR', 'isotype'] = 'IgG' + 'IgM'
epitopes.loc[epitopes.Epitope == '144QL', 'isotype'] = 'IgM'
epitopes.loc[epitopes.Epitope == '144TKH', 'isotype'] = 'IgG' + 'IgM'
epitopes.loc[epitopes.Epitope == '145KHA', 'isotype'] = 'unknown'
epitopes.loc[epitopes.Epitope == '151AHA', 'isotype'] = 'IgM'
epitopes.loc[epitopes.Epitope == '152W', 'mAb'] = 'unknown'
epitopes.loc[epitopes.Epitope == '158T', 'isotype'] = 'IgM'
epitopes.loc[epitopes.Epitope == '161D', 'isotype'] = 'IgM'
epitopes.loc[epitopes.Epitope == '163LW', 'isotype'] = 'IgM'
epitopes.loc[epitopes.Epitope == '163RG', 'isotype'] = 'IgG'
epitopes.loc[epitopes.Epitope == '163RW', 'isotype'] = 'IgM'
epitopes.loc[epitopes.Epitope == '163EW', 'isotype'] = 'IgG'
epitopes.loc[epitopes.Epitope == '166DG', 'mAb'] = 'unknown'
epitopes.loc[epitopes.Epitope == '219W', 'isotype'] = 'IgM'


In [49]:
# Class II human
epitopes.loc[epitopes.Epitope == '16Y', 'isotype'] = 'IgM'
epitopes.loc[epitopes.Epitope == '37YV[DR]', 'isotype'] = 'unknown'
epitopes.loc[epitopes.Epitope == '48Q', 'isotype'] = 'unknown'
epitopes.loc[epitopes.Epitope == '51R', 'mAb'] = 'unknown'
epitopes.loc[epitopes.Epitope == '57DE', 'isotype'] = 'IgG'
epitopes.loc[epitopes.Epitope == '57S', 'isotype'] = 'IgM'
epitopes.loc[epitopes.Epitope == '74R', 'isotype'] = 'IgM'
epitopes.loc[epitopes.Epitope == '96HK', 'isotype'] = 'IgG'
epitopes.loc[epitopes.Epitope == '98Q', 'isotype'] = 'unknown'



In [60]:
# ind = epitopes['isotype'].apply(lambda x: 'IgM' in x)
# epitopes[ind]
epitopes.to_pickle('../data/EpitopevsHLA.pickle')

In [64]:
# epitopes[epitopes.mAb == 'Yes'].shape

In [65]:
epitopes.columns

Index(['Epitope', 'ElliPro Score', 'PolymorphicResidues', 'AntibodyReactivity',
       'Luminex Alleles', 'All Alleles', 'distance [A]', 'mean_distance [A]',
       'std_distance [A]', 'mAb', 'Location', 'isotype'],
      dtype='object')

In [45]:
epitopes[epitopes.Epitope == '37YV[DR]']
epitopes[epitopes.Epitope == '62LQ']

Unnamed: 0,Epitope,ElliPro Score,PolymorphicResidues,AntibodyReactivity,Luminex Alleles,All Alleles,distance [A],mean_distance [A],std_distance [A],mAb,Location,isotype
34,62LQ,Intermediate,"[(62, L), (63, Q)]",Yes,"{A*29:01, A*43:01, A*29:02}","{A*11:11, A*29:17, A*29:13, A*29:16, A*29:09, ...","{'A*29:02': 63.992, 'A*29:01': 63.957, 'A*43:0...",64.005333,0.045886,Yes,"{'A*29:02': [8.63, 4.84, 39.21], 'A*29:01': [8...",IgG
