In [1]:
import pandas as pd
import numpy as np

In [2]:
def flatten2list(object) -> list:
    """ This function flattens objects in a nested structure and return a list"""
    gather = []
    for item in object:
        if isinstance(item, (list, set)):
            gather.extend(flatten2list(item))            
        else:
            gather.append(item)
    return gather

def flatten2set(object) -> set:
    """ This function flattens objects in a nested structure and return a set"""

    return set(flatten2list(object))

#######################################################
# epitope class
####################################################### 

class Epitope_DB:
    
    def __init__(self, path:str='../data/20201123_EpitopevsHLA.pickle'):
        self.df = pd.read_pickle(path)
        
    def __repr__(self):
        return f""" Epitope_DB(records={len(self.df)}, columns={self.df.columns}) """
    
    def __str__(self):
        return __repr__()
    
    def epitope(self, value):
        if isinstance(value, str):
            ind = self.df.Epitope == value 
        else: 
            ind = self.df.Epitope.apply(lambda x: x in value) 
        self.df = self.df[ind]
        return self
    
    def hlavsep(self, hla_allel:str='Luminex Alleles'):
        from collections import defaultdict
        hlas = flatten2set(self.df[hla_allel].values)
        hlavsep_dict = defaultdict(set)
        for hla in hlas:
            ind = self.df[hla_allel].apply(lambda x: hla in x)
            epitopes = flatten2set(self.df[ind]['Epitope'].values)
            hlavsep_dict[hla].update(epitopes)
        self.hlavsep = hlavsep_dict
        return self.hlavsep
    
    def ellipro(self, value):
        if isinstance(value, str):
            ind = self.df.Epitope == value 
        else: 
            ind = self.df['ElliPro Score'].apply(lambda x: x in value) 
        self.df = self.df[ind]
        return self


In [8]:
epitope = Epitope_DB()

# len(flatten2set(ep_db.df['All Alleles'].values))
# hlavsep_dict = ep_db.hlavsep()

In [6]:
# pd.DataFrame.from_dict(hlavsep_dict)

In [10]:
epitope.df

Unnamed: 0,Epitope,ElliPro Score,PolymorphicResidues,AntibodyReactivity,Luminex Alleles,All Alleles,distance [A],mean_distance [A],std_distance [A],mAb,Location
0,1C,High,"[(1, C)]",unknown,"{C*05:01, C*07:02, C*15:02, C*08:02, C*01:02, ...","{C*06:23, C*08:07, C*07:44, C*07:53, C*14:08, ...","{'C*06:02': 48.755, 'C*15:02': 48.759, 'C*05:0...",48.545385,0.312178,unknown,"{'C*08:02': [17.51, 19.17, 13.44], 'C*12:02': ..."
1,9D,Very Low,"[(9, D)]",unknown,"{C*07:02, C*06:02, C*07:01, B*08:01, C*18:01, ...","{C*06:23, C*07:44, C*07:53, C*07:29, C*07:14, ...","{'B*08:01': 52.644, 'C*06:02': 52.18, 'C*07:02...",52.669500,0.389487,unknown,"{'C*07:02': [16.85, -7.29, 12.52], 'B*08:01': ..."
2,9F[A],Very Low,"[(9, F)]",unknown,"{A*80:01, C*01:02, A*02:03, A*32:01, A*36:01, ...","{A*02:131, A*02:31, A*02:12, A*02:36, A*01:23,...","{'A*02:03': 53.409, 'A*32:01': 52.566, 'A*80:0...",53.752900,0.530792,unknown,"{'A*32:01': [2.45, 4.33, 6.64], 'A*02:01': [20..."
3,9H,Very Low,"[(9, H)]",unknown,"{B*27:08, B*27:03, B*73:01, B*45:01, B*37:01, ...","{B*18:13, B*42:09, B*37:05, B*27:06, B*18:06, ...","{'B*40:01': 54.35, 'B*50:01': 54.077, 'B*27:05...",53.781462,0.471508,unknown,"{'B*40:01': [-3.53, 9.98, -29.58], 'B*37:01': ..."
4,9S,Very Low,"[(9, S)]",unknown,"{A*23:02, A*24:02, A*24:03, A*30:02, A*30:01, ...","{A*30:12, A*24:79, C*14:08, C*04:12, A*24:25, ...","{'A*30:02': 53.668, 'A*24:03': 51.839, 'A*24:0...",51.999286,1.152629,unknown,"{'A*23:01': [-24.06, 8.52, -14.82], 'C*04:01':..."
...,...,...,...,...,...,...,...,...,...,...,...
419,160A,High,"[(160, A)]",unknown,"{DQA1*02:01, DQA1*05:01, DQA1*04:01, DQA1*03:0...","{DQA1*05:01, DQA1*02:01, DQA1*04:01, DQA1*01:0...","{'DQA1*03:01': 29.99, 'DQA1*04:01': 33.55, 'DQ...",30.801500,1.755056,unknown,"{'DQA1*03:01': [73.51, 67.51, 103.62], 'DQA1*0..."
420,160AD,High,"[(160, A), (161, D)]",unknown,"{DQA1*02:01, DQA1*04:01, DQA1*03:01, DQA1*06:0...","{DQA1*02:01, DQA1*04:01, DQA1*03:01, DQA1*01:0...","{'DQA1*03:01': 31.735, 'DQA1*04:01': 35.48, 'D...",31.908000,2.145945,unknown,"{'DQA1*03:01': [74.45, 66.88, 102.11], 'DQA1*0..."
421,160D,High,"[(160, D)]",unknown,"{DQA1*03:03, DQA1*03:02}","{DQA1*03:03, DQA1*03:02}","{'DQA1*03:02': 31.978, 'DQA1*03:03': 34.829}",33.403500,1.425500,unknown,"{'DQA1*03:02': [72.38, 67.98, 103.95], 'DQA1*0..."
422,160S,High,"[(160, S)]",unknown,{DQA1*05:03},"{DQA1*05:06, DQA1*05:07, DQA1*05:03}",{'DQA1*05:03': 33.099},33.099000,0.000000,unknown,"{'DQA1*05:03': [25.66, -20.38, -26.74]}"
