In [3]:
import pandas as pd
import numpy as np

In [4]:
def flatten2list(object) -> list:
    """ This function flattens objects in a nested structure and return a list"""
    gather = []
    for item in object:
        if isinstance(item, (list, set)):
            gather.extend(flatten2list(item))            
        else:
            gather.append(item)
    return gather

def flatten2set(object) -> set:
    """ This function flattens objects in a nested structure and return a set"""

    return set(flatten2list(object))

#######################################################
# epitope class
####################################################### 

class Epitope_DB:
    
    def __init__(self, path:str='../data/20201123_EpitopevsHLA.pickle'):
        self.df = pd.read_pickle(path)
        
    def __repr__(self):
        return f""" Epitope_DB(records={len(self.df)}, columns={self.df.columns}) """
    
    def __str__(self):
        return __repr__()
    
    def epitope(self, value):
        if isinstance(value, str):
            ind = self.df.Epitope == value 
        else: 
            ind = self.df.Epitope.apply(lambda x: x in value) 
        self.df = self.df[ind]
        return self
    
    def hlavsep(self, hla_allel:str='Luminex Alleles'):
        from collections import defaultdict
        hlas = flatten2set(self.df[hla_allel].values)
        hlavsep_dict = defaultdict(set)
        for hla in hlas:
            ind = self.df[hla_allel].apply(lambda x: hla in x)
            epitopes = flatten2set(self.df[ind]['Epitope'].values)
            hlavsep_dict[hla].update(epitopes)
        self.hlavsep = hlavsep_dict
        return self.hlavsep
    
    def ellipro(self, value):
        if isinstance(value, str):
            ind = self.df.Epitope == value 
        else: 
            ind = self.df['ElliPro Score'].apply(lambda x: x in value) 
        self.df = self.df[ind]
        return self


In [5]:
ep_db = Epitope_DB()

# len(flatten2set(ep_db.df['All Alleles'].values))
hlavsep_dict = ep_db.hlavsep()

In [6]:
# pd.DataFrame.from_dict(hlavsep_dict)

defaultdict(set,
            {'DRB1*03:03': {'108P',
              '112H',
              '11STS',
              '120S',
              '13SE',
              '140TV',
              '16H',
              '180V',
              '180VTP',
              '181T',
              '189R',
              '25R',
              '26F',
              '31F',
              '31FH',
              '32H',
              '33N',
              '37N',
              '38V',
              '40F',
              '47Y',
              '47YR',
              '57D',
              '57DA',
              '58A',
              '58AY',
              '60Y',
              '67LQ',
              '70Q',
              '70QK',
              '71K',
              '73G',
              '74R',
              '77N[DR]',
              '78Y',
              '85V',
              '85VV',
              '86V',
              '96H',
              '96HK',
              '98K',
              '98KS'},
             'C*12:02': {'103L',
              '113YD',
   