In [6]:
import pandas as pd
import numpy as np

In [3]:
def flatten2list(object) -> list:
    """ This function flattens objects in a nested structure and return a list"""
    gather = []
    for item in object:
        if isinstance(item, (list, set)):
            gather.extend(flatten2list(item))            
        else:
            gather.append(item)
    return gather

def flatten2set(object) -> set:
    """ This function flattens objects in a nested structure and return a set"""

    return set(flatten2list(object))

#######################################################
# epitope class
####################################################### 

class Epitope_DB:
    
    def __init__(self, path:str='../data/20201123_EpitopevsHLA.pickle'):
        self.df = pd.read_pickle(path)
        
    def __repr__(self):
        return f""" Epitope_DB(records={len(self.df)}, columns={self.df.columns}) """
    
    def __str__(self):
        return __repr__()
    
    def epitope(self, value):
        if isinstance(value, str):
            ind = self.df.Epitope == value 
        else: 
            ind = self.df.Epitope.apply(lambda x: x in value) 
        self.df = self.df[ind]
        return self
    
    def hlavsep(self, hla_allel:str='Luminex Alleles'):
        from collections import defaultdict
        hlas = flatten2set(self.df[hla_allel].values[0])
        hlavsep_dict = defaultdict(list)
        for hla in hlas:
            ind = self.df[hla_allel].apply(lambda x: hla in x)
            epitopes = flatten2set(self.df[ind]['Epitope'].values)
            hlavsep_dict[hla].append(epitopes)
        return hlavsep_dict
    
    def ellipro(self, value):
        if isinstance(value, str):
            ind = self.df.Epitope == value 
        else: 
            ind = self.df['ElliPro Score'].apply(lambda x: x in value) 
        self.df = self.df[ind]
        return self

# Define a dictionary in which the new table will reside 
    HLA_Epitopes = {'HLA': [], 'Epitope': []}
    # Get unique HLA and find all the Epitopes 
    for s in (Epitope_DB[Allel_type].values):
        for HLA in s:
            if HLA not in HLA_Epitopes['HLA']:
                HLA_Epitopes['HLA'].append(HLA)
                ind = Epitope_DB[Allel_type].apply(lambda x: HLA in x)
                Epitope = set(Epitope_DB[ind]['Epitope'].values) # set assignment is taking place here
                HLA_Epitopes['Epitope'].append(Epitope)

In [5]:
ep_db = Epitope_DB()
ep_db

 Epitope_DB(records=424, columns=Index(['Epitope', 'ElliPro Score', 'PolymorphicResidues', 'AntibodyReactivity',
       'Luminex Alleles', 'All Alleles', 'distance [A]', 'mean_distance [A]',
       'std_distance [A]', 'mAb', 'Location'],
      dtype='object')) 