# Cluster Interface

## Data Resource

* [PISA](https://www.ebi.ac.uk/pdbe/pisa/)
* [ProtCID](http://dunbrack2.fccc.edu/ProtCiD/Default.aspx)
* [EPPIC](http://www.eppic-web.org/ewui/#)

In [121]:
import nglview
import pandas as pd

rep1 = [
    {"type": "spacefill", "params": {
        "sele": "protein", "color": "chainindex", "opacity": 0.3
    }},
    {"type": "line", "params": {
        "sele": "protein", "color": "sstruc"
    }},
    {"type": "surface", "params": {
        "sele": "protein", "color": "chainindex","opacity": 0.1
    }}
]

In [250]:
view = nglview.show_file("./pdb_files/1u7f.cif")
view.background = '#212121'
view.representations = rep1
view

NGLWidget(background='#212121')

In [253]:
view.clear_representations()
chain_res_tem = "({res_str}) and :{chain_id}"
a_ab = chain_res_tem.format(res_str=ab_dict['A'], chain_id='A')
b_ab = chain_res_tem.format(res_str=ab_dict['B'], chain_id='B')
b_bc = chain_res_tem.format(res_str=bc_dict['B'], chain_id='B')
c_bc = chain_res_tem.format(res_str=bc_dict['C'], chain_id='C')
a_ac = chain_res_tem.format(res_str=ac_dict['A'], chain_id='A')
c_ac = chain_res_tem.format(res_str=ac_dict['C'], chain_id='C')

a_ab_s = chain_res_tem.format(res_str=ab_s_dict['A'], chain_id='A')
b_ab_s = chain_res_tem.format(res_str=ab_s_dict['B'], chain_id='B')

i_chains = ' or '.join(f"({i})" for i in (a_ab, b_ab)) # , b_bc, c_bc, a_ac, c_ac
s_chains = ' or '.join(f"({i})" for i in (a_ab_s, b_ab_s))
interface = f"({i_chains}) and % and /0 and protein"
surface = f"({s_chains}) and % and /0 and protein"

view.add_cartoon(selection="(:A or :B) and protein", color="chainindex", opacity=0.5) # (:A or :B) and protein
view.add_spacefill(selection="(:A or :B) and protein", color="gray", opacity=0.1)
view.add_spacefill(selection=surface, color="chainindex", opacity=0.3)
view.add_surface(selection=interface, color="chainindex")
# view.add_surface(selection=interface, color="residueindex", opacity=0.05)


# 
# "352 or 355 and ^ and :B and % and /0"

In [159]:
converters = {
    'pdb_id': str,
    'chain_id': str,
    'struct_asym_id': str,
    'entity_id': int,
    'author_residue_number': int,
    'residue_number': int,
    'author_insertion_code': str}

eec_as_df = pd.read_csv("C:\\Download\\20200716\\biounit\\0725.tsv", sep="\t", converters=converters)
check = pd.read_csv(
    r"C:\Download\20200716\biounit\pisa%interfacedetail%+1u7f%1%1.tsv", 
    sep="\t", 
    usecols=['pdb_code', 'assemble_code', 'interface_number', 'chain_id', 'residue', 'sequence', 'insertion_code', 'buried_surface_area','solvent_accessible_area', 'hsdc'],
    na_values=[' ']
    ).rename(columns={"pdb_code":"pdb_id",
                      "sequence":"author_residue_number",
                      "insertion_code":"author_insertion_code",
                      "residue":"residue_name",
                      "chain_id": "struct_asym_id_in_assembly"})
check.author_insertion_code.fillna('', inplace=True)

chain_df_check = eec_as_df[eec_as_df.pdb_id.eq('1u7f') & eec_as_df.assembly_id.eq(1)]
residues_check = pd.read_csv("C:\\Download\\20200716\\biounit\\pdb%entry%residue_listing%+1u7f.tsv", sep="\t", converters=converters)
check = check.merge(chain_df_check, how="left")
check = check.merge(residues_check, how="left")
def annotate_pisa(df: pd.DataFrame):
    '''
    Buried Residues:  ASA.eq(0)
    Surface Residues: ASA.ne(0)
    Interface Residues: BSA.ne(0)
    '''
    df['pisa_surface'] = df.solvent_accessible_area.apply(lambda x: 1 if x>0 else 0)
    df['pisa_interface'] = df.buried_surface_area.apply(lambda x: 1 if x>0 else 0)
    return df

annotate_pisa(check)
# check['pic'] = check.apply(lambda x: f"P1_{x['residue_number']}_{x['residue_name']}" if x['struct_asym_id'] == 'B' else f"P2_{x['residue_number']}_{x['residue_name']}", axis=1)
check['pic'] = check.author_residue_number.astype(str)+' and ^'+check.author_insertion_code
check[check.pisa_interface.eq(1)].head()

Unnamed: 0,buried_surface_area,struct_asym_id_in_assembly,hsdc,author_insertion_code,residue_name,author_residue_number,solvent_accessible_area,assemble_code,interface_number,pdb_id,...,chain_id,struct_asym_id,assembly_id,model_id,multiple_conformers,observed_ratio,residue_number,pisa_surface,pisa_interface,pic
23,70.2544,B,HS,,GLU,337,152.317993,1,1,1u7f,...,B,B,1,1,,1.0,24,1,1,337 and ^
38,10.0652,B,,,GLY,352,17.1028,1,1,1u7f,...,B,B,1,1,,1.0,39,1,1,352 and ^
39,81.331,B,,,TYR,353,97.243698,1,1,1u7f,...,B,B,1,1,,1.0,40,1,1,353 and ^
40,95.4516,B,H,,VAL,354,95.451599,1,1,1u7f,...,B,B,1,1,,1.0,41,1,1,354 and ^
41,40.3753,B,H,,ASP,355,57.167099,1,1,1u7f,...,B,B,1,1,,1.0,42,1,1,355 and ^


In [160]:
def str_int_join(iterable):
    return ' or '.join(f"({i})" for i in iterable)
ab_dict = check[check.pisa_interface.eq(1)].groupby(['struct_asym_id_in_assembly']).pic.apply(str_int_join).to_dict()
# bc_dict = check[check.pisa_interface.eq(1)].groupby(['struct_asym_id_in_assembly']).pic.apply(str_int_join).to_dict()
# ac_dict = check[check.pisa_interface.eq(1)].groupby(['struct_asym_id_in_assembly']).pic.apply(str_int_join).to_dict()

In [212]:
ab_s_dict = check[check.pisa_surface.eq(1)].groupby(['struct_asym_id']).pic.apply(str_int_join).to_dict()