# Get coordinates for network visualisation of interacting residues

This notebook explains how to generate coordinates to visualise residues at certain positions for later reuse

In [1]:
import pandas as pd
from IFPAggVis.ifpaggvis import aggregate
from IFPAggVis.ifpaggvis.calculate import calculate_distances
from IFPAggVis.ifpaggvis.calculate import calculate_where_diff_and_sim
from IFPAggVis.ifpaggvis.visualise import plot_similarity_between_ligands
import pickle


In [2]:
file_path = "../../data/aggregated_files/aggregation_time/"
outpath = "../../data/aggregated_files/"

ligand1 = 1
ligand2 = 2
step_size = 1
filter_val_x2 = 0.2
name_file1 = "ligand_" + str(ligand1) + "_x1_filter_" + str(step_size) + "_x2_filter_" + str(filter_val_x2) + "_time_based_aggregation"
name_file2 = "ligand_" + str(ligand2) + "_x1_filter_" + str(step_size) + "_x2_filter_" + str(filter_val_x2) + "_time_based_aggregation"
outfile_name = outpath + "ligand_" + str(ligand1) + "_ligand_" + str(ligand2) + "_x1_" + str(step_size) + "_x2_" + str(filter_val_x2) + "_time"

In [3]:
infile_temp1 = file_path + name_file1 + ".csv"
infile_temp2 = file_path + name_file2 + ".csv"


In [4]:
df1 = pd.read_csv(infile_temp1, index_col=0)
df2 = pd.read_csv(infile_temp2, index_col=0)


In [5]:
df1.head(3)

Unnamed: 0,HOH8_HBAcceptor,HIS66_Hydrophobic,ASP95_Hydrophobic,ARG96_Hydrophobic,ARG96_HBAcceptor,ARG96_Anionic,ASN124_Hydrophobic,ASN124_HBAcceptor,HIS125_Hydrophobic,HIS125_PiStacking,...,GLU275_HBDonor,GLU275_HBAcceptor,GLU275_Cationic,PHE276_Hydrophobic,PHE276_HBAcceptor,MN400_VdWContact,MN401_VdWContact,HOH402_HBAcceptor,occurence,diff_to_prev
0,1,0,0,0,1,1,0,0,1,0,...,0,1,0,1,0,0,0,1,50,[]
1,1,0,0,0,1,1,0,0,1,0,...,0,1,0,1,0,0,0,0,2,2
2,1,0,0,0,1,1,0,0,1,0,...,0,1,0,1,0,0,0,0,68,2


In [6]:
df1["occurence"].sum()

74254

In [7]:
df2["occurence"].sum()

74254

In [8]:
df2.head(3)

Unnamed: 0,HOH8_HBAcceptor,HIS66_Hydrophobic,ARG96_Hydrophobic,ARG96_HBAcceptor,ARG96_Anionic,ASN124_Hydrophobic,ASN124_HBAcceptor,HIS125_Hydrophobic,GLU126_Hydrophobic,CYS127_Hydrophobic,...,CYS273_Hydrophobic,CYS273_HBAcceptor,GLY274_HBAcceptor,GLU275_Hydrophobic,PHE276_Hydrophobic,MN400_VdWContact,MN401_VdWContact,HOH402_HBAcceptor,occurence,diff_to_prev
0,0,0,0,1,1,0,0,1,0,0,...,0,0,0,1,1,1,1,1,426,[]
1,0,0,0,1,1,0,0,1,0,0,...,0,0,0,1,1,1,1,1,330,2
2,0,0,0,1,1,0,0,1,0,0,...,0,0,0,1,1,1,1,1,26,2


In [9]:
df_merged = aggregate.summarise_two_interaction_dfs(df1, df2, "LG" + str(ligand1), "LG" + str(ligand2))

Number of columns df1:  88
Number of columns df2:  57
Number of columns after merge:  91


In [10]:
df_merged.head(3)

Unnamed: 0,HOH8_HBAcceptor,HIS66_Hydrophobic,ASP95_Hydrophobic,ARG96_Hydrophobic,ARG96_HBAcceptor,ARG96_Anionic,ASN124_Hydrophobic,ASN124_HBAcceptor,HIS125_Hydrophobic,HIS125_PiStacking,...,GLU275_HBDonor,GLU275_HBAcceptor,GLU275_Cationic,PHE276_Hydrophobic,PHE276_HBAcceptor,MN400_VdWContact,MN401_VdWContact,HOH402_HBAcceptor,occurence,Lig
0,1,0,0.0,0,1,1,0,0,1,0.0,...,0.0,1.0,0.0,1,0.0,0,0,1,50,LG1
1,1,0,0.0,0,1,1,0,0,1,0.0,...,0.0,1.0,0.0,1,0.0,0,0,0,2,LG1
2,1,0,0.0,0,1,1,0,0,1,0.0,...,0.0,1.0,0.0,1,0.0,0,0,0,68,LG1


In [11]:
df_merged.iloc[::1, :-2].head(3)

Unnamed: 0,HOH8_HBAcceptor,HIS66_Hydrophobic,ASP95_Hydrophobic,ARG96_Hydrophobic,ARG96_HBAcceptor,ARG96_Anionic,ASN124_Hydrophobic,ASN124_HBAcceptor,HIS125_Hydrophobic,HIS125_PiStacking,...,GLY274_HBAcceptor,GLU275_Hydrophobic,GLU275_HBDonor,GLU275_HBAcceptor,GLU275_Cationic,PHE276_Hydrophobic,PHE276_HBAcceptor,MN400_VdWContact,MN401_VdWContact,HOH402_HBAcceptor
0,1,0,0.0,0,1,1,0,0,1,0.0,...,1,1,0.0,1.0,0.0,1,0.0,0,0,1
1,1,0,0.0,0,1,1,0,0,1,0.0,...,1,1,0.0,1.0,0.0,1,0.0,0,0,0
2,1,0,0.0,0,1,1,0,0,1,0.0,...,1,1,0.0,1.0,0.0,1,0.0,0,0,0
3,1,0,0.0,0,1,1,0,0,1,0.0,...,1,1,0.0,1.0,0.0,1,0.0,0,0,0
4,1,0,0.0,0,1,1,0,0,0,0.0,...,1,1,0.0,1.0,0.0,1,0.0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
982,0,0,0.0,1,1,1,0,0,0,0.0,...,0,1,0.0,0.0,0.0,1,0.0,0,0,0
983,0,0,0.0,1,1,1,0,0,0,0.0,...,0,1,0.0,0.0,0.0,0,0.0,0,0,0
984,0,0,0.0,1,1,1,0,0,0,0.0,...,0,1,0.0,0.0,0.0,1,0.0,0,0,0
985,0,0,0.0,1,1,1,0,0,0,0.0,...,0,1,0.0,0.0,0.0,1,0.0,0,0,0


In [12]:
prot_residues = df_merged.columns[:-2].values

In [13]:
prot_residues

array(['HOH8_HBAcceptor', 'HIS66_Hydrophobic', 'ASP95_Hydrophobic',
       'ARG96_Hydrophobic', 'ARG96_HBAcceptor', 'ARG96_Anionic',
       'ASN124_Hydrophobic', 'ASN124_HBAcceptor', 'HIS125_Hydrophobic',
       'HIS125_PiStacking', 'HIS125_EdgeToFace', 'GLU126_Hydrophobic',
       'CYS127_Hydrophobic', 'CYS127_HBAcceptor', 'SER129_HBAcceptor',
       'ILE130_Hydrophobic', 'ASN131_Hydrophobic', 'ASN131_HBAcceptor',
       'ARG132_Hydrophobic', 'ARG132_HBAcceptor', 'ARG132_PiCation',
       'ILE133_Hydrophobic', 'TYR134_Hydrophobic', 'TYR134_HBDonor',
       'TYR134_HBAcceptor', 'TYR134_PiStacking', 'TYR134_EdgeToFace',
       'VAL195_Hydrophobic', 'PRO196_Hydrophobic', 'ASP197_Hydrophobic',
       'LEU201_Hydrophobic', 'CYS202_Hydrophobic', 'CYS202_HBAcceptor',
       'LEU205_Hydrophobic', 'TRP206_Hydrophobic', 'TRP206_HBDonor',
       'TRP206_PiStacking', 'TRP206_FaceToFace', 'TRP206_EdgeToFace',
       'ASP208_Hydrophobic', 'ASP208_HBDonor', 'ASP208_Cationic',
       'ASP210_Cationic

### Network visualisation

In [14]:
from IFPAggVis.ifpaggvis.visualise import get_unique_residue_position

Generate unique positions for each residue interacting and save positions to file to reload later (e.g. for other IFP analysis)

In [15]:
pos_nodes_unique = get_unique_residue_position(prot_residues, outfile = outpath + "LG" + str(ligand1) + "_LG" + str(ligand2))

Example how to reload positions saved to file...

In [16]:
import pickle

In [17]:
with open(outpath + "LG" + str(ligand1) + "_LG" + str(ligand2) + '_unique_residue_position.pkl', 'rb') as fp:
    pos_nodes_unique = pickle.load(fp)
fp.close()

In [18]:
outpath + "LG" + str(ligand1) + "_LG" + str(ligand2) + '_unique_residue_position.pkl'

'../../data/aggregated_files/LG1_LG2_unique_residue_position.pkl'

In [19]:
pos_nodes_unique

{'LIG': (3453.6, 3343.8),
 'HIS248': (5910.5, 2358.1),
 'ASN219': (2896.4, 18.0),
 'ASP210': (1902.8, 6292.0),
 'SER224': (5115.7, 2554.5),
 'ASN131': (1693.9, 2825.3),
 'ARG96': (918.54, 1238.9),
 'ARG132': (5888.6, 1328.7),
 'TRP206': (2643.3, 613.76),
 'ASP208': (4816.3, 402.57),
 'ARG221': (3525.1, 699.52),
 'GLY222': (46.796, 3170.6),
 'CYS202': (6736.8, 2832.3),
 'ASP220': (6214.8, 4042.6),
 'GLY274': (3744.0, 6357.4),
 'GLN249': (4527.5, 1181.3),
 'TYR272': (5466.5, 798.88),
 'HOH402': (728.6, 5285.2),
 'GLU275': (298.95, 4408.4),
 'HIS125': (1262.7, 4692.3),
 'LEU205': (2468.1, 1958.3),
 'PHE276': (1042.4, 1880.0),
 'ASN124': (4855.5, 5688.0),
 'SER129': (5445.7, 6053.7),
 'LEU201': (3169.9, 6678.1),
 'GLU126': (4015.6, 88.263),
 'TYR134': (6099.4, 5353.1),
 'ASP197': (2426.5, 4157.2),
 'HIS66': (2032.5, 1010.4),
 'CYS127': (6458.5, 1910.2),
 'PHE225': (3796.7, 5442.6),
 'ILE130': (5017.7, 3626.5),
 'PHE267': (1510.8, 638.36),
 'CYS273': (2293.9, 5268.3),
 'VAL250': (5494.7, 48

In [20]:
len(pos_nodes_unique)

45