Visualize chemical Space of Biofacquim and other libraries.

In [1]:
"""Import Libraries"""
import pandas as pd
import numpy as np

from bokeh.io import  show, output_file
from bokeh.models import ColumnDataSource, LassoSelectTool, ZoomInTool, ZoomOutTool, SaveTool, HoverTool,PanTool, Legend
from bokeh.plotting import figure
from bokeh.core.enums import LegendLocation

In [2]:
"""Open tSNE results"""
Data = pd.read_csv("Results_tSNE.csv", sep = ",")
Data.head()

Unnamed: 0.1,Unnamed: 0,PC 1,PC 2,Library,SMILES,Name
0,0,-47.93842,40.677586,Epidatabase,ONC(c(cc1)ccc1N[C@@H](CCN1c(cc2)cc(Cl)c2Cl)C1=...,SBSM341754
1,1,-19.640524,24.254223,Epidatabase,ONC(CCCCCNC(C=C1c(cccc2)c2-c2c1cccc2)=O)=O,SBSM462973
2,2,-15.006295,51.33025,Epidatabase,ONC(/C=C/c1cccc2ccccc12)=O,SBSM267041
3,3,-54.15976,32.496826,Epidatabase,ONC(c1cn(CCN(C2)C(c(cc3)ccc3-c3ccccc3)=O)c2n1)=O,SBSM330733
4,4,26.841022,-9.17039,Epidatabase,Clc(cc1)cnc1N/N=C(\c1ccccc1)/c1ncccc1,SBSM195263


In [3]:
#visualize Libraries names
Data.Library.unique()

array(['Epidatabase', 'FDA', 'PPI', 'AFRODB', 'BIOFACQUIM'], dtype=object)

In [4]:
#We need separate the Data by library.
def column_source( pca_result, L):
    data = pca_result[pca_result["Library"] == L]
    X = data["PC 1"].to_list()
    Y = data["PC 2"].to_list()
    N = data["Name"].to_list()
    return ColumnDataSource(dict(x = X, y = Y, N = N))

In [5]:
#Assign sources
source1=column_source(Data, 'AFRODB')
source2=column_source(Data, 'Epidatabase')
source3=column_source(Data, 'BIOFACQUIM')
source4=column_source(Data, 'FDA')
source5=column_source(Data, 'PPI')

In [6]:
#Set the plot features
hover = HoverTool(tooltips = [          ("PCA1","($x)"),
                                        ("PCA2","($y)"),
                                        ("NAME","(@N)"),
                                        ])
p = figure(title = "Chemical Space by tSNE",
        x_axis_label = "PC 1 " + " ", y_axis_label="PC 2 " + " ",
        x_range = (-7,7), y_range = (-7,7), tools = [hover], plot_width = 1000, plot_height = 800)
#plot libraries
AFRO_plot = p.circle(x = "x", y = "y", source = source1, color = "gold", size = 5)
EPI_plot = p.circle(x = "x", y = "y", source = source2, color = "tomato", size = 5)
PPI_plot = p.circle(x = "x", y = "y", source = source5, color ="blueviolet", size = 5)
FDA_plot = p.circle(x = "x", y = "y", source = source4, color = "teal", size = 5)
BIOFACQUIM_plot = p.circle(x = "x", y = "y", source = source3, color ="yellowgreen", size = 5)
p.add_tools(LassoSelectTool(), ZoomInTool(), ZoomOutTool(), SaveTool(), PanTool())
#configure legend
legend = Legend(items=[    
            ("AFRODB",     [AFRO_plot]),
            ("EPIDATABASE",     [EPI_plot]),
            ("BIOFACQUIM",   [BIOFACQUIM_plot]),
            ("FDA",      [FDA_plot]),
            ("PPI", [PPI_plot])
            ], 
        location = "center", orientation = "vertical", click_policy = "hide")
p.add_layout(legend, place = 'right')
p.xaxis.axis_label_text_font_size = "20pt"
p.yaxis.axis_label_text_font_size = "20pt"
p.xaxis.axis_label_text_color = "black"
p.yaxis.axis_label_text_color = "black"
p.xaxis.major_label_text_font_size = "18pt"
p.yaxis.major_label_text_font_size = "18pt"
p.title.text_font_size = "22pt"

In [7]:
show(p)