## Visualize Chemica Space

Objective: Visualize chemical space of Biofacquim and other libraries.
            From PCA results.

## Import Libraries

In [1]:
import pandas as pd
import numpy as np

from bokeh.io import  show, output_file
from bokeh.models import ColumnDataSource, LassoSelectTool, ZoomInTool, ZoomOutTool, SaveTool, HoverTool,PanTool, Legend
from bokeh.plotting import figure
from bokeh.core.enums import LegendLocation

## Open Database

In [2]:
Data = pd.read_csv("Results_PCA.csv", sep = ",")
Data.head()

Unnamed: 0.1,Unnamed: 0,PC 1,PC 2,PC 3,PC 4,PC 5,PC 6,Library,SMILES,Name
0,0,7.489868,12.745201,-4.959822,2.575001,-1.385036,0.20667,AFRODB,CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC(OC...,NPR_00036
1,1,13.844666,-3.851776,2.574616,-0.270224,0.054491,-1.170585,AFRODB,C[C@H](CCC(O[C@H](C[C@@H]([C@@H]1CC2)[C@H]3[C@...,ABD_UD_004
2,2,13.293397,-3.420555,2.417961,-0.619019,0.065054,-1.4196,AFRODB,C[C@H](CC[C@@]([C@H]1C)(OC)O[C@H](C2)[C@H]1[C@...,ABD_UD_003
3,3,10.032845,10.458687,-5.273828,5.431767,-1.559375,1.096686,AFRODB,COc(cc(/C=C/C(OCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC...,WA_0086
4,4,7.198307,9.831694,-5.158237,4.113243,-1.741645,0.354613,AFRODB,COc(cc(/C=C/C(OCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC...,WA_0085


In [3]:
#visualize Libraries names
Data.Library.unique()

array(['AFRODB', 'BIOFACQUIM', nan, 'Epidatabase', 'FDA', 'PPI'],
      dtype=object)

## Plot

In [4]:
#Filter Data by Library

In [4]:
def column_source( pca_result, L):
    data = pca_result[pca_result["Library"] == L]
    X = data["PC 1"].to_list()
    Y = data["PC 2"].to_list()
    N = data["Name"].to_list()
    return ColumnDataSource(dict(x = X, y = Y, N = N))

In [5]:
#Assign sources
source1=column_source(Data, 'AFRODB')
source2=column_source(Data, 'Epidatabase')
source3=column_source(Data, 'BIOFACQUIM')
source4=column_source(Data, 'FDA')
source5=column_source(Data, 'PPI')

In [6]:
#Set the plot features
hover = HoverTool(tooltips = [          ("PCA1","($x)"),
                                        ("PCA2","($y)"),
                                        ("NAME","(@N)"),
                                        ])
p = figure(title = "Chemical Space by PCA",
        x_axis_label = "PC 1 " + " ", y_axis_label="PC 2 " + " ",
        x_range = (-7,7), y_range = (-7,7), tools = [hover], plot_width = 1000, plot_height = 800)

#plot libraries

AFRO_plot = p.circle(x = "x", y = "y", source = source1, color = "gold", size = 5)
EPI_plot = p.circle(x = "x", y = "y", source = source2, color = "tomato", size = 5)
PPI_plot = p.circle(x = "x", y = "y", source = source5, color ="blueviolet", size = 5)
FDA_plot = p.circle(x = "x", y = "y", source = source4, color = "teal", size = 5)
BIOFACQUIM_plot = p.circle(x = "x", y = "y", source = source3, color ="yellowgreen", size = 5)
p.add_tools(LassoSelectTool(), ZoomInTool(), ZoomOutTool(), SaveTool(), PanTool())

#configure legend

legend = Legend(items=[    
            ("AFRODB",     [AFRO_plot]),
            ("EPIDATABASE",     [EPI_plot]),
            ("BIOFACQUIM",   [BIOFACQUIM_plot]),
            ("FDA",      [FDA_plot]),
            ("PPI", [PPI_plot])
            ], 
        location = "center", orientation = "vertical", click_policy = "hide")
p.add_layout(legend, place = 'right')
p.xaxis.axis_label_text_font_size = "20pt"
p.yaxis.axis_label_text_font_size = "20pt"
p.xaxis.axis_label_text_color = "black"
p.yaxis.axis_label_text_color = "black"
p.xaxis.major_label_text_font_size = "18pt"
p.yaxis.major_label_text_font_size = "18pt"
p.title.text_font_size = "22pt"

In [7]:
show(p)