In [2]:
#https://qiita.com/kimisyo/items/f3c9209a48ad08923986
import pandas as pd
from sklearn.decomposition import PCA
import umap
import bokeh
from bokeh.models import  ColumnDataSource
from bokeh.plotting import figure

import numpy as np
import pandas as pd
from rdkit import rdBase, Chem
from rdkit.Chem import AllChem, Descriptors, Draw
from Autodescriptor import AutoDescriptor

In [3]:
main_path="data/Molecule-2022-04-11.csv"
main_df=pd.read_csv(main_path)

main_df=main_df[main_df["converted_li_potential1"]==main_df["converted_li_potential1"]]
main_df=main_df.reset_index()
#main_df["converted_li_potential1"]

In [4]:
from rdkit.Avalon import pyAvalonTools
from rdkit import rdBase, Chem, DataStructs

smiles_list=list(main_df["SMILES"])
#descriptor
desc_calculator=AutoDescriptor()
sm_df=desc_calculator(smiles_list)

#fingerprint
mols=[Chem.MolFromSmiles(i) for i in smiles_list]
fps= [pyAvalonTools.GetAvalonFP(mol) for mol in mols]
fps= [AllChem.GetMACCSKeysFingerprint(mol) for mol in mols]

fp_list=[]
for bit_fp in fps:
    fp = np.zeros((0,), dtype=int)
    DataStructs.ConvertToNumpyArray(bit_fp, fp)
    fp_list.append(fp)

sm_df=pd.DataFrame(fp_list)
sm_df["SMILES"]=smiles_list

In [5]:
# オートスケーリング
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA 

ss = StandardScaler() 
targets_scaling = ss.fit_transform(sm_df.drop("SMILES",axis=1))

pca_model = PCA(n_components=2)
Z = pca_model.fit_transform(targets_scaling)
#show_df
show_df=pd.DataFrame(Z,columns=["x","y"])

pca_model = PCA(n_components=1)
Z = pca_model.fit_transform(targets_scaling)
show_df["z"]=Z

show_df["SMILES"]=sm_df["SMILES"]
show_df["ID"]=main_df["title"]
show_df["converted_li_potential1"]=main_df["converted_li_potential1"]
show_df["converted_li_potential2"]=main_df["converted_li_potential2"]

In [6]:
# 化学構造式画像の生成
images = []
for smiles in sm_df["SMILES"]:
    images.append(Draw.MolToImage(Chem.MolFromSmiles(smiles), size=(128, 128)))


#show_df["images"]=images

In [8]:
from io import BytesIO
from PIL import Image
import base64

def to_png(arr):
    out = BytesIO()
    im = Image.fromarray(arr)
    im.save(out, format='png')
    return out.getvalue()

def b64_image_files(images):
    urls = []
    for im in images:
        #png = to_png(im)

        out = BytesIO()
        im.save(out, format='png')
        png=out.getvalue()
        url = 'data:image_files/png;base64,' + base64.b64encode(png).decode('utf-8')
        urls.append(url)
    return urls


#filenames = b64_image_files(show_df['images'])
filenames = b64_image_files(images)
show_df['image_files'] = filenames

In [28]:
from bokeh.palettes import Turbo256
from bokeh.transform import linear_cmap
from bokeh.plotting import output_notebook, figure, show
from bokeh import plotting as bplot

# データソースの初期設定
#source = ColumnDataSource(data=dict(length=[], width=[]))
#source.data = {"0": [], "1": []}
source = ColumnDataSource(show_df)

# ツールチップの設定
TOOLTIPS = """
    <div>
        <table border="0">
            <tr><td>@ID</td></tr>
            <tr><td>@converted_li_potential1 V</td></tr>
            <tr><td>@converted_li_potential2 V</td></tr>
            <tr><td style="padding:5px;">
                <img
                src="@image_files" height="120" alt="image"
                style="float: left; margin: 0px 15px 15px 0px; image-rendering: pixelated;"
                border="2"
                ></img>
            </td></tr>
        </table>
    </div>
"""

#bplot.output_file('plot.html')
# グラフ初期設定
p = figure(tools="pan,box_zoom,lasso_select,box_select,poly_select,tap,wheel_zoom,reset,save,zoom_in",
            title="Analyze Result",
             #plot_width=1000, 
             #plot_height=800,
              tooltips=TOOLTIPS)

field_name="converted_li_potential1"
vmax= max(show_df[field_name])
vmin= min(show_df[field_name])
mapper = linear_cmap(field_name=field_name, palette=Turbo256, low=vmin, high=vmax)
p.circle(x=field_name, y='z', source=source, size=12,color=mapper)

p.image_url(x=field_name, y='z', source=source,url="image_files",
            w=30,h=30, h_units = 'screen', w_units = 'screen',
           alpha=0.5)

In [29]:
output_notebook()    # <- Notebook出力にはこの１行が必要
show(p)


In [None]:
show_df

In [None]:
main_df