### merge files
### for linux
```sh
paste -d "," data.csv encodings_1_0.csv > merged01.csv
paste -d "," data.csv encodings_2_0.csv > merged02.csv

```

In [None]:
import pathlib
import sys
import os
from bokeh.io import push_notebook, show, output_notebook, push_notebook
from bokeh.plotting import figure
from bokeh.plotting import ColumnDataSource

import pandas as pd
import numpy as np
#from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
from rdkit import Chem
from rdkit.Chem import Draw
from rdkit.Chem.Draw import IPythonConsole
from rdkit.Chem.Draw import rdMolDraw2D
from rdkit.Chem.Draw import rdDepictor
from rdkit import RDLogger
RDLogger.DisableLog('rdApp.info')


def mol2svg(mol):
    """
    Generate an SVG representation of an RDKit molecule.

    Args:
        mol (rdkit.Chem.Mol): RDKit molecule object.

    Returns:
        str: SVG string of the molecule drawing.
    """
    rdDepictor.Compute2DCoords(mol)
    d2d = rdMolDraw2D.MolDraw2DSVG(200, 100)
    d2d.DrawMolecule(mol)
    d2d.FinishDrawing()
    return d2d.GetDrawingText()

notedir = pathlib.Path().resolve()
print(notedir)

output_notebook()

In [None]:
df = pd.read_csv("merged01.csv")
df.head(2)
print(df.shape)

In [None]:
df_clean = df.dropna(subset=["fp_0"])
print(df_clean.shape)

In [None]:
selected_colmns = df_clean.filter(regex="^fp_")

In [None]:
selected_colmns.head(2)

In [None]:
selected_colmns.shape

In [None]:
pca = PCA(n_components=2)
res = pca.fit_transform(selected_colmns)

In [None]:
res.shape

In [None]:
molsvgs = [mol2svg(Chem.MolFromSmiles(smi)) for smi in df_clean["MOL_smiles"]]

data = dict(x=res[:,0],
       y=res[:,1],
        ids = [uniqid for uniqid in df_clean.UNIQUE_ID.values],
        img = molsvgs,
        )

TOOLTIPS = """
<div>
ROW-ID: @ids<br>
<div>@img{safe}</div>
</div>
"""

In [None]:
source = ColumnDataSource(data)
p = figure(tooltips=TOOLTIPS, width=800, height=400,)
c = p.scatter('x', 'y', size=5, source=source, 
         #fill_color=mapper,
         alpha=0.6
        )

In [None]:
handle = show(p, notebook_handle=True)
push_notebook(handle=handle)

In [None]:
df2 = pd.read_csv("merged02.csv")
df2.head(2)
print(df2.shape)

In [None]:
df_clean2 = df2.dropna(subset=["fp_0"])
print(df_clean2.shape)
selected_colmns2 = df_clean2.filter(regex="^fp_")

In [None]:
pca2 = PCA(n_components=2)
res2 = pca2.fit_transform(selected_colmns2)

In [None]:
molsvgs2 = [mol2svg(Chem.MolFromSmiles(smi)) for smi in df_clean2["MOL_smiles"]]

data2 = dict(x=res2[:,0],
       y=res2[:,1],
        ids = [uniqid for uniqid in df_clean2.UNIQUE_ID.values],
        img = molsvgs2,
        )

TOOLTIPS = """
<div>
ROW-ID: @ids<br>
<div>@img{safe}</div>
</div>
"""

In [None]:
source2 = ColumnDataSource(data2)
p2 = figure(tooltips=TOOLTIPS, width=800, height=400,)
c2 = p2.scatter('x', 'y', size=5, source=source2, 
         #fill_color=mapper,
         alpha=0.6
        )

In [None]:
handle2 = show(p2, notebook_handle=True)
push_notebook(handle=handle2)