# Import

In [1]:
# Python
import numpy as np
import pandas as pd
import time

# Machine Learning
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split

# RDKit
from rdkit.Chem import Descriptors
from rdkit.ML.Descriptors import MoleculeDescriptors
from rdkit.Chem import rdMolDescriptors
from rdkit.Chem import AllChem
from rdkit import Chem
from rdkit.Chem import Descriptors
from rdkit.ML.Descriptors import MoleculeDescriptors
from rdkit.Chem.rdMolDescriptors import GetHashedMorganFingerprint
from rdkit.Chem.Draw import rdMolDraw2D

In [2]:
from rdkit.Chem.Draw import IPythonConsole
from ipywidgets import interact,fixed,IntSlider
def renderFpBit(mol,bitIdx,bitInfo,fn):
    bid = bitIdx
    return(display(fn(mol,bid,bitInfo)))
def get_index_of_array_that_contain_1_in_any_position(list1):
    result = []
    for i, e in enumerate(list1):
        if e > 0:
            result.append(i)
    return result

# Example 1 : Explain Concept of Fingerprint

In [47]:
mol1 = Chem.MolFromSmiles('CCC#C')
mol2 = Chem.MolFromSmiles('CC#CC')
bit_info1 = {}
bit_info2 = {}
fp1 = GetHashedMorganFingerprint(mol1, 3,  nBits=16, bitInfo=bit_info1, useFeatures=True)
fp2 = GetHashedMorganFingerprint(mol2, 3,  nBits=16, bitInfo=bit_info2, useFeatures=True)

In [48]:
count_bit_info_mol1=[]
count_bit_info_mol2=[]
for i in bit_info1:
    count_bit_info_mol1.append([i, len(bit_info1[i])])
for i in bit_info2:
    count_bit_info_mol2.append([i, len(bit_info2[i])])

df1 = pd.DataFrame(count_bit_info_mol1)
df1 = df1.set_index(0)
df1.rename_axis('Bit Index', inplace=True)
df1 = df1.rename(columns={1: 'Number in Bit'})

df2 = pd.DataFrame(count_bit_info_mol2)
df2 = df2.set_index(0)
df2.rename_axis('Bit Index', inplace=True)
df2 = df2.rename(columns={1: 'Number in Bit'})

In [49]:
df1

Unnamed: 0_level_0,Number in Bit
Bit Index,Unnamed: 1_level_1
0,4
1,2
2,1
8,1
13,1


In [50]:
df2

Unnamed: 0_level_0,Number in Bit
Bit Index,Unnamed: 1_level_1
0,4
1,3
13,2


# Example 2 : Morgan Fingerprint Step

In [3]:
mol1 = Chem.MolFromSmiles('CCCC(=O)N')
d = rdMolDraw2D.MolDraw2DCairo(250, 200)
d.drawOptions().addAtomIndices = True
d.DrawMolecule(mol1)
d.FinishDrawing()
d.WriteDrawingText('atom_annotation_1.png')

In [4]:
d

<rdkit.Chem.Draw.rdMolDraw2D.MolDraw2DCairo at 0x23c3e3d0f40>

In [5]:
#smi = 'c1nc(*)ccc1* |$;;;R1;;;;R2$|'
#mol.GetAtomWithIdx(3).SetProp("_displayLabel","R<sub>1</sub>")
#mol.GetAtomWithIdx(7).SetProp("_displayLabel","R<sub>2</sub>")

smi = '*C* |$A;;A;$|'
mol = Chem.MolFromSmiles(smi)
d = rdMolDraw2D.MolDraw2DCairo(250, 250)
rdMolDraw2D.PrepareAndDrawMolecule(d,mol)
d.WriteDrawingText("EX2-pic1.png")

In [8]:
smi2 = '*CCC(=*)* |$A;;;;A;A;$|'
mol2 = Chem.MolFromSmiles(smi2)
d2 = rdMolDraw2D.MolDraw2DCairo(250, 250)
rdMolDraw2D.PrepareAndDrawMolecule(d2,mol2)
d2.WriteDrawingText("EX2-pic2.png")

In [9]:
smi2 = 'CCCC(=O)N |$;;;;;;;;$|'
mol2 = Chem.MolFromSmiles(smi2)
d2 = rdMolDraw2D.MolDraw2DCairo(250, 250)
rdMolDraw2D.PrepareAndDrawMolecule(d2,mol2)
d2.WriteDrawingText("EX2-pic3.png")