In [17]:
from rdkit import Chem
from rdkit.Chem import ChemicalFeatures
from rdkit import RDConfig
from rdkit.Chem.Pharm2D.SigFactory import SigFactory
from rdkit.Chem.Pharm2D import Generate
from rdkit import DataStructs
import os 
import numpy as np


fdefName = os.path.join(RDConfig.RDDataDir,'BaseFeatures.fdef')
featFactory = ChemicalFeatures.BuildFeatureFactory(fdefName)

In [6]:
sigFactory = SigFactory(featFactory,minPointCount=2,maxPointCount=3)
sigFactory.SetBins([(0,2),(2,5),(5,8)])
sigFactory.Init()
sigFactory.GetSigSize()

2988

In [8]:
mol = Chem.MolFromSmiles('OCC(=O)CCCN')
fp = Generate.Gen2DFingerprint(mol,sigFactory)
print(fp)
print(len(fp))
print(fp.GetNumOnBits())

<rdkit.DataStructs.cDataStructs.SparseBitVect object at 0x0000023B33A9A2F0>
2988
23


In [9]:
list(fp.GetOnBits())

[1,
 7,
 8,
 10,
 20,
 47,
 49,
 59,
 63,
 73,
 171,
 191,
 267,
 488,
 503,
 511,
 579,
 622,
 698,
 1687,
 1798,
 1874,
 2223]

In [13]:
sigFactory.GetBitDescription(7)

'Acceptor Donor |0 1|1 0|'

In [14]:
# Generate fingerprints for other molecules
molecules = Chem.SmilesMolSupplier('./data/clique/molecules.smi',delimiter='\t',titleLine=False)
print(f"{len(molecules)} molecules")

fps = []
for mol in molecules:
    fp = Generate.Gen2DFingerprint(mol,sigFactory)
    fps.append(fp)
    
fps

5 molecules


[<rdkit.DataStructs.cDataStructs.SparseBitVect at 0x23b33ab1bb0>,
 <rdkit.DataStructs.cDataStructs.SparseBitVect at 0x23b33740b70>,
 <rdkit.DataStructs.cDataStructs.SparseBitVect at 0x23b33a895b0>,
 <rdkit.DataStructs.cDataStructs.SparseBitVect at 0x23b337394f0>,
 <rdkit.DataStructs.cDataStructs.SparseBitVect at 0x23b33a14bf0>]

In [19]:
output = []
for fp in fps:
    fp_arr = np.zeros((1,))
    DataStructs.ConvertToNumpyArray(fp, fp_arr)
    output.append(fp_arr)
x = np.asarray(output)

ArgumentError: Python argument types in
    rdkit.DataStructs.cDataStructs.ConvertToNumpyArray(SparseBitVect, numpy.ndarray)
did not match C++ signature:
    ConvertToNumpyArray(class RDKit::SparseIntVect<unsigned __int64> bv, class boost::python::api::object destArray)
    ConvertToNumpyArray(class RDKit::SparseIntVect<unsigned int> bv, class boost::python::api::object destArray)
    ConvertToNumpyArray(class RDKit::SparseIntVect<__int64> bv, class boost::python::api::object destArray)
    ConvertToNumpyArray(class RDKit::SparseIntVect<int> bv, class boost::python::api::object destArray)
    ConvertToNumpyArray(class RDKit::DiscreteValueVect bv, class boost::python::api::object destArray)
    ConvertToNumpyArray(class ExplicitBitVect bv, class boost::python::api::object destArray)