In [1]:
from skfp.fingerprints import AtomPairFingerprint

smiles_list = ["O=S(=O)(O)CCS(=O)(=O)O", "O=C(O)c1ccccc1O"]

atom_pair_fingerprint = AtomPairFingerprint()

X = atom_pair_fingerprint.transform(smiles_list)
print(X)

[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]


In [2]:
from skfp.preprocessing import ConformerGenerator, MolFromSmilesTransformer
from skfp.fingerprints import WHIMFingerprint

smiles_list = ["O=S(=O)(O)CCS(=O)(=O)O", "O=C(O)c1ccccc1O"]

mol_from_smiles = MolFromSmilesTransformer()
conf_gen = ConformerGenerator()
fp = WHIMFingerprint()
print(fp.requires_conformers)  # True

mols_list = mol_from_smiles.transform(smiles_list)
mols_list = conf_gen.transform(mols_list)

X = fp.transform(mols_list)
print(X)


True
[[4.3190e+00 1.1930e+00 6.3200e-01 7.0300e-01 1.9400e-01 2.0000e-01
  2.0000e-01 2.0000e-01 5.8300e-01 5.5600e-01 3.3000e-01 5.2330e+00
  6.6900e-01 5.3100e-01 8.1300e-01 1.0400e-01 2.0000e-01 2.0000e-01
  2.0000e-01 9.2700e-01 1.2300e-01 2.2800e-01 4.4720e+00 8.5600e-01
  5.4000e-01 7.6200e-01 1.4600e-01 2.0000e-01 2.0000e-01 2.0000e-01
  6.5800e-01 2.3500e-01 2.5100e-01 4.6300e+00 1.1790e+00 6.7000e-01
  7.1500e-01 1.8200e-01 2.0000e-01 2.0000e-01 2.0500e-01 6.7900e-01
  5.3900e-01 3.7800e-01 3.9780e+00 8.2700e-01 4.2700e-01 7.6000e-01
  1.5800e-01 2.0000e-01 2.0000e-01 2.0000e-01 5.1100e-01 2.4600e-01
  1.5800e-01 4.4130e+00 1.2420e+00 6.6500e-01 6.9800e-01 1.9700e-01
  2.0000e-01 2.0000e-01 2.0000e-01 6.0700e-01 6.0000e-01 3.6400e-01
  6.0720e+00 1.1170e+00 8.9100e-01 7.5200e-01 1.3800e-01 2.0000e-01
  2.0000e-01 2.0000e-01 1.2410e+00 3.4500e-01 6.7200e-01 6.1450e+00
  6.4330e+00 5.8680e+00 6.4790e+00 5.2320e+00 6.3210e+00 8.0790e+00
  8.6400e+00 6.6380e+00 6.7050e+00 9.3490e+



In [5]:
from skfp.datasets.moleculenet import load_clintox
from skfp.metrics import multioutput_auroc_score
from skfp.model_selection import scaffold_train_test_split
from skfp.fingerprints import ECFPFingerprint, MACCSFingerprint
from skfp.preprocessing import MolFromSmilesTransformer

from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import make_pipeline, make_union

smiles, y = load_clintox()
smiles_train, smiles_test, y_train, y_test = scaffold_train_test_split(
    smiles, y, test_size=0.2
)

pipeline = make_pipeline(
    MolFromSmilesTransformer(),
    make_union(ECFPFingerprint(count=True), MACCSFingerprint()),
    RandomForestClassifier(random_state=0),
)
pipeline.fit(smiles_train, y_train)

y_pred_proba = pipeline.predict_proba(smiles_test)
auroc = multioutput_auroc_score(y_test, y_pred_proba)
print(f"AUROC: {auroc:.2%}")

AUROC: 49.03%
