# Description
### (April 26 2020)
* In this notebook, we use the [RDKit](https://www.rdkit.org/docs/) to calculate the similarity between two molecules from their SMILES representations.

In [1]:
# Imports
from rdkit import Chem,DataStructs
import time
import pandas as pd

In [2]:
# Example 1 - Using SMILES
mol1 = Chem.MolFromSmiles("CC(C)C=CCCCCC(=O)NCc1ccc(c(c1)OC)O")
mol2 = Chem.MolFromSmiles("COC1=C(C=CC(=C1)C=O)O")
fp1 = Chem.RDKFingerprint(mol1)
fp2 = Chem.RDKFingerprint(mol2)
start_time = time.time()
print("RDK fingerprint: ",DataStructs.TanimotoSimilarity(fp1,fp2))
print("Total elapsed time to make calculation: {} seconds".format(time.time() - start_time))

RDK fingerprint:  0.4268867924528302
Total elapsed time to make calculation: 0.002062082290649414 seconds


In [3]:
# Example 2 - Using InChI
mol3 = Chem.MolFromInchi("InChI=1S/C8H8O3/c1-11-8-3-2-6(5-9)4-7(8)10/h2-5,10H,1H3") # Isovanillin
mol4 = Chem.MolFromInchi("InChI=1S/C8H8O3/c1-11-8-4-6(5-9)2-3-7(8)10/h2-5,10H,1H3") # Vanillin
fp3 = Chem.RDKFingerprint(mol3)
fp4 = Chem.RDKFingerprint(mol4)
start_time = time.time()
print("RDK fingerprint: ",DataStructs.TanimotoSimilarity(fp3,fp4))
print("Total elapsed time to make calculation: {} seconds".format(time.time() - start_time))

RDK fingerprint:  0.8777292576419214
Total elapsed time to make calculation: 0.0013492107391357422 seconds


In [4]:
# Example 3 - Is Tanimoto Similarity the same as above if using SMILES instead?
mol5 = Chem.MolFromSmiles("COC1=C(C=C(C=C1)C=O)O") # Isovanillin
mol6 = Chem.MolFromSmiles("COC1=C(C=CC(=C1)C=O)O") # Vanillin
fp5 = Chem.RDKFingerprint(mol5)
fp6 = Chem.RDKFingerprint(mol6)
start_time = time.time()
print("RDK fingerprint: ",DataStructs.TanimotoSimilarity(fp5,fp6))
print("Total elapsed time to make calculation: {} seconds".format(time.time() - start_time))

RDK fingerprint:  0.8777292576419214
Total elapsed time to make calculation: 0.0014407634735107422 seconds
