In [11]:
from rdkit import Chem
from rdkit.Chem import Fragments
from rdkit.Chem import Draw
from rdkit.Chem import Descriptors
import pandas as pd

The descriptors should describe the molecuel as good as possible. Because we are looking for the Rf value of a compound in a certain solvent ration it seems rational to finde descriptors which are related to solubility and polarity of a molecuel. 
The overall polarity depends on the polarity of singel functional groups but also on the symmtery of a molecuel. As a start the number of some polarity influenzing functional groups seems like a good descriptor.

In [3]:

def polar_groups(Dataframe: pd.DataFrame):

    # isocyanates
    Dataframe['Isocyanates'] = Dataframe['productSmiles'].apply(lambda x: Chem.Fragments.fr_isocyanate(Chem.MolFromSmiles(x)))
    # sulfones
    Dataframe['Sulfones'] = Dataframe['productSmiles'].apply(lambda x: Chem.Fragments.fr_sulfone(Chem.MolFromSmiles(x)))
    # nitriles
    Dataframe['Nitriles'] = Dataframe['productSmiles'].apply(lambda x: Chem.Fragments.fr_nitrile(Chem.MolFromSmiles(x)))
    # nitro groups
    Dataframe['Isocyanates'] = Dataframe['productSmiles'].apply(lambda x: Chem.Fragments.fr_nitro(Chem.MolFromSmiles(x))) 
    # halogens
    Dataframe['Halogens'] = Dataframe['productSmiles'].apply(lambda x: Chem.Fragments.fr_halogen(Chem.MolFromSmiles(x)))
    # amides
    Dataframe['Amides'] = Dataframe['productSmiles'].apply(lambda x: Chem.Fragments.fr_amide(Chem.MolFromSmiles(x))) 

    return Dataframe

In [4]:
def low_polar_groups(Dataframe: pd.DataFrame):

    # hydroxyl groups
    Dataframe['Halogens'] = Dataframe['productSmiles'].apply(lambda x: Chem.Fragments.fr_OH(Chem.MolFromSmiles(x)))
    # carboxylic acids
    Dataframe['Carboxylic_acids'] = Dataframe['productSmiles'].apply(lambda x: Chem.Fragments.fr_COO2(Chem.MolFromSmiles(x)))
    # thiols
    Dataframe['Thiols'] = Dataframe['productSmiles'].apply(lambda x: Chem.Fragments.fr_SH(Chem.MolFromSmiles(x)))
    # aldehydes
    Dataframe['Aldehydes'] = Dataframe['productSmiles'].apply(lambda x: Chem.Fragments.fr_aldehyde(Chem.MolFromSmiles(x)))
    # ketones
    Dataframe['Ketones'] = Dataframe['productSmiles'].apply(lambda x: Chem.Fragments.fr_ketone(Chem.MolFromSmiles(x)))
    # esters
    Dataframe['Esters'] = Dataframe['productSmiles'].apply(lambda x: Chem.Fragments.fr_ester(Chem.MolFromSmiles(x)))
    # secondary amines
    Dataframe['Secondary_amines'] = Dataframe['productSmiles'].apply(lambda x: Chem.Fragments.fr_NH2(Chem.MolFromSmiles(x))) 
    # primary amines
    Dataframe['Primary_amines'] = Dataframe['productSmiles'].apply(lambda x: Chem.Fragments.fr_NH1(Chem.MolFromSmiles(x))) 

    return Dataframe

In [5]:
def unpolar_groups(Dataframe: pd.DataFrame):

    # unbranched alkanes
    Dataframe['Unbranched_alkanes'] = Dataframe['productSmiles'].apply(lambda x: Chem.Fragments.fr_unbrch_alkane(Chem.MolFromSmiles(x)))
    # benzenes
    Dataframe['Benzenes'] = Dataframe['productSmiles'].apply(lambda x: Chem.Fragments.fr_benzene(Chem.MolFromSmiles(x)))
    # ethers
    Dataframe['Ethers'] = Dataframe['productSmiles'].apply(lambda x: Chem.Fragments.fr_ether(Chem.MolFromSmiles(x)))
    # tertiary amines
    Dataframe['Tertiary_amines'] = Dataframe['productSmiles'].apply(lambda x: Chem.Fragments.fr_NH0(Chem.MolFromSmiles(x))) 
    # imines
    Dataframe['Imines'] = Dataframe['productSmiles'].apply(lambda x: Chem.Fragments.fr_imine(Chem.MolFromSmiles(x)))

    return Dataframe 


Moreover it is possible to use some predicted physical data like logP or the topological polar surface area (TPSA).

In [6]:
def physcial_desc(Dataƒrame: pd.DataFrame):

    # molecular weight
    Dataframe['Molecular_weight'] = Dataframe['productSmiles'].apply(lambda x: Chem.Descriptors.MolWt(Chem.MolFromSmiles(x)))
    # logP
    Dataframe['LogP'] = Dataframe['productSmiles'].apply(lambda x: Chem.Descriptors.MolLogP(Chem.MolFromSmiles(x)))
   # topological polar surface area
    Dataframe['TPSA'] = Dataframe['productSmiles'].apply(lambda x: Chem.Descriptors.TPSA(Chem.MolFromSmiles(x)))
    # topological polar surface area (inculde S and P)
    Dataframe['TPSA_including_S_and_P'] = Dataframe['productSmiles'].apply(lambda x: Chem.MolSurf.TPSA(Chem.MolFromSmiles(x, inculdeSandP=True)))

    return Dataframe

Some Solvents like water, Methanole, Ethanole, Carboxylic acids are able to develope H-Bonds with 
the compounds and improve theire solubility and change theire polarity. Therfore it would be intersting to know if the molecule is able to develop H-Bonds. We can test this by chekcing if it has any H-acceptors or H-donors.

In [8]:
def H_Bonds(Dataframe: pd.DataFrame):

    # number of hydrogen bond donors
    Dataframe['HBD'] = Dataframe['productSmiles'].apply(lambda x: Chem.Descriptors.NumHDonors(Chem.MolFromSmiles(x)))
    # number of hydrogen bond acceptors
    Dataframe['HBA'] = Dataframe['productSmiles'].apply(lambda x: Chem.Descriptors.NumHAcceptors(Chem.MolFromSmiles(x)))
    

    return Dataframe

Or we just use every descriptor rdkit offers:

In [115]:

def all_descriptors(Dataframe: pd.DataFrame):

   descriptors = pd.DataFrame()

   for index, row in Dataframe.iterrows():
          
         row = pd.DataFrame(Chem.Descriptors.CalcMolDescriptors(Chem.MolFromSmiles(row['Smiles'])), index=[0])
          
         descriptors = pd.concat([descriptors, row], axis=0)
   
   Dataframe = pd.concat([Dataframe, descriptors.reset_index(drop=True)], axis=1)
   
   return Dataframe
