In [1]:
import pandas as pd
import rdkit
from rdkit import Chem
from rdkit.Chem import Descriptors
from rdkit.Chem import rdMolDescriptors

In [2]:
file_path = "solvents_manClean.xlsx"
df = pd.read_excel(file_path, sheet_name='solvents')
df.head()

Unnamed: 0,Solvent_A,SMILES_solvent,Name_in_other_dataset,Data_source_for_SMILES_name,Comment
0,hydrogenated tetrapropylene,CCCC(C)CC(C)CC(C)C,hydrogenated tetrapropene; TPH (hydrogenated t...,PubChem,"search for 2,4,6-Trimethylnonane in PubChem. ..."
1,chloroform,C(Cl)(Cl)Cl,Chloroform,PubChem,
2,1-dodecanol,CCCCCCCCCCCCO,,PubChem,
3,n-octane,CCCCCCCC,,PubChem,
4,toluene,CC1=CC=CC=C1,Toluene,PubChem,


The log P can be calculated with rdkit package, `rdkit,Chem.rdMolDescriptors.CalcCrippenDescriptors(mol)[0]`. Here is the link: https://www.rdkit.org/docs/source/rdkit.Chem.rdMolDescriptors.html#rdkit.Chem.rdMolDescriptors.CalcCrippenDescriptors. The log P is Wildman-Crippen partition coefficient (log P) and molar refractivity (MR) values from a paper published in 1999 (https://doi.org/10.1021/ci990307l). 

In [3]:
# Calculate average molecular weight of the molecule
def cal_MolWt(smile):
    try:
        mol = Chem.MolFromSmiles(smile)
        mol_weight = Descriptors.MolWt(mol)
    except:
        mol_weight = None
    return mol_weight

# Calculate LogP
def cal_logP(smile):
    try:
        mol = Chem.MolFromSmiles(smile)
        logP = rdMolDescriptors.CalcCrippenDescriptors(mol)[0]
    except:
        logP = None
    return logP


df['Molar_mass'] = df['SMILES_solvent'].apply(cal_MolWt)
df['log_P'] = df['SMILES_solvent'].apply(cal_logP)
df.head()

[15:41:17] SMILES Parse Error: syntax error while parsing: CCCCCCCCC,
[15:41:17] SMILES Parse Error: Failed parsing SMILES 'CCCCCCCCC,' for input: 'CCCCCCCCC,'
[15:41:17] SMILES Parse Error: syntax error while parsing: CCCCCCCCCS(=O)(=O)[O-].[Na+],
[15:41:17] SMILES Parse Error: Failed parsing SMILES 'CCCCCCCCCS(=O)(=O)[O-].[Na+],' for input: 'CCCCCCCCCS(=O)(=O)[O-].[Na+],'
[15:41:17] SMILES Parse Error: syntax error while parsing: C1=CC=C2C=CC=CC2=C1,
[15:41:17] SMILES Parse Error: Failed parsing SMILES 'C1=CC=C2C=CC=CC2=C1,' for input: 'C1=CC=C2C=CC=CC2=C1,'
[15:41:17] SMILES Parse Error: syntax error while parsing: CCCCCCCCC(C)C,
[15:41:17] SMILES Parse Error: Failed parsing SMILES 'CCCCCCCCC(C)C,' for input: 'CCCCCCCCC(C)C,'
[15:41:17] SMILES Parse Error: syntax error while parsing: CCCCCCCCC,
[15:41:17] SMILES Parse Error: Failed parsing SMILES 'CCCCCCCCC,' for input: 'CCCCCCCCC,'
[15:41:17] SMILES Parse Error: syntax error while parsing: CCCCCCCCCS(=O)(=O)[O-].[Na+],
[15:41:17] S

Unnamed: 0,Solvent_A,SMILES_solvent,Name_in_other_dataset,Data_source_for_SMILES_name,Comment,Molar_mass,log_P
0,hydrogenated tetrapropylene,CCCC(C)CC(C)CC(C)C,hydrogenated tetrapropene; TPH (hydrogenated t...,PubChem,"search for 2,4,6-Trimethylnonane in PubChem. ...",170.34,4.4949
1,chloroform,C(Cl)(Cl)Cl,Chloroform,PubChem,,119.378,1.9864
2,1-dodecanol,CCCCCCCCCCCCO,,PubChem,,186.339,3.8996
3,n-octane,CCCCCCCC,,PubChem,,114.232,3.3668
4,toluene,CC1=CC=CC=C1,Toluene,PubChem,,92.141,1.99502


In [4]:
df.to_excel('output_solvents_rdkit.xlsx', index=False)

In [5]:
# For testing

# For Exxal 13
from rdkit import Chem
from rdkit.Chem import Descriptors
from rdkit.Chem import rdMolDescriptors

mol = Chem.MolFromSmiles('CC(C)CCCCCCCCCCO')

# Calculate average molecular weight of the molecule
mol_weight = Descriptors.MolWt(mol)

# Calculate LogP (partition coefficient)
logP = rdMolDescriptors.CalcCrippenDescriptors(mol)[0]

print(f"Molecular Weight: {mol_weight}")
print(f"LogP: {logP}")


Molecular Weight: 200.36599999999996
LogP: 4.145600000000004


In [6]:
# For kerosene

molC9 = Chem.MolFromSmiles('CCCCCCCCC')
molC10 = Chem.MolFromSmiles('CCCCCCCCCC')
molC11 = Chem.MolFromSmiles('CCCCCCCCCCC')
molC12 = Chem.MolFromSmiles('CCCCCCCCCCCC')
molC13 = Chem.MolFromSmiles('CCCCCCCCCCCCC')
molC14 = Chem.MolFromSmiles('CCCCCCCCCCCCCC')
molC15 = Chem.MolFromSmiles('CCCCCCCCCCCCCCC')
molC16 = Chem.MolFromSmiles('CCCCCCCCCCCCCCCC')

# Calculate average molecular weight of the molecule
mol_weightC9 = Descriptors.MolWt(molC9)
mol_weightC10 = Descriptors.MolWt(molC10)
mol_weightC11 = Descriptors.MolWt(molC11)
mol_weightC12 = Descriptors.MolWt(molC12)
mol_weightC13 = Descriptors.MolWt(molC13)
mol_weightC14 = Descriptors.MolWt(molC14)
mol_weightC15 = Descriptors.MolWt(molC15)
mol_weightC16 = Descriptors.MolWt(molC16)

mol_weight = (mol_weightC9+mol_weightC10+mol_weightC11+mol_weightC12+mol_weightC13+mol_weightC14+mol_weightC15+mol_weightC16)/8


# Calculate LogP (partition coefficient)
logP9 = rdMolDescriptors.CalcCrippenDescriptors(molC9)[0]
logP10 = rdMolDescriptors.CalcCrippenDescriptors(molC10)[0]
logP11 = rdMolDescriptors.CalcCrippenDescriptors(molC11)[0]
logP12 = rdMolDescriptors.CalcCrippenDescriptors(molC12)[0]
logP13 = rdMolDescriptors.CalcCrippenDescriptors(molC13)[0]
logP14 = rdMolDescriptors.CalcCrippenDescriptors(molC14)[0]
logP15 = rdMolDescriptors.CalcCrippenDescriptors(molC15)[0]
logP16 = rdMolDescriptors.CalcCrippenDescriptors(molC16)[0]

logP = (logP9+logP10+logP11+logP12+logP13+logP14+logP15+logP16)/8

print(f"Molecular Weight: {mol_weight}")
print(f"LogP: {logP}")

Molecular Weight: 177.35349999999994
LogP: 5.122250000000005


In [7]:
# for sulfonated kerosene

molC9 = Chem.MolFromSmiles('CCCCCCCCCS(=O)(=O)[O-].[Na+]')
molC10 = Chem.MolFromSmiles('CCCCCCCCCCS(=O)(=O)[O-].[Na+]')
molC11 = Chem.MolFromSmiles('CCCCCCCCCCCS(=O)(=O)[O-].[Na+]')
molC12 = Chem.MolFromSmiles('CCCCCCCCCCCCS(=O)(=O)[O-].[Na+]')
molC13 = Chem.MolFromSmiles('CCCCCCCCCCCCCS(=O)(=O)[O-].[Na+]')
molC14 = Chem.MolFromSmiles('CCCCCCCCCCCCCCS(=O)(=O)[O-].[Na+]')
molC15 = Chem.MolFromSmiles('CCCCCCCCCCCCCCCS(=O)(=O)[O-].[Na+]')
molC16 = Chem.MolFromSmiles('CCCCCCCCCCCCCCCCS(=O)(=O)[O-].[Na+]')

# Calculate average molecular weight of the molecule
mol_weightC9 = Descriptors.MolWt(molC9)
mol_weightC10 = Descriptors.MolWt(molC10)
mol_weightC11 = Descriptors.MolWt(molC11)
mol_weightC12 = Descriptors.MolWt(molC12)
mol_weightC13 = Descriptors.MolWt(molC13)
mol_weightC14 = Descriptors.MolWt(molC14)
mol_weightC15 = Descriptors.MolWt(molC15)
mol_weightC16 = Descriptors.MolWt(molC16)

mol_weight = (mol_weightC9+mol_weightC10+mol_weightC11+mol_weightC12+mol_weightC13+mol_weightC14+mol_weightC15+mol_weightC16)/8


# Calculate LogP (partition coefficient)
logP9 = rdMolDescriptors.CalcCrippenDescriptors(molC9)[0]
logP10 = rdMolDescriptors.CalcCrippenDescriptors(molC10)[0]
logP11 = rdMolDescriptors.CalcCrippenDescriptors(molC11)[0]
logP12 = rdMolDescriptors.CalcCrippenDescriptors(molC12)[0]
logP13 = rdMolDescriptors.CalcCrippenDescriptors(molC13)[0]
logP14 = rdMolDescriptors.CalcCrippenDescriptors(molC14)[0]
logP15 = rdMolDescriptors.CalcCrippenDescriptors(molC15)[0]
logP16 = rdMolDescriptors.CalcCrippenDescriptors(molC16)[0]

logP = (logP9+logP10+logP11+logP12+logP13+logP14+logP15+logP16)/8

print(f"Molecular Weight: {mol_weight}")
print(f"LogP: {logP}")

Molecular Weight: 279.3995
LogP: 0.6515500000000007


In [8]:
# SSD70

mol1 = Chem.MolFromSmiles('C1=CC=C2C=CC=CC2=C1')
mol2 = Chem.MolFromSmiles('CCCCCCCCCCC')
mol3 = Chem.MolFromSmiles('CCCCCCCCCCCC')
mol4 = Chem.MolFromSmiles('CCCCCCCCCCCCC')
mol5 = Chem.MolFromSmiles('CCCCCCCCCCCCCC')

# Calculate average molecular weight of the molecule
mol_weight1 = Descriptors.MolWt(mol1)
mol_weight2 = Descriptors.MolWt(mol2)
mol_weight3 = Descriptors.MolWt(mol3)
mol_weight4 = Descriptors.MolWt(mol4)
mol_weight5 = Descriptors.MolWt(mol5)

mol_weight = (mol_weight1+mol_weight2+mol_weight3+mol_weight4+mol_weight5)/5

# Calculate LogP (partition coefficient)
logP1 = rdMolDescriptors.CalcCrippenDescriptors(mol1)[0]
logP2 = rdMolDescriptors.CalcCrippenDescriptors(mol2)[0]
logP3 = rdMolDescriptors.CalcCrippenDescriptors(mol3)[0]
logP4 = rdMolDescriptors.CalcCrippenDescriptors(mol4)[0]
logP5 = rdMolDescriptors.CalcCrippenDescriptors(mol5)[0]

logP = (logP1+logP2+logP3+logP4+logP5)/5

print(f"Molecular Weight: {mol_weight}")
print(f"LogP: {logP}")

Molecular Weight: 167.5176
LogP: 4.665760000000004


In [9]:
# Isopar L

mol1 = Chem.MolFromSmiles('CCCCCCCCC(C)C')
mol2 = Chem.MolFromSmiles('CCCCCCCCCC(C)C')
mol3 = Chem.MolFromSmiles('CCCCCCCCCCC(C)C')

# Calculate average molecular weight of the molecule
mol_weight1 = Descriptors.MolWt(mol1)
mol_weight2 = Descriptors.MolWt(mol2)
mol_weight3 = Descriptors.MolWt(mol3)

mol_weight = (mol_weight1+mol_weight2+mol_weight3)/3

# Calculate LogP (partition coefficient)
logP1 = rdMolDescriptors.CalcCrippenDescriptors(mol1)[0]
logP2 = rdMolDescriptors.CalcCrippenDescriptors(mol2)[0]
logP3 = rdMolDescriptors.CalcCrippenDescriptors(mol3)[0]

logP = (logP1+logP2+logP3)/3

print(f"Molecular Weight: {mol_weight}")
print(f"LogP: {logP}")

Molecular Weight: 170.34
LogP: 4.783100000000005
