# Import the necessary libraries

In [1]:
from rdkit import Chem
from rdkit.Chem import PandasTools
import pandas as pd

# Define some utility functions

In [2]:
def extractMoleNames(sdf_file_path):
    sdf_supplier = Chem.SDMolSupplier(sdf_file_path)

    molecule_names = []
    for mol in sdf_supplier:
        if mol is not None:
            name = mol.GetProp('_Name')
            molecule_names.append(name)
    return molecule_names

def extractBindingEnergyAndRmsd(sdf_file_path):
    df = pd.read_csv(sdf_file_path)
    values = []
    for d1, d2 in zip(df[df.columns[1]], df[df.columns[2]]):
        values.append(f"{d1:.3f} ({d2:.3f})")
        
    return values

# Reading data for pfLDH

In [3]:
df_pfLDH = pd.read_csv("best_test_molecules_docking_result (pfldh).txt")
df_pfLDH["Free Binding Energy"] = extractBindingEnergyAndRmsd("best_test_molecules_docking_result (pfldh).txt")
df_pfLDH["mol"] = extractMoleNames("best_test_molecules_docking_result (pfldh).sdf")
df_pfLDH.rename(columns={"Free Binding Energy": "pfLDH Free Binding Energy, kcal/mol (RMSD, Å)"}, inplace=True)

df_pfLDH = df_pfLDH[["mol", "pfLDH Free Binding Energy, kcal/mol (RMSD, Å)"]]

df_pfLDH.to_csv("best_test_molecules_docking_result (pfldh).csv")

df_pfLDH

Unnamed: 0,mol,"pfLDH Free Binding Energy, kcal/mol (RMSD, Å)"
0,Emicymarin,-8.303 (1.841)
1,Di-n-octyl phthalate,-7.907 (2.293)
2,Strophanthin K,-7.700 (3.670)
3,"11-Eicosenoic acid, trimethylsilyl ester",-7.389 (1.992)
4,"Hexanoic acid, octadecyl ester",-7.258 (1.790)
5,Heneicosyl pentafluoropropionate,-7.160 (2.093)
6,"1,2-Benzenedicarboxylic acid, butyl 2-ethylhexy",-7.072 (0.689)
7,Octadecanoic acid,-6.914 (2.333)
8,"9-Octadecenoic acid (Z)-, methyl ester",-6.798 (1.403)
9,n-Hexadecanoic acid,-6.626 (1.735)


# Reading data for COX-2

In [4]:
df_COX_2 = pd.read_csv("best_test_molecules_docking_result (cox-2).txt")
df_COX_2["Free Binding Energy"] = extractBindingEnergyAndRmsd("best_test_molecules_docking_result (cox-2).txt")
df_COX_2["mol"] = extractMoleNames("best_test_molecules_docking_result (cox-2).sdf")
df_COX_2.rename(columns={"Free Binding Energy": "COX-2 Free Binding Energy, kcal/mol (RMSD, Å)"}, inplace=True)

df_COX_2 = df_COX_2[["mol", "COX-2 Free Binding Energy, kcal/mol (RMSD, Å)"]]

df_COX_2.to_csv("best_test_molecules_docking_result (cox-2).csv")
df_COX_2

Unnamed: 0,mol,"COX-2 Free Binding Energy, kcal/mol (RMSD, Å)"
0,"1,2-Benzenedicarboxylic acid, butyl 2-ethylhexy",-8.393 (2.048)
1,Di-n-octyl phthalate,-8.299 (2.461)
2,"9-Octadecenoic acid (Z)-, methyl ester",-8.245 (1.110)
3,"11-Eicosenoic acid, trimethylsilyl ester",-8.182 (2.432)
4,Octadecanoic acid,-8.122 (2.740)
5,"2-[2-(2-butoxyethoxy)ethoxy]ethyl 2,2,3,3,3-pe...",-8.025 (1.527)
6,n-Hexadecanoic acid,-7.748 (1.677)
7,E-11-Tetradecenoic acid,-7.732 (1.199)
8,Heneicosyl pentafluoropropionate,-6.750 (1.497)
9,n-Decanoic acid,-6.666 (1.409)


# Reading data for PLA2

In [5]:
df_PLA = pd.read_csv("best_test_molecules_docking_result (pla2).txt")
df_PLA["Free Binding Energy"] = extractBindingEnergyAndRmsd("best_test_molecules_docking_result (pla2).txt")
df_PLA["mol"] = extractMoleNames("best_test_molecules_docking_result (pla2).sdf")
df_PLA.rename(columns={"Free Binding Energy": "PLA2 Free Binding Energy, kcal/mol (RMSD, Å)"}, inplace=True)

df_PLA = df_PLA[["mol", "PLA2 Free Binding Energy, kcal/mol (RMSD, Å)", "lip_violation"]]

df_PLA.to_csv("best_test_molecules_docking_result (pla2).csv")
df_PLA

Unnamed: 0,mol,"PLA2 Free Binding Energy, kcal/mol (RMSD, Å)",lip_violation
0,"11-Eicosenoic acid, trimethylsilyl ester",-8.266 (2.657),1
1,"Hexanoic acid, octadecyl ester",-8.056 (1.443),1
2,Di-n-octyl phthalate,-7.727 (1.825),1
3,Heneicosyl pentafluoropropionate,-7.605 (1.580),1
4,Octadecanoic acid,-7.107 (2.445),0
5,Strophanthin K,-7.087 (3.932),3
6,"9-Octadecenoic acid (Z)-, methyl ester",-7.018 (1.043),1
7,n-Hexadecanoic acid,-7.013 (1.914),0
8,"2-[2-(2-butoxyethoxy)ethoxy]ethyl 2,2,3,3,3-pe...",-6.685 (1.777),0
9,E-11-Tetradecenoic acid,-6.648 (1.082),0


# Combining the data

In [6]:
final_table1 = df_pfLDH.merge(df_COX_2, how='inner', on="mol")
final_table = final_table1.merge(df_PLA, how='inner', on="mol")

final_table.to_csv("combined_docking_result.csv")

final_table

Unnamed: 0,mol,"pfLDH Free Binding Energy, kcal/mol (RMSD, Å)","COX-2 Free Binding Energy, kcal/mol (RMSD, Å)","PLA2 Free Binding Energy, kcal/mol (RMSD, Å)",lip_violation
0,Emicymarin,-8.303 (1.841),-2.593 (1.492),3.046 (2.104),1
1,Di-n-octyl phthalate,-7.907 (2.293),-8.299 (2.461),-7.727 (1.825),1
2,Strophanthin K,-7.700 (3.670),-6.332 (3.891),-7.087 (3.932),3
3,"11-Eicosenoic acid, trimethylsilyl ester",-7.389 (1.992),-8.182 (2.432),-8.266 (2.657),1
4,"Hexanoic acid, octadecyl ester",-7.258 (1.790),-4.633 (0.999),-8.056 (1.443),1
5,Heneicosyl pentafluoropropionate,-7.160 (2.093),-6.750 (1.497),-7.605 (1.580),1
6,"1,2-Benzenedicarboxylic acid, butyl 2-ethylhexy",-7.072 (0.689),-8.393 (2.048),-6.477 (0.940),1
7,Octadecanoic acid,-6.914 (2.333),-8.122 (2.740),-7.107 (2.445),0
8,"9-Octadecenoic acid (Z)-, methyl ester",-6.798 (1.403),-8.245 (1.110),-7.018 (1.043),1
9,n-Hexadecanoic acid,-6.626 (1.735),-7.748 (1.677),-7.013 (1.914),0
