In [1]:
import pandas as pd
import numpy as np
from rdkit import Chem
from rdkit.Chem import AllChem
from tqdm import tqdm
from sklearn import metrics
from sklearn.model_selection import cross_val_score, KFold, train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from statistics import mean, stdev
from platform import python_version
print(python_version())

3.8.16


In [2]:
def read_data(file_name):
    df = pd.read_excel(file_name)
    df['SMILES'] = df['SMILES'].replace('>>', '.', regex=True)
    return df

In [3]:
def calculate_morgan_fingerprints(smiles):
    try:
        mol = Chem.MolFromSmiles(smiles)
        fp = AllChem.GetMorganFingerprintAsBitVect(mol, 3, nBits=2048)
        return list(fp)
    except:
        return [None] * 2048

In [4]:
def random_forest_regression(x, y):
    r2_scores = []
    rmse_scores = []
    mae_scores = []

    for random_state in range(1, 6):
        cv = KFold(n_splits=5, shuffle=True, random_state=random_state)
        Y_pred, Y_true = [], []

        for train_index, test_index in cv.split(x):
            x_train = x.loc[train_index].values
            x_test = x.loc[test_index].values
            y_train = y.loc[train_index].values
            y_test = y.loc[test_index].values

            model = RandomForestRegressor(n_estimators=120, max_depth=25)
            model.fit(x_train, y_train)
            Y_pred.extend(model.predict(x_test))
            Y_true.extend(y_test)

        r2_scores.append(metrics.r2_score(Y_true, Y_pred))
        rmse_scores.append(metrics.mean_squared_error(Y_true, Y_pred, squared=False))
        mae_scores.append(metrics.mean_absolute_error(Y_true, Y_pred))

    print("\nAverage performance of 5CV")
    print("Metric\tavg\tstdev")
    print("R^2\t%.2f\t%.2f" % (mean(r2_scores), stdev(r2_scores)))
    print("RMSE\t%.2f\t%.2f" % (mean(rmse_scores), stdev(rmse_scores)))
    print("MAE\t%.2f\t%.2f" % (mean(mae_scores), stdev(mae_scores)))

In [5]:
if __name__ == "__main__":
    df = read_data("Condensation_reactions.xls")
    morgan_cols = ['Morgan_{}'.format(i) for i in range(2048)]
    df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
    data = df.dropna()
    data.reset_index(drop=True, inplace=True)
    x=data.drop(['SMILES', 'yield', "Ind"], axis=1)
    y=data['yield']

    # Total set
    print("Total:")
    random_forest_regression(x, y)

[11:05:30] Explicit valence for atom # 26 H, 2, is greater than permitted
[11:05:32] Explicit valence for atom # 26 H, 2, is greater than permitted
[11:05:33] Explicit valence for atom # 0 O, 3, is greater than permitted
[11:05:34] Explicit valence for atom # 10 O, 3, is greater than permitted
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply

  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] =

  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] =

  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] =

  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] =

Total:

Average performance of 5CV
Metric	avg	stdev
R^2	0.63	0.01
RMSE	13.00	0.19
MAE	9.49	0.12


In [6]:
if __name__ == "__main__":
    df = read_data("BODIPYs.xlsx")
    morgan_cols = ['Morgan_{}'.format(i) for i in range(2048)]
    df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
    data = df.dropna()
    data.reset_index(drop=True, inplace=True)
    x=data.drop(['SMILES', 'yield', "Ind"], axis=1)
    y=data['yield']

    # BODIPYs set
    print("BODIPYs:")
    random_forest_regression(x, y)

[11:13:03] Explicit valence for atom # 0 O, 3, is greater than permitted
[11:13:04] Explicit valence for atom # 10 O, 3, is greater than permitted
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(

  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] =

  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] =

  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] =

  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] =

BODIPYs:

Average performance of 5CV
Metric	avg	stdev
R^2	0.51	0.01
RMSE	15.20	0.18
MAE	12.16	0.12


In [7]:
if __name__ == "__main__":
    df = read_data("Porphyrins.xlsx")
    morgan_cols = ['Morgan_{}'.format(i) for i in range(2048)]
    df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
    data = df.dropna()
    data.reset_index(drop=True, inplace=True)
    x=data.drop(['SMILES', 'yield', "Ind"], axis=1)
    y=data['yield']

    # Porphyrins set
    print("Porphyrins:")
    random_forest_regression(x, y)

[11:17:09] Explicit valence for atom # 26 H, 2, is greater than permitted
[11:17:10] Explicit valence for atom # 26 H, 2, is greater than permitted
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist

  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] =

  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] =

  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] =

  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] =

Porphyrins:

Average performance of 5CV
Metric	avg	stdev
R^2	0.39	0.01
RMSE	9.44	0.12
MAE	6.10	0.07


In [8]:
if __name__ == "__main__":
    df = read_data("Dipyrromethanes.xlsx")
    morgan_cols = ['Morgan_{}'.format(i) for i in range(2048)]
    df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
    data = df.dropna()
    data.reset_index(drop=True, inplace=True)
    x=data.drop(['SMILES', 'yield', "Ind"], axis=1)
    y=data['yield']

    # Dipyrromethanes set
    print("Dipyrromethanes:")
    random_forest_regression(x, y)

  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] =

  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] =

  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] =

  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] = pd.DataFrame(df['SMILES'].apply(calculate_morgan_fingerprints).tolist(), index=df.index)
  df[morgan_cols] =

Dipyrromethanes:

Average performance of 5CV
Metric	avg	stdev
R^2	0.80	0.03
RMSE	10.96	0.86
MAE	7.38	0.47
