# Imports and Functions

In [1]:
from collections import defaultdict
import json
import os
import glob
from typing import List, Callable, Union

import numpy as np
import pandas as pd
from sklearn.metrics import (average_precision_score, mean_absolute_error, root_mean_squared_error,
                             precision_recall_curve, r2_score, roc_auc_score, mean_absolute_percentage_error, auc)
%load_ext autoreload
%autoreload 2

from scipy.stats import ttest_ind
from scipy.stats import ttest_rel
from chemprop import models

In [None]:
# Paths for the markdown reports
CHEMELEON_MD_PATH = "/home/akshatz/fastprop_foundation/analysis/moleculenet_results/CheMeleon.md"
CHEMPROP_MD_PATH = "/home/akshatz/fastprop_foundation/analysis/moleculenet_results/ChemProp.md"

from collections import defaultdict
import os

def init_md_files():
    """Initialize the markdown files as empty."""
    with open(CHEMELEON_MD_PATH, "w") as f:
        f.write("# CheMeleon Benchmark Results\n\n")
    with open(CHEMPROP_MD_PATH, "w") as f:
        f.write("# ChemProp Benchmark Results\n\n")
    print(f"Initialized {CHEMELEON_MD_PATH} and {CHEMPROP_MD_PATH}")

# Track which datasets have already been written in each file (for this session)
_md_written_datasets = defaultdict(set)

def log_result(dataset, metric, results, md_path):
    """
    Appends a result row to the specified markdown file, grouped by dataset with subheaders.
    Prevents duplicate logging of the same metric for the same dataset.
    """
    test_mean = 0.0
    if isinstance(results, pd.DataFrame) and metric in results.columns:
        test_mean = results[metric].mean()
    elif isinstance(results, pd.Series):
        test_mean = results.mean()
    elif hasattr(results, "mean"):
        test_mean = results.mean()
    else:
        test_mean = float(results)

    # Check if this metric for this dataset is already logged in the file
    already_logged = False
    if os.path.exists(md_path):
        with open(md_path, "r") as f:
            lines = f.readlines()
        in_dataset_section = False
        for line in lines:
            if line.strip() == f"## {dataset}":
                in_dataset_section = True
            elif in_dataset_section and line.startswith("## ") and line.strip() != f"## {dataset}":
                # End of this dataset section
                break
            elif in_dataset_section and line.strip().startswith(f"| {metric} "):
                already_logged = True
                break

    if already_logged:
        print(f"Skipping duplicate log for {dataset} - {metric} in {os.path.basename(md_path)}")
        return

    # Only write subheader and table header if this is the first metric for this dataset in this session
    dataset_key = (md_path, dataset)
    write_header = dataset not in _md_written_datasets[md_path]
    with open(md_path, "a") as f:
        if write_header:
            f.write(f"\n## {dataset}\n\n")
            f.write("| Metric | Value |\n|:--|--:|\n")
            _md_written_datasets[md_path].add(dataset)
        f.write(f"| {metric} | {test_mean:.4f} |\n")
    print(f"Logged {dataset} - {metric} to {os.path.basename(md_path)}")

# Do not call init_md_files() automatically here. Only call it manually when you want to reset the markdown files.


Initialized /home/akshatz/fastprop_foundation/analysis/moleculenet_results/CheMeleon.md and /home/akshatz/fastprop_foundation/analysis/moleculenet_results/ChemProp.md


In [3]:
def parse_indices(idxs):
    """Parses a string of indices into a list of integers. e.g. '0,1,2-4' -> [0, 1, 2, 3, 4]"""
    if isinstance(idxs, str):
        indices = []
        for idx in idxs.split(","):
            if "-" in idx:
                start, end = map(int, idx.split("-"))
                indices.extend(range(start, end + 1))
            else:
                indices.append(int(idx))
        return indices
    return idxs

def prc_auc(targets: List[int], preds: List[float]) -> float:
    """
    Computes the area under the precision-recall curve.

    :param targets: A list of binary targets.
    :param preds: A list of prediction probabilities.
    :return: The computed prc-auc.
    """
    precision, recall, _ = precision_recall_curve(targets, preds)
    return auc(recall, precision)

def get_metric_func(metric: str):
    r"""
    Gets the metric function corresponding to a given metric name.

    Supports:

    * :code:`roc-auc`: Area under the receiver operating characteristic curve
    * :code:`prc-auc`: Area under the precision recall curve
    * :code:`ap`: Average precision from prediction scores
    * :code:`rmse`: Root mean squared error
    * :code:`mae`: Mean absolute error
    * :code:`r2`: Coefficient of determination R\ :superscript:`2`

    :param metric: Metric name.
    :return: A metric function which takes as arguments a list of targets and a list of predictions and returns.
    """
    if metric == 'roc-auc':
        return roc_auc_score

    if metric == 'prc-auc':
        return prc_auc
    
    if metric == 'ap':
        return average_precision_score

    if metric == 'rmse':
        return root_mean_squared_error
    
    if metric == 'mae':
        return mean_absolute_error

    if metric == 'r2':
        return r2_score
    
    raise ValueError(f'Metric "{metric}" not supported.')
    

In [4]:
def evaluate_results(data_path, splits_path, result_dir, num_tasks, metrics, target_columns=None):
    df = pd.read_csv(data_path)
    with open(splits_path, "rb") as json_file:
        split_idxss = json.load(json_file)
    test_indices = [parse_indices(d["test"]) for d in split_idxss]
    test_df = df.iloc[test_indices[0]]
    target_columns=test_df.keys()[-num_tasks:].tolist() if target_columns is None else target_columns

    df_pred_list = []
    files = glob.glob(os.path.join(result_dir, '**', "test_predictions.csv"), recursive=True)
    assert len(files) == 5, f"There should be 5 files; {len(files)} found"
    for file in files:
        df_pred = pd.read_csv(file)[target_columns]
        df_pred_list.append(df_pred)
    df_pred = pd.concat(df_pred_list).groupby(level=0).mean()

    metric_to_func = {metric: get_metric_func(metric) for metric in metrics}

    results = defaultdict(list)
    for column in target_columns:
        for metric, metric_func in metric_to_func.items():
            preds = df_pred[column].tolist()
            targets = test_df[column].tolist()
            results[metric].append(metric_func(targets, preds))
    results = dict(results)

    results_df = pd.DataFrame(results, index=target_columns)
    return results_df

In [5]:
import pandas as pd
import numpy as np
import os
import glob
import json
from collections import defaultdict

def evaluate_results_err(data_path, splits_path, result_dir, num_tasks, metrics, target_columns=None):
    # Load data and test split
    df = pd.read_csv(data_path)
    with open(splits_path, "rb") as json_file:
        split_idxss = json.load(json_file)
    test_indices = [parse_indices(d["test"]) for d in split_idxss]
    test_df = df.iloc[test_indices[0]]
    
    # Determine target columns
    target_columns = test_df.keys()[-num_tasks:].tolist() if target_columns is None else target_columns

    # Gather prediction files
    files = glob.glob(os.path.join(result_dir, '**', "test_predictions.csv"), recursive=True)
    assert len(files) == 5, f"There should be 5 files; {len(files)} found"

    # Create a dictionary to store results for each file
    results = defaultdict(lambda: defaultdict(list))  # Format: results[file][metric] = [metric values for each target]
    
    # Map metric names to their respective functions
    metric_to_func = {metric: get_metric_func(metric) for metric in metrics}

    # Process each prediction file individually
    for file in files:
        df_pred = pd.read_csv(file)[target_columns]

        # For each target column and metric, calculate metrics for the current file
        for column in target_columns:
            preds = df_pred[column].tolist()
            targets = test_df[column].tolist()
            for metric, metric_func in metric_to_func.items():
                metric_value = metric_func(targets, preds)
                results[file][metric].append(metric_value)  # Store the metric value for this file

    # Create a dictionary to store aggregated results
    aggregated_results = defaultdict(dict)

    # Calculate average and standard deviation for each metric across files
    for metric in metrics:
        metric_values = []

        # Gather all metric values for this metric across files
        for file in files:
            metric_values.append(results[file][metric])

        # Convert to a numpy array for easier mean and std calculations
        metric_values = np.array(metric_values)

        # Store mean and std
        aggregated_results[metric]["mean"] = np.mean(metric_values, axis=0)
        aggregated_results[metric]["std"] = np.std(metric_values, axis=0)

    # Convert results to DataFrame format
    individual_results_df = pd.DataFrame(results)
    aggregated_results_df = pd.DataFrame(aggregated_results)

    return aggregated_results_df

In [6]:
def evaluate_results_1(data_path, splits_path, result_dir, num_tasks, metrics, target_columns=None):
    df = pd.read_csv(data_path)
    with open(splits_path, "rb") as json_file:
        split_idxss = json.load(json_file)
    test_indices = [parse_indices(d["test"]) for d in split_idxss]
    test_df = df.iloc[test_indices[0]]
    target_columns=test_df.keys()[-num_tasks:].tolist() if target_columns is None else target_columns

    df_pred_list = []
    files = glob.glob(os.path.join(result_dir, '**', "test_predictions.csv"), recursive=True)
    # assert len(files) == 5, f"There should be 5 files; {len(files)} found"
    for file in files:
        df_pred = pd.read_csv(file)[target_columns]
        df_pred_list.append(df_pred)
    df_pred = pd.concat(df_pred_list).groupby(level=0).mean()

    metric_to_func = {metric: get_metric_func(metric) for metric in metrics}

    results = defaultdict(list)
    for column in target_columns:
        for metric, metric_func in metric_to_func.items():
            preds = df_pred[column].tolist()
            targets = test_df[column].tolist()
            results[metric].append(metric_func(targets, preds))
    results = dict(results)

    results_df = pd.DataFrame(results, index=target_columns)
    return results_df

In [7]:
def evaluate_results_full(data_path, splits_path, result_dir, num_tasks, metrics, target_columns=None, model_path=None):
    df = pd.read_csv(data_path)
    with open(splits_path, "rb") as json_file:
        split_idxss = json.load(json_file)
    test_indices = [parse_indices(d["test"]) for d in split_idxss]
    test_df = df.iloc[test_indices[0]]
    target_columns=test_df.keys()[-num_tasks:].tolist() if target_columns is None else target_columns

    df_pred_list = []
    files = glob.glob(os.path.join(result_dir, '**', "test_predictions.csv"), recursive=True)
    assert len(files) == 5, f"There should be 5 files; {len(files)} found"
    for file in files:
        df_pred = pd.read_csv(file)[target_columns]
        df_pred_list.append(df_pred)
    df_pred = pd.concat(df_pred_list).groupby(level=0).mean()
    # display(df_pred)
    # display(test_df)
    metric_to_func = {metric: get_metric_func(metric) for metric in metrics}
    
    i = 0
    test_preds = pd.DataFrame()
    for column in target_columns:
        test_preds[f'true_{i}'] = test_df[column]
        test_preds[f'predicted_{i}'] = df_pred[column]
        i=i+1
    # display(test_preds)
    results = defaultdict(list)
    for column in target_columns:
        for metric, metric_func in metric_to_func.items():
            preds = df_pred[column].tolist()
            targets = test_df[column].tolist()
            results[metric].append(metric_func(targets, preds))
    results = dict(results)

    results_df = pd.DataFrame(results, index=target_columns)
    
    if model_path is not None:
        model = models.MPNN.load_from_file(model_path)
        total_params = sum(p.numel() for p in model.parameters())
        results_df["model_size"] = total_params
    
    return results_df, test_preds

# QM 9 Gap

In [8]:
data_path = "/home/akshatz/bond_order_free/qm9/dataset/qm9_data_filtered.csv"
splits_path = "/home/akshatz/bond_order_free/qm9/dataset/splits_filtered.json"
num_tasks = 1
metrics = ["mae","rmse", "r2"]

# CheMeleon
result_dir = "/home/akshatz/fastprop_foundation/analysis/moleculenet_results/scripts/chemeleon/qm9_gap_results"
print("CheMeleon Results:")
results = evaluate_results(data_path, splits_path, result_dir, num_tasks, metrics, target_columns=["gap"])
display(results)
for m in metrics:
    log_result("QM9 Gap", m, results, CHEMELEON_MD_PATH)

# ChemProp
result_dir = "/home/akshatz/fastprop_foundation/analysis/moleculenet_results/scripts/chemprop/qm9_gap_results"
print("\nChemProp Results:")
try:
    results = evaluate_results(data_path, splits_path, result_dir, num_tasks, metrics, target_columns=["gap"])
    display(results)
    for m in metrics:
        log_result("QM9 Gap", m, results, CHEMPROP_MD_PATH)
except Exception as e:
    print(f"Could not evaluate ChemProp results: {e}")

CheMeleon Results:


Unnamed: 0,mae,rmse,r2
gap,0.003219,0.005877,0.98454


Logged QM9 Gap - mae to CheMeleon.md
Logged QM9 Gap - rmse to CheMeleon.md
Logged QM9 Gap - r2 to CheMeleon.md

ChemProp Results:


Unnamed: 0,mae,rmse,r2
gap,0.003904,0.006534,0.980891


Logged QM9 Gap - mae to ChemProp.md
Logged QM9 Gap - rmse to ChemProp.md
Logged QM9 Gap - r2 to ChemProp.md


# QM 9 U0

In [9]:
data_path = "/home/akshatz/bond_order_free/qm9/dataset/qm9_data_filtered.csv"
splits_path = "/home/akshatz/bond_order_free/qm9/dataset/splits_filtered.json"
num_tasks = 1
metrics = ["mae", "rmse", "r2"]

# CheMeleon
result_dir = "/home/akshatz/fastprop_foundation/analysis/moleculenet_results/scripts/chemeleon/qm9_u0_results"
print("CheMeleon Results:")
results = evaluate_results(data_path, splits_path, result_dir, num_tasks, metrics, target_columns=["u0_atom"])
display(results)
for m in metrics:
    log_result("QM9 U0", m, results, CHEMELEON_MD_PATH)

# ChemProp
result_dir = "/home/akshatz/fastprop_foundation/analysis/moleculenet_results/scripts/chemprop/qm9_u0_results"
print("\nChemProp Results:")
try:
    results = evaluate_results(data_path, splits_path, result_dir, num_tasks, metrics, target_columns=["u0_atom"])
    display(results)
    for m in metrics:
        log_result("QM9 U0", m, results, CHEMPROP_MD_PATH)
except Exception as e:
    print(f"Could not evaluate ChemProp results: {e}")

CheMeleon Results:


Unnamed: 0,mae,rmse,r2
u0_atom,2.167752,14.132761,0.996425


Logged QM9 U0 - mae to CheMeleon.md
Logged QM9 U0 - rmse to CheMeleon.md
Logged QM9 U0 - r2 to CheMeleon.md

ChemProp Results:


Unnamed: 0,mae,rmse,r2
u0_atom,1.446406,2.901698,0.999849


Logged QM9 U0 - mae to ChemProp.md
Logged QM9 U0 - rmse to ChemProp.md
Logged QM9 U0 - r2 to ChemProp.md


# QM 9 Multitask

In [10]:
data_path = "/home/akshatz/bond_order_free/qm9/dataset/qm9_data_filtered.csv"
splits_path = "/home/akshatz/bond_order_free/qm9/dataset/splits_filtered.json"
num_tasks = 12
metrics = ["mae", "rmse", "r2"]

# CheMeleon
result_dir = "/home/akshatz/fastprop_foundation/analysis/moleculenet_results/scripts/chemeleon/qm9_multitask_results"
print("CheMeleon Results:")
results = evaluate_results(data_path, splits_path, result_dir, num_tasks, metrics)
display(results)
for target in results.index:
    for m in metrics:
        log_result(f"QM9 Multitask - {target}", m, results.loc[target, m], CHEMELEON_MD_PATH)

# ChemProp
result_dir = "/home/akshatz/fastprop_foundation/analysis/moleculenet_results/scripts/chemprop/qm9_multitask_results"
print("\nChemProp Results:")
try:
    results = evaluate_results(data_path, splits_path, result_dir, num_tasks, metrics)
    display(results)
    for target in results.index:
        for m in metrics:
            log_result(f"QM9 Multitask - {target}", m, results.loc[target, m], CHEMPROP_MD_PATH)
except Exception as e:
    print(f"Could not evaluate ChemProp results: {e}")


CheMeleon Results:


Unnamed: 0,mae,rmse,r2
mu,0.347369,0.607233,0.842723
alpha,0.294276,0.824264,0.989407
homo,0.002494,0.004263,0.962277
lumo,0.00247,0.004171,0.992052
gap,0.003377,0.005998,0.983895
r2,18.115125,36.033951,0.983356
zpve,0.000463,0.001533,0.997844
cv,0.134403,0.330809,0.993252
u0_atom,3.727032,15.641594,0.995621
u298_atom,3.754351,15.748834,0.995639


Logged QM9 Multitask - mu - mae to CheMeleon.md
Logged QM9 Multitask - mu - rmse to CheMeleon.md
Logged QM9 Multitask - mu - r2 to CheMeleon.md
Logged QM9 Multitask - alpha - mae to CheMeleon.md
Logged QM9 Multitask - alpha - rmse to CheMeleon.md
Logged QM9 Multitask - alpha - r2 to CheMeleon.md
Logged QM9 Multitask - homo - mae to CheMeleon.md
Logged QM9 Multitask - homo - rmse to CheMeleon.md
Logged QM9 Multitask - homo - r2 to CheMeleon.md
Logged QM9 Multitask - lumo - mae to CheMeleon.md
Logged QM9 Multitask - lumo - rmse to CheMeleon.md
Logged QM9 Multitask - lumo - r2 to CheMeleon.md
Logged QM9 Multitask - gap - mae to CheMeleon.md
Logged QM9 Multitask - gap - rmse to CheMeleon.md
Logged QM9 Multitask - gap - r2 to CheMeleon.md
Logged QM9 Multitask - r2 - mae to CheMeleon.md
Logged QM9 Multitask - r2 - rmse to CheMeleon.md
Logged QM9 Multitask - r2 - r2 to CheMeleon.md
Logged QM9 Multitask - zpve - mae to CheMeleon.md
Logged QM9 Multitask - zpve - rmse to CheMeleon.md
Logged QM9 

Unnamed: 0,mae,rmse,r2
mu,0.408434,0.655254,0.816864
alpha,0.371365,0.679839,0.992794
homo,0.003222,0.00499,0.948317
lumo,0.003499,0.00519,0.987691
gap,0.004539,0.007085,0.977534
r2,24.555212,40.258457,0.979224
zpve,0.000416,0.000551,0.999722
cv,0.180973,0.288448,0.99487
u0_atom,3.526757,5.369247,0.999484
u298_atom,3.546034,5.379368,0.999491


Logged QM9 Multitask - mu - mae to ChemProp.md
Logged QM9 Multitask - mu - rmse to ChemProp.md
Logged QM9 Multitask - mu - r2 to ChemProp.md
Logged QM9 Multitask - alpha - mae to ChemProp.md
Logged QM9 Multitask - alpha - rmse to ChemProp.md
Logged QM9 Multitask - alpha - r2 to ChemProp.md
Logged QM9 Multitask - homo - mae to ChemProp.md
Logged QM9 Multitask - homo - rmse to ChemProp.md
Logged QM9 Multitask - homo - r2 to ChemProp.md
Logged QM9 Multitask - lumo - mae to ChemProp.md
Logged QM9 Multitask - lumo - rmse to ChemProp.md
Logged QM9 Multitask - lumo - r2 to ChemProp.md
Logged QM9 Multitask - gap - mae to ChemProp.md
Logged QM9 Multitask - gap - rmse to ChemProp.md
Logged QM9 Multitask - gap - r2 to ChemProp.md
Logged QM9 Multitask - r2 - mae to ChemProp.md
Logged QM9 Multitask - r2 - rmse to ChemProp.md
Logged QM9 Multitask - r2 - r2 to ChemProp.md
Logged QM9 Multitask - zpve - mae to ChemProp.md
Logged QM9 Multitask - zpve - rmse to ChemProp.md
Logged QM9 Multitask - zpve - r

# Multi Molecule

In [11]:
data_path = "/home/akshatz/bond_order_free/multi_molecule/dataset/mult_mol_data_filtered.csv"
splits_path = "/home/akshatz/bond_order_free/multi_molecule/dataset/splits_filtered.json"
num_tasks = 1
metrics = ["mae", "rmse", "r2"]

# CheMeleon
result_dir = "/home/akshatz/fastprop_foundation/analysis/moleculenet_results/scripts/chemeleon/uv_vis_results"
print("CheMeleon Results:")
results = evaluate_results(data_path, splits_path, result_dir, num_tasks, metrics)
display(results)
for m in metrics:
    log_result("UV Vis", m, results, CHEMELEON_MD_PATH)

# ChemProp
result_dir = "/home/akshatz/fastprop_foundation/analysis/moleculenet_results/scripts/chemprop/uv_vis_results"
print("\nChemProp Results:")
try:
    results = evaluate_results(data_path, splits_path, result_dir, num_tasks, metrics)
    display(results)
    for m in metrics:
        log_result("UV Vis", m, results, CHEMPROP_MD_PATH)
except Exception as e:
    print(f"Could not evaluate ChemProp results: {e}")

CheMeleon Results:


Unnamed: 0,mae,rmse,r2
peakwavs_max,16.859979,31.363678,0.910984


Logged UV Vis - mae to CheMeleon.md
Logged UV Vis - rmse to CheMeleon.md
Logged UV Vis - r2 to CheMeleon.md

ChemProp Results:


Unnamed: 0,mae,rmse,r2
peakwavs_max,19.98766,34.170062,0.894341


Logged UV Vis - mae to ChemProp.md
Logged UV Vis - rmse to ChemProp.md
Logged UV Vis - r2 to ChemProp.md


# HIV

In [12]:
data_path = "/home/akshatz/bond_order_free/hiv/dataset/hiv_data_filtered.csv"
splits_path = "/home/akshatz/bond_order_free/hiv/dataset/splits_filtered.json"
num_tasks = 1
metrics = ["roc-auc", "ap", "prc-auc"]

# CheMeleon
result_dir = "/home/akshatz/fastprop_foundation/analysis/moleculenet_results/scripts/chemeleon/hiv_results"
print("CheMeleon Results:")
results = evaluate_results(data_path, splits_path, result_dir, num_tasks, metrics, target_columns=["HIV_active"])
display(results)
for m in metrics:
    log_result("HIV", m, results, CHEMELEON_MD_PATH)

# ChemProp
result_dir = "/home/akshatz/fastprop_foundation/analysis/moleculenet_results/scripts/chemprop/hiv_results"
print("\nChemProp Results:")
try:
    results = evaluate_results(data_path, splits_path, result_dir, num_tasks, metrics, target_columns=["HIV_active"])
    display(results)
    for m in metrics:
        log_result("HIV", m, results, CHEMPROP_MD_PATH)
except Exception as e:
    print(f"Could not evaluate ChemProp results: {e}")

CheMeleon Results:


Unnamed: 0,roc-auc,ap,prc-auc
HIV_active,0.768079,0.336912,0.331843


Logged HIV - roc-auc to CheMeleon.md
Logged HIV - ap to CheMeleon.md
Logged HIV - prc-auc to CheMeleon.md

ChemProp Results:


Unnamed: 0,roc-auc,ap,prc-auc
HIV_active,0.789523,0.307796,0.30574


Logged HIV - roc-auc to ChemProp.md
Logged HIV - ap to ChemProp.md
Logged HIV - prc-auc to ChemProp.md


# PCBA Random

In [13]:
def evaluate_results_with_nan_targets(data_path, splits_path, result_dir, num_tasks, metrics, target_columns=None):
    df = pd.read_csv(data_path)
    with open(splits_path, "rb") as json_file:
        split_idxss = json.load(json_file)
    test_indices = [parse_indices(d["test"]) for d in split_idxss]
    test_df = df.iloc[test_indices[0]]
    target_columns=test_df.keys()[-num_tasks:].tolist() if target_columns is None else target_columns

    df_pred_list = []
    files = glob.glob(os.path.join(result_dir, '**', "test_predictions.csv"), recursive=True)
    assert len(files) == 5, f"There should be 5 files; {len(files)} found"
    for file in files:
        df_pred = pd.read_csv(file)[target_columns]
        df_pred_list.append(df_pred)
    df_pred = pd.concat(df_pred_list).groupby(level=0).mean()

    metric_to_func = {metric: get_metric_func(metric) for metric in metrics}

    results = defaultdict(list)
    for column in target_columns:
        for metric, metric_func in metric_to_func.items():
            preds = df_pred[column].tolist()
            # targets = test_df[column].fillna(0.0).tolist()
            targets = test_df[column].tolist()
            
            pairs = [(target, pred) for target, pred in zip(targets, preds) if not np.isnan(target)]
            targets = [item[0] for item in pairs]
            preds = [item[1] for item in pairs]
            
            results[metric].append(metric_func(targets, preds))
    results = dict(results)

    results_df = pd.DataFrame(results, index=target_columns)
    return results_df

In [14]:
data_path = "/home/akshatz/bond_order_free/pcba_random/dataset/pcba_random_data_filtered.csv"
splits_path = "/home/akshatz/bond_order_free/pcba_random/dataset/splits_filtered.json"
num_tasks = 128
metrics = ["prc-auc", "ap", "roc-auc"]

# CheMeleon
result_dir = "/home/akshatz/fastprop_foundation/analysis/moleculenet_results/scripts/chemeleon/pcba_random_results"
print("CheMeleon Results:")
results = evaluate_results_with_nan_targets(data_path, splits_path, result_dir, num_tasks, metrics).mean()
display(results)
for m in metrics:
    log_result("PCBA Random", m, results, CHEMELEON_MD_PATH)

# ChemProp
result_dir = "/home/akshatz/fastprop_foundation/analysis/moleculenet_results/scripts/chemprop/pcba_random_results"
print("\nChemProp Results:")
try:
    results = evaluate_results_with_nan_targets(data_path, splits_path, result_dir, num_tasks, metrics).mean()
    display(results)
    for m in metrics:
        log_result("PCBA Random", m, results, CHEMPROP_MD_PATH)
except Exception as e:
    print(f"Could not evaluate ChemProp results: {e}")

CheMeleon Results:


prc-auc    0.210400
ap         0.215105
roc-auc    0.909773
dtype: float64

Logged PCBA Random - prc-auc to CheMeleon.md
Logged PCBA Random - ap to CheMeleon.md
Logged PCBA Random - roc-auc to CheMeleon.md

ChemProp Results:


prc-auc    0.197051
ap         0.202194
roc-auc    0.904881
dtype: float64

Logged PCBA Random - prc-auc to ChemProp.md
Logged PCBA Random - ap to ChemProp.md
Logged PCBA Random - roc-auc to ChemProp.md


# PCBA Random NaN

In [15]:
data_path = "/home/akshatz/bond_order_free/pcba_random_nan/dataset/pcba_random_nan_data_filtered.csv"
splits_path = "/home/akshatz/bond_order_free/pcba_random_nan/dataset/splits_filtered.json"
num_tasks = 128
metrics = ["prc-auc", "ap", "roc-auc"]

# CheMeleon
result_dir = "/home/akshatz/fastprop_foundation/analysis/moleculenet_results/scripts/chemeleon/pcba_random_nan_results"
print("CheMeleon Results:")
results = evaluate_results_with_nan_targets(data_path, splits_path, result_dir, num_tasks, metrics).mean()
display(results)
for m in metrics:
    log_result("PCBA Random NaN", m, results, CHEMELEON_MD_PATH)

# ChemProp
result_dir = "/home/akshatz/fastprop_foundation/analysis/moleculenet_results/scripts/chemprop/pcba_random_nan_results"
print("\nChemProp Results:")
try:
    results = evaluate_results_with_nan_targets(data_path, splits_path, result_dir, num_tasks, metrics).mean()
    display(results)
    for m in metrics:
        log_result("PCBA Random NaN", m, results, CHEMPROP_MD_PATH)
except Exception as e:
    print(f"Could not evaluate ChemProp results: {e}")

CheMeleon Results:


prc-auc    0.369269
ap         0.375512
roc-auc    0.905497
dtype: float64

Logged PCBA Random NaN - prc-auc to CheMeleon.md
Logged PCBA Random NaN - ap to CheMeleon.md
Logged PCBA Random NaN - roc-auc to CheMeleon.md

ChemProp Results:


prc-auc    0.356543
ap         0.361364
roc-auc    0.900398
dtype: float64

Logged PCBA Random NaN - prc-auc to ChemProp.md
Logged PCBA Random NaN - ap to ChemProp.md
Logged PCBA Random NaN - roc-auc to ChemProp.md


# PCBA Scaffold

In [16]:
data_path = "/home/akshatz/bond_order_free/pcba_scaffold/dataset/pcba_scaffold_data_filtered.csv"  # setting NaN targets to 0
splits_path = "/home/akshatz/bond_order_free/pcba_scaffold/dataset/splits_filtered.json"
num_tasks = 127
metrics = ["prc-auc", "ap", "roc-auc"]

# CheMeleon
result_dir = "/home/akshatz/fastprop_foundation/analysis/moleculenet_results/scripts/chemeleon/pcba_scaffold_results"
print("CheMeleon Results:")
results = evaluate_results_with_nan_targets(data_path, splits_path, result_dir, num_tasks, metrics).mean()
display(results)
for m in metrics:
    log_result("PCBA Scaffold", m, results, CHEMELEON_MD_PATH)

# ChemProp
result_dir = "/home/akshatz/fastprop_foundation/analysis/moleculenet_results/scripts/chemprop/pcba_scaffold_results"
print("\nChemProp Results:")
try:
    results = evaluate_results_with_nan_targets(data_path, splits_path, result_dir, num_tasks, metrics).mean()
    display(results)
    for m in metrics:
        log_result("PCBA Scaffold", m, results, CHEMPROP_MD_PATH)
except Exception as e:
    print(f"Could not evaluate ChemProp results: {e}")

CheMeleon Results:


prc-auc    0.287534
ap         0.291340
roc-auc    0.889468
dtype: float64

Logged PCBA Scaffold - prc-auc to CheMeleon.md
Logged PCBA Scaffold - ap to CheMeleon.md
Logged PCBA Scaffold - roc-auc to CheMeleon.md

ChemProp Results:


prc-auc    0.264985
ap         0.270518
roc-auc    0.881199
dtype: float64

Logged PCBA Scaffold - prc-auc to ChemProp.md
Logged PCBA Scaffold - ap to ChemProp.md
Logged PCBA Scaffold - roc-auc to ChemProp.md


# PCQM4MV2

In [None]:
data_path = "/home/akshatz/bond_order_free/pcqm4mv2/dataset/pcqm4mv2_data_filtered.csv"
splits_path = "/home/akshatz/bond_order_free/pcqm4mv2/dataset/splits_filtered.json"
num_tasks = 1
metrics = ["mae", "rmse", "r2"]

# CheMeleon
result_dir = "/home/akshatz/fastprop_foundation/analysis/moleculenet_results/scripts/chemeleon/pcqm4mv2/results"
print("CheMeleon Results:")
results = evaluate_results(data_path, splits_path, result_dir, num_tasks, metrics, target_columns=["homolumogap"])
display(results)
for m in metrics:
    log_result("PCQM4MV2", m, results, CHEMELEON_MD_PATH)

# ChemProp
result_dir = "/home/akshatz/fastprop_foundation/analysis/moleculenet_results/scripts/chemprop/pcqm4mv2/results"
print("\nChemProp Results:")
try:
    results = evaluate_results(data_path, splits_path, result_dir, num_tasks, metrics, target_columns=["homolumogap"])
    display(results)
    for m in metrics:
        log_result("PCQM4MV2", m, results, CHEMPROP_MD_PATH)
except Exception as e:
    print(f"Could not evaluate ChemProp results: {e}")

Unnamed: 0,mae,rmse,r2
homolumogap,0.092909,0.154394,0.982413


# SAMPL

In [17]:
def evaluate_sampl(test_no, result_dir, metrics):
    targets = None
    df_pred_list = []
    files = glob.glob(os.path.join(result_dir, '**', f"pred_SAMPL{test_no}.csv"), recursive=True)
    assert len(files) == 1, f"There should be 1 file; {len(files)} found"
    for file in files:
        df = pd.read_csv(file)
        
        if targets is None:
            targets = df["logP mean"].tolist() if test_no != 9 else df["new_logPexp_reviewed"].tolist()
            
        df_pred = df["logP"]
        df_pred_list.append(df_pred)
    preds = pd.concat(df_pred_list).groupby(level=0).mean().tolist()
    
    metric_to_func = {metric: get_metric_func(metric) for metric in metrics}

    results = defaultdict(list)
    for metric, metric_func in metric_to_func.items():
        results[metric].append(metric_func(targets, preds))
    results = dict(results)

    results_df = pd.DataFrame(results, index=[f"logP - SAMPL{test_no}"])
    return results_df

In [18]:
metrics = ["mae", "rmse", "r2"]

# CheMeleon
result_dir = "/home/akshatz/fastprop_foundation/analysis/moleculenet_results/scripts/chemeleon/sampl_production_results"
print("CheMeleon Results:")
results = evaluate_sampl(6, result_dir, metrics)
display(results)
for m in metrics:
    log_result("SAMPL 6", m, results, CHEMELEON_MD_PATH)

# ChemProp
result_dir = "/home/akshatz/fastprop_foundation/analysis/moleculenet_results/scripts/chemprop/sampl_production_results"
print("\nChemProp Results:")
try:
    results = evaluate_sampl(6, result_dir, metrics)
    display(results)
    for m in metrics:
        log_result("SAMPL 6", m, results, CHEMPROP_MD_PATH)
except Exception as e:
    print(f"Could not evaluate ChemProp results: {e}")

CheMeleon Results:


Unnamed: 0,mae,rmse,r2
logP - SAMPL6,0.282115,0.310067,0.783596


Logged SAMPL 6 - mae to CheMeleon.md
Logged SAMPL 6 - rmse to CheMeleon.md
Logged SAMPL 6 - r2 to CheMeleon.md

ChemProp Results:


Unnamed: 0,mae,rmse,r2
logP - SAMPL6,0.303556,0.38742,0.662156


Logged SAMPL 6 - mae to ChemProp.md
Logged SAMPL 6 - rmse to ChemProp.md
Logged SAMPL 6 - r2 to ChemProp.md


In [19]:
metrics = ["mae", "rmse", "r2"]

# CheMeleon
result_dir = "/home/akshatz/fastprop_foundation/analysis/moleculenet_results/scripts/chemeleon/sampl_production_results"
print("CheMeleon Results:")
results = evaluate_sampl(7, result_dir, metrics)
display(results)
for m in metrics:
    log_result("SAMPL 7", m, results, CHEMELEON_MD_PATH)

# ChemProp
result_dir = "/home/akshatz/fastprop_foundation/analysis/moleculenet_results/scripts/chemprop/sampl_production_results"
print("\nChemProp Results:")
try:
    results = evaluate_sampl(7, result_dir, metrics)
    display(results)
    for m in metrics:
        log_result("SAMPL 7", m, results, CHEMPROP_MD_PATH)
except Exception as e:
    print(f"Could not evaluate ChemProp results: {e}")

CheMeleon Results:


Unnamed: 0,mae,rmse,r2
logP - SAMPL7,0.480964,0.697183,-0.10208


Logged SAMPL 7 - mae to CheMeleon.md
Logged SAMPL 7 - rmse to CheMeleon.md
Logged SAMPL 7 - r2 to CheMeleon.md

ChemProp Results:


Unnamed: 0,mae,rmse,r2
logP - SAMPL7,0.359081,0.552139,0.308778


Logged SAMPL 7 - mae to ChemProp.md
Logged SAMPL 7 - rmse to ChemProp.md
Logged SAMPL 7 - r2 to ChemProp.md


In [20]:
metrics = ["mae", "rmse", "r2"]

# CheMeleon
result_dir = "/home/akshatz/fastprop_foundation/analysis/moleculenet_results/scripts/chemeleon/sampl_production_results"
print("CheMeleon Results:")
results = evaluate_sampl(9, result_dir, metrics)
display(results)
for m in metrics:
    log_result("SAMPL 9", m, results, CHEMELEON_MD_PATH)

# ChemProp
result_dir = "/home/akshatz/fastprop_foundation/analysis/moleculenet_results/scripts/chemprop/sampl_production_results"
print("\nChemProp Results:")
try:
    results = evaluate_sampl(9, result_dir, metrics)
    display(results)
    for m in metrics:
        log_result("SAMPL 9", m, results, CHEMPROP_MD_PATH)
except Exception as e:
    print(f"Could not evaluate ChemProp results: {e}")

CheMeleon Results:


Unnamed: 0,mae,rmse,r2
logP - SAMPL9,0.852672,1.03706,0.785748


Logged SAMPL 9 - mae to CheMeleon.md
Logged SAMPL 9 - rmse to CheMeleon.md
Logged SAMPL 9 - r2 to CheMeleon.md

ChemProp Results:


Unnamed: 0,mae,rmse,r2
logP - SAMPL9,0.941813,1.089913,0.763353


Logged SAMPL 9 - mae to ChemProp.md
Logged SAMPL 9 - rmse to ChemProp.md
Logged SAMPL 9 - r2 to ChemProp.md
