## Batch run for different models

In [None]:
import os
import json
import subprocess

# Define the list of models to test
models = ['GCN', 'GAT', 'Weave', 'MPNN', 'AttentiveFP',
          'gin_supervised_contextpred', 'gin_supervised_infomax',
          'gin_supervised_edgepred', 'gin_supervised_masking', 'NF']

# Base command template
base_command = (
    "python regression_train.py "
    "-c ./C18RT.csv "
    "-sc 'CanonicalSMILES' -nw 6 -me 'mae' -p './results' "
    "-mo '{model}' -t 'C18RT' -s random -sr 0.8,0.1,0.1"
)

In [None]:
# Initialize a dictionary to store results
results = {}

for model in models:
    print(f"Training model: {model}")

    # Execute the command
    try:
        # Load the evaluation results
        eval_path = f"./results/{model}/eval.txt"
        if os.path.exists(eval_path):
            with open(eval_path, 'r') as f:
                eval_results = f.readlines()

            # Extract metrics from the evaluation file
            metrics = {}
            for line in eval_results:
                if ":" in line:
                    key, value = line.split(":")
                    metrics[key.strip()] = float(value.strip())

            results[model] = metrics

    except subprocess.CalledProcessError as e:
        print(f"Failed to train model {model}: {e}")

Training model: GCN
Training model: GAT
Training model: Weave
Training model: MPNN
Training model: AttentiveFP
Training model: gin_supervised_contextpred
Training model: gin_supervised_infomax
Training model: gin_supervised_edgepred
Training model: gin_supervised_masking
Training model: NF


### Process the training results

In [7]:
# Determine the best model based on the chosen metric (e.g., lowest MAE)
metric_to_optimize = 'Test mae'
best_model = min(results, key=lambda x: results[x][metric_to_optimize])

# Output the results
print("Results for all models:")
for model, metrics in results.items():
    print(f"{model}: {metrics}")

print(f"\nBest model: {best_model} with {metric_to_optimize}: {results[best_model][metric_to_optimize]}")

# Save results to a JSON file
with open("all_model_results.json", "w") as f:
    json.dump(results, f, indent=2)

Results for all models:
GCN: {'Best val mae': 0.49150320887565613, 'Test mae': 0.45785361528396606}
GAT: {'Best val mae': 0.3472808003425598, 'Test mae': 0.3785816431045532}
Weave: {'Best val mae': 0.4747655391693115, 'Test mae': 0.48232242465019226}
MPNN: {'Best val mae': 0.3982100188732147, 'Test mae': 0.4415026307106018}
AttentiveFP: {'Best val mae': 0.3214002549648285, 'Test mae': 0.33426281809806824}
gin_supervised_contextpred: {'Best val mae': 0.4089275598526001, 'Test mae': 0.4010923206806183}
gin_supervised_infomax: {'Best val mae': 0.4564848244190216, 'Test mae': 0.4705674350261688}
gin_supervised_edgepred: {'Best val mae': 0.439164400100708, 'Test mae': 0.41336357593536377}
gin_supervised_masking: {'Best val mae': 0.45019441843032837, 'Test mae': 0.45540446043014526}
NF: {'Best val mae': 1.2017685174942017, 'Test mae': 1.2223578691482544}

Best model: AttentiveFP with Test mae: 0.33426281809806824
