<a href="https://colab.research.google.com/github/Gressling/notebooks/blob/main/Inverse_Design_Workflow.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# title: Inverse Design Workflow
# author: Gressling, T                               # license: MIT License
# code: github.com/gressling/notebooks               # activity: single example
# indices: -


In [None]:
!pip install scikit-learn rdkit numpy

In [None]:
import numpy as np
from rdkit import Chem
from rdkit.Chem import Descriptors
from sklearn.ensemble import RandomForestRegressor
from sklearn.exceptions import DataConversionWarning
import warnings

warnings.filterwarnings(action='ignore', category=DataConversionWarning)

In [None]:
def define_target_properties():
    print("Defining target properties...")
    target_properties = {
        'melting_point': 300.0,  # Example target property
        'solubility': 0.5
    }
    print(f"Target properties: {target_properties}")
    return target_properties

def generate_candidate_molecules(target_properties):
    print("Generating candidate molecules...")
    smiles_list = [
        'CCO', 'CCN', 'CCC', 'CCCl'
    ]
    molecules = [Chem.MolFromSmiles(smiles) for smiles in smiles_list]
    candidates = [(molecule, target_properties) for molecule in molecules]
    print(f"Generated {len(candidates)} candidate molecules.")
    return candidates

def predict_properties(data):
    molecule, target_properties = data
    smiles = Chem.MolToSmiles(molecule)
    print(f"Predicting properties for molecule: {smiles}")

    features = np.array([Descriptors.MolWt(molecule), Descriptors.MolLogP(molecule)]).reshape(1, -1)

    model_melting_point = RandomForestRegressor()
    model_solubility = RandomForestRegressor()

    # Dummy training for melting point
    model_melting_point.fit(np.random.rand(10, 2), np.random.rand(10) * 500)  # Melting points between 0 and 500

    # Dummy training for solubility
    model_solubility.fit(np.random.rand(10, 2), np.random.rand(10))  # Solubility between 0 and 1

    predicted_melting_point = model_melting_point.predict(features)
    predicted_solubility = model_solubility.predict(features)

    predicted_properties = {
        'melting_point': predicted_melting_point[0],
        'solubility': predicted_solubility[0]
    }
    print(f"Predicted properties: {predicted_properties}")
    return molecule, predicted_properties, target_properties

def evaluate_candidates(data):
    molecule, predicted_properties, target_properties = data
    smiles = Chem.MolToSmiles(molecule)
    print(f"Evaluating candidate molecule: {smiles}")

    if abs(predicted_properties['melting_point'] - target_properties['melting_point']) < 50:
        print(f"Candidate {smiles} is valid.")
        return molecule, predicted_properties  # Return experimental results
    print(f"Candidate {smiles} is not valid.")
    return None

def output_valid_candidates(data):
    if data:
        molecule, experimental_properties = data
        smiles = Chem.MolToSmiles(molecule)
        print(f"Valid candidate: {smiles} with experimental properties: {experimental_properties}")


In [None]:
print("Starting inverse design workflow...")
target_properties = define_target_properties()
candidates = generate_candidate_molecules(target_properties)

for candidate in candidates:
    predicted_data = predict_properties(candidate)
    evaluated_candidate = evaluate_candidates(predicted_data)
    output_valid_candidates(evaluated_candidate)
print("Workflow completed.")

Starting inverse design workflow...
Defining target properties...
Target properties: {'melting_point': 300.0, 'solubility': 0.5}
Generating candidate molecules...
Generated 4 candidate molecules.
Predicting properties for molecule: CCO
Predicted properties: {'melting_point': 401.459906956573, 'solubility': 0.448700690544071}
Evaluating candidate molecule: CCO
Candidate CCO is not valid.
Predicting properties for molecule: CCN
Predicted properties: {'melting_point': 443.70455801218503, 'solubility': 0.535809508939771}
Evaluating candidate molecule: CCN
Candidate CCN is not valid.
Predicting properties for molecule: CCC
Predicted properties: {'melting_point': 296.39746790095705, 'solubility': 0.5060324991001187}
Evaluating candidate molecule: CCC
Candidate CCC is valid.
Valid candidate: CCC with experimental properties: {'melting_point': 296.39746790095705, 'solubility': 0.5060324991001187}
Predicting properties for molecule: CCCl
Predicted properties: {'melting_point': 185.9733097200104