<a href="https://colab.research.google.com/github/Ramprasad-Group/Sublimation_enthalpy_model/blob/main/predict.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>



This notebook imports model published in the article titled "Accelerated predictions of the sublimation enthalpy of organic materials with machine learning" and offers predictions for the sublimation enthalpy of new candidates. For more detailed information, please refer to XXX.

In [3]:
#@title Predict the Sublimation Enthalpy of Organic Molecules
#@markdown *Please first input your candidate's SMILES string, and then press on the left button to run.*

#@markdown *This model will check and load the RDkit 2022.9.05 version, then give a prediction for the input SMILES.*

#Clone the repository if not already present
import os
if not os.path.exists("/content/Sublimation_enthalpy_model"):
    !git clone https://github.com/Ramprasad_Group/Sublimation_enthalpy_model.git
else:
    print("Repository already cloned.")


#Load rdkit 2022.9.05 version
!pip install rdkit==2022.9.05

#Load necessary packages
import joblib
import os
import pickle
import numpy as np
import pandas as pd
from rdkit import Chem
from rdkit.Chem import Descriptors
from sklearn.preprocessing import StandardScaler
import warnings

# Suppress InconsistentVersionWarning
warnings.filterwarnings("ignore", category=UserWarning)

#Paths to the required files
scaler_path = "/content/Sublimation_enthalpy_model/845scaler.save"
model_path = "/content/Sublimation_enthalpy_model/845model.pkl"
zero_features = "/content/Sublimation_enthalpy_model/all_zero_features.txt"

#Load the scaler
with open(scaler_path, 'rb') as f:
    scaler = joblib.load(f)

#Load the model
with open(model_path, 'rb') as f:
    model = pickle.load(f)

# @markdown 1. Enter a SMILES string:
smiles = "CCCC" # @param {type:"string"}

def compute_descriptors(smiles):
    """
    Compute RDKit molecular descriptors for a given SMILES string.
    Parameters:
        smiles (str): The SMILES string of the molecule.
    Returns:
        np.array: A NumPy array of molecular descriptors.
    """
    mol = Chem.MolFromSmiles(smiles)
    if mol is None:
        raise ValueError(f"Invalid SMILES string: {smiles}")

    # Extract descriptors
    descriptor_values = [func(mol) for _, func in Descriptors.descList]
    return np.array(descriptor_values)

def get_smiles_input():
    """Request user input for a SMILES string."""
    return input("Enter a SMILES string: ")

def predict_sublimation_enthalpy(smiles):
    """Predict the sublimation enthalpy for a given SMILES string."""
    try:
        # Compute molecular descriptors
        descriptors = compute_descriptors(smiles)

        # Reshape and normalize the descriptors using the scaler
        # Load the list of zero features
        with open(zero_features, 'r') as f:
            zero_features_list = f.read().splitlines()

        # Convert the list of zero features to indices
        zero_indices = [Descriptors.descList.index((name, func)) for name, func in Descriptors.descList if name in zero_features_list]

        # Drop the zero features from the descriptors
        descriptors_filtered = np.delete(descriptors, zero_indices)

        # Reshape and normalize the descriptors using the scaler
        descriptors_normalized = scaler.transform([descriptors_filtered])

        # Use the model to predict the sublimation enthalpy
        prediction = model.predict(descriptors_normalized)
        return prediction[0]
    except Exception as e:
        print(f"Error: {e}")
        return None


if __name__ == "__main__":

    # Predict the sublimation enthalpy
    enthalpy = predict_sublimation_enthalpy(smiles)

    # Display the result
    if enthalpy is not None:
        print(f"Predicted Sublimation Enthalpy: {enthalpy:.2f} kJ/mol")
    else:
        print("Failed to predict sublimation enthalpy.")

Repository already cloned.
Predicted Sublimation Enthalpy: 38.95 kJ/mol
