**NOTEBOOK PROJET PROG**


for the functions of this notebook we need to import the following objects: 


In [None]:
import streamlit as st
import numpy as np
from rdkit import Chem
from rdkit.Chem import Draw, AllChem
from collections import defaultdict
import pulp
from rxnmapper import RXNMapper
from rxn_insight.reaction import Reaction
from rxn_insight.utils import draw_chemical_reaction, curate_smirks, get_similarity, get_fp
from IPython.display import SVG
import time
import requests
import base64
from io import BytesIO
from chemicals import CAS_from_any, Tb, Tm, Tc, Hfs, Hfl, Hfg, S0s, S0l, S0g

1. get_smiles_from_name(name):

Purpose:

Fetches the SMILES (Simplified Molecular Input Line Entry System) string of a molecule using its common name from the PubChem database.

How it works:

Sends a GET request to the PubChem API with the common name of the molecule.
Parses the JSON response to extract the SMILES string.
Returns the SMILES string or an error message if the molecule is not found.


In [None]:

def get_smiles_from_name(name):
     """
    Fetch the SMILES string of a molecule by its common name from PubChem.

    Parameters:
    name (str): The common name of the molecule.

    Returns:
    str: The SMILES string of the molecule, or an error message if not found.
    """
    url = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{name}/property/CanonicalSMILES/JSON"
    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()
        smiles = data['PropertyTable']['Properties'][0]['CanonicalSMILES']
        return smiles
    else:
        return "No data found or error occurred."


2. count_atoms(smiles):

Purpose: 

Counts the number of each type of atom in a SMILES string.

How it works:

Converts the SMILES string to a molecular representation using RDKit.
Iterates over the atoms in the molecule, counting occurrences of each atom type.
Returns a dictionary with atom symbols as keys and their counts as values.

In [None]:

def count_atoms(smiles):
    """
    Count the number of each type of atom in a SMILES string, including hydrogen atoms.

    Parameters:
    smiles (str): The SMILES string of the molecule.

    Returns:
    dict: A dictionary with atom symbols as keys and their counts as values.
    """
    mol = Chem.MolFromSmiles(smiles)
    atom_counts = defaultdict(int)
    if mol:
        # Add explicit hydrogens to the molecule
        mol = Chem.AddHs(mol)
        for atom in mol.GetAtoms():
            atom_counts[atom.GetSymbol()] += 1
    return dict(atom_counts)


3.solve_ilp(A):

Purpose:

Solves an integer linear programming problem to find stoichiometric coefficients that balance a chemical equation.

How it works:

Sets up an optimization problem to minimize the sum of the stoichiometric coefficients.
Adds constraints to ensure the equation is balanced for each element.
Uses the PuLP library to solve the optimization problem.
Returns the stoichiometric coefficients if an optimal solution is found.

In [None]:

def solve_ilp(A):
    """
    Solve the integer linear programming problem to find stoichiometric coefficients.

    Parameters:
    A (numpy.ndarray): The stoichiometry matrix.

    Returns:
    list: A list of stoichiometric coefficients.
    """
    num_vars = A.shape[1]
    prob = pulp.LpProblem("Balancing_Chemical_Equation", pulp.LpMinimize)
    x_vars = [pulp.LpVariable(f'x{i}', lowBound=1, cat='Integer') for i in range(num_vars)]
    prob += pulp.lpSum(x_vars)
    for i in range(A.shape[0]):
        prob += pulp.lpDot(A[i, :], x_vars) == 0
    solver = pulp.PULP_CBC_CMD(msg=False)
    prob.solve(solver)
    if pulp.LpStatus[prob.status] == 'Optimal':
        return [int(pulp.value(var)) for var in x_vars]
    else:
        raise RuntimeError("Failed to find a valid solution.")


4.get_molecular_formula(smiles):

Purpose: 

Retrieves the molecular formula of a molecule from its SMILES string.

How it works:

Converts the SMILES string to a molecular representation using RDKit.
Computes and returns the molecular formula.
Returns an error message if the SMILES string is invalid.


In [None]:

def get_molecular_formula(smiles):
    """
    Get the molecular formula of a molecule from its SMILES string.

    Parameters:
    smiles (str): The SMILES string of the molecule.

    Returns:
    str: The molecular formula of the molecule, or an error message if invalid.
    """
    molecule = Chem.MolFromSmiles(smiles)
    if molecule is not None:
        return Chem.rdMolDescriptors.CalcMolFormula(molecule)
    else:
        return "Invalid SMILES string"


5.balance_chemical_equation(reactant_smiles, product_smiles):

Purpose: 

Balances a chemical equation given reactants and products as SMILES strings.

How it works:

Counts the atoms in each reactant and product.
Verifies that the sets of elements in reactants and products are identical.
Constructs the stoichiometry matrix for reactants and products.
Solves for the stoichiometric coefficients using integer linear programming.
Returns the coefficients and molecular formulas for the balanced equation.

In [None]:

def balance_chemical_equation(reactant_smiles, product_smiles):
    """
    Balance a chemical equation given reactants and products as SMILES strings.

    Parameters:
    reactant_smiles (list): A list of SMILES strings for the reactants.
    product_smiles (list): A list of SMILES strings for the products.

    Returns:
    tuple: Two lists containing tuples of stoichiometric coefficients and molecular formulas for reactants and products.
    """
    reactant_counts = [count_atoms(smiles) for smiles in reactant_smiles]
    product_counts = [count_atoms(smiles) for smiles in product_smiles]
    
    reactant_elements = set(sum([list(counts.keys()) for counts in reactant_counts], []))
    product_elements = set(sum([list(counts.keys()) for counts in product_counts], []))

    if reactant_elements != product_elements:
        missing_in_products = reactant_elements - product_elements
        missing_in_reactants = product_elements - reactant_elements
        error_message = "Element mismatch found: "
        if missing_in_products:
            error_message += f"Elements {missing_in_products} are in reactants but not in products. "
        if missing_in_reactants:
            error_message += f"Elements {missing_in_reactants} are in products but not in reactants."
        raise ValueError(error_message)

    elements = sorted(reactant_elements.union(product_elements))
    A_reactants = setup_matrix(elements, reactant_counts)
    A_products = setup_matrix(elements, product_counts)
    A = np.concatenate([A_reactants, -A_products], axis=1)
    
    integer_coefficients = solve_ilp(A)
    reactant_coeffs = integer_coefficients[:len(reactant_smiles)]
    product_coeffs = integer_coefficients[len(reactant_smiles):]
    
    reactant_data = [(coeff, get_molecular_formula(smiles)) for coeff, smiles in zip(reactant_coeffs, reactant_smiles)]
    product_data = [(coeff, get_molecular_formula(smiles)) for coeff, smiles in zip(product_coeffs, product_smiles)]

    return reactant_data, product_data


6. setup_matrix(elements, compounds)

Purpose:
    
Creates a stoichiometry matrix representing the number of each type of atom in each compound.

How it works:

Iterates over each element, constructing a row in the matrix for the element.
For each compound, retrieves the count of the current element, defaulting to zero if not present.
Constructs and returns the stoichiometry matrix as a NumPy array.

In [None]:
    
def setup_matrix(elements, compounds):
    """
    Create a stoichiometry matrix for the elements and compounds.

    Parameters:
    elements (list): A list of elements.
    compounds (list): A list of atom counts for the compounds.

    Returns:
    numpy.ndarray: The stoichiometry matrix.
    """
    matrix = []
    for element in elements:
        row = [compound.get(element, 0) for compound in compounds]
        matrix.append(row)
    return np.array(matrix, dtype=int)



7. display_reaction(reactants, products)

Purpose:
    
Formats and displays the balanced chemical reaction as a string.

How it works:

Formats each reactant and product with its coefficient and molecular formula.
Joins the formatted reactants and products into a single reaction string.
Returns the formatted reaction string.

In [None]:

def display_reaction(reactants, products):
     """
    Format and display the chemical reaction.

    Parameters:
    reactants (list): A list of tuples for reactants with coefficients and molecular formulas.
    products (list): A list of tuples for products with coefficients and molecular formulas.

    Returns:
    str: The formatted chemical reaction as a string.
    """
    def format_component(component):
        try:
            coefficient, molecule = component
            return f"{coefficient} {molecule}" if coefficient != 1 else molecule
        except ValueError:
            raise ValueError(f"Invalid component format: {component}. Expected a tuple of (coefficient, molecule).")

    if not reactants or not products:
        raise ValueError("Both reactants and products need at least one component.")

    try:
        reactants_str = ' + '.join(format_component(r) for r in reactants)
        products_str = ' + '.join(format_component(p) for p in products)
        return f"{reactants_str} → {products_str}"
    except ValueError as e:
        print(e)
        return None  # or handle differently



9. create_reaction_string(reactants, products)

Purpose:
    
Creates a reaction string in the format required for visualizing chemical reactions.

How it works:

Joins the SMILES strings of reactants and products with dots.
Combines the reactant and product strings with a double arrow.
Returns the formatted reaction string.

In [None]:

def create_reaction_string(reactants, products):
     """
    Create a reaction string for visualization purposes.

    Parameters:
    reactants (list): A list of SMILES strings for the reactants.
    products (list): A list of SMILES strings for the products.

    Returns:
    str: A string representing the chemical reaction in the format 'reactants>>products'.
    """
    reactants_str = '.'.join(reactants)
    products_str = '.'.join(products)
    return f"{reactants_str}>>{products_str}"


9. display_svg(svg)

Purpose: 
    
Displays an SVG image in Streamlit using markdown with unsafe HTML.

How it works:

Encodes the SVG string in base64.
Embeds the base64-encoded SVG in an HTML image tag.
Uses Streamlit's markdown function to display the HTML.

In [None]:

def display_svg(svg):
    """
    Display SVG in Streamlit using markdown with unsafe HTML.

    Parameters:
    svg (str): The SVG content as a string.

    Returns:
    None
    """
    b64 = base64.b64encode(svg.encode('utf-8')).decode("utf-8")
    html = f"<img src='data:image/svg+xml;base64,{b64}'/>"
    st.markdown(html, unsafe_allow_html=True)

10. compound_state

Purpose: 
    
Determines the physical state of a compound at a given temperature.

How it works:

Converts the compound name to its CAS number.
Retrieves the boiling point and melting point of the compound.
Compares the temperature to the boiling and melting points to determine the state.

In [None]:
def compound_state(compound, temp):
    """
    Determine the physical state of a compound at a given temperature.

    Parameters:
    compound (str): The name or identifier of the compound.
    temp (float): The temperature in Kelvin.

    Returns:
    str: The physical state of the compound ('solid', 'liquid', or 'gas').
    """
    CAS_compound = CAS_from_any(compound)
    boiling_p = Tb(CAS_compound)
    melting_p = Tm(CAS_compound)
    if temp <= melting_p:
        return 'solid'
    elif temp > melting_p and temp <= boiling_p:
        return 'liquid'
    else:
        return 'gas'

11. enthalpy

Purpose: 

Calculates the enthalpy of a compound in a specific state.

How it works:

Determines the enthalpy based on the state.
Uses the stoichiometric coefficient to scale the enthalpy value.

In [None]:
def enthalpy(coeff, compound, state):
    """
    Calculate the enthalpy of a compound in a specific state.

    Parameters:
    coeff (float): The stoichiometric coefficient of the compound.
    compound (str): The name or identifier of the compound.
    state (str): The physical state of the compound ('solid', 'liquid', or 'gas').

    Returns:
    float: The enthalpy of the compound.
    """
    if state == 'solid': 
        return coeff * Hfs(CAS_from_any(compound))
    elif state == 'liquid':
        return coeff * Hfl(CAS_from_any(compound))
    else: 
        return coeff * Hfg(CAS_from_any(compound))


12. entropy

Purpose: 
    
Calculates the entropy of a compound in a specific state.


How it works:

Determines the entropy based on the state.
Uses the stoichiometric coefficient to scale the entropy value.

In [None]:
def entropy(coeff, compound, state):
    """
    Calculate the entropy of a compound in a specific state.

    Parameters:
    coeff (float): The stoichiometric coefficient of the compound.
    compound (str): The name or identifier of the compound.
    state (str): The physical state of the compound ('solid', 'liquid', or 'gas').

    Returns:
    float: The entropy of the compound.
    """
    if state == 'solid': 
        return coeff * S0s(CAS_from_any(compound))
    elif state == 'liquid':
        return coeff * S0l(CAS_from_any(compound))
    else: 