# 1- First part of the research

![Result](assets/generative_smiles.png)

In [None]:
from rdkit import Chem
from rdkit.Chem import AllChem, Descriptors
import itertools
from typing import List, Optional

def calculate_rule_of_five_violations(smiles: str) -> Optional[int]:
    """
    Calculate the number of Lipinski's Rule of Five violations for a given SMILES string.

    Parameters:
        smiles (str): A SMILES string representing a molecule.

    Returns:
        Optional[int]: The number of Rule of Five violations, or None if the SMILES is invalid.
    """
    mol = Chem.MolFromSmiles(smiles)
    if not mol:
        return None  # Invalid SMILES

    # Calculate molecular properties
    mw = Descriptors.MolWt(mol)
    logp = Descriptors.MolLogP(mol)
    h_donors = Descriptors.NumHDonors(mol)
    h_acceptors = Descriptors.NumHAcceptors(mol)

    # Check for rule of five violations
    violations = sum([
        mw > 500,
        logp > 5,
        h_donors > 5,
        h_acceptors > 10
    ])
    return violations

def generate_cyclic_smiles(original_smiles: str, replacements: List[str]) -> List[str]:
    """
    Generate a list of cyclic SMILES by substituting 'C' positions with the given replacements.

    Parameters:
        original_smiles (str): The original SMILES string containing 'C' atoms.
        replacements (List[str]): A list of replacement atom types (e.g., ["C", "N", "O"]).

    Returns:
        List[str]: A list of modified SMILES strings with appended "Br".
    """
    c_positions = [i for i, char in enumerate(original_smiles) if char == "C"]
    combinations = itertools.product(replacements, repeat=len(c_positions))

    cyclic_smiles_list_with_br = []
    for combo in combinations:
        modified_smiles = list(original_smiles)
        for pos, replacement in zip(c_positions, combo):
            modified_smiles[pos] = replacement
        cyclic_smiles_list_with_br.append("".join(modified_smiles) + "Br")
    return cyclic_smiles_list_with_br

def perform_reaction(core_mol: Chem.Mol, group_smiles_list: List[str]) -> List[str]:
    """
    Perform a chemical reaction between the core molecule and the generated group SMILES.

    Parameters:
        core_mol (Chem.Mol): RDKit molecule object of the core structure.
        group_smiles_list (List[str]): List of SMILES strings for the groups.

    Returns:
        List[str]: A list of SMILES strings for the reaction products.
    """
    if core_mol is None:
        return []

    rxn = AllChem.ReactionFromSmarts("[*:1]Br.[Br][*:2]>>[*:1][*:2]")
    new_smiles_list = []

    for group_smiles in group_smiles_list:
        group_mol = Chem.MolFromSmiles(group_smiles)
        if group_mol is None:
            continue

        product_sets = rxn.RunReactants((core_mol, group_mol))
        if product_sets and product_sets[0]:
            product = product_sets[0][0]
            if product:
                new_smiles = Chem.MolToSmiles(product)
                new_smiles_list.append(new_smiles)

    return new_smiles_list

def filter_smiles_by_rule_of_five(smiles_list: List[str], max_violations: int = 3) -> List[str]:
    """
    Filter a list of SMILES strings based on the number of Rule of Five violations.

    Parameters:
        smiles_list (List[str]): List of SMILES strings to filter.
        max_violations (int): Maximum allowable Rule of Five violations (default is 3).

    Returns:
        List[str]: A list of SMILES strings that meet the Rule of Five criteria.
    """
    filtered_smiles_list = []
    for smiles in smiles_list:
        violations = calculate_rule_of_five_violations(smiles)
        if violations is not None and violations <= max_violations:
            filtered_smiles_list.append(smiles)
    return filtered_smiles_list

def generate_SMILES(original_smiles: str, core_smiles: str) -> List[str]:
    """
    Generate and filter SMILES strings based on the core SMILES and Rule of Five criteria.

    Parameters:
        original_smiles (str): The original SMILES string of the molecule.
        core_smiles (str): The SMILES string of the core molecule to react with.

    Returns:
        List[str]: A list of filtered SMILES strings that meet the criteria.
    """
    replacements = ["C", "N", "O"]
    group_smiles_list = generate_cyclic_smiles(original_smiles, replacements)
    core_mol = Chem.MolFromSmiles(core_smiles)

    # Perform reaction and filter results
    new_smiles_list = perform_reaction(core_mol, group_smiles_list)
    filtered_smiles_list = filter_smiles_by_rule_of_five(new_smiles_list)

    # Output results
    for i, smiles in enumerate(filtered_smiles_list, 1):
        violations = calculate_rule_of_five_violations(smiles)
        print(f"{i}: {smiles} (Violations: {violations})")

    return filtered_smiles_list

# Input data
inputs = [
    "CC1CCCC(C)C1", "CCC1CCCCC1C", "CC1=C(C)C=CC1",
    "CC1=CC(C)=CC1", "CC1=CC=C(C)C1", "CCC1CCCCC1",
    "CC1CCCCC1", "CC1CCCC1", "O1C=CC=C1", "CC1=COC=C1",
    "C1=CC=CC=C1", "CC1=CC=CC=C1", "C1CCC1",
    "CC1CCC1", "C1=CC=C1", "CC1=CC=C1"
]

# Core SMILES for reaction
core_smiles_list = [
    "OC(O)C1=CC=NC=C1NCC1CCCC2=C1C=CC(NBr)=C2",
    "OC(O)C1=CC=NC=C1NCC1CCCC2=C1C=CC(N(C)Br)=C2"
]

# Generate SMILES for each input and core
for core_smiles in core_smiles_list:
    for input_smiles in inputs:
        generate_SMILES(input_smiles, core_smiles)

# 2- Second part of the research

![Result](assets/generative_smiles_2.png)

In [None]:
from itertools import product
from rdkit import Chem
from rdkit.Chem import MolToSmiles

original_smiles_list = [
    "C1CCCC2=C1C=CC=C2",
    "C1=CC=CC=C1",
    "C1CCCCC1",
    "C1CCCC1",
    "C1C=CC=C1",
    "C1=CC=CC2=C1C=CC=C2",
    "C1=CC=C(C=C1)C1=CC=CC=C1",
    "C1=CC(C=C1)C1=CC=CC=C1",
    "C1CCC(C1)C1=CC=CC=C1",
    "C1=CC=CC=C1",
    "CC1=CC=CC=C1",
    "C1CCC1",
    "C1CCC(CC1)C1=CC=CC=C1",
    "C1CCC(C1)C1CCCC1",
    "C1CCC(C1)C1C=CC=C1",
    "C1=CC(C=C1)C1C=CC=C1",
    "C1CC2CCCCC2C1",
    "C1CCC2=CC=CC2C1",
    "C1CC2=C(C1)C=CC=C2",
    "C1CCC2(CC1)CCCCC2",
    "C1CCC2(C1)CCCCC2",
]

replacement_atoms = ["C", "S", "O", "N"]

def generate_all_replacements(smiles):
    mol = Chem.MolFromSmiles(smiles)
    if not mol:
        print(f"Invalid SMILES: {smiles}")
        return []
    carbon_indices = [atom.GetIdx() for atom in mol.GetAtoms() if atom.GetSymbol() == "C"]
    replacements = product(replacement_atoms, repeat=len(carbon_indices))
    all_replaced_smiles = set()
    for replacement_set in replacements:
        mol_copy = Chem.Mol(mol)
        for idx, replacement in zip(carbon_indices, replacement_set):
            mol_copy.GetAtomWithIdx(idx).SetAtomicNum(
                Chem.GetPeriodicTable().GetAtomicNumber(replacement)
            )
        all_replaced_smiles.add(MolToSmiles(mol_copy, isomericSmiles=True))
    return list(all_replaced_smiles)

all_replaced_smiles = []
for smiles in original_smiles_list:
    all_replaced_smiles.extend(generate_all_replacements(smiles))

unique_smiles = set(all_replaced_smiles)

print(f"Generated {len(unique_smiles)} unique SMILES:")
for smiles in sorted(unique_smiles):
    print(smiles)