In [37]:
from rdkit import Chem
from rdkit.Chem import AllChem

# Start with methane (CH4)
smiles = "C"
mol = Chem.MolFromSmiles(smiles)
mol = Chem.AddHs(mol)

# Convert to editable molecule
emol = Chem.EditableMol(mol)

# Remove one hydrogen (index 1)
emol.RemoveAtom(1)

# Add new carbon
emol.AddAtom(Chem.Atom(6))  # Carbon, index 4
emol.AddBond(0, 4, Chem.BondType.SINGLE)

# Add hydrogen below new carbon
emol.AddAtom(Chem.Atom(1))  # Hydrogen, index 5
emol.AddBond(4, 5, Chem.BondType.SINGLE)

# Add carbon above (no hydrogens, connects to two oxygens)
emol.AddAtom(Chem.Atom(6))  # Carbon, index 6
emol.AddBond(4, 6, Chem.BondType.SINGLE)

# Add oxygen to right of carbon (no hydrogens)
emol.AddAtom(Chem.Atom(8))  # Oxygen, index 7
emol.AddBond(6, 7, Chem.BondType.SINGLE)

# Add oxygen to left of carbon (no hydrogens)
emol.AddAtom(Chem.Atom(8))  # Oxygen, index 8
emol.AddBond(6, 8, Chem.BondType.SINGLE)

# Add carbon to left of oxygen
emol.AddAtom(Chem.Atom(6))  # Carbon, index 9
emol.AddBond(8, 9, Chem.BondType.SINGLE)

# Add 3 hydrogens to final carbon
for _ in range(3):
    emol.AddAtom(Chem.Atom(1))  # Hydrogen
    emol.AddBond(9, emol.GetMol().GetNumAtoms()-1, Chem.BondType.SINGLE)

# Get the modified molecule
mol = emol.GetMol()

# Copy the molecule
mol_copy = Chem.Mol(mol)

# Mirror the copy (invert x-coordinates for 2D)
AllChem.Compute2DCoords(mol_copy)
for atom in mol_copy.GetAtoms():
    conf = mol_copy.GetConformer()
    pos = conf.GetAtomPosition(atom.GetIdx())
    conf.SetAtomPosition(atom.GetIdx(), (-pos.x, pos.y, pos.z))

# Combine original and mirrored molecule
combined_mol = Chem.EditableMol(Chem.Mol(mol))

# Add atoms from mirrored molecule
atom_map = {}
for atom in mol_copy.GetAtoms():
    new_idx = combined_mol.AddAtom(atom)
    atom_map[atom.GetIdx()] = new_idx

# Add bonds from mirrored molecule
for bond in mol_copy.GetBonds():
    begin_idx = atom_map[bond.GetBeginAtomIdx()]
    end_idx = atom_map[bond.GetEndAtomIdx()]
    combined_mol.AddBond(begin_idx, end_idx, bond.GetBondType())

# Connect original and mirrored molecule at carbon with single hydrogen (index 4)
combined_mol.AddBond(4, atom_map[4], Chem.BondType.SINGLE)

# Get the final molecule
final_mol = combined_mol.GetMol()

# Sanitize molecule
Chem.SanitizeMol(final_mol)

# Generate 3D coordinates
AllChem.EmbedMolecule(final_mol, randomSeed=42)

# Generate XYZ format
n_atoms = final_mol.GetNumAtoms()
xyz = f"{n_atoms}\nMolecule generated by RDKit\n"
conf = final_mol.GetConformer()
for atom in final_mol.GetAtoms():
    pos = conf.GetAtomPosition(atom.GetIdx())
    symbol = atom.GetSymbol()
    xyz += f"{symbol} {pos.x:.6f} {pos.y:.6f} {pos.z:.6f}\n"
    
# Save to file (optional)
with open("molecule.xyz", "w") as f:
    f.write(xyz)

[16:36:35] Molecule does not have explicit Hs. Consider calling AddHs()


In [None]:
from rdkit import Chem
from rdkit.Chem import AllChem

def create_terminal_group():
    """Create terminal group: carbon with 3 hydrogens."""
    mol = Chem.MolFromSmiles("C")
    emol = Chem.EditableMol(mol)
    for _ in range(3):
        emol.AddAtom(Chem.Atom(1))  # Hydrogen
        emol.AddBond(0, emol.GetMol().GetNumAtoms()-1, Chem.BondType.SINGLE)
    return emol.GetMol()

def create_methyl_acrylate_unit(single_bond=True):
    """Create methyl acrylate unit with single or double bond to carbon."""
    # Base structure: CH2=CH-C(=O)OCH3, modified for single bond if needed
    smiles = "C=C-C(=O)OC" if not single_bond else "CC-C(=O)OC"
    mol = Chem.MolFromSmiles(smiles)
    return mol

def connect_molecules(mol1, mol2, atom1_idx, atom2_idx, bond_type=Chem.BondType.SINGLE):
    """Connect two molecules at specified atom indices."""
    combined = Chem.EditableMol(Chem.Mol(mol1))
    atom_map = {i: i for i in range(mol1.GetNumAtoms())}
    
    # Add atoms from mol2
    for atom in mol2.GetAtoms():
        new_idx = combined.AddAtom(atom)
        atom_map[atom.GetIdx()] = new_idx
    
    # Add bonds from mol2
    for bond in mol2.GetBonds():
        begin_idx = atom_map[bond.GetBeginAtomIdx()]
        end_idx = atom_map[bond.GetEndAtomIdx()]
        combined.AddBond(begin_idx, end_idx, bond.GetBondType())
    
    # Connect mol1 and mol2
    combined.AddBond(atom1_idx, atom_map[atom2_idx], bond_type)
    
    return combined.GetMol()

def build_polymer(n_units=2):
    """Generalized function to build polymer with terminal groups and n methyl acrylate units."""
    # Create terminal group (CH3)
    polymer = create_terminal_group()
    
    # Add n methyl acrylate units
    for i in range(n_units):
        unit = create_methyl_acrylate_unit(single_bond=True)
        # Connect to previous unit or terminal group
        # Terminal group carbon (index 0) or last carbon of previous unit
        atom1_idx = 0 if i == 0 else polymer.GetNumAtoms() - 1  # Last atom of chain
        atom2_idx = 0  # First carbon of methyl acrylate unit
        polymer = connect_molecules(polymer, unit, atom1_idx, atom2_idx)
    
    # Add final terminal group
    final_terminal = create_terminal_group()
    atom1_idx = polymer.GetNumAtoms() - 1  # Last atom of last unit
    atom2_idx = 0  # Carbon of terminal group
    polymer = connect_molecules(polymer, final_terminal, atom1_idx, atom2_idx)
    
    # Sanitize
    Chem.SanitizeMol(polymer)
    
    # Generate 3D coordinates
    AllChem.EmbedMolecule(polymer, randomSeed=42)
    
    # Generate XYZ format
    n_atoms = polymer.GetNumAtoms()
    xyz = f"{n_atoms}\nPolymer generated by RDKit\n"
    conf = polymer.GetConformer()
    for atom in polymer.GetAtoms():
        pos = conf.GetAtomPosition(atom.GetIdx())
        symbol = atom.GetSymbol()
        xyz += f"{symbol} {pos.x:.6f} {pos.y:.6f} {pos.z:.6f}\n"
    
    return xyz

# Build polymer with 2 units
xyz_output = build_polymer(n_units=4)

[16:47:24] Molecule does not have explicit Hs. Consider calling AddHs()


In [2]:
from rdkit import Chem
from rdkit.Chem import AllChem

def create_terminal_group():
    """Create terminal group: carbon with 3 hydrogens."""
    mol = Chem.MolFromSmiles("C")
    emol = Chem.EditableMol(mol)
    for _ in range(3):
        emol.AddAtom(Chem.Atom(1))  # Hydrogen
        emol.AddBond(0, emol.GetMol().GetNumAtoms()-1, Chem.BondType.SINGLE)
    return emol.GetMol()

def create_methyl_acrylate_unit():
    """Create modified methyl acrylate unit with single bond to first carbon."""
    smiles = "CC-C(=O)OC"  # CH3-C-C(=O)OCH3
    mol = Chem.MolFromSmiles(smiles)
    return mol

def check_valences(mol, step=""):
    """Debug valences of all atoms in the molecule."""
    for atom in mol.GetAtoms():
        idx = atom.GetIdx()
        symbol = atom.GetSymbol()
        valence = atom.GetExplicitValence()
        expected = Chem.GetPeriodicTable().GetDefaultValence(atom.GetAtomicNum())
        if valence > expected:
            print(f"Valence error at {step}: Atom {idx} ({symbol}) has valence {valence}, expected <= {expected}")
            raise ValueError(f"Invalid valence for atom {idx}")
    print(f"Valences OK at {step}")

def connect_molecules(mol1, mol2, atom1_idx, atom2_idx, bond_type=Chem.BondType.SINGLE, remove_hydrogen=False):
    """Connect two molecules at specified atom indices, optionally removing a hydrogen."""
    combined = Chem.EditableMol(Chem.Mol(mol1))
    atom_map = {i: i for i in range(mol1.GetNumAtoms())}
    
    # Add atoms from mol2
    for atom in mol2.GetAtoms():
        new_idx = combined.AddAtom(atom)
        atom_map[atom.GetIdx()] = new_idx
    
    # Add bonds from mol2
    for bond in mol2.GetBonds():
        begin_idx = atom_map[bond.GetBeginAtomIdx()]
        end_idx = atom_map[bond.GetEndAtomIdx()]
        combined.AddBond(begin_idx, end_idx, bond.GetBondType())
    
    # Remove a hydrogen from atom1_idx if specified
    if remove_hydrogen:
        for neighbor in mol1.GetAtomWithIdx(atom1_idx).GetNeighbors():
            if neighbor.GetSymbol() == 'H':
                combined.RemoveAtom(neighbor.GetIdx())
                break
    
    # Add bond between mol1 and mol2
    combined.AddBond(atom1_idx, atom_map[atom2_idx], bond_type)
    
    mol = combined.GetMol()
    check_valences(mol, f"After connecting atoms {atom1_idx} and {atom2_idx}")
    return mol

def find_first_carbon(mol, start_idx):
    """Find the index of the first carbon in a methyl acrylate unit (C with one H)."""
    for atom in mol.GetAtoms():
        if atom.GetIdx() >= start_idx and atom.GetSymbol() == 'C':
            neighbors = [n.GetSymbol() for n in atom.GetNeighbors()]
            if neighbors.count('H') == 1 and len(neighbors) == 3:  # C with one H, two other bonds
                return atom.GetIdx()
    raise ValueError("First carbon not found")

def build_polymer():
    """Build polymer with 4 methyl acrylate units, symmetric structure."""
    # Forward chain: Terminal + 2 methyl acrylate units
    polymer = create_terminal_group()  # CH3
    check_valences(polymer, "Initial terminal")
    
    unit1 = create_methyl_acrylate_unit()  # First methyl acrylate
    polymer = connect_molecules(polymer, unit1, 0, 0, remove_hydrogen=True)  # Terminal C to first C
    
    # Connect second unit to first carbon of unit1
    unit2 = create_methyl_acrylate_unit()
    first_c_unit1 = find_first_carbon(polymer, 1)
    polymer = connect_molecules(polymer, unit2, first_c_unit1, 0, remove_hydrogen=True)  # First C of unit1 to first C of unit2
    
    # Add central carbon to first carbon of unit2
    emol = Chem.EditableMol(polymer)
    first_c_unit2 = find_first_carbon(polymer, polymer.GetNumAtoms() - unit2.GetNumAtoms())
    emol.AddAtom(Chem.Atom(6))  # New carbon
    emol.AddBond(first_c_unit2, polymer.GetNumAtoms(), Chem.BondType.SINGLE)
    polymer = emol.GetMol()
    check_valences(polymer, "After adding central carbon")
    
    # Backward chain: Mirror the structure
    unit3 = create_methyl_acrylate_unit()
    polymer = connect_molecules(polymer, unit3, polymer.GetNumAtoms()-1, 0, remove_hydrogen=True)  # Central C to first C of unit3
    
    # Connect unit4 to first carbon of unit3
    unit3_start_idx = polymer.GetNumAtoms() - unit3.GetNumAtoms()
    first_c_unit3 = find_first_carbon(polymer, unit3_start_idx)
    unit4 = create_methyl_acrylate_unit()
    polymer = connect_molecules(polymer, unit4, first_c_unit3, 0, remove_hydrogen=True)  # First C of unit3 to first C of unit4
    
    # Connect terminal group to first carbon of unit4
    unit4_start_idx = polymer.GetNumAtoms() - unit4.GetNumAtoms()
    first_c_unit4 = find_first_carbon(polymer, unit4_start_idx)
    final_terminal = create_terminal_group()
    polymer = connect_molecules(polymer, final_terminal, first_c_unit4, 0, remove_hydrogen=True)  # First C of unit4 to terminal
    
    # Sanitize
    Chem.SanitizeMol(polymer)
    check_valences(polymer, "After sanitization")
    
    # Generate 3D coordinates
    AllChem.EmbedMolecule(polymer, randomSeed=42)
    
    # Generate XYZ format
    n_atoms = polymer.GetNumAtoms()
    xyz = f"{n_atoms}\nPolymer generated by RDKit\n"
    conf = polymer.GetConformer()
    for atom in polymer.GetAtoms():
        pos = conf.GetAtomPosition(atom.GetIdx())
        symbol = atom.GetSymbol()
        xyz += f"{symbol} {pos.x:.6f} {pos.y:.6f} {pos.z:.6f}\n"
    
    return xyz

# Build polymer
xyz_output = build_polymer()

[17:15:17] 

****
Pre-condition Violation
getValence(ValenceType::EXPLICIT) called without call to calcExplicitValence()
Violation occurred on line 319 in file /project/build/temp.linux-x86_64-cpython-310/rdkit/Code/GraphMol/Atom.cpp
Failed Expression: (which == ValenceType::IMPLICIT || d_explicitValence > -1)
----------
Stacktrace:
----------
****



RuntimeError: Pre-condition Violation
	getValence(ValenceType::EXPLICIT) called without call to calcExplicitValence()
	Violation occurred on line 319 in file Code/GraphMol/Atom.cpp
	Failed Expression: (which == ValenceType::IMPLICIT || d_explicitValence > -1)
	RDKIT: 2025.03.3
	BOOST: 1_85


In [67]:
# Save to file (optional)
with open("molecule.xyz", "w") as f:
    f.write(xyz_output)