# Building Polymers with CGSmiles and PolymerBuilder

This notebook demonstrates building polymers with different topologies:
- **Linear chains**: `{[#EO2]|4[#PS]}`
- **Cyclic (ring) structures**: `{[#EO2]1[#PS][#EO2][#PS][#EO2]1}`
- **Branched structures**: `{[#PS][#EO3]([#PS])([#PS])}` (requires 3-arm monomer)

## Step 1: Import Libraries

In [1]:
import numpy as np
import molpy as mp
from molpy.core.atomistic import Atomistic
from molpy.external import RDKitAdapter, Generate3D
from molpy.parser.smiles import bigsmilesir_to_polymerspec, parse_bigsmiles, parse_cgsmiles
from molpy.reacter import Reacter, select_hydroxyl_group, form_single_bond
from molpy.reacter.selectors import select_port_atom
from molpy.reacter.utils import find_neighbors
from molpy.builder.polymer.connectors import ReacterConnector
from molpy.builder.polymer.placer import CovalentSeparator, LinearOrienter, Placer
from molpy.builder.polymer import PolymerBuilder
from molpy.builder.polymer.port_utils import get_all_port_info
from molpy.typifier.atomistic import OplsAtomisticTypifier
from molpy.io.data.lammps import LammpsDataWriter
from pathlib import Path

## Step 2: Load Force Field

In [2]:
ff = mp.io.read_xml_forcefield("oplsaa.xml")
typifier = OplsAtomisticTypifier(ff, strict_typing=False)
print("✅ Force field loaded")



✅ Force field loaded


## Step 3: Build Monomers

Build three types of monomers using symmetric `$` ports (all ports are equivalent):
1. **EO2** (linear): 2 ports using `[$]`
2. **PS** (linear): 2 ports using `[$]`
3. **EO3** (3-arm): 3 ports using `[$]`


In [3]:
def build_monomer_from_bigsmiles(bigsmiles: str, typifier):
    """Build monomer from BigSMILES with 3D coordinates."""
    ir = parse_bigsmiles(bigsmiles)
    polymerspec = bigsmilesir_to_polymerspec(ir)
    monomers = polymerspec.all_monomers()
    if len(monomers) != 1:
        raise ValueError(f"Expected 1 monomer, got {len(monomers)}")
    
    monomer = monomers[0]
    adapter = RDKitAdapter(internal=monomer)
    generate_3d = Generate3D(add_hydrogens=True, embed=True, optimize=True, update_internal=True)
    adapter = generate_3d(adapter)
    monomer = adapter.get_internal()
    monomer.get_topo(gen_angle=True, gen_dihe=True)
    
    for idx, atom in enumerate(monomer.atoms):
        atom["id"] = idx + 1
    
    typifier.typify(monomer)

    return monomer

# Build linear monomers
eo2 = build_monomer_from_bigsmiles("{[$]OCCO[$]}", typifier)
ps = build_monomer_from_bigsmiles("{[$]OCC(c1ccccc1)CO[$]}", typifier)

# Build 3-arm monomer using BigSMILES
eo3 = build_monomer_from_bigsmiles("{[$]OCC(CO[$])(CO[$])}", typifier)

print("✅ Monomers built:")
print(f"   EO2: {len(eo2.atoms)} atoms, ports: {list(get_all_port_info(eo2).keys())}")
print(f"   PS: {len(ps.atoms)} atoms, ports: {list(get_all_port_info(ps).keys())}")
print(f"   EO3: {len(eo3.atoms)} atoms, ports: {list(get_all_port_info(eo3).keys())}")

# Verify EO3 has 3 ports for branching
eo3_ports = list(get_all_port_info(eo3).keys())
if len(eo3_ports) < 3:
    print(f"⚠️  WARNING: EO3 only has {len(eo3_ports)} port(s): {eo3_ports}")
    print("   BigSMILES format may not support multiple descriptors in parentheses.")
    print("   Consider using atom class notation or manually marking ports.")


✅ Monomers built:
   EO2: 10 atoms, ports: ['$']
   PS: 29 atoms, ports: ['$']
   EO3: 17 atoms, ports: ['$']
   BigSMILES format may not support multiple descriptors in parentheses.
   Consider using atom class notation or manually marking ports.


## Step 4: Configure Polymer Builder

In [4]:
library = {"EO2": eo2, "PS": ps, "EO3": eo3}

# Reaction selectors
def select_carbon_from_oh(assembly: Atomistic, port_name: str):
    """Select C atom connected to -OH oxygen."""
    port_o = select_port_atom(assembly, port_name)
    c_neighbors = find_neighbors(assembly, port_o, element="C")
    if not c_neighbors:
        raise ValueError(f"No C neighbor for -OH at port {port_name}")
    return c_neighbors[0]

def select_h_from_oh(assembly: Atomistic, port_atom):
    """Select H from -OH group."""
    if port_atom["symbol"] != "O":
        raise ValueError(f"Expected O, got {port_atom['symbol']}")
    h_neighbors = find_neighbors(assembly, port_atom, element="H")
    if not h_neighbors:
        raise ValueError("No H found bonded to -OH")
    return [h_neighbors[0]]

dehydration_reacter = Reacter(
    name="dehydration_ether_formation",
    port_selector_left=select_carbon_from_oh,
    port_selector_right=select_port_atom,
    leaving_selector_left=select_hydroxyl_group,
    leaving_selector_right=select_h_from_oh,
    bond_former=form_single_bond,
)

# Port map
port_map = {}
for left_label in library.keys():
    for right_label in library.keys():
        port_map[(left_label, right_label)] = ("$", "$")

connector = ReacterConnector(default=dehydration_reacter, port_map=port_map)
placer = Placer(separator=CovalentSeparator(), orienter=LinearOrienter())
builder = PolymerBuilder(library=library, connector=connector, placer=placer, typifier=typifier)

print("✅ Polymer builder configured")
print(f"   Library: {list(library.keys())}")

✅ Polymer builder configured
   Library: ['EO2', 'PS', 'EO3']


## Step 5: Helper Functions

In [5]:
output_dir = Path("case1_output")
output_dir.mkdir(parents=True, exist_ok=True)

def export_to_lammps(structure: Atomistic, filepath: Path):
    """Export structure to LAMMPS data format."""
    frame = structure.to_frame()
    if "atoms" in frame:
        atoms = frame["atoms"]
        n_atoms = atoms.nrows
        if "mol" not in atoms:
            atoms["mol"] = np.ones(n_atoms, dtype=int)
        if "q" not in atoms:
            atoms["q"] = np.zeros(n_atoms, dtype=float)
    writer = LammpsDataWriter(filepath)
    writer.write(frame)

## Example 1: Linear Polymer

Build a linear chain: `{[#EO2]|4[#PS]}`

In [6]:
cgsmiles_linear = "{[#EO2]|4[#PS]}"
print(f"Building: {cgsmiles_linear}")

build_result = builder.build(cgsmiles_linear)
chain = build_result.polymer

print(f"✅ Built successfully:")
print(f"   Atoms: {len(chain.atoms)}")
print(f"   Bonds: {len(chain.bonds)}")
print(f"   Connection steps: {build_result.total_steps}")

export_to_lammps(chain, output_dir / "linear.data")
print(f"   Exported to {output_dir}/linear.data")

Building: {[#EO2]|4[#PS]}


ValueError: No hydrogen found bonded to hydroxyl oxygen

## Example 2: Cyclic (Ring) Polymer

Build a cyclic polymer: `{[#EO2]1[#PS][#EO2][#PS][#EO2]1}`

In [None]:
cgsmiles_ring = "{[#EO2]1[#PS][#EO2][#PS][#EO2]1}"
print(f"Building: {cgsmiles_ring}")

build_result = builder.build(cgsmiles_ring)
ring_chain = build_result.polymer

print(f"✅ Ring polymer built:")
print(f"   Atoms: {len(ring_chain.atoms)}")
print(f"   Bonds: {len(ring_chain.bonds)}")
print(f"   Connection steps: {build_result.total_steps}")
    
export_to_lammps(ring_chain, output_dir / "ring.data")
print(f"   Exported to {output_dir}/ring.data")

Building: {[#EO2]1[#PS][#EO2][#PS][#EO2]1}
✅ Ring polymer built:
   Atoms: 73
   Bonds: 75
   Connection steps: 5
   Exported to case1_output/ring.data


## Example 3: Branched Polymer

Build a branched structure: `{[#PS][#EO3]([#PS])([#PS])}`

This requires the 3-arm monomer (EO3) which has 3 ports. The branch point connects to 3 neighbors.

In [None]:
cgsmiles_branch = "{[#PS][#EO3]([#PS])([#PS])}"
print(f"Building: {cgsmiles_branch}")

# Show graph structure
ir_branch = parse_cgsmiles(cgsmiles_branch)

build_result = builder.build(cgsmiles_branch)
branch_chain = build_result.polymer

print(f"\n✅ Branch polymer built:")
print(f"   Atoms: {len(branch_chain.atoms)}")
print(f"   Bonds: {len(branch_chain.bonds)}")
print(f"   Connection steps: {build_result.total_steps}")
    
export_to_lammps(branch_chain, output_dir / "branch.data")
print(f"   Exported to {output_dir}/branch.data")

Building: {[#PS][#EO3]([#PS])([#PS])}
   Graph: 4 nodes, 3 bonds
   Branch point: Node [EO3] has 3 connections


NoCompatiblePortsError: Node 47 (label 'EO3') has no available ports