PSMILES > lammps data file Generator full-automated system
=======FRAMEWORK=======
1. Polymer PSMILES -> completed polymer chain using pysoftk -> pdb file
2. Li+, TFSI- ion into pdb file using RDKit
3. Pack polymer chains and ions into one pdb file
4. Convert pdb file into mol file using RDKit
5. Convert packed mol file into .car and .mdf file
6. Finally make LAMMPS input data using car and mdf files using insights2lammps.pl in EMC(Enhanced Monte-Carlo)

In [None]:
'''
IMPORT FILES
'''
import subprocess, math, datetime
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem.Descriptors import ExactMolWt
from pysoftk.linear_polymer.linear_polymer import Lp
from pathlib import Path
from rdkit.Chem.rdchem import HybridizationType as Hyb
import numpy as np

AVOGADRO = 6.02214076e23

In [None]:
'''PARAMETERS'''
star_smi      = "*OCC(NC(*)=O)N"
dp            = 22
shift         = 1.5
relax_iters   = 3000
target_density= 0.1
unit_per_li   = 6.0

polymer_pdb = "polymer.pdb"
li_pdb      = "li.pdb"
tfsi_pdb    = "tfsi.pdb"
box_size    = 40.0  # Å
packed_pdb = "packed_manual.pdb"
output_mol = "output.mol"

1. Building Linear Polymers

In [None]:
def build_polymer(psmiles, dp, shift, relax_iters, mol_out, pdb_out):
    print("Building Linear Polmer...")
    mono = Chem.MolFromSmiles(psmiles.replace("*","Br"))
    mono = Chem.AddHs(mono)
    AllChem.EmbedMolecule(mono, AllChem.ETKDGv3())
    AllChem.UFFOptimizeMolecule(mono, maxIters=1500)
    linear = Lp(mono, "Br", dp, shift=shift)
    poly = linear.linear_polymer(force_field="UFF", relax_iterations=relax_iters)
    mol_block = poly.write("mol")
    poly_rd = Chem.MolFromMolBlock(mol_block, removeHs=False)
    Chem.MolToMolFile(poly_rd, mol_out, forceV3000=True)
    Chem.MolToPDBFile(poly_rd, pdb_out)
    return poly_rd, pdb_out

def build_ion_pdb(smiles, pdb_out, embed=True):
    m = Chem.MolFromSmiles(smiles)
    m = Chem.AddHs(m)
    if embed:
        AllChem.EmbedMolecule(m, AllChem.ETKDGv3())
        AllChem.UFFOptimizeMolecule(m, maxIters=1000)
    Chem.MolToPDBFile(m, pdb_out)
    return pdb_out

def calc_chain_count(poly_mol, box_size, target_density):
    mw = ExactMolWt(poly_mol)
    vol_cm3 = (box_size**3)*1e-24
    moles = target_density*vol_cm3/mw
    chains = int(round(moles*AVOGADRO))
    return max(chains,1)

In [None]:
poly_mol, poly_pdb = build_polymer(
    star_smi, dp, shift, relax_iters,
    mol_out="polymer_v3000.mol", pdb_out="polymer.pdb"
)
print("Sucessfully build linear polymer")

Building Linear Polmer...


2. Li+, TFSI- -> .pdb

In [18]:
# 2) count chains
n_poly = calc_chain_count(poly_mol, box_size, target_density)
print(f"→ Using {n_poly} chains for {target_density} g/cm³")

# 3) ions
li_pdb   = build_ion_pdb("[Li+]", "li.pdb", embed=False)
tfsi_smi = "N(S(=O)(=O)C(F)(F)F)(S(=O)(=O)C(F)(F)F)[O-]"
tfsi_pdb = build_ion_pdb(tfsi_smi, "tfsi.pdb", embed=True)

# 4) ion counts
MW = ExactMolWt(poly_mol)
salt_molality = 1.5
li_count   = int(round(salt_molality * n_poly * MW / 1000))
tfsi_count = li_count
print(f"→ Li⁺: {li_count}, TFSI⁻: {tfsi_count}")

→ Using 2 chains for 0.1 g/cm³
→ Li⁺: 7, TFSI⁻: 7


3. Pack into one pdb file

In [None]:
def parse_pdb(pdb_file):
    """
    Parse PDB file to extract ATOM/HETATM lines and their coordinates.
    Returns:
      atom_lines: list of original PDB lines (str)
      coords: numpy array of shape (N, 3)
    """
    atom_lines = []
    coords = []
    for line in open(pdb_file):
        if line.startswith(('ATOM  ', 'HETATM')):
            atom_lines.append(line.rstrip('\n'))
            x = float(line[30:38])
            y = float(line[38:46])
            z = float(line[46:54])
            coords.append([x, y, z])
    return atom_lines, np.array(coords)

def write_pdb(output_file, lines):
    """
    Write list of PDB lines to a file, adding an END record.
    """
    with open(output_file, 'w') as f:
        for line in lines:
            f.write(line + '\n')
        f.write('END\n')

def random_insert_ions(poly_pdb, ion_pdb, num_ions, box_size,
                       tol_poly_ion=3.0, tol_ion_ion=3.0, max_attempts=10000):
    """
    Insert `num_ions` copies of the ion defined in ion_pdb into the polymer box
    defined by poly_pdb, avoiding overlaps.

    Returns:
      combined_lines: list of PDB lines for polymer + inserted ions
    """
    # Parse polymer and ion templates
    poly_lines, poly_coords = parse_pdb(poly_pdb)
    ion_lines, ion_coords = parse_pdb(ion_pdb)
    # Center ion template at origin
    centroid = ion_coords.mean(axis=0)
    ion_coords_centered = ion_coords - centroid

    placed_lines = []
    placed_ion_coords = []

    # Place ions one by one
    for i in range(num_ions):
        for attempt in range(max_attempts):
            # propose random translation
            tx, ty, tz = np.random.rand(3) * box_size
            new_coords = ion_coords_centered + np.array([tx, ty, tz])
            # check overlap with polymer
            if np.min(np.linalg.norm(poly_coords[:, None, :] - new_coords[None, :, :], axis=2)) < tol_poly_ion:
                continue
            # check overlap with already placed ions
            if placed_ion_coords:
                existing = np.vstack(placed_ion_coords)
                if np.min(np.linalg.norm(existing[:, None, :] - new_coords[None, :, :], axis=2)) < tol_ion_ion:
                    continue
            # accept this position
            placed_ion_coords.append(new_coords)
            # generate PDB lines with updated coordinates
            for line, coord in zip(ion_lines, new_coords):
                prefix = line[:30]
                suffix = line[54:]
                x_str = f"{coord[0]:8.3f}"
                y_str = f"{coord[1]:8.3f}"
                z_str = f"{coord[2]:8.3f}"
                new_line = prefix + x_str + y_str + z_str + suffix
                placed_lines.append(new_line)
            break
        else:
            raise RuntimeError(f"Failed to place ion #{i+1} without overlaps after {max_attempts} attempts")

    # Combine polymer and ion lines
    combined_lines = poly_lines + placed_lines
    return combined_lines

num_li      = li_count
num_tfsi    = tfsi_count

# Insert Li+
combined = random_insert_ions(polymer_pdb, li_pdb, num_li, box_size,
                              tol_poly_ion=3.0, tol_ion_ion=3.0)
# Insert TFSI-
combined = combined + random_insert_ions(polymer_pdb, tfsi_pdb, num_tfsi, box_size,
                                         tol_poly_ion=3.0, tol_ion_ion=3.0)

# Write out final packed PDB
write_pdb("packed_manual.pdb", combined)

print(f"Manual packing completed: {packed_pdb}")


Manual packing completed: packed_manual.pdb


4. Convert .pdb file into .mol file

In [19]:
# 1) PDB → Mol (sanitize=False, proximityBonding=True)
mol = Chem.MolFromPDBFile(
    "packed_manual.pdb",
    sanitize=False,
    removeHs=False,
    proximityBonding=True
)
if mol is None:
    raise RuntimeError("PDB 변환 실패")

# 2) sanitize하되 valence 체크만 빼고 수행
flags = (Chem.SanitizeFlags.SANITIZE_ALL
         & ~Chem.SanitizeFlags.SANITIZE_PROPERTIES)
# SANITIZE_PROPERTIES 에 valence 검사도 포함되어 있습니다.
Chem.SanitizeMol(mol, flags)

# 3) Mol 파일로 저장 (forceV3000=False → V2000)
Chem.MolToMolFile(mol, output_mol, forceV3000=False)
print(f"✅ RDKit 우회 저장 완료: {output_mol}")

✅ RDKit 우회 저장 완료: output.mol


5. Convert .mol into .car and .mdf

In [None]:
# ─────────────────────── Helpers ────────────────────────

def safe_charge(atom):
    if atom.HasProp('_GasteigerCharge'):
        q = atom.GetDoubleProp('_GasteigerCharge')
        return 0.0 if math.isnan(q) else q
    return 0.0

def atom_label(at, idx):
    return f"{at.GetSymbol()}{idx}"

def guess_pcff(atom):
    sym = atom.GetSymbol()
    hyb = atom.GetHybridization()
    aro = atom.GetIsAromatic()
    ring3 = atom.IsInRingSize(3)

    if sym == "C":
        if aro:      return "cp"
        if hyb == Hyb.SP:   return "ct"
        if hyb == Hyb.SP2:  return "c="
        return "c3"
    if sym == "O":
        return "o_2" if hyb == Hyb.SP2 else "o"
    if sym == "N":
        if aro:      return "nb"
        if hyb == Hyb.SP2:  return "n2"
        if ring3:    return "n3m"
        return "na"
    if sym == "S":
        return "sp" if aro else "s"
    if sym == "H":
        heavy = atom.GetBonds()[0].GetOtherAtom(atom).GetSymbol()
        return "h*" if heavy in ("N", "O") else "h"
    return sym.lower()

# ────────────────────────────── Writer ───────────────────

def write_car(mol: Chem.Mol, fname: Path):
    cell = (40.0, 40.0, 40.0, 90.0, 90.0, 90.0)
    a, b, c, α, β, γ = cell
    now = datetime.datetime.now().strftime("%a %b %d %H:%M:%S %Y")
    conf = mol.GetConformer()
    lines = [
        "!BIOSYM archive 3",
        "PBC=ON",
        "Materials Studio Generated CAR File",
        f"!DATE: {now}",
        f"PBC{a:10.4f}{b:10.4f}{c:10.4f}{α:8.2f}{β:8.2f}{γ:8.2f}",
        ""
    ]
    for i, atom in enumerate(mol.GetAtoms(), start=1):
        x, y, z = conf.GetAtomPosition(i-1)
        lines.append(
            f"{atom_label(atom,i):<8}"
            f"{x:10.4f}{y:10.4f}{z:10.4f} "
            f"SYS_1: 1 {guess_pcff(atom):<4} {atom.GetSymbol():<2} "
            f"{safe_charge(atom):6.4f}"
        )
    lines += ["end", "end"]
    fname.write_text("\n".join(lines))

def write_mdf(mol: Chem.Mol, fname: Path):
    if not mol.GetAtomWithIdx(0).HasProp('_GasteigerCharge'):
        AllChem.ComputeGasteigerCharges(mol)
    neigh = {i: [n.GetIdx() for n in atom.GetNeighbors()]
             for i, atom in enumerate(mol.GetAtoms())}
    now = datetime.datetime.now().strftime("%a %b %d %H:%M:%S %Y")
    hdr = [
        "!BIOSYM molecular_data 4", "",
        f"!Date: {now}", "",
        "#topology", "",
        "@column 1 element", "@column 2 atom_type",
        "@column 3 charge_group", "@column 4 isotope",
        "@column 5 formal_charge", "@column 6 charge",
        "@column 7 switching_atom", "@column 8 oop_flag",
        "@column 9 chirality_flag", "@column 10 occupancy",
        "@column 11 xray_temp_factor", "@column 12 connections", "",
        "@molecule poly", ""
    ]
    body = []
    for i, atom in enumerate(mol.GetAtoms(), start=1):
        base = (
            f"{atom_label(atom,i):<15}"
            f"{atom.GetSymbol():<2}{guess_pcff(atom):<4}"
            f" 1 0 0 {safe_charge(atom):8.4f} 0 0 8 1.0000  0.0000 "
        )
        conns = " ".join(atom_label(mol.GetAtomWithIdx(j), j+1)
                         for j in neigh[i-1])
        body.append(base + conns)
    tail = ["", "!", "#symmetry", "@periodicity 3 xyz", "@group (P1)", "", "#end"]
    fname.write_text("\n".join(hdr + body + tail))

# ─────────────────────── Main ─────────────────────────────

# Specify your input file here:
input_path = Path(output_mol)  # or .mol2
stem = input_path.stem

# Load the molecule
if input_path.suffix.lower() == ".mol2":
    mol = Chem.MolFromMol2File(str(input_path), sanitize=False, removeHs=False)
else:
    mol = Chem.MolFromMolFile(str(input_path), sanitize=False, removeHs=False)
if mol is None:
    raise RuntimeError(f"RDKit failed to parse {input_path}")

# Ensure 3D conformation & charges
if not mol.GetConformers():
    AllChem.EmbedMolecule(mol, randomSeed=1)
AllChem.ComputeGasteigerCharges(mol)

# Write .car and .mdf
car_file = Path(f"{stem}.car")
mdf_file = Path(f"{stem}.mdf")
write_car(mol, car_file)
write_mdf(mol, mdf_file)
print(f"[OK] wrote {car_file} and {mdf_file}")


[OK] wrote output.car and output.mdf


6. convert .mdf and .car into LAMMPS input data

In [22]:
# Path to the insight2lammps.pl script
script = Path.home() / "emc" / "scripts" / "insight2lammps.pl"
input_base = "output" # without .car/.mdf extension

cmd = [
    "perl",
    str(script),
    "-forcefield=pcff",
    "-class=class2",
    input_base
]

result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
    print("▶ insight2lammps.pl 실행 오류:\n", result.stderr)
else:
    print("▶ insight2lammps.pl 실행 성공:\n", result.stdout)

▶ insight2lammps.pl 실행 성공:
 insight2lammps v2.12.1 (August 20, 2018) (c) 2008-2018 Pieter J. in 't Veld

Info: setting up force field
Info: creating connectivity
Info: creating types
Info: writing lammps data file output.data
Info: 0 automatic parameters


