<a href="https://colab.research.google.com/github/DelWow/RippenApril2025/blob/main/Fragmentstein.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Set Up Process**

In [None]:
# Setup process
# Restart runtime after running this
!pip install "numpy==1.26.4"
!pip install fragmenstein
!pip install rdkit
!pip install mmpdb


**View your fragmented data as a csv to choose what molecules you want to stitch**

In [None]:
# Upload your .smi file

from google.colab import files
uploaded = files.upload()      # choose yourfile.smi

import pandas as pd

SMI_PATH = next(iter(uploaded))              # first uploaded file
records = []
with open(SMI_PATH, "r") as fh:
    for line in fh:
        line = line.strip()
        if not line or line.startswith("#"):    # skip blanks / comments
            continue
        parts = line.split()
        smiles = parts[0]
        name   = parts[1] if len(parts) > 1 else ""
        records.append({"smiles": smiles, "name": name})

df = pd.DataFrame(records)

# Add index column starting at 0
df.insert(0, "index", range(len(df)))

print(df.head())   #Meant for trouble shooting

#Save as CSV
CSV_PATH = SMI_PATH.rsplit(".", 1)[0] + ".csv"
df.to_csv(CSV_PATH, index=False)
print(f" Saved {CSV_PATH} with {len(df)} rows")



**Give fragmented database 3d coordinates**

In [None]:
# Imports
from rdkit import Chem
from rdkit.Chem import AllChem, SDWriter

infile  = '/content/fragments.smi'     #Upload the file path of your fragmented smi
outfile = '/content/fragments_3d.sdf' #Input where you want your sdf with coordinates to be (or leave as is)

# Next section just adds 3d cooridnates to your SMILES so that they can be stitched together in the next cell
suppl = Chem.SmilesMolSupplier(
            infile,
            delimiter='\t',
            titleLine=False,
            nameColumn=1
)

writer = SDWriter(outfile)
fail   = 0

for mol in suppl:
    if mol is None:                    # bad SMILES or parse error
        fail += 1
        continue
    mol = Chem.AddHs(mol)
    if AllChem.EmbedMolecule(mol, AllChem.ETKDGv3()) != 0:   # returns 0 on success
        fail += 1
        continue
    AllChem.MMFFOptimizeMolecule(mol)
    writer.write(mol)

writer.close()
print(f"✅  wrote {len(suppl) - fail} molecules to {outfile}, "
      f"{fail} failures")


**Stitch Molecules Together Using Monster**

In [None]:
# Imports
from rdkit import Chem
from fragmenstein import Monster

# Set names
# Here you want to use the csv you made earlier to choose what 2 molecules you want to stitch. Set the index to match the positions (their index) of the two molecules you want to stitch together.
mol1 = suppl[0]
mol2 = suppl[1]

mol1.SetProp('_Name', 'frag1')
mol2.SetProp('_Name', 'frag2')


# Initialize Fragmenstein Monster
monster = Monster(hits =[mol1, mol2])

merged = monster.combine()
merged_mol = monster.positioned_mol


# Will save the generated molecule to linked_output1.mol and will also print
Chem.MolToMolFile(merged_mol, 'linked_output1.mol', includeStereo=True)
print("Linked Molecule SMILES:", Chem.MolToSmiles(merged_mol))