<a href="https://colab.research.google.com/github/Mohaammed-Fouad/Ligand-Based-Virtual-Screening/blob/main/mols_or_sdf_to_smi.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install rdkit -q

import os
from rdkit import Chem
from rdkit.Chem.MolStandardize import rdMolStandardize
from google.colab import files

def get_largest_fragment(mol):
    """Removes salts, solvents, and ions to keep only the main molecule."""
    try:
        remover = rdMolStandardize.LargestFragmentChooser()
        return remover.choose(mol)
    except:
        return mol

def mol2_supplier(file_path):
    """Custom generator to handle multiple molecules in one Mol2 file."""
    with open(file_path, 'r') as f:
        data = f.read()
    blocks = data.split('@<TRIPOS>MOLECULE')
    for block in blocks[1:]:
        mol_block = '@<TRIPOS>MOLECULE' + block
        mol = Chem.MolFromMol2Block(mol_block, sanitize=False)
        if mol:
            try:
                mol.UpdatePropertyCache(strict=False)
                Chem.SanitizeMol(mol, Chem.SanitizeFlags.SANITIZE_ALL ^ Chem.SanitizeFlags.SANITIZE_PROPERTIES)
            except:
                pass
            yield mol
        else:
            yield None

def convert_compounds():
    print("üöÄ Upload your .sdf or .mol2 file:")
    uploaded = files.upload()

    if not uploaded:
        print("‚ùå No file selected.")
        return

    input_file = list(uploaded.keys())[0]
    file_root, file_ext = os.path.splitext(input_file)
    ext = file_ext.lower()

    # Select supplier
    if ext == '.sdf':
        supplier = Chem.SDMolSupplier(input_file)
    elif ext == '.mol2':
        supplier = mol2_supplier(input_file)
    else:
        print(f"‚ùå Unsupported format: {ext}")
        return

    print(f"üîÑ Processing {input_file} and removing salts...")

    output_filename = "cleaned_compounds.smi"
    count = 0

    with open(output_filename, 'w') as f:
        for i, mol in enumerate(supplier):
            if mol is not None:
                try:
                    # 1. Strip salts/fragments
                    clean_mol = get_largest_fragment(mol)
                    # 2. Generate SMILES
                    smiles = Chem.MolToSmiles(clean_mol)
                    # 3. Get Name
                    name = mol.GetProp('_Name').strip() if mol.HasProp('_Name') else f"Compound_{i}"

                    f.write(f"{smiles}\t{name}\n")
                    count += 1
                except Exception as e:
                    print(f"‚ö†Ô∏è Error on molecule {i}: {e}")
            else:
                print(f"‚ö†Ô∏è Could not parse molecule {i}")

    if count > 0:
        print(f"\n‚úÖ Success! {count} molecules cleaned and converted.")
        files.download(output_filename)
    else:
        print("\n‚ùå Conversion failed. Check your file format.")

# Run the process
convert_compounds()