
# FrankPEPstein: Incremental Debug Notebook

This notebook tests the pipeline using a consolidated workflow:
1.  **Setup**: Installs dependencies in a dedicated environment.
2.  **Workflow**: Uploads Receptor -> Detects/Uploads Pockets -> Selects Pocket -> Calculates Box.


In [None]:

#@title 0. Install CondaColab & Setup Tools
import sys
import os
import subprocess
from IPython.display import clear_output

# Helper to suppress output
class SuppressStdout:
    def __enter__(self):
        self._original_stdout = sys.stdout
        sys.stdout = open(os.devnull, 'w')
    def __exit__(self, exc_type, exc_val, exc_tb):
        sys.stdout.close()
        sys.stdout = self._original_stdout

def run_setup():
    # Install tqdm first if missing (fast)
    try:
        from tqdm.notebook import tqdm
    except ImportError:
        subprocess.run("pip install -q tqdm", shell=True)
        from tqdm.notebook import tqdm

    print("Setting up FrankPEPstein environment...")
    
    steps = [
        ("Installing CondaColab", "condacolab"),
        ("Cloning Repository", "git"),
        ("Creating Conda Environment (Slow)", "env"),
        ("Configuring Notebook Utils", "patch"),
        ("Setting up External Tools", "tools"),
        ("Configuring Modeller", "modeller")
    ]
    
    with tqdm(total=len(steps)) as pbar:
        # 1. CondaColab
        pbar.set_description(steps[0][0])
        try:
            with SuppressStdout():
                import condacolab
                condacolab.check()
        except ImportError:
            with SuppressStdout():
                subprocess.run("pip install -q condacolab", shell=True, check=True)
                import condacolab
                condacolab.install()
        pbar.update(1)

        # 2. Git Clone
        pbar.set_description(steps[1][0])
        with SuppressStdout():
            if not os.path.exists("FrankPEPstein"):
                subprocess.run("git clone https://github.com/Joacaldog/FrankPEPstein.git", shell=True, check=True)
        pbar.update(1)

        # 3. Create Environment
        pbar.set_description(steps[2][0])
        env_path = "/usr/local/envs/FrankPEPstein"
        if not os.path.exists(env_path):
             # Redirect mamba output to devnull
             subprocess.run("mamba create -n FrankPEPstein -q -y -c conda-forge -c salilab openbabel biopython fpocket joblib tqdm py3dmol vina python=3.10 salilab::modeller > /dev/null 2>&1", shell=True, check=True)
        
        # Configure Path
        site_packages = f"{env_path}/lib/python3.10/site-packages"
        if site_packages not in sys.path:
            sys.path.append(site_packages)
        os.environ['PATH'] = f"{env_path}/bin:" + os.environ['PATH']
        pbar.update(1)

        # 4. Patch Utils
        pbar.set_description(steps[3][0])
        patched_utils_content = r'''import os
import sys
import glob
import shutil

def configure_modeller(license_key='MODELIRANJE', repo_dir='FrankPEPstein'):
    """
    Configures Modeller by locating the config.py file in the installation
    and replacing the license key placeholder with the provided key.
    """
    # Template location in the repo
    template_config = os.path.join(repo_dir, "utilities/config.py")
    
    # Try using python import to find the location
    dest_config = None
    try:
        import modeller
        modeller_path = os.path.dirname(modeller.__file__)
        candidate = os.path.join(modeller_path, "config.py")
        if os.path.exists(candidate):
            dest_config = candidate
    except Exception:
        # Modeller raises an error on import if not configured, which is expected.
        # We just want to find where it is installed.
        pass

    # Fallback to search if import finding failed
    if not dest_config:
        possible_paths = [
            f"{sys.prefix}/lib/modeller-*/modlib/modeller/config.py", # Standard standalone install
            f"{sys.prefix}/lib/python*/site-packages/modeller/config.py", # Site-packages install
            f"{sys.prefix}/pkgs/modeller-*/lib/modeller-*/modlib/modeller/config.py", # Conda pkgs cache structure
            "/usr/local/envs/FrankPEPstein/lib/modeller-*/modlib/modeller/config.py" # Targeted Conda Environment
        ]
        
        dest_config_paths = []
        for pattern in possible_paths:
            found = glob.glob(pattern)
            dest_config_paths.extend(found)
        
        if dest_config_paths:
            dest_config = dest_config_paths[0]

    
    if dest_config and os.path.exists(template_config):
        print(f"Found modeller config at: {dest_config}")
        print(f"Using template {template_config} to update {dest_config}")
        
        with open(template_config, 'r') as f:
            content = f.read()
        
        # Replace placeholder 'MODELIRANJE' with actual key
        new_content = content.replace("'MODELIRANJE'", f"'{license_key}'")
        
        with open(dest_config, 'w') as f:
            f.write(new_content)
        print("Modeller configured successfully.")
        return True
    else:
        print(f"Error: Modeller config destination ({dest_config}) or template ({template_config}) not found.")
        return False

def get_pocket_box(pdb_file):
    """
    Calculates the center and size of a box surrounding the atoms in the given PDB file.
    Adds a buffer of 5.0 units to the size.
    """
    import Bio.PDB
    parser = Bio.PDB.PDBParser(QUIET=True)
    structure = parser.get_structure("pocket", pdb_file)
    coords = []
    for atom in structure.get_atoms():
        coords.append(atom.get_coord())
    
    if not coords:
        return None, None

    min_coord = [min([c[i] for c in coords]) for i in range(3)]
    max_coord = [max([c[i] for c in coords]) for i in range(3)]
    
    center = [(min_coord[i] + max_coord[i]) / 2 for i in range(3)]
    size = [(max_coord[i] - min_coord[i]) + 5.0 for i in range(3)] # Add buffer
    return center, size

def patch_scripts(scripts_dir, path_replacements):
    """
    Iterates through .py files in scripts_dir and applies string replacements.
    """
    print("Patching scripts...")
    count = 0
    for script_name in os.listdir(scripts_dir):
        if script_name.endswith(".py"):
            full_path = os.path.join(scripts_dir, script_name)
            with open(full_path, 'r') as f:
                content = f.read()
            
            original_content = content
            for old, new in path_replacements.items():
                content = content.replace(old, new)
            
            # Additional patches for command calls
            content = content.replace("vina", "vina") 
            
            if content != original_content:
                with open(full_path, 'w') as f:
                    f.write(content)
                print(f"Patched {script_name}")
                count += 1
    return count

def setup_external_tools(drive_ids=None):
    """
    Sets up external tools (ADFR, Click, DB).
    If drive_ids is provided, downloads missing files from Google Drive.
    """
    import subprocess
    
    # Ensure gdown is installed
    try:
        import gdown
    except ImportError:
        print("Installing gdown...")
        subprocess.run([sys.executable, "-m", "pip", "install", "gdown"], check=True)
        import gdown

    base_dir = "FrankPEPstein"
    # Adjust base_dir if we are running from root vs inside scripts?
    # The utils assume repo_dir='FrankPEPstein' usually implies subfolder.
    # But if looking for "utilities", it usually expects to find them relative to CWD?
    # Let's check config.
    # In notebook setup: repo_path = os.path.abspath("FrankPEPstein").
    # If we run cell_01_setup.py from FrankPEPstein root, base_dir "FrankPEPstein" might be wrong if we are IN it?
    # notebook_utils.py:
    #   configure_modeller default repo_dir='FrankPEPstein'.
    #   BUT when running locally in the repo, 'FrankPEPstein' folder DOES NOT EXIST inside 'FrankPEPstein'.
    #   The repo IS the cwd.
    #   When cloning in Colab: cwd is /content/, repo is /content/FrankPEPstein.
    #   So 'FrankPEPstein/utilities' is correct there.
    #   But LOCALLY, if I am in ~/FrankPEPstein/, 'FrankPEPstein/utilities' does not exist. 'utilities' exists.
    
    # I need to handle this path difference!
    
    if os.path.exists("utilities"):
        # We are likely INSIDE the repo root (Local execution)
        base_dir = "."
    elif os.path.exists("FrankPEPstein/utilities"):
        # We are likely in parent dir (Colab default)
        base_dir = "FrankPEPstein"
    else:
        # Fallback or create?
        base_dir = "FrankPEPstein" # Default to colab behavior for safety, or create it.

    utilities_dir = os.path.join(base_dir, "utilities")
    db_dir = os.path.join(base_dir, "DB")
    
    os.makedirs(utilities_dir, exist_ok=True)
    os.makedirs(db_dir, exist_ok=True)

    # File definitions
    # --- Bundle Download Logic ---
    # Downloads everything in two main packages if IDs are provided
    bundles = {
        "utilities_pkg": {
            "path": os.path.join(base_dir, "utilities.tar.gz"),
            "id_key": "utilities_pkg_id",
            "extract_to": os.path.join(base_dir, "utilities"),
            "desc": "Utilities Bundle"
        },
        "db_pkg": {
            "path": os.path.join(base_dir, "DB.tar.gz"),
            "id_key": "db_pkg_id",
            "extract_to": os.path.join(base_dir, "DB"),
            "desc": "Database Bundle"
        }
    }

    if drive_ids is None:
        drive_ids = {}

    # Download and extract bundles first
    for name, info in bundles.items():
        bundle_id = drive_ids.get(info["id_key"])
        if bundle_id and not os.path.exists(info["extract_to"]): # Only if dir doesn't exist? Or check manifest?
             # Actually, we should check if the CONTENT exists, but downloading bundle is safer if unsure.
             # Simple check: if tarball doesn't exist, download.
             if not os.path.exists(info["path"]):
                 print(f"Downloading {info['desc']}...")
                 url = f'https://drive.google.com/uc?id={bundle_id}'
                 gdown.download(url, info["path"], quiet=False)
             
             # Extract
             if os.path.exists(info["path"]):
                 print(f"Extracting {info['desc']}...")
                 os.makedirs(info["extract_to"], exist_ok=True)
                 # strip-components=0 because we tarred content of utilies into utilities.tar.gz?
                 # I tarred with -C utilities, so it contains "ADFR..." at root.
                 subprocess.run(f"tar -xzf {info['path']} -C {info['extract_to']}", shell=True, check=True)

    # --- Individual Tool Verification ---
    # Even after bundle extraction, we run this to ensure paths are set and bins are executable.
    # It also handles legacy cases (individual IDs provided).
    files = {
        "adfr": {
            "path": os.path.join(utilities_dir, "ADFRsuite_x86_64Linux_1.0.tar.gz"),
            "id_key": "adfr_id", # Fallback key
            "extract_cmd": f"tar -xzf {{}} -C {utilities_dir}",
            "bin_path": os.path.join(os.path.abspath(utilities_dir), "ADFRsuite_x86_64Linux_1.0/bin") 
        },
        "click": {
            "path": os.path.join(utilities_dir, "Click.tar.gz"),
            "id_key": "click_id",
            "extract_cmd": f"tar -xzf {{}} -C {utilities_dir}",
            "bin_path": os.path.join(os.path.abspath(utilities_dir), "Click/bin")
        },
        "db": {
            "path": os.path.join(db_dir, "minipockets_surface80_winsize3_size3_curated-db.tar.gz"),
            "id_key": "db_id",
            "extract_cmd": f"tar -xzf {{}} -C {db_dir}"
        },
        "dict": {
            "path": os.path.join(db_dir, "reduce_wwPDB_het_dict.tar.gz"), 
            "id_key": "dict_id",
            "extract_cmd": f"tar -xzf {{}} -C {db_dir}"
        }
    }

    for name, info in files.items():
        if not os.path.exists(info["path"]):
            # Check if we have an ID to download (fallback)
            file_id = drive_ids.get(info["id_key"])
            if file_id:
                print(f"Downloading {name} (Fallback)...")
                url = f'https://drive.google.com/uc?id={file_id}'
                gdown.download(url, info["path"], quiet=False)
        
        # Extract if exists
        should_extract = False
        if os.path.exists(info["path"]):
             should_extract = True
             if "bin_path" in info and os.path.exists(info["bin_path"]):
                 should_extract = False
             # For DB files that don't have bin_path, we might re-extract unnecessarily?
             # Check if destination exists
             if name == "db" and os.path.exists(os.path.join(db_dir, "minipockets_surface80_winsize3_size3_curated-db")):
                 should_extract = False
             if name == "dict" and os.path.exists(os.path.join(db_dir, "reduce_wwPDB_het_dict.txt")):
                 should_extract = False
        
        if should_extract:
            print(f"Extracting {name}...")
            subprocess.run(info["extract_cmd"].format(info["path"]), shell=True, check=True)
            
        # Add to PATH if needed
        if "bin_path" in info and os.path.exists(info["bin_path"]):
            if info['bin_path'] not in os.environ['PATH']:
                os.environ['PATH'] += f":{info['bin_path']}"
                print(f"Added {name} to PATH: {info['bin_path']}")
            if name == "click":
                    subprocess.run(f"chmod +x {info['bin_path']}/click", shell=True)
    
    # Handle dictionary txt
    dict_txt = os.path.join(db_dir, "reduce_wwPDB_het_dict.txt")
    if os.path.exists(dict_txt):
        print("Dictionary txt found.")
    else:
        print("WARNING: reduce_wwPDB_het_dict.txt not found (maybe inside another folder after extraction?)")

'''
        os.makedirs("FrankPEPstein/scripts", exist_ok=True)
        with open("FrankPEPstein/scripts/notebook_utils.py", "w") as f:
            f.write(patched_utils_content)
        pbar.update(1)

        # 5. External Tools Setup
        pbar.set_description(steps[4][0])
        repo_path = os.path.abspath("FrankPEPstein")
        if repo_path not in sys.path:
            sys.path.append(repo_path)
        from scripts import notebook_utils
        
        # DRIVE CONFIGURATION: Enter your File IDs here
        drive_ids = {
            "adfr_id": "1gmRj8mva84-JB7UXUcQfB3Ziw_nwwdox",
            "db_id": "1a4GoZ1ZT-DNYMyvVtKJukNdF6TAaLJU5", 
            "dict_id": "1nrwSUof0lox9fp8Ow5EICIN9u0lglu7U"
        }
        
        # Suppress output of these functions too if possible, or live with it?
        # User wants "barrita de carga y nada mas".
        # We can capture stdout.
        with SuppressStdout():
             notebook_utils.setup_external_tools(drive_ids)
        pbar.update(1)

        # 6. Configure Modeller
        pbar.set_description(steps[5][0])
        with SuppressStdout():
            notebook_utils.configure_modeller()
        pbar.update(1)
        
    clear_output()
    print("‚úÖ Setup Ready!")

if __name__ == "__main__":
    run_setup()


In [None]:
#@title 1. Input & Pocket Selection
#@markdown **Instructions:**
#@markdown 1. Upload your Receptor PDB.
#@markdown 2. Choose Mode: **Auto Detect** (runs fpocket) or **Manual Upload** (upload your specific pocket PDB).
#@markdown 3. Select the pocket from the dropdown to visualize.

import os
import sys
import subprocess
try:
    import py3Dmol
except ImportError:
    # Try adding FrankPEPstein env to path
    env_path = "/usr/local/envs/FrankPEPstein"
    site_packages = f"{env_path}/lib/python3.10/site-packages"
    if os.path.exists(site_packages):
        if site_packages not in sys.path:
            sys.path.append(site_packages)
        # Add binary path too
        if f"{env_path}/bin" not in os.environ['PATH']:
            os.environ['PATH'] = f"{env_path}/bin:" + os.environ['PATH']
    
    # Retry import
    try:
        import py3Dmol
    except ImportError:
        print("py3Dmol not found. Installing...")
        subprocess.run("pip install -q py3dmol", shell=True, check=True)
        import py3Dmol

import ipywidgets as widgets
from google.colab import files
from IPython.display import display

# --- configuration ---
detection_mode = "Auto Detect" #@param ["Auto Detect", "Manual Upload"]

# Global variables for next steps
receptor_filename = None
pockets_dir = "pockets_upload" # Default for manual
final_pockets_list = []

# --- 1. Upload Receptor ---
print(f"--- Upload Receptor PDB ({detection_mode}) ---")
uploaded_r = files.upload()

import re

if not uploaded_r:
    print("No receptor file uploaded.")
else:
    raw_filename = list(uploaded_r.keys())[0]
        
    # Check for Colab duplicate naming (e.g. receptor(1).pdb)
    match = re.search(r'^(.*?)\s?\(\d+\)(\.[^.]*)?$', raw_filename)
    if match:
        clean_name = match.group(1) + (match.group(2) if match.group(2) else "")
        print(f"Detected duplicate upload: {raw_filename} -> overwriting {clean_name}")
        
        if os.path.exists(clean_name):
            os.remove(clean_name)
        os.rename(raw_filename, clean_name)
        receptor_filename = os.path.abspath(clean_name)
    else:
        receptor_filename = os.path.abspath(raw_filename)
        
    print(f"Receptor: {receptor_filename}")

    # --- 2. Pocket Handling ---
    if detection_mode == "Auto Detect":
        print(f"\nRunning fpocket on {receptor_filename}...")
        try:
            # Fix: Quotes for filenames with spaces
            subprocess.run(f"fpocket -f '{receptor_filename}'", shell=True, check=True)
            
            # Robust folder finding
            # fpocket creates output based on the filename in the SAME directory
            # receptor_filename is absolute, so output should be absolute too?
            # fpocket output format: /path/to/file_out/
            
            base_name_no_ext = os.path.splitext(os.path.basename(receptor_filename))[0]
            base_dir = os.path.dirname(receptor_filename)
            
            # Possible output folder names
            folder_name_1 = f"{os.path.basename(receptor_filename)}_out"
            folder_name_2 = f"{base_name_no_ext}_out"
            
            # Check in the same directory as the receptor
            possible_folders = [
                os.path.join(base_dir, folder_name_1),
                os.path.join(base_dir, folder_name_2)
            ]
            
            output_folder = next((f for f in possible_folders if os.path.exists(f)), None)

            if output_folder:
                pockets_dir = os.path.join(output_folder, "pockets")
                if os.path.exists(pockets_dir):
                    final_pockets_list = [f for f in os.listdir(pockets_dir) if f.endswith(".pdb")]
                    print(f"Auto-detection finished. Found {len(final_pockets_list)} pockets.")
                    pockets_dir = os.path.abspath(pockets_dir) # Ensure absolute
                else:
                    print(f"Warning: pockets subdirectory not found in {output_folder}")
            else:
                print("Error: fpocket output not found. Checked:", possible_folders)
                
        except subprocess.CalledProcessError:
             print("Error running fpocket.")

    elif detection_mode == "Manual Upload":
        print(f"\n--- Upload Manual Pocket PDB ---")
        os.makedirs(pockets_dir, exist_ok=True)
        uploaded_p = files.upload()
        import re
        if uploaded_p:
            for p_file in uploaded_p.keys():
                # Colab renames duplicate uploads to filename(1).ext. 
                # User wants to overwrite instead.
                
                # Check for pattern like "name(1).pdb" or "name (1).pdb"
                # Regex matches: (any content) optional space (digits) (extension)
                match = re.search(r'^(.*?)\s?\(\d+\)(\.[^.]*)?$', p_file)
                
                if match:
                    clean_name = match.group(1) + (match.group(2) if match.group(2) else "")
                    print(f"Detected duplicate upload: {p_file} -> overwriting {clean_name}")
                else:
                    clean_name = p_file

                target_path = os.path.join(pockets_dir, clean_name)
                
                # If target exists, log that we are replacing it
                if os.path.exists(target_path):
                    print(f"Replacing existing file: {clean_name}")
                    os.remove(target_path)
                
                # Move (rename) the uploaded file to the target path
                # Note: 'p_file' is in CWD (content/), target is in pockets_dir
                os.rename(p_file, target_path)
                
                if clean_name not in final_pockets_list:
                    final_pockets_list.append(clean_name)
                    
            print(f"Manual upload finished. Available pockets: {len(final_pockets_list)}")

    # --- 3. Visualization & Selection ---
    if final_pockets_list:
        print("\n--- Pocket Selection & Visualization ---")
        print("Displaying all detected pockets. Select one below for extraction.")
        
        pocket_dropdown = widgets.Dropdown(
            options=sorted(final_pockets_list),
            description='Select Pocket:',
            disabled=False,
        )

        def view_pockets(selected_pocket_file):
            view = py3Dmol.view(width=800, height=600)
            
            # 1. Receptor Surface (White, Transparent)
            with open(receptor_filename, 'r') as f:
                view.addModel(f.read(), "pdb")
            view.setStyle({}) 
            view.addSurface(py3Dmol.SES, {'opacity': 0.3, 'color': 'white'})
            
            # 2. Add ALL pockets with distinct colors
            colors = ['#FF0000', '#00FF00', '#0000FF', '#FFFF00', '#FF00FF', '#00FFFF', '#FFA500', '#800080', '#008000', '#800000']
            
            # Regex to find number in filename (e.g. pocket5_atm.pdb -> 5, or just 5.pdb -> 5)
            import re
            
            for i, p_file in enumerate(sorted(final_pockets_list)):
                full_path = os.path.join(pockets_dir, p_file)
                if os.path.exists(full_path):
                    with open(full_path, 'r') as f:
                        view.addModel(f.read(), "pdb")
                    
                    # Try to extract a short label (number)
                    # Common patterns: "pocket5_atm.pdb", "pocket5.pdb", "5.pdb"
                    match = re.search(r'(\d+)', p_file)
                    label_text = match.group(1) if match else p_file
                    
                    # Determine styling
                    is_selected = (p_file == selected_pocket_file)
                    
                    if is_selected:
                         color = 'red'
                         opacity = 1.0
                         label_style = {'fontSize': 18, 'fontColor': 'red', 'backgroundColor': 'white', 'backgroundOpacity': 0.8, 'border': '2px solid red'}
                    else:
                        color = colors[i % len(colors)]
                        opacity = 0.6
                        label_style = {'fontSize': 12, 'fontColor': 'black', 'backgroundColor': 'white', 'backgroundOpacity': 0.5}

                    view.setStyle({'model': -1}, {'sphere': {'color': color, 'opacity': opacity}})
                    
                    # Add 3D Label (Number only)
                    view.addLabel(label_text, label_style, {'model': -1})

            view.zoomTo()
            view.show()
            
        display(widgets.interactive(view_pockets, selected_pocket_file=pocket_dropdown))
    else:
        print("No pockets available to select.")
#@title 4. Pocket Extraction & Box Generation
#@markdown This step extracts the selected pocket and calculates the grid box center and size.

import os
# --- Helper Functions (Subprocess) ---
def run_box_calculation_isolated(pdb_file, buffer=10.0):
    """
    Runs the box calculation in the FrankPEPstein environment (isolated)
    to avoid Bio.PDB binary incompatibility with the Colab kernel.
    """
    
    # 1. Create the script
    script_content = """
import sys
import os
from Bio.PDB import PDBParser

def get_box_center_size(pdb_file, buffer):
    parser = PDBParser(QUIET=True)
    structure = parser.get_structure("pocket", pdb_file)
    coords = []
    
    for model in structure:
        for chain in model:
            for residue in chain:
                for atom in residue:
                    coords.append(atom.get_coord())
    
    if not coords:
        return None, None

    min_coord = [min([c[i] for c in coords]) for i in range(3)]
    max_coord = [max([c[i] for c in coords]) for i in range(3)]
    
    center = [(min_coord[i] + max_coord[i]) / 2 for i in range(3)]
    size = [(max_coord[i] - min_coord[i]) + float(buffer) for i in range(3)]
    
    return center, size

if __name__ == "__main__":
    pdb_path = sys.argv[1]
    buf = sys.argv[2]
    
    try:
        center, size = get_box_center_size(pdb_path, buf)
        if center:
            print(f"CENTER:{center[0]},{center[1]},{center[2]}")
            print(f"SIZE:{size[0]},{size[1]},{size[2]}")
        else:
            print("ERROR:No coordinates")
    except Exception as e:
        print(f"ERROR:{e}")
"""
    
    script_name = "calculate_box_isolated.py"
    with open(script_name, "w") as f:
        f.write(script_content)
        
    # 2. Run with isolated python
    # We assume the environment is at /usr/local/envs/FrankPEPstein
    python_exe = "/usr/local/envs/FrankPEPstein/bin/python"
    
    if not os.path.exists(python_exe):
        print(f"Error: Python executable not found at {python_exe}. Is the environment created?")
        return None, None
        
    try:
        result = subprocess.run(
            [python_exe, script_name, pdb_file, str(buffer)], 
            capture_output=True, 
            text=True, 
            check=True
        )
        
        # 3. Parse Output
        center = None
        size = None
        
        for line in result.stdout.splitlines():
            if line.startswith("CENTER:"):
                parts = line.strip().split(":")[1].split(",")
                center = [float(p) for p in parts]
            elif line.startswith("SIZE:"):
                parts = line.strip().split(":")[1].split(",")
                size = [float(p) for p in parts]
            elif line.startswith("ERROR:"):
                print(f"Script Error: {line}")
                
        return center, size
        
    except subprocess.CalledProcessError as e:
        print(f"Error executing isolated script: {e}")
        print(f"Stderr: {e.stderr}")
        return None, None

# --- GUI & Interaction Logic ---

# Output widget to capture print statements from the callback
output_log = widgets.Output()

def extract_and_calculate_box(b):
    """
    Callback function triggered by the 'Extract Box' button.
    It reads the current dropdown value and runs the isolated box calculation.
    """
    output_log.clear_output()
    with output_log:
        # Check if dropdown exists and has a value
        try:
            if 'pocket_dropdown' not in globals() or not pocket_dropdown.value:
                print("Error: No pocket selected. Please upload a receptor and detect pockets first.")
                return
            
            selected_pocket_file = pocket_dropdown.value
            print(f"Selected Pocket: {selected_pocket_file}")
            
            # pockets_dir must be defined globally from the upload step
            if 'pockets_dir' not in globals():
                 print("Error: pockets_dir not defined.")
                 return

            pocket_path = os.path.join(pockets_dir, selected_pocket_file)
            
            if os.path.exists(pocket_path):
                print(f"Path: {pocket_path}")
                print("Calculating box parameters...")
                
                # Run isolated calculation
                center, size = run_box_calculation_isolated(pocket_path, buffer=10.0)
                
                if center:
                    center_str = f"{center[0]:.3f} {center[1]:.3f} {center[2]:.3f}"
                    size_str = f"{size[0]:.3f} {size[1]:.3f} {size[2]:.3f}"
                    
                    print("-" * 30)
                    print(f"Box Center: {center_str}")
                    print(f"Box Size:   {size_str}")
                    print("-" * 30)
                    
                    # Store these in global namespace so other cells can access them if needed
                    global box_center, box_size
                    box_center = center
                    box_size = size
                    
                    print("‚úÖ Pocket parameters ready for FrankPEPstein!")
                else:
                    print("‚ùå Error: Could not calculate coordinates.")
            else:
                print(f"‚ùå Error: File not found {pocket_path}")
        except Exception as e:
            print(f"An unexpected error occurred: {e}")

# Create Button
extract_btn = widgets.Button(
    description='Extract Pocket & Calculate Box',
    disabled=False,
    button_style='success', # 'success', 'info', 'warning', 'danger' or ''
    tooltip='Click to extract the selected pocket',
    icon='box'
)

extract_btn.on_click(extract_and_calculate_box)

# Display GUI
print("\n--- 4. Extraction Control ---")
display(extract_btn, output_log)



In [None]:
#@title 5. FrankPEPstein Pipeline
#@markdown **Instructions:**
#@markdown 1. Configure the pipeline parameters (Length, Number of Peptides).
#@markdown 2. Click **Run Pipeline** to start the search and generation process.

import os
import sys
import subprocess
import shutil
import ipywidgets as widgets
from IPython.display import display

# --- Configuration Widgets ---
style = {'description_width': 'initial'}

length_slider = widgets.IntSlider(
    value=10,
    min=3,
    max=30,
    step=1,
    description='Target Peptide Length:',
    style=style,
    layout=widgets.Layout(width='50%')
)

num_peptides_slider = widgets.IntSlider(
    value=10,
    min=1,
    max=100,
    step=1,
    description='Number of Output Peptides:',
    style=style,
    layout=widgets.Layout(width='50%')
)

threads_slider = widgets.IntSlider(
    value=2,
    min=1,
    max=8,
    step=1,
    description='CPU Threads:',
    style=style,
    layout=widgets.Layout(width='50%')
)

# Output area
run_output = widgets.Output()

# --- Execution Logic ---
def run_frankpepstein_pipeline(b):
    run_output.clear_output()
    with run_output:
        # 1. Validate Prereqs
        if 'box_center' not in globals() or 'box_size' not in globals():
             print("‚ùå Error: Box parameters (center/size) not defined. Please run Step 4 first.")
             return
        if 'receptor_filename' not in globals() or not receptor_filename:
             print("‚ùå Error: Receptor filename not defined. Please run Step 1 first.")
             return
             
        # Check environment
        frank_python = "/usr/local/envs/FrankPEPstein/bin/python"
        # Paths to scripts (Assuming repo structure)
        # Force absolute base path
        if os.path.exists("/content/FrankPEPstein"):
            base_dir = "/content/FrankPEPstein"
        else:
            base_dir = os.path.abspath(os.getcwd())
            
        print(f"Base Directory: {base_dir}")

        scripts_dir = os.path.join(base_dir, "scripts")
        db_path = os.path.join(base_dir, "DB", "minipockets_surface80_winsize3_size3_curated-db")
        
        # 2. Setup Run Directory (Absolute Path)
        run_dir = os.path.join(base_dir, "FrankPEPstein_Run")
        
        if os.path.exists(run_dir):
            shutil.rmtree(run_dir) # Clean start
        os.makedirs(run_dir)
        
        # Copy Receptor (Full)
        target_receptor_full = "receptor.pdb"
        try:
            # Source file check
            if os.path.isabs(receptor_filename) and os.path.exists(receptor_filename):
                src_receptor = receptor_filename
            elif os.path.exists(os.path.join(base_dir, receptor_filename)):
                src_receptor = os.path.join(base_dir, receptor_filename)
            elif os.path.exists(os.path.join("/content", receptor_filename)):
                src_receptor = os.path.join("/content", receptor_filename)
            else:
                src_receptor = os.path.abspath(receptor_filename)

            if not os.path.exists(src_receptor):
                 print(f"‚ùå Error: Could not find receptor file '{receptor_filename}'.")
                 return
                 
            shutil.copy(src_receptor, os.path.join(run_dir, target_receptor_full))
        except FileNotFoundError:
             print(f"‚ùå Error copying receptor.")
             return

        # Prepare Pocket for Superposer (Chain 'p')
        target_pocket_file = "target_pocket.pdb"
        if 'extracted_pocket_path' not in globals() or not extracted_pocket_path:
            print("‚ùå Error: Pocket path not defined. Please run extraction (Step 3/4) first.")
            return

        try:
            print(f"Preparing pocket from: {extracted_pocket_path}")
            from Bio import PDB
            parser = PDB.PDBParser(QUIET=True)
            struct = parser.get_structure("pocket", extracted_pocket_path)
            
            # Rename all chains to 'p' to satisfy Superposer requirement
            for model in struct:
                for chain in model:
                    chain.id = 'p'
            
            io = PDB.PDBIO()
            io.set_structure(struct)
            io.save(os.path.join(run_dir, target_pocket_file))
            print(f"‚úÖ Created {target_pocket_file} with chain 'p' for Superposer.")
            
        except Exception as e:
            print(f"‚ùå Error preparing pocket PDB: {e}")
            return
            

        # 3. Define Parameters
        pep_length = length_slider.value
        n_peps = num_peptides_slider.value
        n_threads = threads_slider.value
        
        if not os.path.exists(db_path):
             print(f"‚ùå Error: Database not found at {db_path}")
             return

        print("\n--- Pipeline Configuration ---")
        print(f"Target Length: {pep_length}")
        print(f"Output Count : {n_peps}")
        print(f"Threads      : {n_threads}")

        # --- A. SUPERPOSER (Fragment Scanning) ---
        print("\nüöÄ Starting Step 1: Fragment Scanning (Superposer)...")
        
        superposer_script = os.path.join(scripts_dir, "superposerV5.2_leave1out.py")
        
        # Uses target_pocket_file (chain p) as -T
        cmd_superposer = [
            frank_python, superposer_script,
            "-T", target_pocket_file, 
            "-d", db_path,
            "-a", "3", 
            "-r", "0.1",
            "-x_center", str(box_center[0]),
            "-y_center", str(box_center[1]),
            "-z_center", str(box_center[2]),
            "-x_size", str(box_size[0]),
            "-y_size", str(box_size[1]),
            "-z_size", str(box_size[2]),
            "-t", str(n_threads),
            "-fm", db_path
        ]
        
        try:
            # We run this INSIDE the run_dir to keep outputs contained
            subprocess.run(cmd_superposer, cwd=run_dir, check=True)
            print("‚úÖ Superposer finished.")
        except subprocess.CalledProcessError as e:
            print(f"‚ùå Error running Superposer: {e}")
            return

        # --- B. FrankVINA Part 1 (Filtering Patches) ---
        print("\nüöÄ Starting Step 2: Patch Filtering (FrankVINA I)...")
        
        # Output of superposer is in 'superpockets_residuesAligned3_RMSD0.1' inside run_dir
        super_out_dir = os.path.join(run_dir, "superpockets_residuesAligned3_RMSD0.1")
        
        if not os.path.exists(super_out_dir):
            print("‚ùå Error: Superposer output directory not found. Did it find any fragments?")
            return
            
        vina_script_1 = os.path.join(scripts_dir, "frankVINA_FNKPSTN.py")
        
        # Need to copy receptor there? run_superposer did it.
        # superposerV5.2 seems to put files in subfolders inside the output dir?
        # Wait, superposerV5.2 iterates over minipockets and puts output in `superpockets...`
        # Let's inspect what frankVINA_FNKPSTN expects. It expects to run IN the folder with patches.
        # But superposer puts all matches in that folder?
        # `os.system(f"cp \"{folder_file}\" .")` -> it copies minipockets to CWD (which is run_dir).
        # Actually superposerV5.2 creates `superpockets...` and puts outputs there?
        # "folder_output = ...; if not os.path.exists...makedirs"
        
        # The logic in `run_superposer...py` was:
        # os.chdir("superpockets_residuesAligned3_RMSD0.1")
        # cp ../receptor.pdb .
        # run frankVINA_FNKPSTN.py receptor.pdb threads
        
        # So we replicate that:
        try:
            # Copy receptor into the superpockets folder
            shutil.copy(os.path.join(run_dir, target_receptor), os.path.join(super_out_dir, target_receptor))
            
            cmd_vina1 = [
                frank_python, vina_script_1,
                target_receptor,
                str(n_threads)
            ]
            
            subprocess.run(cmd_vina1, cwd=super_out_dir, check=True)
            print("‚úÖ Patch Filtering finished.")
            
        except Exception as e:
            print(f"‚ùå Error in Vina Step 1: {e}")
            return
            
        # --- C. Patch Clustering (Assembly) ---
        print(f"\nüöÄ Starting Step 3: Peptide Assembly (PatchClustering) for {pep_length}-mers...")
        
        # Input for this step is in `superpockets.../top_10_patches`
        # Created by frankVINA_FNKPSTN
        patches_dir = os.path.join(super_out_dir, "top_10_patches")
        
        if not os.path.exists(patches_dir):
             print("‚ùå Error: 'top_10_patches' folder not found. No patches passed filtering?")
             return
             
        # Script expects to run inside that folder
        patch_clust_script = os.path.join(scripts_dir, "patch_clustering_V8.7.py")
        
        cmd_clust = [
            frank_python, patch_clust_script,
            "-w", str(pep_length),
            "-t", str(n_threads)
        ]
        
        try:
            subprocess.run(cmd_clust, cwd=patches_dir, check=True)
            print("‚úÖ Patch Clustering finished.")
        except subprocess.CalledProcessError as e:
             print(f"‚ùå Error in Patch Clustering: {e}")
             return

        # --- D. FrankVINA Part 2 (Final Scoring) ---
        print("\nüöÄ Starting Step 4: Final Refinement & Scoring (FrankVINA II)...")
        
        # Output of clustering is in `frankPEPstein_{winsize}` inside patches_dir
        final_dir = os.path.join(patches_dir, f"frankPEPstein_{pep_length}")
        
        if not os.path.exists(final_dir):
             print(f"‚ùå Error: Output directory '{final_dir}' not found. No peptides assembled?")
             return
             
        vina_script_2 = os.path.join(scripts_dir, "frankVINA_V3.py")
        
        try:
            # Copy receptor to final dir
            shutil.copy(os.path.join(super_out_dir, target_receptor), os.path.join(final_dir, target_receptor))
            
            cmd_vina2 = [
                frank_python, vina_script_2,
                target_receptor,
                str(n_threads),
                str(n_peps) # Number of top peptides to keep
            ]
            
            subprocess.run(cmd_vina2, cwd=final_dir, check=True)
            print("‚úÖ Final Scoring finished.")
            
            # --- Results ---
            results_tsv = os.path.join(final_dir, f"top_{n_peps}_peps", f"top{n_peps}_peps.tsv")
            if os.path.exists(results_tsv):
                print(f"\nüéâ Success! Top peptides saved in: {results_tsv}")
                # Optional: specific display code for results
            else:
                print("‚ö†Ô∏è Warning: Pipeline finished but results TSV not found.")
                
        except Exception as e:
            print(f"‚ùå Error in Vina Step 2: {e}")
            return


# Draw UI
run_btn = widgets.Button(
    description='Run FrankPEPstein Pipeline',
    disabled=False,
    button_style='danger', # 'success', 'info', 'warning', 'danger' or ''
    tooltip='Start the magic',
    icon='rocket'
)
run_btn.on_click(run_frankpepstein_pipeline)

display(widgets.VBox([
    widgets.HBox([length_slider, num_peptides_slider]),
    threads_slider,
    run_btn,
    run_output
]))
