In [None]:

#@title 0. Install CondaColab & Setup Tools (~3 min)
import sys
import os
import subprocess
from IPython.display import clear_output

# Helper to suppress output
class SuppressStdout:
    def __enter__(self):
        self._original_stdout = sys.stdout
        sys.stdout = open(os.devnull, 'w')
    def __exit__(self, exc_type, exc_val, exc_tb):
        sys.stdout.close()
        sys.stdout = self._original_stdout

def run_setup():
    # Install tqdm first if missing (fast)
    try:
        from tqdm.notebook import tqdm
    except ImportError:
        subprocess.run("pip install -q tqdm", shell=True)
        from tqdm.notebook import tqdm

    print("Setting up FrankPEPstein environment...")
    
    steps = [
        ("Installing CondaColab", "condacolab"),
        ("Cloning Repository", "git"),
        ("Creating Conda Environment (Slow)", "env"),
        ("Configuring Notebook Utils", "patch"),
        ("Setting up External Tools (DB Download)", "tools"),
        ("Configuring Modeller", "modeller")
    ]
    
    with tqdm(total=len(steps)) as pbar:
        # 1. CondaColab
        pbar.set_description(steps[0][0])
        try:
            with SuppressStdout():
                import condacolab
                condacolab.check()
        except (ImportError, AssertionError):
            print("Installing CondaColab... (Kernel will restart and colab will say it crashes, you dont need to do anything)")
            print("Installing python dependencies...")
            subprocess.run("pip install -q py3dmol logomaker", shell=True, check=True)
            subprocess.run("pip install -q biopython", shell=True, check=True) # Ensure biopython is there too
            with SuppressStdout():
                subprocess.run("pip install -q condacolab", shell=True, check=True)
                import condacolab
                condacolab.install()
        
        pbar.update(1)

        # 2. Git Clone
        pbar.set_description(steps[1][0])
        with SuppressStdout():
            if not os.path.exists("FrankPEPstein"):
                subprocess.run("git clone https://github.com/Joacaldog/FrankPEPstein.git", shell=True, check=True)
        pbar.update(1)

        # 3. Create Environment
        pbar.set_description(steps[2][0])
        env_path = "/usr/local/envs/FrankPEPstein"
        if not os.path.exists(env_path):
             # Added 'pigz' to the package list
             subprocess.run("mamba create -n FrankPEPstein -q -y -c conda-forge -c salilab openbabel biopython fpocket joblib tqdm py3dmol vina pigz python=3.10 salilab::modeller > /dev/null 2>&1", shell=True, check=True)
        
        # Configure Path
        site_packages = f"{env_path}/lib/python3.10/site-packages"
        if site_packages not in sys.path:
            sys.path.append(site_packages)
        os.environ['PATH'] = f"{env_path}/bin:" + os.environ['PATH']
        pbar.update(1)

        # 4. Patch Utils
        pbar.set_description(steps[3][0])
        # We write a clean utils file
        patched_utils_content = r'''import os
import sys
import glob
import shutil
import subprocess

def configure_modeller(license_key='MODELIRANJE', repo_dir='FrankPEPstein'):
    template_config = os.path.join(repo_dir, "utilities/config.py")
    dest_config = None
    try:
        import modeller
        modeller_path = os.path.dirname(modeller.__file__)
        candidate = os.path.join(modeller_path, "config.py")
        if os.path.exists(candidate):
            dest_config = candidate
    except Exception:
        pass

    if not dest_config:
        possible_paths = [
            f"{sys.prefix}/lib/modeller-*/modlib/modeller/config.py",
            f"{sys.prefix}/lib/python*/site-packages/modeller/config.py",
            "/usr/local/envs/FrankPEPstein/lib/modeller-*/modlib/modeller/config.py"
        ]
        for pattern in possible_paths:
            found = glob.glob(pattern)
            if found:
                dest_config = found[0]
                break

    if dest_config and os.path.exists(template_config):
        with open(template_config, 'r') as f:
            content = f.read()
        new_content = content.replace("'MODELIRANJE'", f"'{license_key}'")
        with open(dest_config, 'w') as f:
            f.write(new_content)
        return True
    return False



def setup_external_tools(files_id=None):
    # Install gdown if needed
    try: import gdown
    except ImportError: subprocess.run([sys.executable, "-m", "pip", "install", "gdown"], check=True); import gdown

    # Determine Base Dir
    base_dir = "." # Always download to current directory (root of colab)
    
    # 1. Download Single Archive
    if files_id:
        archive_path = os.path.join(base_dir, "files.tar.gz")
        # Check if already extracted (simple check for DB and utilities)
        if not (os.path.exists(os.path.join(base_dir, "DB")) and os.path.exists(os.path.join(base_dir, "utilities"))):
            if not os.path.exists(archive_path):
                 print(f"Downloading files.tar.gz (ID: {files_id})...")
                 gdown.download(f'https://drive.google.com/uc?id={files_id}', archive_path, quiet=True)
            
            if os.path.exists(archive_path):
                 print("Extracting files.tar.gz (Parallel with pigz)...")
                 # pigz -d -c files.tar.gz | tar xf -
                 # or tar -I pigz -xf ...
                 subprocess.run(f"tar -I pigz -xf {archive_path} -C {base_dir}", shell=True, check=True)
                 # os.remove(archive_path) # Optional clean up
    
    # 2. Permissions & Final Setup
    utilities_dir = os.path.join(base_dir, "utilities")
    if os.path.exists(utilities_dir):
        # [ADDED] Rename extracted ADFR folder to Suite name required by tools
        adfr_extracted = os.path.join(utilities_dir, "ADFR")
        adfr_target = os.path.join(utilities_dir, "ADFRsuite_x86_64Linux_1.0")
        if os.path.exists(adfr_extracted) and not os.path.exists(adfr_target):
             print(f"Renaming {adfr_extracted} to {adfr_target}...")
             os.rename(adfr_extracted, adfr_target)
             
             
        # if os.path.exists(utilities_dir): # REMOVED redundant line causing indent error
        print("Fixing permissions...")
        subprocess.run(f"chmod -R +x {utilities_dir}", shell=True)
        
        # Add paths
        adfr_bin = os.path.join(utilities_dir, "ADFR/bin")
        if os.path.exists(adfr_bin) and adfr_bin not in os.environ['PATH']:
             os.environ['PATH'] += f":{adfr_bin}"
             
        click_bin = os.path.join(utilities_dir, "click") # assuming it extracts directly or in bin
        # Adjust check if structure is different (e.g. utilities/Click/bin)
        # User said "utilities que quedaran en ~/. Ahora ADFR al extraerse queda listo... lo mismo con click"
        # We assume standard structure.
        

        '''
        os.makedirs("FrankPEPstein/scripts", exist_ok=True)
        with open("FrankPEPstein/scripts/notebook_utils.py", "w") as f:
            f.write(patched_utils_content)
        pbar.update(1)

        # 5. External Tools Setup
        pbar.set_description(steps[4][0])
        repo_path = os.path.abspath("FrankPEPstein")
        if repo_path not in sys.path:
            sys.path.append(repo_path)
        from scripts import notebook_utils
        
        # Correct Drive IDs
        files_id = "1M30wmaf6vaXJl1kmj-0cD5yhBYDCx_xw"
        
        with SuppressStdout():
             notebook_utils.setup_external_tools(files_id)
        pbar.update(1)

        # 6. Configure Modeller
        pbar.set_description(steps[5][0])
        with SuppressStdout():
            notebook_utils.configure_modeller()
        pbar.update(1)
        

    # 3. Verify Executables
    print(f"\n{'='*20}")
    print("Verifying Executables...")
    
    base_dir = "FrankPEPstein" if os.path.exists("FrankPEPstein") else "."
    utilities_dir = os.path.join(base_dir, "utilities")
    
    # Check Click
    click_bin = os.path.join(utilities_dir, "Click", "bin", "click")
    if not os.path.exists(click_bin):
         # Try logic from superposer logic/config
         click_bin = os.path.join(utilities_dir, "Click", "click")
         
    if os.path.exists(click_bin):
         if os.access(click_bin, os.X_OK):
             print(f"✅ Click is executable.")
         else:
             print(f"⚠️ Click found but NOT executable. Fixing...")
             subprocess.run(f"chmod +x {click_bin}", shell=True)
             if os.access(click_bin, os.X_OK):
                 print(f"✅ Click fixed.")
             else:
                 print(f"❌ Failed to fix Click permissions.")
    else:
         print(f"❌ Click binary not found at {click_bin}")

    # Check Vina
    vina_bin = os.path.join(utilities_dir, "vina_1.2.4_linux_x86_64")
    if os.path.exists(vina_bin):
         if os.access(vina_bin, os.X_OK):
             print(f"✅ Vina is executable.")
         else:
             print(f"⚠️ Vina found but NOT executable. Fixing...")
             subprocess.run(f"chmod +x {vina_bin}", shell=True)
             if os.access(vina_bin, os.X_OK):
                 print(f"✅ Vina fixed.")
             else:
                 print(f"❌ Failed to fix Vina permissions.")
    else:
         print(f"❌ Vina binary not found at {vina_bin}")
    print(f"{'='*20}\n")

    clear_output()
    print("✅ Setup Ready!")

if __name__ == "__main__":
    run_setup()



In [None]:
#@title 1. Input & Pocket Selection
#@markdown **Instructions:**
#@markdown 1. Upload your Receptor PDB.
#@markdown 2. Choose Mode: **Auto Detect** (runs fpocket) or **Manual Upload** (upload your specific pocket PDB).
#@markdown 3. Select the pocket from the dropdown to visualize.

import os
import sys
import subprocess
try:
    import py3Dmol
except ImportError:
    # Try adding FrankPEPstein env to path
    env_path = "/usr/local/envs/FrankPEPstein"
    site_packages = f"{env_path}/lib/python3.10/site-packages"
    if os.path.exists(site_packages):
        if site_packages not in sys.path:
            sys.path.append(site_packages)
        # Add binary path too
        if f"{env_path}/bin" not in os.environ['PATH']:
            os.environ['PATH'] = f"{env_path}/bin:" + os.environ['PATH']
    
    # Retry import
    try:
        import py3Dmol
    except ImportError:
        print("py3Dmol not found. Installing...")
        subprocess.run("pip install -q py3dmol", shell=True, check=True)
        import py3Dmol

import ipywidgets as widgets
from google.colab import files
from IPython.display import display
import shutil
import json
import re

# --- configuration ---
detection_mode = "Auto Detect" #@param ["Auto Detect", "Manual Upload"]
min_alpha_spheres = 35 #@param {type:"integer"}

# Global variables
receptor_filename = None
initial_path = os.getcwd() # Main Directory
# Refactor: Use FrankPEPstein_run as centralized storage for execution
pockets_dir = os.path.join(initial_path, "FrankPEPstein_run") 
final_pockets_list = []

# Ensure pockets dir exists
if not os.path.exists(pockets_dir):
    os.makedirs(pockets_dir)

# --- Persistence Logic ---
def save_pipeline_state(updates):
    state_file = "pipeline_state.json"
    current_state = {}
    if os.path.exists(state_file):
        try:
            with open(state_file, "r") as f:
                current_state = json.load(f)
        except:
            pass
    current_state.update(updates)
    with open(state_file, "w") as f:
        json.dump(current_state, f, indent=4)
    print(f"State saved to {state_file}")

# --- 1. Upload Receptor ---
print(f"--- Upload Receptor PDB ({detection_mode}) ---")
uploaded_r = files.upload()

if not uploaded_r:
    print("No receptor file uploaded.")
else:
    raw_filename = list(uploaded_r.keys())[0]
        
    # Check for Colab duplicate naming (e.g. receptor(1).pdb)
    match = re.search(r'^(.*?)\s?\(\d+\)(\.[^.]*)?$', raw_filename)
    if match:
        clean_name = match.group(1) + (match.group(2) if match.group(2) else "")
        print(f"Detected duplicate upload: {raw_filename} -> overwriting {clean_name}")
        
        if os.path.exists(clean_name):
            os.remove(clean_name)
        os.rename(raw_filename, clean_name)
        receptor_filename = os.path.abspath(clean_name)
    else:
        receptor_filename = os.path.abspath(raw_filename)
        
    print(f"Receptor: {receptor_filename}")
    save_pipeline_state({"receptor_filename": receptor_filename})

    # --- 2. Pocket Handling ---

    # Determine fpocket path
    fpocket_bin = "fpocket"
    if shutil.which(fpocket_bin) is None:
        # Try specific env path
        env_fpocket = "/usr/local/envs/FrankPEPstein/bin/fpocket"
        if os.path.exists(env_fpocket):
            fpocket_bin = env_fpocket
        else:
            print("⚠️ fpocket executable not found in PATH or FrankPEPstein env.")
            # We let it fail in subprocess if still not found, but this warning helps.

    if detection_mode == "Auto Detect":
        try:
            print(f"Running fpocket on {receptor_filename} using {fpocket_bin} with min alpha spheres={min_alpha_spheres}...")
            # Capture output for debugging
            # Using -m to filter small pockets as requested - REMOVED due to user report of bugs
            result = subprocess.run(f"{fpocket_bin} -f '{receptor_filename}'", shell=True, capture_output=True, text=True)

            
            if result.returncode != 0:
                print("❌ Error running fpocket.")
                print(f"Exit Code: {result.returncode}")
                print(f"STDERR:\n{result.stderr}")
                print(f"STDOUT:\n{result.stdout}")
                # We can try to look at why.
            else:
                # Success logic
                pass 
                
            # Check for output ONLY if successful or to diagnose
            base_name_no_ext = os.path.splitext(os.path.basename(receptor_filename))[0]
            base_dir = os.path.dirname(receptor_filename)
            
            folder_name_1 = f"{os.path.basename(receptor_filename)}_out"
            folder_name_2 = f"{base_name_no_ext}_out"
            
            possible_folders = [
                os.path.join(base_dir, folder_name_1),
                os.path.join(base_dir, folder_name_2)
            ]
            
            output_folder = next((f for f in possible_folders if os.path.exists(f)), None)

            if output_folder:
                fpocket_pockets_dir = os.path.join(output_folder, "pockets")
                if os.path.exists(fpocket_pockets_dir):
                    # Move/Copy relevant pockets to our centralized dir
                    found_pockets = [f for f in os.listdir(fpocket_pockets_dir) if f.endswith(".pdb")]
                    for p in found_pockets:
                        src = os.path.join(fpocket_pockets_dir, p)
                        dst = os.path.join(pockets_dir, p)
                        shutil.copy(src, dst)
                        final_pockets_list.append(p)
                        
                    print(f"Auto-detection finished. Found {len(final_pockets_list)} pockets.")
                    if not final_pockets_list:
                        print(f"⚠️ No pockets found! Try lowering min_alpha_spheres (current: {min_alpha_spheres})")
                else:
                    print(f"Warning: pockets subdirectory not found in {output_folder}")
            else:
                 if result.returncode == 0:
                     print("Error: fpocket finished but output folder not found.")
                
        except Exception as e:
             print(f"Unexpected error running fpocket: {e}")

    elif detection_mode == "Manual Upload":
        print(f"\n--- Upload Manual Pocket PDB ---")
        uploaded_p = files.upload()
        if uploaded_p:
            for p_file in uploaded_p.keys():
                match = re.search(r'^(.*?)\s?\(\d+\)(\.[^.]*)?$', p_file)
                if match:
                    clean_name = match.group(1) + (match.group(2) if match.group(2) else "")
                    print(f"Detected duplicate upload: {p_file} -> overwriting {clean_name}")
                else:
                    clean_name = p_file

                target_path = os.path.join(pockets_dir, clean_name)
                if os.path.exists(target_path):
                    os.remove(target_path)
                
                os.rename(p_file, target_path)
                
                if clean_name not in final_pockets_list:
                    final_pockets_list.append(clean_name)
                    
            print(f"Manual upload finished. Available pockets: {len(final_pockets_list)}")

    # --- 3. Visualization & Selection ---
    if final_pockets_list:
        print("\n--- Pocket Selection & Visualization ---")
        
        pocket_dropdown = widgets.Dropdown(
            options=sorted(final_pockets_list),
            description='Select Pocket:',
            disabled=False,
        )

        def view_pockets(selected_pocket_file):
            view = py3Dmol.view(width=800, height=600)
            
            # Receptor
            with open(receptor_filename, 'r') as f:
                view.addModel(f.read(), "pdb")
            view.setStyle({}) 
            view.addSurface(py3Dmol.SES, {'opacity': 0.3, 'color': 'white'})
            
            colors = ['#FF0000', '#00FF00', '#0000FF', '#FFFF00', '#FF00FF', '#00FFFF', '#FFA500', '#800080', '#008000', '#800000']
            
            for i, p_file in enumerate(sorted(final_pockets_list)):
                full_path = os.path.join(pockets_dir, p_file)
                if os.path.exists(full_path):
                    with open(full_path, 'r') as f:
                        view.addModel(f.read(), "pdb")
                    
                    match = re.search(r'(\d+)', p_file)
                    label_text = match.group(1) if match else p_file
                    
                    is_selected = (p_file == selected_pocket_file)
                    
                    if is_selected:
                         color = 'red'
                         opacity = 1.0
                         label_style = {'fontSize': 18, 'fontColor': 'red', 'backgroundColor': 'white', 'backgroundOpacity': 0.8, 'border': '2px solid red'}
                    else:
                        color = colors[i % len(colors)]
                        opacity = 0.6
                        label_style = {'fontSize': 12, 'fontColor': 'black', 'backgroundColor': 'white', 'backgroundOpacity': 0.5}

                    view.setStyle({'model': -1}, {'sphere': {'color': color, 'opacity': opacity}})
                    view.addLabel(label_text, label_style, {'model': -1})

            view.zoomTo()
            view.show()
            
        display(widgets.interactive(view_pockets, selected_pocket_file=pocket_dropdown))
    else:
        print("No pockets available to select.")

#@title 4. Pocket Extraction & Box Generation
#@markdown This step extracts the selected pocket (if Auto) or processes it (if Manual) and calculates the grid box.

import os
import subprocess
import sys

# --- Helper Functions (Subprocess) ---
def run_processing_isolated(receptor_path, pocket_path, output_pocket_path, mode="extract", buffer=3.0):
    """
    Runs extraction/processing and box calculation in isolated environment.
    Mode: "extract" (Fpocket: 5A NeighborSearch) or "direct" (Manual: Load & Box only)
    """
    
    script_content = f"""
import sys
import os
from Bio import PDB
from Bio.PDB import PDBParser, PDBIO, Select, NeighborSearch

def process_and_box(receptor_file, pocket_file, output_file, mode, buffer_val):
    try:
        parser = PDBParser(QUIET=True)
        
        # 1. Load Pocket
        pocket_struct = parser.get_structure("pocket", pocket_file)
        
        # Determine atoms for Box Calculation
        atoms_for_box = []
        residues_for_saving = [] # (chain, res_id)
        
        # 2. Logic based on Mode
        if mode == 'extract':
             # Fpocket mode: Load Receptor, Find Neighbors (5A)
             receptor_struct = parser.get_structure("receptor", receptor_file)
             
             pocket_atoms = [atom for atom in pocket_struct.get_atoms()]
             if not pocket_atoms:
                 print("ERROR: No atoms in pocket file")
                 return
                 
             receptor_atoms = list(receptor_struct.get_atoms())
             ns = NeighborSearch(receptor_atoms)
             
             selected_residues = set()
             for p_atom in pocket_atoms:
                 nearby = ns.search(p_atom.get_coord(), 5.0, level='R')
                 for res in nearby:
                     selected_residues.add((res.parent.id, res.id))
            
             # Save Logic for Extraction
             class PocketSelect(Select):
                 def accept_residue(self, residue):
                     return (residue.parent.id, residue.id) in selected_residues
                     
             # We save to temp then reload to standardize chain 'p'
             io = PDBIO()
             io.set_structure(receptor_struct)
             temp_out = output_file + ".tmp"
             io.save(temp_out, PocketSelect())
             
             # Reload temp to get atoms for box
             saved_struct = parser.get_structure("saved", temp_out)
             
             # Prepare for Final Save (Rename chain to 'p')
             for model in saved_struct:
                 for chain in model:
                     chain.id = 'p'
                     for residue in chain:
                         for atom in residue:
                             atoms_for_box.append(atom)
             
             io.set_structure(saved_struct)
             io.save(output_file)
             os.remove(temp_out)
             
        elif mode == 'direct':
             # Manual mode: Use pocket file directly, just rename chain to 'p'
             
             # Collect atoms and rename chain
             for model in pocket_struct:
                 for chain in model:
                     chain.id = 'p'
                     for residue in chain:
                         for atom in residue:
                             atoms_for_box.append(atom)
             
             io = PDBIO()
             io.set_structure(pocket_struct)
             io.save(output_file)
             
        # 3. Box Calculation
        if not atoms_for_box:
            print("ERROR: No atoms/residues for box calculation")
            return

        coords = [a.get_coord() for a in atoms_for_box]
        min_coord = [min([c[i] for c in coords]) for i in range(3)]
        max_coord = [max([c[i] for c in coords]) for i in range(3)]
        
        # Center
        center = [(min_coord[i] + max_coord[i]) / 2 for i in range(3)]
        
        # Size + Buffer (3.0 A as requested)
        size = [(max_coord[i] - min_coord[i]) + float(buffer_val) for i in range(3)]
        
        print(f"CENTER:{{center[0]}},{{center[1]}},{{center[2]}}")
        print(f"SIZE:{{size[0]}},{{size[1]}},{{size[2]}}")
        print("SUCCESS")

    except Exception as e:
        print(f"ERROR:{{e}}")

if __name__ == "__main__":
    process_and_box(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4], float(sys.argv[5]))
"""
    script_name = "process_box_isolated.py"
    with open(script_name, "w") as f:
        f.write(script_content)
        
    python_exe = "/usr/local/envs/FrankPEPstein/bin/python"
    if not os.path.exists(python_exe): python_exe = sys.executable

    try:
        result = subprocess.run(
            [python_exe, script_name, receptor_path, pocket_path, output_pocket_path, mode, str(buffer)],
            capture_output=True, text=True, check=True
        )
        
        center, size = None, None
        success = False
        
        for line in result.stdout.splitlines():
            if line.startswith("CENTER:"):
                center = [float(x) for x in line.split(":")[1].split(",")]
            elif line.startswith("SIZE:"):
                size = [float(x) for x in line.split(":")[1].split(",")]
            elif line.startswith("SUCCESS"):
                success = True
            elif line.startswith("ERROR"):
                print(f"Script Error: {line}")
                
        return center, size, success
    except subprocess.CalledProcessError as e:
        print(f"Execution Error: {e.stderr}")
        return None, None, False

# --- GUI ---
output_log = widgets.Output()

def process_logic(b):
    output_log.clear_output()
    with output_log:
        if 'pocket_dropdown' not in globals() or not pocket_dropdown.value:
            print("No pocket selected.")
            return
        
        selected_pocket = pocket_dropdown.value
        # Source path
        src_pocket_path = os.path.join(pockets_dir, selected_pocket)
        
        # Output path (FINAL pocket.pdb in proper location)
        # User requested: "definamos una carpeta pocket en el main directory."
        # Actually we are already using 'pockets' dir. 
        # Does the user want the FINAL 'pocket.pdb' to be `pockets/pocket.pdb`?
        # Yes: "Si se usa fpocket debe copiarse el pocket.pdb final a la carpeta pocket."
        # And "storage... in FrankPEPstein/pockets". Wait, repo folder or main?
        # User said "definamos una carpeta pocket en el main directory ... no en repo folder ... no la del repo clonado"
        # So `os.getcwd()/pockets` is correct.
        
        final_pocket_name = "pocket.pdb"
        final_pocket_path = os.path.join(pockets_dir, final_pocket_name)
        
        print(f"Processing {selected_pocket}...")
        
        mode = "extract" if detection_mode == "Auto Detect" else "direct"
        print(f"Mode: {mode}")
        
        center, size, success = run_processing_isolated(
            receptor_filename, src_pocket_path, final_pocket_path, mode=mode, buffer=3.0
        )
        
        if success and center:
            print("-" * 30)
            print(f"Box Center: {center[0]:.3f}, {center[1]:.3f}, {center[2]:.3f}")
            print(f"Box Size:   {size[0]:.3f}, {size[1]:.3f}, {size[2]:.3f}")
            print("-" * 30)
            print(f"✅ Created Final Pocket: {final_pocket_path}")
            
            # Save State
            global box_center, box_size, extracted_pocket_path
            box_center = center
            box_size = size
            extracted_pocket_path = os.path.abspath(final_pocket_path)
            
            save_pipeline_state({
                "box_center": box_center,
                "box_size": box_size,
                "extracted_pocket_path": extracted_pocket_path
            })
            
            # Copy to Root for user visibility (matches run_local behavior)
            root_pocket = os.path.join(initial_path, "pocket.pdb")
            try:
                shutil.copy(final_pocket_path, root_pocket)
                print(f"✅ Copied to Root: {root_pocket}")
            except Exception as e:
                print(f"Warning: Could not copy to root: {e}")
        else:
            print("Processing Failed.")

process_btn = widgets.Button(
    description='Confirm & Process Pocket',
    button_style='success',
    icon='check',
    layout=widgets.Layout(width='50%')
)
process_btn.on_click(process_logic)

print("\n--- 4. Pocket Processing & Box Calculation ---")
display(process_btn, output_log)


In [None]:
#@title 2. Structure-Guided Peptide Generation
#@markdown **Instructions:**
#@markdown 1. Adjust the Gridbox coordinates if needed.
#@markdown 2. Click **Update View** to see the new box.
#@markdown 3. Click **Run Generation** to start.

import os
import sys
import json
import time
import glob
import subprocess
import threading
import multiprocessing
import ipywidgets as widgets
from IPython.display import display

# --- Dependency Check ---
try:
    import py3Dmol
except ImportError:
    subprocess.run("pip install -q py3dmol", shell=True)
    import py3Dmol

# --- Parameters (Defaults) ---
peptide_size = 8 #@param {type:"slider", min:5, max:15, step:1}
threads = 0 #@param {type:"integer"}
if threads <= 0:
    threads = multiprocessing.cpu_count()
candidates = 10 #@param {type:"integer"}

# --- Configuration & State ---
initial_path = os.getcwd()
repo_folder = os.path.join(initial_path, "FrankPEPstein")
state_file = "pipeline_state.json"

# Fix permissions
def fix_permissions():
    executables = [
        f"{repo_folder}/utilities/vina_1.2.4_linux_x86_64",
        f"{initial_path}/utilities/click/click"
    ]
    for exe in executables:
        if os.path.exists(exe):
            os.chmod(exe, 0o755)

fix_permissions()

# Load State
pipeline_state = {}
if os.path.exists(state_file):
    try:
        with open(state_file, "r") as f:
            pipeline_state = json.load(f)
    except:
        pass

receptor_path = pipeline_state.get("receptor_filename", None)
pockets_dir = os.path.join(initial_path, "pockets")
standard_pocket_path = os.path.join(pockets_dir, "pocket.pdb")

if os.path.exists(standard_pocket_path):
    extracted_pocket_path = standard_pocket_path
else:
    extracted_pocket_path = pipeline_state.get("extracted_pocket_path", None)

box_center_init = pipeline_state.get("box_center", [0.0, 0.0, 0.0])
box_size_init = pipeline_state.get("box_size", [20.0, 20.0, 20.0])

if not box_center_init: box_center_init = [0.0, 0.0, 0.0]
if not box_size_init: box_size_init = [20.0, 20.0, 20.0]

# --- Widgets ---
style = {'description_width': 'initial'}
layout_half = widgets.Layout(width='48%')

# Box Center Widgets
w_xc = widgets.FloatText(value=box_center_init[0], description='Center X:', style=style, layout=layout_half)
w_yc = widgets.FloatText(value=box_center_init[1], description='Center Y:', style=style, layout=layout_half)
w_zc = widgets.FloatText(value=box_center_init[2], description='Center Z:', style=style, layout=layout_half)

# Box Size Widgets
w_xs = widgets.FloatText(value=box_size_init[0], description='Size X:', style=style, layout=layout_half)
w_ys = widgets.FloatText(value=box_size_init[1], description='Size Y:', style=style, layout=layout_half)
w_zs = widgets.FloatText(value=box_size_init[2], description='Size Z:', style=style, layout=layout_half)

btn_update = widgets.Button(description='Update View', button_style='info', icon='refresh')
btn_run = widgets.Button(description='Run Generation', button_style='success', icon='play')
btn_stop = widgets.Button(description='Stop & Reset', button_style='danger', icon='stop')

# Use HTML widget for robust threaded updates
out_vis = widgets.HTML(layout={'border': '1px solid #ddd', 'height': '600px', 'width': '100%'})
out_log = widgets.Output(layout={'border': '1px solid #ccc', 'height': '300px', 'overflow_y': 'scroll'})

# Grouping
box_ui = widgets.VBox([
    widgets.Label("Gridbox Parameters:"),
    widgets.HBox([w_xc, w_xs]),
    widgets.HBox([w_yc, w_ys]),
    widgets.HBox([w_zc, w_zs]),
    btn_update
])

main_ui = widgets.VBox([
    box_ui,
    widgets.HBox([btn_run, btn_stop]),
    out_vis,
    out_log
])

# --- Logic ---

def draw_view(extra_pdbs=None):
    try:
        view = py3Dmol.view(width=800, height=600)
        
        # 1. Receptor
        if receptor_path and os.path.exists(receptor_path):
            with open(receptor_path, 'r') as f:
                view.addModel(f.read(), "pdb")
            view.setStyle({'model': -1}, {})
            view.addSurface(py3Dmol.SES, {'opacity': 0.9, 'color': 'white'})
            
        # 2. Pocket
        if extracted_pocket_path and os.path.exists(extracted_pocket_path):
            with open(extracted_pocket_path, 'r') as f:
                view.addModel(f.read(), "pdb")
            view.setStyle({'model': -1}, {'sphere': {'color': 'orange', 'opacity': 0.6}})

        # 3. Gridbox (From Widgets)
        try:
            cx, cy, cz = float(w_xc.value), float(w_yc.value), float(w_zc.value)
            sx, sy, sz = float(w_xs.value), float(w_ys.value), float(w_zs.value)
            
            view.addBox({
                'center': {'x': cx, 'y': cy, 'z': cz},
                'dimensions': {'w': sx, 'h': sy, 'd': sz},
                'color': 'red',
                'opacity': 0.5
            })
        except ValueError:
            pass # Handle transient empty widget values

        # 4. Extra Fragments (Live Updates)
        if extra_pdbs:
            for pdb_file in extra_pdbs:
                 if os.path.exists(pdb_file):
                     with open(pdb_file, 'r') as f:
                         view.addModel(f.read(), "pdb")
                     view.setStyle({'model': -1}, {'stick': {'colorscheme': 'greenCarbon', 'radius': 0.15}})

        view.zoomTo()
        
        # Set HTML content directly - Thread Safe for Widget property
        out_vis.value = view._make_html()
        
    except Exception as e:
        # Avoid printing to stdout during viz loop to prevent interfering with tqdm
        pass

def on_update_click(b):
    draw_view()

# --- Visualization Monitor Thread ---
stop_event = threading.Event()

def monitor_fragments():
    run_folder_name = "FrankPEPstein_run"
    fragments_dir = os.path.join(initial_path, run_folder_name, "superpockets_residuesAligned3_RMSD0.1")
    
    last_count = 0
    
    while not stop_event.is_set():
        if os.path.exists(fragments_dir):
            # Look for patch files
            files = glob.glob(os.path.join(fragments_dir, "patch_file_*.pdb"))
            current_count = len(files)
            
            if current_count > last_count:
                # Update View with new files (Limit to last 50 to avoid lag)
                # Sort by modification time to show newest
                files.sort(key=os.path.getmtime, reverse=True)
                draw_view(files[:50])
                last_count = current_count
                
        time.sleep(2)


def on_run_click(b):
    out_log.clear_output()
    btn_run.disabled = True
    btn_update.disabled = True
    stop_event.clear()
    
    # Start Monitor Thread
    t = threading.Thread(target=monitor_fragments, daemon=True)
    t.start()
    
    with out_log:
        # Input Validation
        if not receptor_path or not extracted_pocket_path:
            print("❌ Error: Receptor or Pocket not found. Please run Step 1 successfully.")
            btn_run.disabled = False
            return

        box_center = [w_xc.value, w_yc.value, w_zc.value]
        box_size = [w_xs.value, w_ys.value, w_zs.value]

        print(f"--- Starting FrankPEPstein Generation ---")
        print(f"Peptide Size: {peptide_size}")
        print(f"Threads: {threads}")
        print(f"Candidates: {candidates}")
        print(f"Gridbox Center: {box_center}")
        print(f"Gridbox Size: {box_size}")
        
        script_path = os.path.join(repo_folder, "scripts/run_FrankPEPstein.py")
        
        cmd_list = [
            sys.executable, "-u", script_path,
            "-w", str(peptide_size),
            "-t", str(threads),
            "-c", str(candidates),
            "-xc", str(box_center[0]),
            "-yc", str(box_center[1]),
            "-zc", str(box_center[2]),
            "-xs", str(box_size[0]),
            "-ys", str(box_size[1]),
            "-zs", str(box_size[2])
        ]
        
        global process
        try:
            process = subprocess.Popen(
                cmd_list, 
                stdout=subprocess.PIPE, 
                stderr=subprocess.STDOUT, 
                text=True, 
                bufsize=1, 
                universal_newlines=True
            )
            
            # Streaming loop
            for line in iter(process.stdout.readline, ''):
                print(line, end='') 
                
            process.wait()
            
            # Stop monitoring
            stop_event.set()
            
            if process.returncode == 0:
                print("\n✅ Pipeline Finished Successfully.")
                # Final Viz Check
                monitor_fragments() # One last update
            else:
                print(f"\n❌ Pipeline failed with exit code {process.returncode}")
            
        except KeyboardInterrupt:
            stop_event.set()
            print("\n🛑 Pipeline interrupted.")
        except Exception as e:
            stop_event.set()
            print(f"\n❌ Execution Error: {e}")
        finally:
            stop_event.set()
            btn_run.disabled = False
            btn_update.disabled = False

def on_stop_click(b):
    stop_event.set()
    global process
    if 'process' in globals() and process:
         process.terminate()
         
    with out_log:
        print("\n🛑 Stopped by user. Cleaning up...")
        cleanup()
    
    btn_run.disabled = False
    btn_update.disabled = False

def cleanup():
    run_folder_name = "FrankPEPstein_run"
    output_superposer_path = os.path.join(initial_path, run_folder_name, f"superpockets_residuesAligned3_RMSD0.1")
    temp_folder_path = os.path.join(initial_path, run_folder_name, f"temp_folder_residuesAligned3_RMSD0.1")
    
    if os.path.exists(output_superposer_path):
        subprocess.run(f"rm -rf {output_superposer_path}", shell=True)
        # print(f"Removed {output_superposer_path}")
        
    if os.path.exists(temp_folder_path):
        subprocess.run(f"rm -rf {temp_folder_path}", shell=True)
        # print(f"Removed {temp_folder_path}")

btn_update.on_click(on_update_click)
btn_run.on_click(on_run_click)
btn_stop.on_click(on_stop_click)

# Initial Draw
display(main_ui)
draw_view()


In [None]:
#@title 3. Sequence Alignment & Logo Analysis
#@markdown **Instructions:** 
#@markdown This step analyzes the finalized peptide candidates from Step 2.
#@markdown 1. Extracts amino acid sequences from the best PDB candidates.
#@markdown 2. Generates a Multiple Sequence Alignment (Multifasta).
#@markdown 3. Visualizes conserved motifs using a Sequence Logo.

import os
import glob
import pandas as pd
import matplotlib.pyplot as plt
import logomaker
from IPython.display import display, Image
import re

# Identify the latest run folder
initial_path = os.getcwd()
run_base = os.path.join(initial_path, "FrankPEPstein_run")

# Find the specific run folder with results (frankPEPstein_X/top_Y_peps)
# We need to search recursively or assume standard structure.
# Structure: FrankPEPstein_run/frankPEPstein_{pep_size}/top_{candidates}_peps/

candidate_folders = glob.glob(os.path.join(run_base, "frankPEPstein_*", "top_*_peps"))

if not candidate_folders:
    print("❌ No candidate results found from Step 2.")
else:
    # Use the most recent one if multiple (though pipeline likely cleans up)
    # Sort by modification time
    target_folder = sorted(candidate_folders, key=os.path.getmtime, reverse=True)[0]
    print(f"Analyzing results from: {target_folder}")
    
    # 1. Extract Sequences
    pdb_files = glob.glob(os.path.join(target_folder, "*.pdb")) # Actually frag*.pdb or similar? frankVINA_2 outputs top PDBs.
    
    sequences = []
    
    # Check if files exist
    if not pdb_files:
        # Maybe they are still pdbqt? frankVINA_2 converts to pdb at the end.
        print("⚠️ No PDB files found in target folder.")
    else:
        for pdb_path in pdb_files:
            filename = os.path.basename(pdb_path)
            # Expecting filename to contain sequence?
            # User said "si la contiene" (filename contains info).
            # frankVINA_2 output format typically: {score}_{sequence}.pdb or similar?
            # Let's try to find a sequence string (UPPERCASE letters).
            # Usually frag_SEQUENCE_score.pdb or SEQUENCE.pdb
            
            # Simple heuristic: extracting the longest string of uppercase letters
            # Or if user standard is specific...
            # Looking at frankVINA_2.py again, it seems it runs `vina_scorer`.
            # If we don't know exact format, we can extract from PDB SEQRES or Atoms (Chain 'p' or 'x').
            # User said: "desde el nombre del archivo mejor" AND "filename contains info".
            # Let's try to extract sequence from filename.
            # Assuming standard amino acids.
            
            # Attempt to match sequence chars [ACDEFGHIKLMNPQRSTVWY]
            # Pattern: Longest contiguous string of AAs?
            # Or maybe the whole filename is the sequence?
            
            # Fallback: Parse PDB if filename parsing is ambiguous, but user insists on filename.
            # Let's deduce from typical FrankPEPstein behavior. 
            # Often it is `SEQ_score.pdb` or `rank_SEQ_score.pdb`.
            
            # Heuristic: Find all caps string length > 4.
            matches = re.findall(r'[ACDEFGHIKLMNPQRSTVWY]{5,}', filename)
            if matches:
                 # Take the longest one
                 seq = max(matches, key=len)
                 sequences.append(seq)
            else:
                 # Fallback: try reading PDB?
                 # No, user said filename. Let's assume filename IS sequence if simple.
                 # Example: "AAAAA.pdb"
                 base = os.path.splitext(filename)[0]
                 if all(c in "ACDEFGHIKLMNPQRSTVWY_" for c in base): # Allow underscore
                     sequences.append(base.split('_')[0]) # Split score if present
                 else:
                     print(f"Skipping {filename}: Could not deduce sequence from name.")

        if sequences:
            print(f"Extracted {len(sequences)} sequences.")
            
            # 2. Generate Multifasta (MSA)
            fasta_path = os.path.join(target_folder, "candidates.fasta")
            with open(fasta_path, "w") as f:
                for i, seq in enumerate(sequences):
                    f.write(f">candidate_{i+1}\n{seq}\n")
            print(f"✅ Generated Multifasta: {fasta_path}")
            
            # 3. Generate Sequence Logo
            # Create a matrix for logomaker
            # Sequences must be same length for simple logo.
            lengths = [len(s) for s in sequences]
            if len(set(lengths)) > 1:
                print("⚠️ Sequences have varying lengths, alignment needed for proper Logo. Using simple left-alignment.")
                # Pad with gaps? or just ignore? Logomaker needs DataFrame.
                max_len = max(lengths)
                padded_seqs = [s.ljust(max_len, '-') for s in sequences]
                seq_list = padded_seqs
            else:
                seq_list = sequences

            try:
                # Create counts matrix
                logo_matrix = logomaker.alignment_to_matrix(seq_list)
                
                # Plot
                fig, ax = plt.subplots(figsize=(10, 4))
                logo = logomaker.Logo(logo_matrix, ax=ax)
                
                ax.set_title("Conserved Peptide Motifs", fontsize=14)
                ax.set_xlabel("Position", fontsize=12)
                ax.set_ylabel("Probability / Information", fontsize=12)
                
                logo_path = os.path.join(target_folder, "logo.png")
                plt.savefig(logo_path, bbox_inches='tight', dpi=300)
                plt.show() # Display in notebook
                print(f"✅ Generated Logo Plot: {logo_path}")
                
            except Exception as e:
                print(f"Error creating logo: {e}")
                
        else:
            print("No valid sequences found to align.")


In [None]:
#@title 4. Download Results
#@markdown **Instructions:**
#@markdown Click the button below to download a ZIP archive containing:
#@markdown - Candidate Peptide PDBs
#@markdown - Sequence Alignment (Fasta)
#@markdown - Sequence Motif Plot (Logo)

import os
import shutil
import glob
from google.colab import files
import ipywidgets as widgets
from IPython.display import display
from datetime import datetime

# Logic to find the target folder (same as Step 3)
initial_path = os.getcwd()
run_base = os.path.join(initial_path, "FrankPEPstein_run")
candidate_folders = glob.glob(os.path.join(run_base, "frankPEPstein_*", "top_*_peps"))

target_folder = None
if candidate_folders:
    target_folder = sorted(candidate_folders, key=os.path.getmtime, reverse=True)[0]

out_log = widgets.Output()

def download_results(b):
    out_log.clear_output()
    with out_log:
        if not target_folder or not os.path.exists(target_folder):
            print("❌ No results found to download.")
            return
            
        print(f"Compressing results from: {target_folder}")
        
        # Timestamped filename
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        zip_filename = f"FrankPEPstein_Results_{timestamp}"
        zip_path = os.path.join(initial_path, zip_filename) # shutil.make_archive adds .zip extension automatically
        
        try:
            # Create ZIP
            shutil.make_archive(zip_path, 'zip', target_folder)
            final_zip = zip_path + ".zip"
            
            print(f"✅ Created archive: {final_zip}")
            print("Downloading...")
            
            files.download(final_zip)
            
        except Exception as e:
            print(f"Error during download: {e}")

btn_download = widgets.Button(
    description='Download Results (ZIP)',
    button_style='info',
    icon='download',
    layout=widgets.Layout(width='50%')
)
btn_download.on_click(download_results)

display(btn_download, out_log)
