In [None]:

#@title 0. Install CondaColab & Setup Tools (~3 min)
import sys
import os
import subprocess
from IPython.display import clear_output

# Helper to suppress output
class SuppressStdout:
    def __enter__(self):
        self._original_stdout = sys.stdout
        sys.stdout = open(os.devnull, 'w')
    def __exit__(self, exc_type, exc_val, exc_tb):
        sys.stdout.close()
        sys.stdout = self._original_stdout

def run_setup():
    # Install tqdm first if missing (fast)
    try:
        from tqdm.notebook import tqdm
    except ImportError:
        subprocess.run("pip install -q tqdm", shell=True)
        from tqdm.notebook import tqdm

    print("Setting up FrankPEPstein environment...")
    
    steps = [
        ("Installing CondaColab", "condacolab"),
        ("Cloning Repository", "git"),
        ("Creating Conda Environment (Slow)", "env"),
        ("Configuring Notebook Utils", "patch"),
        ("Setting up External Tools (DB Download)", "tools"),
        ("Configuring Modeller", "modeller")
    ]
    
    with tqdm(total=len(steps)) as pbar:
        # 1. CondaColab
        pbar.set_description(steps[0][0])
        try:
            with SuppressStdout():
                import condacolab
                condacolab.check()
        except (ImportError, AssertionError):
            print("Installing CondaColab... (The kernel will restart. Please ignore any crash warnings.)")
            subprocess.run("pip install -q py3dmol logomaker", shell=True, check=True)
            subprocess.run("pip install -q biopython", shell=True, check=True) # Ensure biopython is there too
            with SuppressStdout():
                subprocess.run("pip install -q condacolab", shell=True, check=True)
                import condacolab
                condacolab.install()
        
        pbar.update(1)

        # 2. Git Clone
        pbar.set_description(steps[1][0])
        with SuppressStdout():
            if not os.path.exists("FrankPEPstein"):
                subprocess.run("git clone https://github.com/Joacaldog/FrankPEPstein.git", shell=True, check=True)
        pbar.update(1)

        # 3. Create Environment
        pbar.set_description(steps[2][0])
        env_path = "/usr/local/envs/FrankPEPstein"
        if not os.path.exists(env_path):
             # Added 'pigz' to the package list
             subprocess.run("mamba create -n FrankPEPstein -q -y -c conda-forge -c salilab openbabel biopython fpocket joblib tqdm py3dmol vina pigz scipy scikit-learn matplotlib python=3.10 salilab::modeller > /dev/null 2>&1", shell=True, check=True)
        
        # Configure Path
        site_packages = f"{env_path}/lib/python3.10/site-packages"
        if site_packages not in sys.path:
            sys.path.append(site_packages)
        os.environ['PATH'] = f"{env_path}/bin:" + os.environ['PATH']
        pbar.update(1)

        # 4. Patch Utils
        pbar.set_description(steps[3][0])
        # We write a clean utils file
        patched_utils_content = r'''import os
import sys
import glob
import shutil
import subprocess

def configure_modeller(license_key='MODELIRANJE', repo_dir='FrankPEPstein'):
    template_config = os.path.join(repo_dir, "utilities/config.py")
    dest_config = None
    try:
        import modeller
        modeller_path = os.path.dirname(modeller.__file__)
        candidate = os.path.join(modeller_path, "config.py")
        if os.path.exists(candidate):
            dest_config = candidate
    except Exception:
        pass

    if not dest_config:
        possible_paths = [
            f"{sys.prefix}/lib/modeller-*/modlib/modeller/config.py",
            f"{sys.prefix}/lib/python*/site-packages/modeller/config.py",
            "/usr/local/envs/FrankPEPstein/lib/modeller-*/modlib/modeller/config.py"
        ]
        for pattern in possible_paths:
            found = glob.glob(pattern)
            if found:
                dest_config = found[0]
                break

    if dest_config and os.path.exists(template_config):
        with open(template_config, 'r') as f:
            content = f.read()
        new_content = content.replace("'MODELIRANJE'", f"'{license_key}'")
        with open(dest_config, 'w') as f:
            f.write(new_content)
        return True
    return False



def setup_external_tools(files_id=None):
    # Install gdown if needed
    try: import gdown
    except ImportError: subprocess.run([sys.executable, "-m", "pip", "install", "gdown"], check=True); import gdown

    # Determine Base Dir
    base_dir = "." # Always download to current directory (root of colab)
    
    # 1. Download Single Archive
    if files_id:
        archive_path = os.path.join(base_dir, "files.tar.gz")
        # Check if already extracted (simple check for DB and utilities)
        if not (os.path.exists(os.path.join(base_dir, "DB")) and os.path.exists(os.path.join(base_dir, "utilities"))):
            if not os.path.exists(archive_path):
                 print(f"Downloading files.tar.gz (ID: {files_id})...")
                 gdown.download(f'https://drive.google.com/uc?id={files_id}', archive_path, quiet=True)
            
            if os.path.exists(archive_path):
                 print("Extracting files.tar.gz (Parallel with pigz)...")
                 # pigz -d -c files.tar.gz | tar xf -
                 # or tar -I pigz -xf ...
                 subprocess.run(f"tar -I pigz -xf {archive_path} -C {base_dir}", shell=True, check=True)
                 # os.remove(archive_path) # Optional clean up
    
    # 2. Permissions & Final Setup
    utilities_dir = os.path.join(base_dir, "utilities")
    if os.path.exists(utilities_dir):
        # [ADDED] Rename extracted ADFR folder to Suite name required by tools
        adfr_extracted = os.path.join(utilities_dir, "ADFR")
        adfr_target = os.path.join(utilities_dir, "ADFRsuite_x86_64Linux_1.0")
        if os.path.exists(adfr_extracted) and not os.path.exists(adfr_target):
             print(f"Renaming {adfr_extracted} to {adfr_target}...")
             os.rename(adfr_extracted, adfr_target)
        
        # [ADDED] Patch ADFR scripts with current path (fix hardcoded /home/joacaldo paths)
        if os.path.exists(adfr_target):
             print("Patching ADFR scripts paths...")
             bin_dir = os.path.join(adfr_target, "bin")
             abs_adfr_path = os.path.abspath(adfr_target)
             for fname in os.listdir(bin_dir):
                 fpath = os.path.join(bin_dir, fname)
                 if os.path.isfile(fpath) and not os.path.islink(fpath):
                     try:
                         # Read (ignore errors for binaries)
                         with open(fpath, 'rb') as f:
                             content_bytes = f.read()
                         
                         # Check if text file (shebang or ADS_ROOT)
                         try:
                             content = content_bytes.decode('utf-8')
                             if "ADS_ROOT=" in content:
                                 import re
                                 # Replace ADS_ROOT="..." with correct path
                                 new_content = re.sub(r'ADS_ROOT="[^"]+"', f'ADS_ROOT="{abs_adfr_path}"', content)
                                 with open(fpath, 'w') as f:
                                     f.write(new_content)
                         except UnicodeDecodeError:
                             pass # Binary file
                     except Exception as e:
                         pass
             
         # if os.path.exists(utilities_dir): # REMOVED redundant line causing indent error
        print("Fixing permissions...")
        subprocess.run(f"chmod -R +x {utilities_dir}", shell=True)
        
        # Add paths
        adfr_bin = os.path.join(utilities_dir, "ADFR/bin")
        if os.path.exists(adfr_bin) and adfr_bin not in os.environ['PATH']:
             os.environ['PATH'] += f":{adfr_bin}"
             
        click_bin = os.path.join(utilities_dir, "click") # assuming it extracts directly or in bin
        # Adjust check if structure is different (e.g. utilities/Click/bin)
        # User said "utilities que quedaran en ~/. Ahora ADFR al extraerse queda listo... lo mismo con click"
        # We assume standard structure.
        

        '''
        os.makedirs("FrankPEPstein/scripts", exist_ok=True)
        with open("FrankPEPstein/scripts/notebook_utils.py", "w") as f:
            f.write(patched_utils_content)
        pbar.update(1)

        # 5. External Tools Setup
        pbar.set_description(steps[4][0])
        repo_path = os.path.abspath("FrankPEPstein")
        if repo_path not in sys.path:
            sys.path.append(repo_path)
        from scripts import notebook_utils
        
        # Correct Drive IDs
        files_id = "1M30wmaf6vaXJl1kmj-0cD5yhBYDCx_xw"
        
        with SuppressStdout():
             notebook_utils.setup_external_tools(files_id)
        pbar.update(1)

        # 6. Configure Modeller
        pbar.set_description(steps[5][0])
        with SuppressStdout():
            notebook_utils.configure_modeller()
        pbar.update(1)
        

    # 3. Verify Executables
    print(f"\n{'='*20}")
    print("Verifying Executables...")
    
    base_dir = "FrankPEPstein" if os.path.exists("FrankPEPstein") else "."
    utilities_dir = os.path.join(base_dir, "utilities")
    
    # Check Click
    click_bin = os.path.join(utilities_dir, "Click", "bin", "click")
    if not os.path.exists(click_bin):
         # Try logic from superposer logic/config
         click_bin = os.path.join(utilities_dir, "Click", "click")
         
    if os.path.exists(click_bin):
         if os.access(click_bin, os.X_OK):
             print(f"✅ Click is executable.")
         else:
             print(f"⚠️ Click found but NOT executable. Fixing...")
             subprocess.run(f"chmod +x {click_bin}", shell=True)
             if os.access(click_bin, os.X_OK):
                 print(f"✅ Click fixed.")
             else:
                 print(f"❌ Failed to fix Click permissions.")
    else:
         print(f"❌ Click binary not found at {click_bin}")

    # Check Vina
    vina_bin = os.path.join(utilities_dir, "vina_1.2.4_linux_x86_64")
    if os.path.exists(vina_bin):
         if os.access(vina_bin, os.X_OK):
             print(f"✅ Vina is executable.")
         else:
             print(f"⚠️ Vina found but NOT executable. Fixing...")
             subprocess.run(f"chmod +x {vina_bin}", shell=True)
             if os.access(vina_bin, os.X_OK):
                 print(f"✅ Vina fixed.")
             else:
                 print(f"❌ Failed to fix Vina permissions.")
    else:
         print(f"❌ Vina binary not found at {vina_bin}")
    print(f"{'='*20}\n")

    clear_output()
    print("✅ Setup Ready!")

if __name__ == "__main__":
    run_setup()



In [None]:
#@title 1. Pocket Selection & Gridbox Generation
#@markdown **Instructions:**
#@markdown 1. Select **Detection Mode**.
#@markdown 2. If **Auto Detect**, run fpocket and select a predicted pocket.
#@markdown 3. If **Manual Upload**, upload your pre-defined pocket PDB.
#@markdown 4. Use the controls to adjust the gridbox (Cyan Box) if needed.
#@markdown 5. Click **Confirm & Extract**.

import os
import shutil
import subprocess
import sys
import glob
import re
import ipywidgets as widgets
from IPython.display import display, clear_output
import py3Dmol

# --- Configuration ---
# Ensure directories exist
initial_path = os.getcwd()
pockets_dir = os.path.join(initial_path, "pockets")
fpocket_storage_dir = os.path.join(initial_path, "fpocket_pockets")
os.makedirs(pockets_dir, exist_ok=True)
os.makedirs(fpocket_storage_dir, exist_ok=True)

# State Variables
receptor_filename = "receptor.pdb"
if not os.path.exists(receptor_filename):
    # Try finding it in pipeline state or current dir
    pass 

pipeline_state_file = "pipeline_state.json"
import json
def save_pipeline_state(data):
    current = {}
    if os.path.exists(pipeline_state_file):
        try:
            with open(pipeline_state_file, 'r') as f: current = json.load(f)
        except: pass
    current.update(data)
    with open(pipeline_state_file, 'w') as f: json.dump(current, f)

# --- Part 1: Receptor Input ---
receptor_upload_widget = widgets.FileUpload(description="Upload Receptor (pdb)", accept=".pdb", multiple=False, layout=widgets.Layout(width='300px'))
receptor_status = widgets.Output()

def handle_receptor_upload(change):
    receptor_status.clear_output()
    with receptor_status:
        if not receptor_upload_widget.value: return
        # ipywidgets 7/8 compat
        upl_file = list(receptor_upload_widget.value.values())[0] if isinstance(receptor_upload_widget.value, dict) else receptor_upload_widget.value[0]
        content = upl_file['content']
        with open(receptor_filename, "wb") as f:
            f.write(content)
        print("✅ Receptor uploaded successfully.")

receptor_upload_widget.observe(handle_receptor_upload, names='value')

print("Step 1.1: Load Receptor")
display(widgets.VBox([receptor_upload_widget, receptor_status]))

with receptor_status:
    if os.path.exists(receptor_filename):
        print(f"✅ Receptor file present: {receptor_filename}")
    else:
        print("Waiting for receptor upload...")

# --- Part 2: Detection & Upload Logic ---

mode_selector = widgets.ToggleButtons(
    options=['Auto Detect', 'Manual Upload'],
    description='Mode:',
    disabled=False,
    button_style='',
)

# Auto Detect Widgets
detect_btn = widgets.Button(description="Run fpocket", button_style='info')
pocket_dropdown = widgets.Dropdown(description="Select Pocket:", options=[], disabled=True)

# Manual Upload Widgets
upload_btn = widgets.FileUpload(description="Upload Pocket PDB", accept=".pdb", multiple=False, layout=widgets.Layout(width='300px'))

# Containers
log_output_1 = widgets.Output()

def run_fpocket(b):
    log_output_1.clear_output()
    with log_output_1:
        if not os.path.exists(receptor_filename):
            print("❌ Receptor not found! Please run setup/upload first.")
            return
            
        print("Running fpocket... (this may take a minute)")
        # Clean previous
        subprocess.run(f"rm -rf {fpocket_storage_dir}/*", shell=True)
        
        cmd = f"fpocket -f {receptor_filename}"
        subprocess.run(cmd, shell=True)
        
        # Check results
        # fpocket output: receptor_out/pockets/pocketX_atm.pdb
        out_dir = receptor_filename.replace(".pdb", "_out")
        pockets_found = glob.glob(os.path.join(out_dir, "pockets", "pocket*_atm.pdb"))
        
        if pockets_found:
            print(f"✅ Found {len(pockets_found)} pockets.")
            # Copy to storage
            options = []
            for p in pockets_found:
                basename = os.path.basename(p)
                dest = os.path.join(fpocket_storage_dir, basename)
                shutil.copy(p, dest)
                options.append(basename)
            
            # Sort naturally
            options.sort(key=lambda x: int(re.search(r'\d+', x).group()) if re.search(r'\d+', x) else 0)
            
            pocket_dropdown.options = options
            pocket_dropdown.disabled = False
            if options: pocket_dropdown.value = options[0]
            
            # Trigger Box Init
            initialize_ui(None)
        else:
            print("❌ No pockets found.")

def handle_upload(change):
    log_output_1.clear_output()
    with log_output_1:
        if not upload_btn.value: return
        
        # Get file
        # ipywidgets 7 vs 8 compat logic
        upl_file = list(upload_btn.value.values())[0] if isinstance(upload_btn.value, dict) else upload_btn.value[0]
        
        content = upl_file['content']
        name = upl_file['name']
        
        dest = os.path.join(fpocket_storage_dir, name)
        with open(dest, "wb") as f:
            f.write(content)
            
        print(f"✅ Uploaded {name}")
        
        pocket_dropdown.options = [name]
        pocket_dropdown.value = name
        pocket_dropdown.disabled = False
        
        # Trigger Box Init
        initialize_ui(None)

detect_btn.on_click(run_fpocket)
upload_btn.observe(handle_upload, names='value')

def on_pocket_change(change):
    if change['type'] == 'change' and change['name'] == 'value':
        initialize_ui(None)

pocket_dropdown.observe(on_pocket_change)

# Selection UI
selection_ui = widgets.VBox([
    mode_selector,
    widgets.HBox([detect_btn, pocket_dropdown]),
    upload_btn,
    log_output_1
])

# Visibiity logic
def update_mode_ui(change):
    if mode_selector.value == 'Auto Detect':
        detect_btn.layout.display = 'block'
        pocket_dropdown.layout.display = 'block'
        upload_btn.layout.display = 'none'
    else:
        detect_btn.layout.display = 'none'
        # pocket_dropdown.layout.display = 'none' # Keep dropdown to show selected uploaded file?
        # Usually manual upload implies ONE file. But let's keep it for consistency.
        upload_btn.layout.display = 'block'

mode_selector.observe(update_mode_ui, names='value')
update_mode_ui(None) # Init

print("Select Detection Mode:")
display(selection_ui)


# --- Part 2: Box Calculation & Extraction (Isolated) ---
# This matches the previous updated logic

# Helper Functions (Subprocess)
def run_processing_isolated(receptor_path, pocket_path, output_pocket_path, mode="extract", buffer=0.0):
    """
    Runs extraction/processing and box calculation in isolated environment.
    """
    script_content = f"""
import sys
import os
from Bio import PDB
from Bio.PDB import PDBParser, PDBIO, Select, NeighborSearch

def process_and_box(receptor_file, pocket_file, output_file, mode, buffer_val):
    try:
        parser = PDBParser(QUIET=True)
        
        # 1. Load Pocket
        pocket_struct = parser.get_structure("pocket", pocket_file)
        
        # Determine atoms for Box Calculation
        atoms_for_box = []
        residues_for_saving = [] 
        
        if mode == 'extract':
             # Fpocket mode: Load Receptor, Find Neighbors (5A)
             receptor_struct = parser.get_structure("receptor", receptor_file)
             pocket_atoms = [atom for atom in pocket_struct.get_atoms()]
             if not pocket_atoms:
                 print("ERROR: No atoms in pocket file")
                 return
                 
             receptor_atoms = list(receptor_struct.get_atoms())
             ns = NeighborSearch(receptor_atoms)
             selected_residues = set()
             for p_atom in pocket_atoms:
                 nearby = ns.search(p_atom.get_coord(), 5.0, level='R')
                 for res in nearby:
                     selected_residues.add((res.parent.id, res.id))
            
             class PocketSelect(Select):
                 def accept_residue(self, residue):
                     return (residue.parent.id, residue.id) in selected_residues
                     
             io = PDBIO()
             io.set_structure(receptor_struct)
             temp_out = output_file + ".tmp"
             io.save(temp_out, PocketSelect())
             
             saved_struct = parser.get_structure("saved", temp_out)
             for model in saved_struct:
                 for chain in model:
                     chain.id = 'p'
                     for residue in chain:
                         for atom in residue:
                             atoms_for_box.append(atom)
             io.set_structure(saved_struct)
             io.save(output_file)
             os.remove(temp_out)
             
        elif mode == 'direct':
             for model in pocket_struct:
                 for chain in model:
                     chain.id = 'p'
                     for residue in chain:
                         for atom in residue:
                             atoms_for_box.append(atom)
             io = PDBIO()
             io.set_structure(pocket_struct)
             io.save(output_file)
             
        if not atoms_for_box:
            print("ERROR: No atoms/residues for box calculation")
            return

        coords = [a.get_coord() for a in atoms_for_box]
        min_coord = [min([c[i] for c in coords]) for i in range(3)]
        max_coord = [max([c[i] for c in coords]) for i in range(3)]
        
        center = [(min_coord[i] + max_coord[i]) / 2 for i in range(3)]
        size = [(max_coord[i] - min_coord[i]) + float(buffer_val) for i in range(3)]
        
        print(f"CENTER:{{center[0]}},{{center[1]}},{{center[2]}}")
        print(f"SIZE:{{size[0]}},{{size[1]}},{{size[2]}}")
        print("SUCCESS")

    except Exception as e:
        print(f"ERROR:{{e}}")

if __name__ == "__main__":
    process_and_box(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4], float(sys.argv[5]))
"""
    script_name = "process_box_isolated.py"
    with open(script_name, "w") as f:
        f.write(script_content)
        
    python_exe = "/usr/local/envs/FrankPEPstein/bin/python"
    if not os.path.exists(python_exe): python_exe = sys.executable

    try:
        result = subprocess.run(
            [python_exe, script_name, receptor_path, pocket_path, output_pocket_path, mode, str(buffer)],
            capture_output=True, text=True, check=True
        )
        
        center, size = None, None
        success = False
        
        for line in result.stdout.splitlines():
            if line.startswith("CENTER:"):
                center = [float(x) for x in line.split(":")[1].split(",")]
            elif line.startswith("SIZE:"):
                size = [float(x) for x in line.split(":")[1].split(",")]
            elif line.startswith("SUCCESS"):
                success = True
            
        return center, size, success
    except subprocess.CalledProcessError as e:
        return None, None, False

def get_pocket_volume_hull(pocket_pdb_path):
    """Calculates Convex Hull volume of C-alpha atoms for visualization."""
    script_content = f"""
import sys
import numpy as np
from Bio.PDB import PDBParser
from scipy.spatial import ConvexHull

def calc_vol(pdb_file):
    try:
        parser = PDBParser(QUIET=True)
        structure = parser.get_structure("struct", pdb_file)
        # Use all atoms
        points = []
        for atom in structure.get_atoms():
             points.append(atom.get_coord())
             
        if len(points) < 4:
            print("VOL:0.0")
            return
            
        hull = ConvexHull(points)
        print(f"VOL:{{hull.volume}}")
    except Exception as e:
        print("VOL:0.0")

if __name__ == "__main__":
    calc_vol(sys.argv[1])
"""
    script_name = "calc_volume_isolated.py"
    with open(script_name, "w") as f:
        f.write(script_content)
        
    python_exe = "/usr/local/envs/FrankPEPstein/bin/python"
    if not os.path.exists(python_exe): python_exe = sys.executable

    try:
        result = subprocess.run(
            [python_exe, script_name, pocket_pdb_path],
            capture_output=True, text=True, check=True
        )
        for line in result.stdout.splitlines():
            if line.startswith("VOL:"):
                return float(line.split(":")[1])
    except:
        pass
    return 0.0

# --- Custom Widget Helpers (Box Adjustment) ---

def create_control_group(label, initial_val, step, color_hex):
    # Style buttons
    btn_layout = widgets.Layout(width='40px')
    
    minus_button = widgets.Button(description='\u25c0', layout=btn_layout)
    plus_button = widgets.Button(description='\u25b6', layout=btn_layout)
    
    val_widget = widgets.FloatText(value=initial_val, step=step, description='', layout=widgets.Layout(width='80px'))
    
    def on_minus(b):
        val_widget.value -= step
        update_visual(None)
        
    def on_plus(b):
        val_widget.value += step
        update_visual(None)
            
    minus_button.on_click(on_minus)
    plus_button.on_click(on_plus)
    val_widget.observe(lambda c: update_visual(None) if c['type'] == 'change' and c['name'] == 'value' else None)

    label_html = widgets.HTML(f"<b style='color:{color_hex}; font-size:14px; margin-right:5px;'>{label}</b>")
    
    box = widgets.HBox([label_html, minus_button, val_widget, plus_button], layout=widgets.Layout(align_items='center'))
    return val_widget, box

controls = {}
viz_output = widgets.Output() # Holds the 3D widget
output_log = widgets.Output() # Holds text logs

def initialize_ui(b):
    output_log.clear_output()
    viz_output.clear_output()
    
    if not pocket_dropdown.value:
        # Don't print error if just init, wait for selection
        return

    # 1. Calc Initial
    selected_pocket = pocket_dropdown.value
    src_pocket_path = os.path.join(fpocket_storage_dir, selected_pocket)
    temp_pocket_path = os.path.join(pockets_dir, "temp_calc.pdb")
    
    # Determine mode from toggle
    mode = "extract" if mode_selector.value == "Auto Detect" else "direct"
    
    with output_log: print(f"Processing {selected_pocket}...")
    center, size, success = run_processing_isolated(receptor_filename, src_pocket_path, temp_pocket_path, mode=mode)
    
    if not success:
        with output_log: print("Failed calculating defaults.")
        center = [0.0, 0.0, 0.0]
        size = [20.0, 20.0, 20.0]

    cx, cy, cz = center
    sx, sy, sz = size

    # 2. Build Control Rows (Reset controls)
    cx_w, cx_box = create_control_group("Center X", cx, 0.5, "#FF0000")
    sx_w, sx_box = create_control_group("Size X  ", sx, 1.0, "#FF0000")
    
    cy_w, cy_box = create_control_group("Center Y", cy, 0.5, "#00AA00")
    sy_w, sy_box = create_control_group("Size Y  ", sy, 1.0, "#00AA00")
    
    cz_w, cz_box = create_control_group("Center Z", cz, 0.5, "#0000FF")
    sz_w, sz_box = create_control_group("Size Z  ", sz, 1.0, "#0000FF")

    controls['cx'] = cx_w; controls['sx'] = sx_w
    controls['cy'] = cy_w; controls['sy'] = sy_w
    controls['cz'] = cz_w; controls['sz'] = sz_w
    
    # Header
    header = widgets.HTML("<h3>Manual Gridbox Adjustment</h3>")
    
    row_x = widgets.HBox([cx_box, widgets.HTML("&nbsp;&nbsp;|&nbsp;&nbsp;"), sx_box])
    row_y = widgets.HBox([cy_box, widgets.HTML("&nbsp;&nbsp;|&nbsp;&nbsp;"), sy_box])
    row_z = widgets.HBox([cz_box, widgets.HTML("&nbsp;&nbsp;|&nbsp;&nbsp;"), sz_box])
    
    ui_container = widgets.VBox([
        header,
        widgets.HTML("<hr style='border-top: 1px solid #ccc;'>"),
        row_x,
        row_y,
        row_z,
        widgets.HTML("<hr style='border-top: 1px solid #ccc;'>"),
        confirm_btn
    ])
    
    # Display UI
    with output_log:
        clear_output()
        display(ui_container)
    
    # Trigger first viz
    update_visual(None, create_new=True)


def update_visual(b, create_new=False):
    viz_output.clear_output(wait=True)
    with viz_output:
        if 'cx' not in controls: return
        
        # Get Values
        cx = controls['cx'].value; sx = controls['sx'].value
        cy = controls['cy'].value; sy = controls['sy'].value
        cz = controls['cz'].value; sz = controls['sz'].value
        
        # Setup View
        view = py3Dmol.view(width=800, height=600)
        
        # 0. Receptor Surface (Background)
        # It's helpful to see the receptor too
        if os.path.exists(receptor_filename):
            with open(receptor_filename, 'r') as f:
                view.addModel(f.read(), "pdb")
            view.setStyle({'model': -1}, {}) # Hide atoms
            view.addSurface(py3Dmol.SES, {'opacity': 0.3, 'color': 'gray'}, {'model': -1})

        # 1. Expanded Pocket (Cartoon) - The one with 3A buffer
        temp_pocket_path = os.path.join(pockets_dir, "temp_calc.pdb")
        if os.path.exists(temp_pocket_path):
            with open(temp_pocket_path, 'r') as f:
                view.addModel(f.read(), "pdb")
            # Cartoon Representation
            view.setStyle({'model': -1}, {'cartoon': {'color': 'cyan', 'opacity': 0.8}})
        
        # 2. Original Fpocket (White Surface)
        selected_pocket = pocket_dropdown.value
        src_pocket_path = os.path.join(fpocket_storage_dir, selected_pocket)
        if os.path.exists(src_pocket_path):
             with open(src_pocket_path, 'r') as f:
                view.addModel(f.read(), "pdb")
             # White Surface
             view.addSurface(py3Dmol.SES, {'opacity': 0.5, 'color': 'white'}, {'model': -1})
             # Hide atoms of this one, just surface
             view.setStyle({'model': -1}, {}) 
             
             # Calculate Volume
             vol = get_pocket_volume_hull(src_pocket_path)
             print(f"Original Pocket Volume (Convex Hull): {vol:.2f} A^3")
             
             # 3. Volume Representation (Red Surface)
             view.addSurface(py3Dmol.SAS, {'opacity': 0.3, 'color': 'red'}, {'model': -1})

        # 4. Gridbox
        view.addBox({
            'center': {'x': cx, 'y': cy, 'z': cz},
            'dimensions': {'w': sx, 'h': sy, 'd': sz},
            'color': 'cyan',
            'opacity': 0.2,
            'wireframe': False
        })
        view.addBox({
            'center': {'x': cx, 'y': cy, 'z': cz},
            'dimensions': {'w': sx, 'h': sy, 'd': sz},
            'color': 'black',
            'wireframe': True
        })

        if create_new:
            view.zoomTo()
            
        view.show()

def finalize_process(b):
    output_log.clear_output()
    with output_log:
        if 'cx' not in controls: return
        print("Finalizing extraction with custom box...")
        
        final_center = [controls['cx'].value, controls['cy'].value, controls['cz'].value]
        final_size   = [controls['sx'].value, controls['sy'].value, controls['sz'].value]
        
        selected_pocket = pocket_dropdown.value
        src_pocket_path = os.path.join(fpocket_storage_dir, selected_pocket)
        final_pocket_name = "pocket.pdb"
        final_pocket_path = os.path.join(pockets_dir, final_pocket_name)
        
        mode = "extract" if mode_selector.value == "Auto Detect" else "direct"
        
        # Determine specific 3A logic? 
        # The logic is embedded in 'extract' mode defaults in process_and_box
        
        _, _, success = run_processing_isolated(
            receptor_filename, src_pocket_path, final_pocket_path, mode=mode, buffer=0.0
        )
        
        if success:
            print(f"\u2705 Box Center: {final_center}")
            print(f"\u2705 Box Size:   {final_size}")
            
            # Save State
            global box_center, box_size, extracted_pocket_path
            box_center = final_center
            box_size = final_size
            extracted_pocket_path = os.path.abspath(final_pocket_path)
            
            save_pipeline_state({
                "box_center": box_center,
                "box_size": box_size,
                "extracted_pocket_path": extracted_pocket_path
            })
            
            # Save to root for visibility/Step 2
            root_pocket = os.path.join(initial_path, "pocket.pdb")
            try:
                shutil.copy(final_pocket_path, root_pocket)
                print(f"\u2705 Ready! (Copied to {root_pocket})")
            except: pass
        else:
            print("Error saving final pocket.")

# Main Buttons for Box Section
init_btn = widgets.Button(description='Reset/Refresh View', button_style='primary', icon='refresh', layout=widgets.Layout(width='200px'))
confirm_btn = widgets.Button(description='Confirm & Extract', button_style='success', icon='check', layout=widgets.Layout(width='100%'))

# init_btn.on_click(initialize_ui) # initialize_ui is triggered by dropdown change now
confirm_btn.on_click(finalize_process)

print("\n--- Gridbox Adjustment ---")
display(widgets.VBox([viz_output, output_log]))


In [None]:
#@title 2. Structure-Guided Peptide Generation
#@markdown **Instructions:**
#@markdown 1. Configure parameters.
#@markdown 2. Run this cell to start generation.
#@markdown 3. The 3D view will update every 30 seconds if new fragments are found.

import os
import sys
import json
import time
import glob
import subprocess
import threading
import multiprocessing
import ipywidgets as widgets
from IPython.display import display

# Import Viz Utils
try:
    # Try direct import (if functions is in path)
    import viz_utils
except ImportError:
    # Try adding likely paths relative to CWD (Colab root)
    # Repo structure: /content/FrankPEPstein/functions/viz_utils.py
    possible_paths = [
        os.path.join(os.getcwd(), "FrankPEPstein", "functions"),
        os.path.join(os.getcwd(), "functions"),
    ]
    for p in possible_paths:
        if os.path.exists(p) and p not in sys.path:
            sys.path.append(p)
            
    try:
        import viz_utils
    except ImportError:
        print("⚠️ Warning: viz_utils not found. Visualization will be disabled.")
        viz_utils = None

# --- Dependency Check ---
try:
    import py3Dmol
except ImportError:
    subprocess.run("pip install -q py3dmol", shell=True)
    import py3Dmol

# --- Parameters ---
peptide_size = 8 #@param {type:"slider", min:5, max:15, step:1}
threads = 0 #@param {type:"integer"}
if threads <= 0:
    threads = multiprocessing.cpu_count()
candidates = 10 #@param {type:"integer"}
sampling_limit = 500 #@param {type:"integer"}
#@markdown *Subsampling limit (peptides to simulate). Total combinations are usually much higher; we sample for speed.*
modeller_key = 'MODELIRANJE'


# --- Configuration & State ---
initial_path = os.getcwd()
repo_folder = os.path.join(initial_path, "FrankPEPstein")
state_file = "pipeline_state.json"

# Fix permissions
def fix_permissions():
    executables = [
        f"{initial_path}/utilities/vina_1.2.4_linux_x86_64",
        f"{initial_path}/utilities/click/click"
    ]
    for exe in executables:
        if os.path.exists(exe):
            os.chmod(exe, 0o755)

def ensure_modeller_config(key='MODELIRANJE'):
    """Finds and fixes Modeller config.py with the provided key."""
    try:
        import modeller
        modeller_path = os.path.dirname(modeller.__file__)
        config_path = os.path.join(modeller_path, "config.py")
    except ImportError:
        # Fallback search if module not importable in this context
        config_path = None
        possible_paths = [
             "/usr/local/envs/FrankPEPstein/lib/modeller-*/modlib/modeller/config.py",
             f"{sys.prefix}/lib/modeller-*/modlib/modeller/config.py"
        ]
        for pattern in possible_paths:
            found = glob.glob(pattern)
            if found:
                config_path = found[0]
                break
    
    if config_path and os.path.exists(config_path):
        try:
            with open(config_path, 'w') as f:
                f.write(f"license = '{key}'\n")
                f.write("install_dir = r'/usr/local/envs/FrankPEPstein/lib/modeller-10.8'\n") # Basic config
            return True, config_path
        except Exception as e:
            return False, str(e)
    return False, "Not found"

fix_permissions()

# Load State
pipeline_state = {}
if os.path.exists(state_file):
    try:
        with open(state_file, "r") as f:
            pipeline_state = json.load(f)
    except:
        pass

receptor_path = pipeline_state.get("receptor_filename", None)
standard_pocket_path = os.path.join(initial_path, "pocket.pdb")

if os.path.exists(standard_pocket_path):
    extracted_pocket_path = standard_pocket_path
else:
    extracted_pocket_path = pipeline_state.get("extracted_pocket_path", None)

box_center = pipeline_state.get("box_center", [0.0, 0.0, 0.0])
box_size = pipeline_state.get("box_size", [20.0, 20.0, 20.0])

if not box_center or not box_size:
    # Fallback to defaults if missing (shouldn't happen if Step 1 ran)
    box_center = [0.0, 0.0, 0.0]
    box_size = [20.0, 20.0, 20.0]

# --- UI Layout ---
# Widgets
# REPLACED: HTML with Image for static Matplotlib rendering
out_vis = widgets.Image(
    layout=widgets.Layout(border='1px solid #ddd', height='500px', width='600px')
)

progress_bar = widgets.FloatProgress(
    value=0.0,
    min=0.0,
    max=100.0,
    description='Progress:',
    bar_style='info',
    style={'bar_color': '#4287f5'},
    layout=widgets.Layout(width='100%')
)

status_label = widgets.Label(
    value="Ready to start...",
    layout=widgets.Layout(width='100%')
)

log_output = widgets.Output(
    layout={'border': '1px solid #ccc', 'height': '200px', 'overflow_y': 'scroll'}
)

# Container
ui_container = widgets.VBox([
    widgets.HBox([out_vis], layout=widgets.Layout(justify_content='center')),
    widgets.HBox([progress_bar]),
    status_label,
    log_output
])

# --- Logic ---

def update_static_viz(extra_pdbs=None, title="Pipeline Running..."):
    try:
        # Pockets is standard_pocket_path (or extracted_pocket_path)
        if extracted_pocket_path and os.path.exists(extracted_pocket_path):
            img_bytes = viz_utils.render_static_view(
                receptor_path=receptor_path,
                pocket_path=extracted_pocket_path,
                box_center=box_center,
                box_size=box_size,
                fragments_paths=extra_pdbs if extra_pdbs else [],
                title=title
            )
            if img_bytes:
                out_vis.value = img_bytes
    except Exception as e:
        # Fail can happen if paths missing or viz error
        # print(e) # Debug
        pass

# Initial view
update_static_viz(title="Ready")
display(ui_container)

# --- Threading & Execution ---
stop_event = threading.Event()
pipeline_phase = "Initializing"

def monitor_fragments():
    run_folder_name = "FrankPEPstein_run"
    base_run_dir = os.path.join(initial_path, run_folder_name)
    
    # Dynamic search for RMSD folder
    fragments_dir_pattern = os.path.join(base_run_dir, "superpockets_residuesAligned3_RMSD*")
    fragments_dir = None
    
    # We loop to find the dir once it's created
    # But inside the loop we should keep checking if it changes/appears
    fragments_dir_candidates = glob.glob(fragments_dir_pattern)
    if fragments_dir_candidates:
         fragments_dir = fragments_dir_candidates[0] # Take the first one found
    
    last_count = 0
    
    while not stop_event.is_set():
        # Keep trying to find the directory if we haven't yet, or if we want to be robust to changes
        if not fragments_dir:
            fragments_dir_candidates = glob.glob(fragments_dir_pattern)
            if fragments_dir_candidates:
                fragments_dir = fragments_dir_candidates[0]
        
        if fragments_dir and os.path.exists(fragments_dir):
            files = glob.glob(os.path.join(fragments_dir, "patch_file_*.pdb"))
            current_count = len(files)
            
            # We update periodically regardless of new files to ensure phase title is current
            # Sort by modification time to show newest first, limit to more if static (it's fast)
            files.sort(key=os.path.getmtime, reverse=True)
            
            # Pass top 100 fragments
            update_static_viz(files[:100], title=f"{pipeline_phase} (Fragments: {current_count})")
            
            if current_count > last_count:
                last_count = current_count
        
        # Check stop event every 1s, but wait 10s total interval (faster update for static image)
        for _ in range(10):
            if stop_event.is_set(): break
            time.sleep(1)

import re

def run_step_2():
    global pipeline_phase
    # Input Validation
    # We clear the log output for a new run
    log_output.clear_output()
    progress_bar.value = 0
    progress_bar.bar_style = 'info'
    status_label.value = "Initializing..."
    pipeline_phase = "Initializing"
    
    # 0. Fix Modeller License
    with log_output:
        print("Checking Modeller License...")
        success, msg = ensure_modeller_config(modeller_key)
        if success:
             print(f"✅ Modeller license configured in {msg}")
        else:
             print(f"⚠️ Warning: Could not configure Modeller license: {msg}")

    if not receptor_path or not extracted_pocket_path:
        with log_output:
            print("❌ Error: Receptor or Pocket not found. Please run Step 1 successfully.")
        return
    if not box_center or not box_size:
        with log_output:
            print("❌ Error: Pocket Gridbox not defined. Please run Step 1 successfully.")
        return
    
    with log_output:
        print(f"--- Starting FrankPEPstein Generation ---")
        print(f"Peptide Size: {peptide_size}")
        print(f"Threads: {threads}")
        
    # Determine Python Executable
    conda_python = "/usr/local/envs/FrankPEPstein/bin/python"
    if os.path.exists(conda_python):
        python_exe = conda_python
    else:
        python_exe = sys.executable

    script_path = os.path.join(repo_folder, "scripts/run_FrankPEPstein.py")
    
    cmd_list = [
        python_exe, "-u", script_path,
        "-w", str(peptide_size),
        "-t", str(threads),
        "-c", str(candidates),
        "-xc", str(box_center[0]),
        "-yc", str(box_center[1]),
        "-zc", str(box_center[2]),
        "-xs", str(box_size[0]),
        "-ys", str(box_size[1]),
        "-zs", str(box_size[2]),
        "-s", str(sampling_limit)
    ]
    
    global process
    stop_event.clear()
    pipeline_phase = "Scanning minipockets" # Default start
    
    # Start Monitor Thread
    t = threading.Thread(target=monitor_fragments, daemon=True)
    t.start()
    
    try:
        process = subprocess.Popen(
            cmd_list, 
            stdout=subprocess.PIPE, 
            stderr=subprocess.STDOUT, 
            text=True, 
            bufsize=1, # Line buffered
            universal_newlines=True
        )
        
        # Regex for tqdm: "  10%|#         | 10/100 [00:01<00:09,  9.15it/s]"
        # We look for a percentage pattern e.g. " 10%" or "100%"
        tqdm_pattern = re.compile(r'(\d+)%\|.*\| (\d+)/(\d+) \[(.*)\]')
        
        while True:
            line = process.stdout.readline()
            if not line and process.poll() is not None:
                break
            if line:
                clean_line = line.strip()
                
                # Check for Phase Markers
                if "--- Running Superposer ---" in clean_line:
                    pipeline_phase = "Scanning minipockets"
                    status_label.value = pipeline_phase
                elif "--- Running FrankVINA 1 ---" in clean_line:
                    pipeline_phase = "Selecting fragment candidates"
                    status_label.value = pipeline_phase
                    progress_bar.bar_style = 'warning' # Change color to indicate change
                elif "--- Checking for patches ---" in clean_line:
                    pipeline_phase = "Clustering fragments and obtaining combinations of peptides"
                    status_label.value = pipeline_phase
                    progress_bar.bar_style = 'info'
                elif "--- Running FrankVINA 2 ---" in clean_line:
                    pipeline_phase = "Refining peptide candidates and selecting"
                    status_label.value = pipeline_phase
                    progress_bar.bar_style = 'success'

                # Check for progress bar
                match = tqdm_pattern.search(line)
                if match:
                    # Update Widget
                    pct = int(match.group(1))
                    current = match.group(2)
                    total = match.group(3)
                    timing = match.group(4)
                    
                    progress_bar.value = pct
                    status_label.value = f"{pipeline_phase}: {pct}% ({current}/{total}) - {timing}"
                else:
                    # Normal Log
                    if clean_line:
                        with log_output:
                            print(clean_line)
            
        process.wait()
        stop_event.set() # Stop monitor
        
        if process.returncode == 0:
            with log_output:
                print("\n✅ Pipeline Finished Successfully.")
            progress_bar.value = 100
            progress_bar.bar_style = 'success'
            status_label.value = "Completed Successfully"
            
            # Final Viz Update
            pipeline_phase = "Completed"
            monitor_fragments() # One last update calls update_static_viz

        else:
            with log_output:
                print(f"\n❌ Pipeline failed with exit code {process.returncode}")
            progress_bar.bar_style = 'danger'
            status_label.value = "Failed"
        
    except KeyboardInterrupt:
        with log_output:
            print("\n🛑 Pipeline interrupted by user.")
        stop_event.set()
        if 'process' in locals():
            process.terminate()
        cleanup()
    except Exception as e:
        stop_event.set()
        with log_output:
            print(f"\n❌ Execution Error: {e}")
            
def cleanup():
    run_folder_name = "FrankPEPstein_run"
    base_run_dir = os.path.join(initial_path, run_folder_name)
    
    # Dynamic Cleanup
    patterns = [
        os.path.join(base_run_dir, "superpockets_residuesAligned3_RMSD*"),
        os.path.join(base_run_dir, "temp_folder_residuesAligned3_RMSD*")
    ]
    
    with log_output:
        for pat in patterns:
            for folder in glob.glob(pat):
                subprocess.run(f"rm -rf {folder}", shell=True)
                print(f"Removed {folder}")
        print("Cleanup complete.")

if __name__ == "__main__":
    run_step_2()


In [None]:

#@title 2.5. Visualize Peptide Candidates
#@markdown **Instructions:**
#@markdown Run this cell to visualize the final receptor-pocket-peptide candidates in 3D.

import os
import glob
import json
import ipywidgets as widgets
from IPython.display import display

def visualize_candidates():
    # Dependency Check inside function to ensure environment
    try:
        import py3Dmol
    except ImportError:
        print("py3Dmol not installed.")
        return

    initial_path = os.getcwd()
    
    # 1. Load State
    state_file = "pipeline_state.json"
    receptor_path = None
    extracted_pocket_path = None
    box_center = None
    box_size = None
    
    if os.path.exists(state_file):
        try:
            with open(state_file, "r") as f:
                state = json.load(f)
                receptor_path = state.get("receptor_filename")
                box_center = state.get("box_center")
                box_size = state.get("box_size")
        except:
            pass
            
    # Pocket Path
    standard_pocket_path = os.path.join(initial_path, "pocket.pdb")
    if os.path.exists(standard_pocket_path):
        extracted_pocket_path = standard_pocket_path
    
    # 2. Find Candidates
    run_base = os.path.join(initial_path, "FrankPEPstein_run")
    # Recursive glob to match: FrankPEPstein_run/**/top_*_peps
    candidate_folders = glob.glob(os.path.join(run_base, "**", "top_*_peps"), recursive=True)
    
    if not candidate_folders:
        print("❌ No candidate results found. Run fragments generation first.")
        return
        
    target_folder = sorted(candidate_folders, key=os.path.getmtime, reverse=True)[0]
    print(f"Visualizing candidates from: {target_folder}")
    
    pdb_files = glob.glob(os.path.join(target_folder, "*.pdb"))
    if not pdb_files:
        print("❌ No PDB files found in target folder.")
        return

    # 3. Render
    try:
        view = py3Dmol.view(width=1000, height=800, js='https://3dmol.org/build/3Dmol.js')
        
        # Receptor: White Surface + Cartoon
        if receptor_path and os.path.exists(receptor_path):
            with open(receptor_path, 'r') as f:
                view.addModel(f.read(), "pdb")
            # Cartoon for structure
            view.setStyle({'model': -1}, {'cartoon': {'color': 'white', 'opacity': 0.4}})
            # Surface for volume
            view.addSurface(py3Dmol.SES, {'opacity': 0.3, 'color': 'white'})

        # Pocket: Orange Surface
        if extracted_pocket_path and os.path.exists(extracted_pocket_path):
            with open(extracted_pocket_path, 'r') as f:
                view.addModel(f.read(), "pdb")
            # Orange surface as requested
            view.addSurface(py3Dmol.SES, {'opacity': 0.6, 'color': 'orange'})
            # Also show atoms slightly to define center
            view.setStyle({'model': -1}, {'sphere': {'radius': 0.5, 'color': 'orange', 'opacity': 0.0}}) # Hidden atoms, just surface

        # Gridbox: Thick Red Bars
        if box_center and box_size:
            cx, cy, cz = box_center
            sx, sy, sz = box_size
            
            min_x, max_x = cx - sx/2, cx + sx/2
            min_y, max_y = cy - sy/2, cy + sy/2
            min_z, max_z = cz - sz/2, cz + sz/2
            
            def draw_edge(p1, p2):
                view.addLine({
                    'start': p1, 'end': p2,
                    'color': 'red', 'linewidth': 10 # Thick
                })
            
            # Corners
            c000 = {'x':min_x, 'y':min_y, 'z':min_z}
            c100 = {'x':max_x, 'y':min_y, 'z':min_z}
            c010 = {'x':min_x, 'y':max_y, 'z':min_z}
            c110 = {'x':max_x, 'y':max_y, 'z':min_z}
            c001 = {'x':min_x, 'y':min_y, 'z':max_z}
            c101 = {'x':max_x, 'y':min_y, 'z':max_z}
            c011 = {'x':min_x, 'y':max_y, 'z':max_z}
            c111 = {'x':max_x, 'y':max_y, 'z':max_z}
            
            # Edges
            draw_edge(c000, c100); draw_edge(c100, c110); draw_edge(c110, c010); draw_edge(c010, c000) # Bottom
            draw_edge(c001, c101); draw_edge(c101, c111); draw_edge(c111, c011); draw_edge(c011, c001) # Top
            draw_edge(c000, c001); draw_edge(c100, c101); draw_edge(c110, c111); draw_edge(c010, c011) # Sides

        # Peptides: Sticks
        for pdb_file in pdb_files:
            with open(pdb_file, 'r') as f:
                view.addModel(f.read(), "pdb")
            # Green/Multicolor sticks
            view.setStyle({'model': -1}, {'stick': {'colorscheme': 'greenCarbon', 'radius': 0.2}})

        view.zoomTo()
        # IMPORTANT: in Colab view.show() displays the widget.
        # But we want to return it or display it here.
        # Calling view.show() returns None but displays.
        view.show()
        
    except Exception as e:
        print(f"Visualization Error: {e}")

if __name__ == "__main__":
    visualize_candidates()


In [None]:
#@title 3. Sequence Alignment & Logo Analysis
#@markdown **Instructions:** 
#@markdown This step analyzes the finalized peptide candidates from Step 2.
#@markdown 1. Extracts amino acid sequences from the best PDB candidates.
#@markdown 2. Generates a Multiple Sequence Alignment (Multifasta).
#@markdown 3. Visualizes conserved motifs using a Sequence Logo.

import os
import glob
import pandas as pd
import matplotlib.pyplot as plt
import logomaker
from IPython.display import display, Image
import re

# Identify the latest run folder
initial_path = os.getcwd()
run_base = os.path.join(initial_path, "FrankPEPstein_run")

# Find the specific run folder with results (frankPEPstein_X/top_Y_peps)
# We need to search recursively or assume standard structure.
# Structure: FrankPEPstein_run/frankPEPstein_{pep_size}/top_{candidates}_peps/

# Recursive glob to match: FrankPEPstein_run/**/top_*_peps
candidate_folders = glob.glob(os.path.join(run_base, "**", "top_*_peps"), recursive=True)

if not candidate_folders:
    print("❌ No candidate results found from Step 2.")
else:
    # Use the most recent one if multiple (though pipeline likely cleans up)
    # Sort by modification time
    target_folder = sorted(candidate_folders, key=os.path.getmtime, reverse=True)[0]
    print(f"Analyzing results from: {target_folder}")
    
    # 1. Extract Sequences
    pdb_files = glob.glob(os.path.join(target_folder, "*.pdb")) # Actually frag*.pdb or similar? frankVINA_2 outputs top PDBs.
    
    sequences = []
    
    # Check if files exist
    if not pdb_files:
        # Maybe they are still pdbqt? frankVINA_2 converts to pdb at the end.
        print("⚠️ No PDB files found in target folder.")
    else:
        for pdb_path in pdb_files:
            filename = os.path.basename(pdb_path)
            # Expecting filename to contain sequence?
            # User said "si la contiene" (filename contains info).
            # frankVINA_2 output format typically: {score}_{sequence}.pdb or similar?
            # Let's try to find a sequence string (UPPERCASE letters).
            # Usually frag_SEQUENCE_score.pdb or SEQUENCE.pdb
            
            # Simple heuristic: extracting the longest string of uppercase letters
            # Or if user standard is specific...
            # Looking at frankVINA_2.py again, it seems it runs `vina_scorer`.
            # If we don't know exact format, we can extract from PDB SEQRES or Atoms (Chain 'p' or 'x').
            # User said: "desde el nombre del archivo mejor" AND "filename contains info".
            # Let's try to extract sequence from filename.
            # Assuming standard amino acids.
            
            # Attempt to match sequence chars [ACDEFGHIKLMNPQRSTVWY]
            # Pattern: Longest contiguous string of AAs?
            # Or maybe the whole filename is the sequence?
            
            # Fallback: Parse PDB if filename parsing is ambiguous, but user insists on filename.
            # Let's deduce from typical FrankPEPstein behavior. 
            # Often it is `SEQ_score.pdb` or `rank_SEQ_score.pdb`.
            
            # Heuristic: Find all caps string length > 4.
            matches = re.findall(r'[ACDEFGHIKLMNPQRSTVWY]{5,}', filename)
            if matches:
                 # Take the longest one
                 seq = max(matches, key=len)
                 sequences.append(seq)
            else:
                 # Fallback: try reading PDB?
                 # No, user said filename. Let's assume filename IS sequence if simple.
                 # Example: "AAAAA.pdb"
                 base = os.path.splitext(filename)[0]
                 if all(c in "ACDEFGHIKLMNPQRSTVWY_" for c in base): # Allow underscore
                     sequences.append(base.split('_')[0]) # Split score if present
                 else:
                     print(f"Skipping {filename}: Could not deduce sequence from name.")

        if sequences:
            print(f"Extracted {len(sequences)} sequences.")
            
            # 2. Generate Multifasta (MSA)
            fasta_path = os.path.join(target_folder, "candidates.fasta")
            with open(fasta_path, "w") as f:
                for i, seq in enumerate(sequences):
                    f.write(f">candidate_{i+1}\n{seq}\n")
            print(f"✅ Generated Multifasta: {fasta_path}")
            
            # 3. Generate Sequence Logo
            # Create a matrix for logomaker
            # Sequences must be same length for simple logo.
            lengths = [len(s) for s in sequences]
            if len(set(lengths)) > 1:
                print("⚠️ Sequences have varying lengths, alignment needed for proper Logo. Using simple left-alignment.")
                # Pad with gaps? or just ignore? Logomaker needs DataFrame.
                max_len = max(lengths)
                padded_seqs = [s.ljust(max_len, '-') for s in sequences]
                seq_list = padded_seqs
            else:
                seq_list = sequences

            try:
                # Create counts matrix
                logo_matrix = logomaker.alignment_to_matrix(seq_list)
                
                # Plot
                fig, ax = plt.subplots(figsize=(10, 4))
                logo = logomaker.Logo(logo_matrix, ax=ax)
                
                ax.set_title("Conserved Peptide Motifs", fontsize=14)
                ax.set_xlabel("Position", fontsize=12)
                ax.set_ylabel("Probability / Information", fontsize=12)
                
                logo_path = os.path.join(target_folder, "logo.png")
                plt.savefig(logo_path, bbox_inches='tight', dpi=300)
                plt.show() # Display in notebook
                print(f"✅ Generated Logo Plot: {logo_path}")
                
            except Exception as e:
                print(f"Error creating logo: {e}")
                
        else:
            print("No valid sequences found to align.")


In [None]:
#@title 4. Download Results
#@markdown **Instructions:**
#@markdown Click the button below to download a ZIP archive containing:
#@markdown - Candidate Peptide PDBs
#@markdown - Sequence Alignment (Fasta)
#@markdown - Sequence Motif Plot (Logo)

import os
import shutil
import glob
from google.colab import files
import ipywidgets as widgets
from IPython.display import display
from datetime import datetime

# Logic to find the target folder (same as Step 3)
initial_path = os.getcwd()
run_base = os.path.join(initial_path, "FrankPEPstein_run")
# Recursive glob to match: FrankPEPstein_run/**/top_*_peps
candidate_folders = glob.glob(os.path.join(run_base, "**", "top_*_peps"), recursive=True)

target_folder = None
if candidate_folders:
    target_folder = sorted(candidate_folders, key=os.path.getmtime, reverse=True)[0]

out_log = widgets.Output()

def download_results(b):
    out_log.clear_output()
    with out_log:
        if not target_folder or not os.path.exists(target_folder):
            print("❌ No results found to download.")
            return
            
        print(f"Compressing results from: {target_folder}")
        
        # Timestamped filename
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        zip_filename = f"FrankPEPstein_Results_{timestamp}"
        zip_path = os.path.join(initial_path, zip_filename) # shutil.make_archive adds .zip extension automatically
        
        try:
            # Create ZIP
            shutil.make_archive(zip_path, 'zip', target_folder)
            final_zip = zip_path + ".zip"
            
            print(f"✅ Created archive: {final_zip}")
            print("Downloading...")
            
            files.download(final_zip)
            
        except Exception as e:
            print(f"Error during download: {e}")

btn_download = widgets.Button(
    description='Download Results (ZIP)',
    button_style='info',
    icon='download',
    layout=widgets.Layout(width='50%')
)
btn_download.on_click(download_results)

display(btn_download, out_log)
