In [None]:
# @title **ipSAE_batch: Protein Interface Analysis**
# @markdown ---
# @markdown ### üß¨ **Batch Processing for Protein Structure Predictions**
# @markdown
# @markdown This notebook runs **ipSAE_batch v1.0.0** analysis on protein structure predictions to calculate interaction scores and generate visualizations.
# @markdown
# @markdown **What ipSAE_batch Does:**
# @markdown - Calculates **ipSAE** (interface predicted Structural Alignment Error) scores
# @markdown - Generates **AlphaBridge-style** matrix and ribbon visualizations
# @markdown - Supports multiple backends: AlphaFold3, ColabFold, Boltz2, IntelliFold
# @markdown - Outputs per-contact and per-residue scores
# @markdown
# @markdown **Required Input:**
# @markdown - Structure prediction output folder (ZIP or from Google Drive)
# @markdown - Selection of appropriate backend
# @markdown
# @markdown **Expected Runtime:** 1-5 minutes per job depending on number of models
# @markdown
# @markdown ---
# @markdown
# @markdown üìö **GitHub:** [JKourelis/ipSAE_batch](https://github.com/JKourelis/ipSAE_batch)
# @markdown
# @markdown üìÑ **Citations:**
# @markdown - ipSAE: [Dunbrack 2025](https://doi.org/10.1101/2025.02.10.637595)
# @markdown - AlphaBridge: [√Ålvarez-Salmoral et al. 2024](https://doi.org/10.1101/2024.10.23.619601)
# @markdown
# @markdown ---

print("‚ïî‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïó")
print("‚ïë         ipSAE_batch v1.0.0 - Interface Analysis           ‚ïë")
print("‚ïë   Protein Structure Prediction Scoring & Visualization    ‚ïë")
print("‚ïö‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïù")
print("\nüìã Follow the cells in order:")
print("   1. Install ipSAE_batch and dependencies")
print("   2. Connect to Google Drive (optional)")
print("   3. Configure analysis parameters")
print("   4. Upload prediction data")
print("   5. Run analysis")
print("   6. Download results\n")
print("üî¨ Supported backends:")
print("   ‚Ä¢ AlphaFold3 (AF3 server or local)")
print("   ‚Ä¢ ColabFold (AF2 multimer)")
print("   ‚Ä¢ Boltz2")
print("   ‚Ä¢ IntelliFold\n")

In [None]:
# @title **Install ipSAE_batch** { display-mode: "form" }
# @markdown Install ipSAE_batch and all required dependencies.
# @markdown This takes approximately 1-2 minutes.

import subprocess
import sys
import os
from datetime import datetime

print("="*60)
print("ENVIRONMENT SNAPSHOT")
print("="*60)
print(f"‚è∞ Timestamp: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print(f"üêç Python: {sys.version.split()[0]}")

# Check GPU
try:
    gpu_info = subprocess.run(['nvidia-smi', '--query-gpu=name,driver_version,memory.total', '--format=csv,noheader'],
                              capture_output=True, text=True, timeout=10)
    if gpu_info.returncode == 0 and gpu_info.stdout.strip():
        gpu_name, driver, memory = gpu_info.stdout.strip().split(', ')
        print(f"üéÆ GPU: {gpu_name}")
        print(f"   Driver: {driver}")
        print(f"   Memory: {memory}")
    else:
        print("üéÆ GPU: None detected (CPU mode)")
except:
    print("üéÆ GPU: None detected (CPU mode)")

print("="*60)
print("\nüì¶ Installing ipSAE_batch...\n")

# Install pycirclize and igraph first (not pre-installed on Colab)
packages = [
    "pycirclize==1.7.1",
    "igraph==0.11.8",
]

for pkg in packages:
    print(f"   Installing {pkg}...")
    result = subprocess.run([sys.executable, "-m", "pip", "install", "-q", pkg],
                           capture_output=True, text=True)
    if result.returncode != 0:
        print(f"   ‚ö†Ô∏è Warning: {result.stderr}")

# Install ipSAE_batch from GitHub
print("   Installing ipSAE_batch from GitHub...")
result = subprocess.run(
    [sys.executable, "-m", "pip", "install", "-q",
     "git+https://github.com/JKourelis/ipSAE_batch.git"],
    capture_output=True, text=True
)

if result.returncode != 0:
    print(f"‚ùå Installation failed: {result.stderr}")
else:
    print("\n" + "="*60)
    print("VERIFICATION")
    print("="*60)

    # Verify installation
    try:
        import ipsae_batch
        print(f"‚úÖ ipSAE_batch version: {ipsae_batch.__version__}")

        import numpy as np
        print(f"‚úÖ numpy version: {np.__version__}")

        import pandas as pd
        print(f"‚úÖ pandas version: {pd.__version__}")

        import scipy
        print(f"‚úÖ scipy version: {scipy.__version__}")

        import matplotlib
        print(f"‚úÖ matplotlib version: {matplotlib.__version__}")

        import pycirclize
        print(f"‚úÖ pycirclize version: {pycirclize.__version__}")

        import igraph
        print(f"‚úÖ igraph version: {igraph.__version__}")

        print("\nüéâ Installation successful! Proceed to the next cell.")

    except ImportError as e:
        print(f"‚ùå Import error: {e}")
        print("   Please try running this cell again.")

In [None]:
# @title **Connect to Google Drive** (Optional) { display-mode: "form" }
# @markdown Mount your Google Drive to access files and save results persistently.
# @markdown Skip this cell if you prefer to upload files directly.

import os
from google.colab import drive

try:
    drive.mount('/content/drive', force_remount=False)
    print("‚úÖ Google Drive connected successfully!")
    print(f"üìÅ Drive mounted at: /content/drive/MyDrive/")

    # Create ipSAE results directory
    results_dir = "/content/drive/MyDrive/ipSAE_Results"
    os.makedirs(results_dir, exist_ok=True)
    print(f"üìÇ Results will be saved to: {results_dir}")

except Exception as e:
    print(f"‚ö†Ô∏è Google Drive not connected: {e}")
    print("   Results will be saved locally. Download before session ends.")

In [None]:
# @title **Configure Analysis Parameters** { display-mode: "form" }
# @markdown Set the parameters for your ipSAE analysis.

# @markdown ---
# @markdown ### Backend Selection
backend = "alphafold3" # @param ["alphafold3", "colabfold", "boltz2", "intellifold"]
# @markdown - **alphafold3**: AF3 server or local AlphaFold3 outputs
# @markdown - **colabfold**: ColabFold/AF2 multimer outputs
# @markdown - **boltz2**: Boltz2 prediction outputs
# @markdown - **intellifold**: IntelliFold server outputs

# @markdown ---
# @markdown ### Analysis Parameters
pae_cutoff = 10.0 # @param {type:"number"}
# @markdown PAE cutoff for ipSAE calculation (default: 10.0)

distance_cutoff = 10.0 # @param {type:"number"}
# @markdown Distance cutoff for interface residues in √Öngstr√∂ms (default: 10.0)

# @markdown ---
# @markdown ### Output Options
generate_png = True # @param {type:"boolean"}
# @markdown Generate PNG graphics (matrix and ribbon plots)

generate_pdf = False # @param {type:"boolean"}
# @markdown Generate PDF report

per_contact_output = True # @param {type:"boolean"}
# @markdown Output per-contact scores CSV

per_residue_output = False # @param {type:"boolean"}
# @markdown Output per-residue scores CSV

# @markdown ---

# Store configuration
config = {
    'backend': backend,
    'pae_cutoff': pae_cutoff,
    'distance_cutoff': distance_cutoff,
    'generate_png': generate_png,
    'generate_pdf': generate_pdf,
    'per_contact_output': per_contact_output,
    'per_residue_output': per_residue_output
}

print("="*60)
print("CONFIGURATION SUMMARY")
print("="*60)
print(f"üîß Backend: {backend}")
print(f"üìä PAE cutoff: {pae_cutoff}")
print(f"üìè Distance cutoff: {distance_cutoff} √Ö")
print(f"üñºÔ∏è  Generate PNG: {generate_png}")
print(f"üìÑ Generate PDF: {generate_pdf}")
print(f"üìã Per-contact output: {per_contact_output}")
print(f"üìã Per-residue output: {per_residue_output}")
print("="*60)
print("\n‚úÖ Configuration saved. Proceed to upload your data.")

In [None]:
# @title **Upload Prediction Data** { display-mode: "form" }
# @markdown Upload your structure prediction output folder.

# @markdown ---
upload_method = "Upload ZIP file" # @param ["Upload ZIP file", "Use Google Drive path"]
drive_path = "" # @param {type:"string"}
# @markdown **Upload Method:**
# @markdown - `Upload ZIP file` - Upload a ZIP archive of your prediction folder
# @markdown - `Use Google Drive path` - Path to folder in your Drive (e.g., `/content/drive/MyDrive/AF3_predictions`)

import os
import zipfile
import shutil
from google.colab import files

input_folder = None

print("üìÅ Data Upload\n")

if upload_method == "Upload ZIP file":
    print("Please select your prediction data ZIP file...")
    print("The ZIP should contain one or more job folders.\n")

    uploaded = files.upload()

    if uploaded:
        filename = list(uploaded.keys())[0]

        # Create extraction directory
        extract_dir = "/content/prediction_data"
        if os.path.exists(extract_dir):
            shutil.rmtree(extract_dir)
        os.makedirs(extract_dir)

        # Extract ZIP
        print(f"\nüì¶ Extracting {filename}...")
        with zipfile.ZipFile(filename, 'r') as zip_ref:
            zip_ref.extractall(extract_dir)

        # Clean up ZIP file
        os.remove(filename)

        # Find the actual data directory (handle nested folders)
        contents = os.listdir(extract_dir)
        if len(contents) == 1 and os.path.isdir(os.path.join(extract_dir, contents[0])):
            # Single folder inside - use it directly
            input_folder = os.path.join(extract_dir, contents[0])
        else:
            input_folder = extract_dir

        print(f"‚úÖ Extracted to: {input_folder}")

        # List contents
        print(f"\nüìÇ Contents:")
        for item in os.listdir(input_folder)[:10]:
            item_path = os.path.join(input_folder, item)
            if os.path.isdir(item_path):
                print(f"   üìÅ {item}/")
            else:
                print(f"   üìÑ {item}")
        if len(os.listdir(input_folder)) > 10:
            print(f"   ... and {len(os.listdir(input_folder)) - 10} more items")
    else:
        print("‚ùå No file uploaded. Please run this cell again.")

elif upload_method == "Use Google Drive path":
    if drive_path.strip():
        if os.path.exists(drive_path) and os.path.isdir(drive_path):
            input_folder = drive_path
            print(f"‚úÖ Using folder: {drive_path}")

            # List contents
            print(f"\nüìÇ Contents:")
            for item in os.listdir(input_folder)[:10]:
                item_path = os.path.join(input_folder, item)
                if os.path.isdir(item_path):
                    print(f"   üìÅ {item}/")
                else:
                    print(f"   üìÑ {item}")
            if len(os.listdir(input_folder)) > 10:
                print(f"   ... and {len(os.listdir(input_folder)) - 10} more items")
        else:
            print(f"‚ùå Folder not found: {drive_path}")
            print("   Please check the path and ensure Google Drive is mounted.")
    else:
        print("‚ùå Please enter a valid Google Drive path.")

# Save input folder path
if input_folder:
    with open('/content/input_folder.txt', 'w') as f:
        f.write(input_folder)
    print(f"\n‚úÖ Data ready for analysis!")

In [None]:
# @title **Run ipSAE Analysis** { display-mode: "form" }
# @markdown Execute the ipSAE_batch analysis on your uploaded data.

import subprocess
import sys
import os
from datetime import datetime

# Load input folder
try:
    with open('/content/input_folder.txt', 'r') as f:
        input_folder = f.read().strip()
except FileNotFoundError:
    print("‚ùå No input folder specified. Please run the 'Upload Prediction Data' cell first.")
    raise SystemExit

if not os.path.exists(input_folder):
    print(f"‚ùå Input folder not found: {input_folder}")
    raise SystemExit

# Create output directory
output_dir = "/content/ipSAE_output"
os.makedirs(output_dir, exist_ok=True)

print("="*60)
print("üî¨ ipSAE BATCH ANALYSIS")
print("="*60)
print(f"üìÅ Input: {input_folder}")
print(f"üìÇ Output: {output_dir}")
print(f"üîß Backend: {config['backend']}")
print(f"‚è∞ Started: {datetime.now().strftime('%H:%M:%S')}")
print("="*60 + "\n")

# Build command
cmd = [
    sys.executable, "-m", "ipsae_batch",
    input_folder,
    "--backend", config['backend'],
    "--output_dir", output_dir,
    "--pae_cutoff", str(config['pae_cutoff']),
    "--dist_cutoff", str(config['distance_cutoff']),
]

if config['generate_png']:
    cmd.append("--png")
if config['generate_pdf']:
    cmd.append("--pdf")
if config['per_contact_output']:
    cmd.append("--per_contact")
if config['per_residue_output']:
    cmd.append("--per_residue")

print(f"üìã Command: {' '.join(cmd)}\n")

# Run analysis with live output
start_time = datetime.now()
process = subprocess.Popen(
    cmd,
    stdout=subprocess.PIPE,
    stderr=subprocess.STDOUT,
    text=True,
    bufsize=1
)

for line in iter(process.stdout.readline, ''):
    print(line, end='')

process.wait()
elapsed = (datetime.now() - start_time).total_seconds()

print("\n" + "="*60)
if process.returncode == 0:
    print(f"‚úÖ Analysis completed in {elapsed:.1f} seconds")

    # List output files
    print("\nüìÇ Output files:")
    for item in sorted(os.listdir(output_dir)):
        item_path = os.path.join(output_dir, item)
        if os.path.isfile(item_path):
            size_kb = os.path.getsize(item_path) / 1024
            print(f"   üìÑ {item} ({size_kb:.1f} KB)")

    # Count PNG files
    png_count = len([f for f in os.listdir(output_dir) if f.endswith('.png')])
    csv_count = len([f for f in os.listdir(output_dir) if f.endswith('.csv')])
    print(f"\nüìä Generated: {csv_count} CSV files, {png_count} PNG files")

    # Save output directory path
    with open('/content/output_dir.txt', 'w') as f:
        f.write(output_dir)

    print("\n‚úÖ Proceed to the next cell to download results.")
else:
    print(f"‚ùå Analysis failed (exit code {process.returncode})")
    print("\nCommon issues:")
    print("   ‚Ä¢ Wrong backend selected for your data")
    print("   ‚Ä¢ Incorrect folder structure")
    print("   ‚Ä¢ Missing required files (PDB, PAE JSON)")
print("="*60)

In [None]:
# @title **Download Results** { display-mode: "form" }
# @markdown Download your analysis results as a ZIP file or save to Google Drive.

# @markdown ---
save_to_drive = True # @param {type:"boolean"}
# @markdown Save results to Google Drive (if mounted)

download_zip = True # @param {type:"boolean"}
# @markdown Download results as ZIP file

# @markdown ---

import os
import shutil
from datetime import datetime
from google.colab import files

# Load output directory
try:
    with open('/content/output_dir.txt', 'r') as f:
        output_dir = f.read().strip()
except FileNotFoundError:
    print("‚ùå No output directory found. Please run the analysis first.")
    raise SystemExit

if not os.path.exists(output_dir):
    print(f"‚ùå Output directory not found: {output_dir}")
    raise SystemExit

print("="*60)
print("üì¶ EXPORT RESULTS")
print("="*60)

timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
archive_name = f"ipSAE_results_{timestamp}"

# Save to Google Drive
if save_to_drive:
    drive_results = "/content/drive/MyDrive/ipSAE_Results"
    if os.path.exists("/content/drive/MyDrive"):
        os.makedirs(drive_results, exist_ok=True)
        drive_output = os.path.join(drive_results, archive_name)

        print(f"\nüíæ Copying to Google Drive...")
        shutil.copytree(output_dir, drive_output)
        print(f"‚úÖ Saved to: {drive_output}")

        # List saved files
        file_count = len([f for f in os.listdir(drive_output) if os.path.isfile(os.path.join(drive_output, f))])
        print(f"   üìÅ {file_count} files saved")
    else:
        print("‚ö†Ô∏è Google Drive not mounted. Skipping Drive save.")

# Create downloadable ZIP
if download_zip:
    print(f"\nüì¶ Creating ZIP archive...")
    zip_path = f"/content/{archive_name}"
    shutil.make_archive(zip_path, 'zip', output_dir)
    zip_file = f"{zip_path}.zip"

    zip_size_mb = os.path.getsize(zip_file) / (1024 * 1024)
    print(f"‚úÖ Created: {archive_name}.zip ({zip_size_mb:.2f} MB)")

    print("\nüì• Starting download...")
    files.download(zip_file)
    print("‚úÖ Download initiated!")

print("\n" + "="*60)
print("üéâ Analysis Complete!")
print("="*60)
print("\nüìä Output includes:")
print("   ‚Ä¢ ipSAE_combined.csv - Aggregate scores for all jobs")
print("   ‚Ä¢ ipSAE_comparison.html - Interactive comparison plots")
if config.get('per_contact_output'):
    print("   ‚Ä¢ *_contacts.csv - Per-contact scores")
if config.get('generate_png'):
    print("   ‚Ä¢ *_matrix.png - PMC/Contact probability plots")
    print("   ‚Ä¢ *_ribbon.png - Circular ribbon diagrams")
print("\nüìö For more information, visit:")
print("   https://github.com/JKourelis/ipSAE_batch")

In [None]:
# @title **Preview Results** (Optional) { display-mode: "form" }
# @markdown Display sample outputs inline.

# @markdown ---
show_summary_table = True # @param {type:"boolean"}
show_sample_plots = True # @param {type:"boolean"}
max_plots_to_show = 4 # @param {type:"integer"}
# @markdown ---

import os
import pandas as pd
from IPython.display import display, Image, HTML

# Load output directory
try:
    with open('/content/output_dir.txt', 'r') as f:
        output_dir = f.read().strip()
except FileNotFoundError:
    print("‚ùå No output directory found. Please run the analysis first.")
    raise SystemExit

print("="*60)
print("üìä RESULTS PREVIEW")
print("="*60)

# Show summary table
if show_summary_table:
    combined_csv = os.path.join(output_dir, "ipSAE_combined.csv")
    if os.path.exists(combined_csv):
        print("\nüìã Summary Table (ipSAE_combined.csv):")
        df = pd.read_csv(combined_csv)
        display(df.head(20))
        print(f"\n   Total rows: {len(df)}")
    else:
        print("‚ö†Ô∏è ipSAE_combined.csv not found")

# Show sample plots
if show_sample_plots:
    png_files = sorted([f for f in os.listdir(output_dir) if f.endswith('.png')])

    if png_files:
        print(f"\nüñºÔ∏è Sample Plots ({min(len(png_files), max_plots_to_show)} of {len(png_files)}):")

        for png_file in png_files[:max_plots_to_show]:
            png_path = os.path.join(output_dir, png_file)
            print(f"\n   üìÑ {png_file}")
            display(Image(filename=png_path, width=600))
    else:
        print("‚ö†Ô∏è No PNG files found. Enable --png option to generate plots.")

# Show HTML comparison link
html_file = os.path.join(output_dir, "ipSAE_comparison.html")
if os.path.exists(html_file):
    print("\nüìà Interactive comparison available: ipSAE_comparison.html")
    print("   Download and open in browser for interactive plots.")