<a href="https://colab.research.google.com/github/RuizhiPeng/ColabFold/blob/main/batch/AlphaFold2_batch_initial_guess.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

<img src="https://raw.githubusercontent.com/sokrypton/ColabFold/main/.github/ColabFold_Marv_Logo_Small.png" height="200" align="right" style="height:240px">

##ColabFold v1.5.5: AlphaFold2 using MMseqs2

Easy to use protein structure and complex prediction using [AlphaFold2](https://www.nature.com/articles/s41586-021-03819-2) and [Alphafold2-multimer](https://www.biorxiv.org/content/10.1101/2021.10.04.463034v1). Sequence alignments/templates are generated through [MMseqs2](mmseqs.com) and [HHsearch](https://github.com/soedinglab/hh-suite). For more details, see <a href="#Instructions">bottom</a> of the notebook, checkout the [ColabFold GitHub](https://github.com/sokrypton/ColabFold) and [Nature Protocols](https://www.nature.com/articles/s41596-024-01060-5).

Old versions: [v1.4](https://colab.research.google.com/github/sokrypton/ColabFold/blob/v1.4.0/AlphaFold2.ipynb), [v1.5.1](https://colab.research.google.com/github/sokrypton/ColabFold/blob/v1.5.1/AlphaFold2.ipynb), [v1.5.2](https://colab.research.google.com/github/sokrypton/ColabFold/blob/v1.5.2/AlphaFold2.ipynb), [v1.5.3-patch](https://colab.research.google.com/github/sokrypton/ColabFold/blob/56c72044c7d51a311ca99b953a71e552fdc042e1/AlphaFold2.ipynb)

[Mirdita M, Sch√ºtze K, Moriwaki Y, Heo L, Ovchinnikov S, Steinegger M. ColabFold: Making protein folding accessible to all.
*Nature Methods*, 2022](https://www.nature.com/articles/s41592-022-01488-1)

In [None]:
#@title Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
#@title Configuration and Setup
#@markdown ### Input/Output Settings
input_dir = '/content/drive/Othercomputers/prometheus/design/cirbp_86_108/af2_input_split1' #@param {type:"string"}
input_type = 'fasta' #@param ["fasta", "pdb"]
output_dir = '/content/drive/Othercomputers/prometheus/design/cirbp_86_108/af2_output' #@param {type:"string"}

#@markdown ### Basic Model Settings
num_relax = 0 #@param [0, 1, 5] {type:"raw"}
#@markdown - specify how many of the top ranked structures to relax using amber
template_mode = "none" #@param ["none", "pdb100","custom"]
#@markdown - `none` = no template information is used. `pdb100` = detect templates in pdb100. `custom` - upload and search own templates

#@markdown ### MSA Settings
msa_mode = "mmseqs2_uniref_env" #@param ["mmseqs2_uniref_env", "mmseqs2_uniref","single_sequence","custom"]
pair_mode = "unpaired_paired" #@param ["unpaired_paired","paired","unpaired"] {type:"string"}
#@markdown - "unpaired_paired" = pair sequences from same species + unpaired MSA, "unpaired" = seperate MSA for each chain, "paired" - only use paired sequences.

#@markdown ### Advanced Model Settings
model_type = "alphafold2_multimer_v3" #@param ["auto", "alphafold2_ptm", "alphafold2_multimer_v1", "alphafold2_multimer_v2", "alphafold2_multimer_v3", "deepfold_v1", "alphafold2"]
num_recycles = "20" #@param ["auto", "0", "1", "3", "6", "12", "20", "24", "48"]
recycle_early_stop_tolerance = "0.5" #@param ["auto", "0.0", "0.5", "1.0"]
relax_max_iterations = 200 #@param [0, 200, 2000] {type:"raw"}
pairing_strategy = "greedy" #@param ["greedy", "complete"] {type:"string"}
calc_extra_ptm = False #@param {type:"boolean"}

#@markdown ### Sampling Settings
max_msa = "auto" #@param ["auto", "512:1024", "256:512", "64:128", "32:64", "16:32"]
num_seeds = 1 #@param [1,2,4,8,16] {type:"raw"}
use_dropout = False #@param {type:"boolean"}

#@markdown ### Initial Guess Settings
use_initial_guess = True #@param {type:"boolean"}
initial_guess_dir = "/content/drive/Othercomputers/prometheus/design/cirbp_86_108/logos_output" #@param {type:"string"}
#@markdown - `use_initial_guess` = soft initialization with desired coordinates (useful for binder design)

#@markdown ### Save Settings
save_all = False #@param {type:"boolean"}
save_recycles = False #@param {type:"boolean"}
save_to_google_drive = True #@param {type:"boolean"}
dpi = 200 #@param {type:"integer"}

# =============================================================================
# Helper Functions
# =============================================================================

def build_fasta_from_pdb(pdb_files, input_dir):
    import os
    from Bio.PDB import PDBParser, PPBuilder

    parser = PDBParser(QUIET=True)
    ppb = PPBuilder()

    fasta_files = []
    for pdb_file in pdb_files:
        structure_id = os.path.splitext(os.path.basename(pdb_file))[0]
        fasta_file_path = os.path.join(input_dir, f'{structure_id}.fasta')

        # Skip if FASTA already exists
        if os.path.exists(fasta_file_path):
            print(f"  ‚è≠Ô∏è  Skipped: {structure_id}.fasta (already exists)")
            fasta_files.append(fasta_file_path)
            continue

        structure = parser.get_structure(structure_id, pdb_file)

        # Extract sequences for each chain
        chain_sequences = []
        for model in structure:
            for chain in model:
                chain_id = chain.get_id()
                chain_sequence = ""
                for pp in ppb.build_peptides(chain):
                    chain_sequence += str(pp.get_sequence())

                if chain_sequence:
                    chain_sequences.append((chain_id, chain_sequence))

        # Write multi-chain FASTA file
        if chain_sequences:
            with open(fasta_file_path, 'w') as f:
                for chain_id, sequence in chain_sequences:
                    f.write(f">chain_{chain_id}\n")
                    f.write(f"{sequence}\n")

            fasta_files.append(fasta_file_path)
            chain_info = ", ".join([f"{chain_id}({len(seq)} aa)" for chain_id, seq in chain_sequences])
            print(f"  ‚úì Created: {structure_id}.fasta [{chain_info}]")

    return fasta_files

def get_fasta_files(input_dir):
    """Efficiently get all FASTA files using os.scandir()"""
    import os
    fasta_files = []
    fasta_extensions = ('.fasta', '.fa', '.fna')
    
    for entry in os.scandir(input_dir):
        if entry.is_file() and entry.name.endswith(fasta_extensions):
            fasta_files.append(entry.path)
        elif entry.is_dir():
            # Recursively scan subdirectories
            fasta_files.extend(get_fasta_files(entry.path))
    
    return fasta_files

def get_sequence_from_fasta(file_path):
    import os
    base_name = os.path.splitext(os.path.basename(file_path))[0]
    sequences = []
    current_seq = ""
    with open(file_path, 'r') as f:
        for line in f:
            line = line.strip()
            if line.startswith(">"):
                if current_seq:
                    sequences.append(current_seq)
                    current_seq = ""
            else:
                current_seq += line
        if current_seq:
            sequences.append(current_seq)
    combined_sequence = ':'.join(sequences)

    # Check legit sequence
    valid_aa = set("ACDEFGHIKLMNPQRSTVWY")
    if not all(residue in valid_aa for residue in combined_sequence.replace(":", "")):
        print(f"Warning: Sequence in {file_path} contains non-standard amino acids. Skipping this file.")
        return {}
    else:
        return {base_name: combined_sequence}

def add_hash(x, y):
    import hashlib
    return x + "_" + hashlib.sha1(y.encode()).hexdigest()[:5]

def load_completion_cache(output_dir):
    """Load completion cache from JSON file"""
    import os
    import json
    cache_file = os.path.join(output_dir, '.completion_cache.json')
    if os.path.exists(cache_file):
        try:
            with open(cache_file, 'r') as f:
                return json.load(f)
        except:
            return {}
    return {}

def save_completion_cache(output_dir, cache):
    """Save completion cache to JSON file"""
    import os
    import json
    cache_file = os.path.join(output_dir, '.completion_cache.json')
    try:
        with open(cache_file, 'w') as f:
            json.dump(cache, f, indent=2)
    except:
        pass

def is_sequence_completed(job_dir, cache=None, use_cache=True):
    """Check if a .done.txt file exists in the job directory (optimized with glob)"""
    import os
    import glob
    
    if not os.path.exists(job_dir):
        return False
    
    # Check cache first if available
    if use_cache and cache is not None and job_dir in cache:
        return cache[job_dir]
    
    # Use glob pattern which is more efficient than listdir
    result = bool(glob.glob(os.path.join(job_dir, '*.done.txt')))
    
    # Update cache if provided
    if cache is not None:
        cache[job_dir] = result
    
    return result

def is_queries_csv_ready(job_dir, jobname):
    """Check if queries CSV file exists and has content (optimized)"""
    import os
    queries_path = os.path.join(job_dir, f"{jobname}.csv")
    # Just check existence and non-zero size for efficiency
    return os.path.exists(queries_path) and os.path.getsize(queries_path) > 0

def build_existing_dirs_mapping(output_dir):
    """Pre-compute all existing output directories for fast lookup"""
    import os
    existing_dirs = {}
    if os.path.exists(output_dir):
        try:
            for entry in os.scandir(output_dir):
                if entry.is_dir():
                    existing_dirs[entry.name] = entry.path
        except:
            pass
    return existing_dirs

# =============================================================================
# Process Input Files
# =============================================================================

import os
import re
from sys import version_info

python_version = f"{version_info.major}.{version_info.minor}"
use_amber = num_relax > 0

# Handle PDB input - extract sequences and create FASTA files
if input_type == 'pdb':
    print("=" * 70)
    print("STEP 1: Converting PDB files to FASTA format")
    print("=" * 70)

    os.system("pip install -q --no-warn-conflicts biopython")
    pdb_files = []
    for root, dirs, files in os.walk(input_dir):
        for file in files:
            if file.endswith(".pdb"):
                pdb_files.append(os.path.join(root, file))

    if not pdb_files:
        raise ValueError(f"No PDB files found in the directory: {input_dir}")

    print(f"Found {len(pdb_files)} PDB files in {input_dir}")
    print("Extracting sequences and creating FASTA files...")
    fasta_files_created = build_fasta_from_pdb(pdb_files, input_dir)
    print(f"‚úì Processed {len(pdb_files)} PDB files ({len(fasta_files_created)} FASTA files ready)\n")
else:
    print("=" * 70)
    print("STEP 1: Skipping PDB conversion (input_type='fasta')")
    print("=" * 70 + "\n")

# Get FASTA files from input directory
print("=" * 70)
print("STEP 2: Discovering FASTA files")
print("=" * 70)

fasta_files = get_fasta_files(input_dir)
if not fasta_files:
    raise ValueError(f"No FASTA files found in the directory: {input_dir}")
print(f"‚úì Found {len(fasta_files)} FASTA files in {input_dir}\n")

# Create output directory
os.makedirs(output_dir, exist_ok=True)

# Load completion cache and build existing directories mapping
print("=" * 70)
print("STEP 3: Processing sequences and checking completion status")
print("=" * 70)

print("Loading completion cache...")
completion_cache = load_completion_cache(output_dir)
print(f"‚úì Cache loaded with {len(completion_cache)} entries")

print("Building existing directories mapping...")
existing_dirs = build_existing_dirs_mapping(output_dir)
print(f"‚úì Found {len(existing_dirs)} existing output directories\n")

sequences_dict = {}
fasta_file_mapping = {}
skipped_sequences = []

for fasta_file in fasta_files:
    sequence_data = get_sequence_from_fasta(fasta_file)
    if sequence_data:
        for jobname, sequence in sequence_data.items():
            clean_sequence = "".join(sequence.split())
            base_jobname = "".join(jobname.split())
            base_jobname = re.sub(r'\W+', '', base_jobname)
            hashed_jobname = add_hash(base_jobname, clean_sequence)

            # Fast lookup using pre-built mapping
            if hashed_jobname in existing_dirs:
                job_dir = existing_dirs[hashed_jobname]
            else:
                job_dir = os.path.join(output_dir, hashed_jobname)
            
            # Check completion with cache
            if is_sequence_completed(job_dir, cache=completion_cache, use_cache=True):
                print(f"‚è≠Ô∏è  {jobname} ‚Üí {hashed_jobname} [COMPLETED - has .done.txt]")
                skipped_sequences.append(jobname)
            else:
                sequences_dict[jobname] = sequence
                fasta_file_mapping[jobname] = fasta_file
                print(f"üìã {jobname} ‚Üí {hashed_jobname} [QUEUED]")

# Save updated cache
save_completion_cache(output_dir, completion_cache)
print(f"\n‚úì Status: {len(sequences_dict)} sequences queued, {len(skipped_sequences)} already completed\n")

# =============================================================================
# Prepare Batch Jobs
# =============================================================================

print("=" * 70)
print("STEP 4: Preparing batch job directories and query files")
print("=" * 70)

batch_jobs = []
queries_created = 0
queries_skipped = 0

for current_jobname, current_query_sequence in sequences_dict.items():
    clean_query_sequence = "".join(current_query_sequence.split())

    base_jobname = "".join(current_jobname.split())
    base_jobname = re.sub(r'\W+', '', base_jobname)
    hashed_jobname = add_hash(base_jobname, clean_query_sequence)
    final_jobname = hashed_jobname

    job_dir = os.path.join(output_dir, final_jobname)
    os.makedirs(job_dir, exist_ok=True)

    queries_path = os.path.join(job_dir, f"{final_jobname}.csv")

    # Check if queries CSV already exists and is valid (optimized)
    if is_queries_csv_ready(job_dir, final_jobname):
        print(f"  ‚è≠Ô∏è  {final_jobname}.csv (already exists)")
        queries_skipped += 1
    else:
        # Create queries CSV
        with open(queries_path, "w") as text_file:
            text_file.write(f"id,sequence\n{final_jobname},{clean_query_sequence}")
        print(f"  ‚úì Created {final_jobname}.csv (length: {len(clean_query_sequence.replace(':', ''))} aa)")
        queries_created += 1

    fasta_file_path = fasta_file_mapping.get(current_jobname, "")
    fasta_basename = os.path.splitext(os.path.basename(fasta_file_path))[0] if fasta_file_path else ""

    batch_jobs.append({
        'jobname': final_jobname,
        'original_jobname': current_jobname,
        'fasta_basename': fasta_basename,
        'query_sequence': clean_query_sequence,
        'queries_path': queries_path,
        'job_dir': job_dir
    })

print(f"\n‚úì Query files: {queries_created} created, {queries_skipped} already existed")
print(f"‚úì Total batch jobs ready: {len(batch_jobs)}\n")

# Process parameters
print("=" * 70)
print("STEP 5: Processing parameters")
print("=" * 70)

num_recycles = None if num_recycles == "auto" else int(num_recycles)
recycle_early_stop_tolerance = None if recycle_early_stop_tolerance == "auto" else float(recycle_early_stop_tolerance)
if max_msa == "auto":
    max_msa = None

# Template settings
if template_mode == "pdb100":
    use_templates = True
    custom_template_path = None
else:
    custom_template_path = None
    use_templates = False

print(f"  Model type: {model_type}")
print(f"  MSA mode: {msa_mode}")
print(f"  Num recycles: {num_recycles if num_recycles is not None else 'auto'}")
print(f"  Num seeds: {num_seeds}")
print(f"  Use templates: {use_templates}")
print(f"  Num relax: {num_relax}\n")

# Build initial guess mapping if enabled
print("=" * 70)
print("STEP 6: Building initial guess mapping")
print("=" * 70)

initial_guess_mapping = {}
if use_initial_guess and initial_guess_dir:
    if os.path.exists(initial_guess_dir):
        # Collect all PDB files with their basenames
        pdb_files = {}
        for file in os.listdir(initial_guess_dir):
            if file.endswith('.pdb') or file.endswith('.cif'):
                basename = os.path.splitext(file)[0]
                pdb_files[basename] = os.path.join(initial_guess_dir, file)

        print(f"Found {len(pdb_files)} PDB/CIF files in initial guess directory")

        # Match FASTA files to PDB files using prefix matching
        exact_matches = 0
        prefix_matches = 0
        no_matches = 0

        for fasta_file in fasta_files:
            fasta_basename = os.path.splitext(os.path.basename(fasta_file))[0]
            matched = False

            # First try exact match
            if fasta_basename in pdb_files:
                initial_guess_mapping[fasta_basename] = pdb_files[fasta_basename]
                print(f"  ‚úì Exact match: {fasta_basename} ‚Üí {os.path.basename(pdb_files[fasta_basename])}")
                exact_matches += 1
                matched = True
            else:
                # Try prefix matching: find PDB whose name is a prefix of the FASTA name
                for pdb_basename, pdb_path in pdb_files.items():
                    if fasta_basename.startswith(pdb_basename):
                        initial_guess_mapping[fasta_basename] = pdb_path
                        print(f"  ‚úì Prefix match: {fasta_basename} ‚Üí {os.path.basename(pdb_path)}")
                        prefix_matches += 1
                        matched = True
                        break

            if not matched:
                print(f"  ‚ö†Ô∏è  No match: {fasta_basename}")
                no_matches += 1

        print(f"\n‚úì Initial guess summary:")
        print(f"  - Exact matches: {exact_matches}")
        print(f"  - Prefix matches: {prefix_matches}")
        print(f"  - No matches: {no_matches}")
        print(f"  - Total mapped: {len(initial_guess_mapping)}/{len(fasta_files)}\n")
    else:
        print(f"‚ö†Ô∏è  Warning: Initial guess directory not found: {initial_guess_dir}")
        use_initial_guess = False
elif use_initial_guess and not initial_guess_dir:
    print("‚ö†Ô∏è  Warning: use_initial_guess enabled but no initial_guess_dir provided")
    use_initial_guess = False
else:
    print("Initial guess disabled\n")

print("=" * 70)
print("‚úì CONFIGURATION COMPLETE - Ready to run predictions")
print("=" * 70)
print(f"\nSummary:")
print(f"  - Total FASTA files: {len(fasta_files)}")
print(f"  - Sequences to process: {len(batch_jobs)}")
print(f"  - Sequences already completed: {len(skipped_sequences)}")
print(f"  - Initial guess enabled: {use_initial_guess}")
if use_initial_guess:
    print(f"  - Initial guess mappings: {len(initial_guess_mapping)}")
print()

In [None]:
#@title Install All Dependencies
#@markdown This cell installs ColabFold, Amber (if needed), and HHsuite (if needed)

%%time
import os

USE_AMBER = use_amber
USE_TEMPLATES = use_templates
PYTHON_VERSION = python_version

# Install ColabFold
if not os.path.isfile("COLABFOLD_READY"):
    print("Installing ColabFold...")
    os.system("pip install -q --no-warn-conflicts 'colabfold[alphafold-minus-jax] @ git+https://github.com/sokrypton/ColabFold'")
    if os.environ.get('TPU_NAME', False) != False:
        os.system("pip uninstall -y jax jaxlib")
        os.system("pip install --no-warn-conflicts --upgrade dm-haiku==0.0.10 'jax[cuda12_pip]'==0.3.25 -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html")
    os.system("ln -s /usr/local/lib/python3.*/dist-packages/colabfold colabfold")
    os.system("ln -s /usr/local/lib/python3.*/dist-packages/alphafold alphafold")
    # Fix TF crash
    os.system("rm -f /usr/local/lib/python3.*/dist-packages/tensorflow/core/kernels/libtfkernel_sobol_op.so")
    os.system("touch COLABFOLD_READY")
    print("‚úì ColabFold installed")

# Install Conda if needed
if USE_AMBER or USE_TEMPLATES:
    if not os.path.isfile("CONDA_READY"):
        print("Installing Conda...")
        os.system("wget -qnc https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-x86_64.sh")
        os.system("bash Miniforge3-Linux-x86_64.sh -bfp /usr/local")
        os.system("mamba config --set auto_update_conda false")
        os.system("touch CONDA_READY")
        print("‚úì Conda installed")

# Install HHsuite and/or Amber
if USE_TEMPLATES and not os.path.isfile("HH_READY") and USE_AMBER and not os.path.isfile("AMBER_READY"):
    print("Installing HHsuite and Amber...")
    os.system(f"mamba install -y -c conda-forge -c bioconda kalign2=2.04 hhsuite=3.3.0 openmm=8.2.0 python='{PYTHON_VERSION}' pdbfixer")
    os.system("touch HH_READY")
    os.system("touch AMBER_READY")
    print("‚úì HHsuite and Amber installed")
else:
    if USE_TEMPLATES and not os.path.isfile("HH_READY"):
        print("Installing HHsuite...")
        os.system(f"mamba install -y -c conda-forge -c bioconda kalign2=2.04 hhsuite=3.3.0 python='{PYTHON_VERSION}'")
        os.system("touch HH_READY")
        print("‚úì HHsuite installed")
    if USE_AMBER and not os.path.isfile("AMBER_READY"):
        print("Installing Amber...")
        os.system(f"mamba install -y -c conda-forge openmm=8.2.0 python='{PYTHON_VERSION}' pdbfixer")
        os.system("touch AMBER_READY")
        print("‚úì Amber installed")

print("\n‚úì All dependencies installed successfully")

In [None]:
#@title Run Prediction
display_images = False #@param {type:"boolean"}

import sys
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
from Bio import BiopythonDeprecationWarning
warnings.simplefilter(action='ignore', category=BiopythonDeprecationWarning)
from pathlib import Path
from colabfold.download import download_alphafold_params, default_data_dir
from colabfold.utils import setup_logging
from colabfold.batch import get_queries, run, set_model_type
from colabfold.plot import plot_msa_v2

import os
import numpy as np

# Check for K80 GPU
try:
    K80_chk = os.popen('nvidia-smi | grep "Tesla K80" | wc -l').read()
except:
    K80_chk = "0"
    pass

if "1" in K80_chk:
    print("WARNING: found GPU Tesla K80: limited to total length < 1000")
    if "TF_FORCE_UNIFIED_MEMORY" in os.environ:
        del os.environ["TF_FORCE_UNIFIED_MEMORY"]
    if "XLA_PYTHON_CLIENT_MEM_FRACTION" in os.environ:
        del os.environ["XLA_PYTHON_CLIENT_MEM_FRACTION"]

from colabfold.colabfold import plot_protein
from pathlib import Path
import matplotlib.pyplot as plt

# Add pdbfixer to path if using amber
if use_amber and f"/usr/local/lib/python{python_version}/site-packages/" not in sys.path:
    sys.path.insert(0, f"/usr/local/lib/python{python_version}/site-packages/")

def input_features_callback(input_features):
    if display_images:
        plot_msa_v2(input_features)
        plt.show()
        plt.close()

def prediction_callback(protein_obj, length, prediction_result, input_features, mode):
    model_name, relaxed = mode
    if not relaxed:
        if display_images:
            fig = plot_protein(protein_obj, Ls=length, dpi=150)
            plt.show()
            plt.close()

# Download AlphaFold parameters once for all jobs
for i, job in enumerate(batch_jobs):
    current_queries_path = job['queries_path']
    queries, is_complex = get_queries(current_queries_path)
    model_type = set_model_type(is_complex, model_type)
    break

download_alphafold_params(model_type, Path("."))

# Batch processing: run prediction for each sequence
print(f"Starting batch processing for {len(batch_jobs)} sequences...")
batch_results = []

for i, job in enumerate(batch_jobs):
    current_jobname = job['jobname']
    current_query_sequence = job['query_sequence']
    current_queries_path = job['queries_path']
    current_job_dir = job['job_dir']
    fasta_basename = job.get('fasta_basename', '')

    print(f"\n=== Processing {i+1}/{len(batch_jobs)}: {current_jobname} ===")

    # Setup MSA file path based on mode
    if "mmseqs2" in msa_mode:
        a3m_file = os.path.join(current_job_dir, f"{current_jobname}.a3m")
    else:  # single_sequence mode
        a3m_file = os.path.join(current_job_dir, f"{current_jobname}.single_sequence.a3m")
        with open(a3m_file, "w") as text_file:
            text_file.write(f">1\n{current_query_sequence}")

    # Setup logging
    log_filename = os.path.join(current_job_dir, "log.txt")
    setup_logging(Path(log_filename))

    # Get queries and model type
    queries, is_complex = get_queries(current_queries_path)
    model_type = set_model_type(is_complex, model_type)

    if "multimer" in model_type and max_msa is not None:
        use_cluster_profile = False
    else:
        use_cluster_profile = True

    # Setup initial guess
    current_initial_guess = None
    if use_initial_guess and fasta_basename:
        if fasta_basename in initial_guess_mapping:
            current_initial_guess = initial_guess_mapping[fasta_basename]
            print(f"Using initial guess from: {os.path.basename(current_initial_guess)}")
        else:
            print(f"Warning: No initial guess PDB found for {fasta_basename}")

    # Run prediction
    try:
        results = run(
            queries=queries,
            result_dir=current_job_dir,
            use_templates=use_templates,
            custom_template_path=custom_template_path,
            num_relax=num_relax,
            msa_mode=msa_mode,
            model_type=model_type,
            num_models=5,
            num_recycles=num_recycles,
            relax_max_iterations=relax_max_iterations,
            recycle_early_stop_tolerance=recycle_early_stop_tolerance,
            num_seeds=num_seeds,
            use_dropout=use_dropout,
            model_order=[1,2,3,4,5],
            initial_guess=current_initial_guess,
            is_complex=is_complex,
            data_dir=Path("."),
            keep_existing_results=False,
            rank_by="auto",
            pair_mode=pair_mode,
            pairing_strategy=pairing_strategy,
            stop_at_score=float(100),
            prediction_callback=prediction_callback,
            dpi=dpi,
            zip_results=False,
            save_all=save_all,
            max_msa=max_msa,
            use_cluster_profile=use_cluster_profile,
            input_features_callback=input_features_callback,
            save_recycles=save_recycles,
            user_agent="colabfold/google-colab-main",
            calc_extra_ptm=calc_extra_ptm,
        )

        # Create result zip for this job
        results_zip = os.path.join(current_job_dir, f"{current_jobname}.result.zip")
        os.system(f"cd {current_job_dir} && zip -r {current_jobname}.result.zip .")

        batch_results.append({
            'jobname': current_jobname,
            'results': results,
            'zip_path': results_zip,
            'job_dir': current_job_dir
        })

        print(f"‚úì Completed {current_jobname}")

    except Exception as e:
        print(f"‚úó Failed {current_jobname}: {str(e)}")
        batch_results.append({
            'jobname': current_jobname,
            'results': None,
            'zip_path': None,
            'job_dir': current_job_dir,
            'error': str(e)
        })

successful_count = len([r for r in batch_results if r.get('results')])
failed_count = len([r for r in batch_results if r.get('error')])
print(f"\nBatch processing completed: {successful_count} successful, {failed_count} failed")