In [None]:
# Simple script to pull predicted structures from the ESM atlas based on a structure search JSON

import wget
import os
import json

def download_pdb_files(json_path, download_folder, min_db_len=200, max_db_len=500, min_eval=1e-10):
    # Ensure the download folder exists
    os.makedirs(download_folder, exist_ok=True)
    
    # Load JSON data
    with open(json_path, 'r') as file:
        data = json.load(file)
    
    # Base URL for downloading structures
    base_url = "https://api.esmatlas.com/fetchPredictedStructure/"
    
    # Iterate over each item to extract .pdb filenames and form URLs
    for item in data:
        for alignment in item.get("alignments", []):
            target = alignment.get("target", "")
            db_len = alignment.get("dbLen", 0)  # Get dbLen, default to 0 if missing
            eval_value = alignment.get("eval", 1)  # Get E-value, default to 1 if missing
            
            # Print statement to verify dbLen and eval values (for debugging)
            print(f"Checking {target}: dbLen = {db_len}, E-value = {eval_value}")

            # Filter by dbLen range and E-value threshold
            if min_db_len <= db_len <= max_db_len and eval_value <= min_eval and target.endswith(".pdb.gz"):
                pdb_id = target.rstrip(".gz")  # Strip .gz to get the correct pdb_id
                download_url = f"{base_url}{pdb_id}"
                output_path = os.path.join(download_folder, pdb_id)
                
                print(f"Downloading {pdb_id} with dbLen = {db_len} and E-value = {eval_value}...")
                wget.download(download_url, out=output_path)  # Download using Python wget
                print(f"\nDownloaded {pdb_id} with dbLen = {db_len} and E-value = {eval_value}")
            else:
                print(f"Skipping {target} due to dbLen = {db_len} or E-value = {eval_value} (outside thresholds)")

# Specify paths (edit these as needed)
json_path = r"path_to_input_file.json"
download_folder = r"path_to_output_folder"

# Specify thresholds for dbLen and E-value
min_db_len = 0
max_db_len = 1000000
min_eval = 1

# Run the download function
download_pdb_files(json_path, download_folder, min_db_len, max_db_len, min_eval)
print("\nDownload completed!")
