# extract PDB resolution

In [None]:
import pandas as pd
import requests
import time

def get_pdb_resolution(pdb_id):
    """Query RCSB API for resolution of a given PDB ID."""
    url = f"https://data.rcsb.org/rest/v1/core/entry/{pdb_id}"
    try:
        response = requests.get(url)
        response.raise_for_status()
        data = response.json()
        resolution = data.get("rcsb_entry_info", {}).get("resolution_combined", [None])[0]
        return resolution
    except Exception as e:
        print(f"Error fetching resolution for {pdb_id}: {e}")
        return None

def extract_pdb_id(description):
    """Extract 4-character PDB ID from the description string."""
    return description.strip().split()[0].lower()

def main(input_csv, output_csv):
    # Load the CSV
    df = pd.read_csv('/Users/qingshuzhao/Library/CloudStorage/OneDrive-SharedLibraries-UWM/Arjun Saha - qingshu_project/Paper/SI/validation_set/PDBind_testset.csv')

    # Extract PDB IDs from the 'description' column
    df["pdb_id"] = df["description"].apply(extract_pdb_id)

    # Fetch resolution for each PDB ID
    resolutions = []
    for pdb_id in df["pdb_id"]:
        res = get_pdb_resolution(pdb_id)
        resolutions.append(res)
        time.sleep(0.2)  # polite delay to avoid hammering the API

    # Add resolution column
    df["resolution"] = resolutions

    # Save the updated CSV
    df.to_csv('/Users/qingshuzhao/Library/CloudStorage/OneDrive-SharedLibraries-UWM/Arjun Saha - qingshu_project/Paper/SI/validation_set/PDBind_testset_resolution.csv', index=False)
    print(f"Done! Output written to {output_csv}")

if __name__ == "__main__":
    input_csv_path = "input.csv"
    output_csv_path = "output_with_resolutions.csv"
    main(input_csv_path, output_csv_path)