In [None]:
# Ch08-3 - Using PDB & UniProt

In [None]:
# Import libraries
import requests
import sys
import json

In [None]:
# 1. Fetch protein data from UniProt given an Accession
def fetch_protein_data_json(accession):
    """
    Fetch protein data from the EBI Proteins API in JSON format.

    Parameters:
        accession (str): Protein accession number (e.g., P21802).

    Returns:
        dict: The protein data as a JSON object (Python dictionary).
    """
    request_url = f"https://www.ebi.ac.uk/proteins/api/proteins?offset=0&size=100&accession={accession}"
    headers = {"Accept": "application/json"}  # Request JSON format

    try:
        print(f"Fetching data for accession: {accession}")
        response = requests.get(request_url, headers=headers, timeout=30)
        response.raise_for_status()  # Raise an exception for HTTP errors
        return response.json()  # Parse JSON response directly into a Python dictionary
    except requests.exceptions.RequestException as e:
        print(f"Error fetching protein data: {e}")
        sys.exit(1)

In [None]:
# Function to save the JSON data to a file
def save_json_to_file(data, filename):
    """
    Save JSON data to a file.

    Parameters:
        data (dict): The JSON data to save.
        filename (str): The name of the file to save the data to.

    Returns:
        None
    """
    try:
        with open(filename, "w") as json_file:
            json.dump(data, json_file, indent=4)
        print(f"Protein data saved to {filename}")
    except IOError as e:
        print(f"Error saving data to file: {e}")
        sys.exit(1)

In [None]:
# Execute functions for accession number of interest
def main():
    accession = "P21802"  # Example accession
    output_file = "protein_data.json"  # File to save the JSON response

    # Fetch protein data
    protein_data = fetch_protein_data_json(accession)

    # Print the JSON data as a Python dictionary
    print("Protein Data (JSON):")
    print(protein_data)

    # Save the data to a file
    save_json_to_file(protein_data, output_file)

if __name__ == "__main__":
    main()

In [None]:
# Move file to output
! mv protein_data.json output/

In [None]:
# 2. Query PDB

In [None]:
# Import Libraries
import os
import requests
from Bio import PDB

In [None]:
# Download from PDB given and ID
def download_pdb(pdb_id, output_dir="output"):
    """
    Downloads a PDB file and associated metadata from the Protein Data Bank.

    :param pdb_id: The 4-character PDB ID (e.g., '1A8M')
    :param output_dir: Directory where files will be saved
    """
    pdb_id = pdb_id.lower()  # Ensure the PDB ID is lowercase
    base_url = "https://files.rcsb.org/download"
    metadata_url = f"https://data.rcsb.org/rest/v1/core/entry/{pdb_id}"
    pdb_url = f"{base_url}/{pdb_id}.pdb"
    
    # Create output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)
    
    # Download PDB file
    pdb_file_path = os.path.join(output_dir, f"{pdb_id}.pdb")
    response = requests.get(pdb_url)
    if response.status_code == 200:
        with open(pdb_file_path, "w") as file:
            file.write(response.text)
        print(f"PDB file saved at: {pdb_file_path}")
    else:
        print(f"Failed to download PDB file for {pdb_id}.")
    
    # Download metadata
    metadata_file_path = os.path.join(output_dir, f"{pdb_id}_metadata.json")
    response = requests.get(metadata_url)
    if response.status_code == 200:
        with open(metadata_file_path, "w") as file:
            file.write(response.text)
        print(f"Metadata saved at: {metadata_file_path}")
    else:
        print(f"Failed to download metadata for {pdb_id}.")

In [None]:
# Run the function for our protein
pdb_id = "1A8M"  
download_pdb(pdb_id)

In [None]:
## End of Notebook ##