In [1]:
import requests

def fetch_fasta_files_by_ids(pdb_ids):
    base_url = "https://www.rcsb.org/fasta/entry/"
    pdb_base_link = "https://www.rcsb.org/structure/"
    fasta_files_with_links = {}
    
    for pdb_id in pdb_ids:
        fasta_response = requests.get(f"{base_url}{pdb_id}")
        if fasta_response.status_code == 200:
            fasta_files_with_links[pdb_id] = {
                'fasta': fasta_response.text,
                'link': f"{pdb_base_link}{pdb_id}"
            }
        else:
            print(f"Failed to fetch FASTA for ID: {pdb_id}")
    
    return fasta_files_with_links


In [3]:
fetch_fasta_files_by_ids(['7nb4'])[0]

'>7NB4_1|Chain A|Induced myeloid leukemia cell differentiation protein Mcl-1|Homo sapiens (9606)\nMHHHHHHLVPRGSEDELYRQSLEIISRYLREQATGAKDTKPMGRSGATSRKALETLRRVGDGVQRNHETAFQGMLRKLDIKNEDDVKSLSRVMIHVFSDGVTNWGRIVTLISFGAFVAKHLKTINQESCIEPLAESITDVLVRTKRDWLVKQRGWDGFVEFFHVEDLEGG\n'

In [15]:
def search_pdb_by_string(search_string, rows=10):
    search_url = "https://search.rcsb.org/rcsbsearch/v2/query"
    query = {
      "query": {
        "type": "terminal",
        "service": "full_text",
        "parameters": {
          "value": search_string
        }
      },
      "return_type": "entry"
    }
    
    response = requests.post(search_url, json=query)
    if response.status_code == 200:
        search_results = response.json()
        matches = search_results.get('result_set', [])
        best_matches = [match['identifier'] for match in matches[:rows]]
        return best_matches
    else:
        print("Search failed.", response.text)
        return []

In [16]:
search_pdb_by_string('rhodopsin')

['7MT8',
 '7MT9',
 '6SQG',
 '1H2S',
 '3AM6',
 '1H68',
 '7MTA',
 '7MTB',
 '2ZIY',
 '6I9K']