In [1]:
import requests
import json
import pandas as pd

# Change the gene name to the desired gene_name (this can be gotten from uniprot)
gene_name = "OPRM1"

url = " https://search.rcsb.org/rcsbsearch/v2/query"

# Define the query
query = {
  "query": {
    "type": "terminal",
    "label": "full_text",
    "service": "full_text",
    "parameters": {
      "value": gene_name
    }
  },
  "return_type": "entry",
  "request_options": {
    "paginate": {
      "start": 0,
      "rows": 25
    },
    "results_content_type": [
      "experimental"
    ],
    "sort": [
      {
        "sort_by": "score",
        "direction": "desc"
      }
    ],
    "scoring_strategy": "combined"
  }
}


# Make the request
response = requests.post(url, json=query)

# Extract the data from the response
data = response.json()

# Get the PDB_ids
pdb_ids = [item["identifier"] for item in data.get("result_set", [])]

print(pdb_ids)

url = 'https://data.rcsb.org/graphql'

# Define the query
query = '''
{
  entries(entry_ids: %s) {
    rcsb_id
    rcsb_accession_info {
      initial_release_date
    }
    rcsb_primary_citation {
      pdbx_database_id_PubMed
      pdbx_database_id_DOI
    }
    rcsb_entry_info {
      resolution_combined
    }
  }
}
''' % json.dumps(pdb_ids)

# Make the request
response = requests.post(url, json={'query': query})

# Extract the data from the response
data = response.json()

# Extract the desired data
entries = data['data']['entries']

# Initialize an empty list to hold the entry data
entry_data = []

for entry in entries:
    entry_data.append({
        'ID': entry['rcsb_id'],
        'Initial Release Date': entry['rcsb_accession_info']['initial_release_date'],
        'PubMed ID': entry['rcsb_primary_citation']['pdbx_database_id_PubMed'],
        'DOI': entry['rcsb_primary_citation']['pdbx_database_id_DOI'],
        'Resolution': entry['rcsb_entry_info']['resolution_combined']
    })

# Create a DataFrame from the entry data
df = pd.DataFrame(entry_data)

print(df.head())

# Write the DataFrame to a CSV file
df.to_csv('pdb_data_oprm1.csv', index=False)

['4DKL', '7UL4', '5C1M', '7U2K', '6DDF', '7SBF', '7SCG', '7T2G', '7T2H', '7U2L', '8EFB', '8EFL', '8EFO', '8EFQ', '8F7R', '6DDE', '8EF5', '8EF6', '8F7Q']
     ID  Initial Release Date  PubMed ID                         DOI   
0  4DKL  2012-03-21T00:00:00Z   22437502         10.1038/nature10954  \
1  7UL4  2022-06-29T00:00:00Z   36396979  10.1038/s41594-022-00859-8   
2  5C1M  2015-08-05T00:00:00Z   26245379         10.1038/nature14886   
3  7U2K  2022-12-07T00:00:00Z   36450356  10.1038/s41586-022-05588-y   
4  6DDF  2018-06-13T00:00:00Z   29899455   10.1038/s41586-018-0219-7   

  Resolution  
0      [2.8]  
1      [2.8]  
2     [2.07]  
3      [3.3]  
4      [3.5]  
