In [5]:
import requests
import json
import pandas as pd

url = "https://search.rcsb.org/rcsbsearch/v2/query"

# Define the query
query = {
    "query": {
        "type": "group",
        "logical_operator": "and",
        "nodes": [
            {
                "type": "terminal",
                "service": "text",
                "parameters": {
                    "operator": "exact_match",
                    "value": "CNR1",
                    "attribute": "rcsb_entity_source_organism.rcsb_gene_name.value"
                }
            },
            {
                "type": "terminal",
                "service": "text",
                "parameters": {
                    "operator": "exact_match",
                    "value": "Homo sapiens",
                    "attribute": "rcsb_entity_source_organism.taxonomy_lineage.name"
                }
            },
            {
                "type": "terminal",
                "service": "text",
                "parameters": {
                    "operator": "greater_or_equal",
                    "negation": False,
                    "value": 100,
                    "attribute": "rcsb_entry_info.deposited_polymer_monomer_count"
                }
            },
            {
                "type": "group",
                "logical_operator": "or",
                "nodes": [
                    {
                        "type": "terminal",
                        "service": "text",
                        "parameters": {
                            "attribute": "rcsb_polymer_entity_annotation.type",
                            "operator": "exact_match",
                            "value": "PDBTM"
                        }
                    },
                    {
                        "type": "terminal",
                        "service": "text",
                        "parameters": {
                            "attribute": "rcsb_polymer_entity_annotation.type",
                            "operator": "exact_match",
                            "value": "MemProtMD"
                        }
                    },
                    {
                        "type": "terminal",
                        "service": "text",
                        "parameters": {
                            "attribute": "rcsb_polymer_entity_annotation.type",
                            "operator": "exact_match",
                            "value": "OPM"
                        }
                    },
                    {
                        "type": "terminal",
                        "service": "text",
                        "parameters": {
                            "attribute": "rcsb_polymer_entity_annotation.type",
                            "operator": "exact_match",
                            "value": "mpstruc"
                        }
                    }
                ]
            }
        ]
    },
    "return_type": "entry",
    "request_options": {
        "paginate": {
            "start": 0,
            "rows": 100
        },
        "results_content_type": [
            "experimental"
        ],
        "sort": [
            {
                "sort_by": "score",
                "direction": "desc"
            }
        ],
        "scoring_strategy": "combined"
    }
}

# Make the request
response = requests.post(url, json=query)

# Extract the data from the response
data = response.json()

#Get the pdb ids
pdb_ids = [item["identifier"] for item in data.get("result_set", [])]

print(pdb_ids)

url = 'https://data.rcsb.org/graphql'

# Define the query
query = '''
{
  entries(entry_ids: %s) {
    rcsb_id
    rcsb_accession_info {
      initial_release_date
    }
    rcsb_primary_citation {
      pdbx_database_id_PubMed
      pdbx_database_id_DOI
    }
    rcsb_entry_info {
      resolution_combined
    }
  }
}
''' % json.dumps(pdb_ids)

# Make the request
response = requests.post(url, json={'query': query})

# Extract the data from the response
data = response.json()

# Extract the desired data
entries = data['data']['entries']

# Initialize an empty list to hold the entry data
entry_data = []

for entry in entries:
    entry_data.append({
        'ID': entry['rcsb_id'],
        'Initial Release Date': entry['rcsb_accession_info']['initial_release_date'],
        'PubMed ID': entry['rcsb_primary_citation']['pdbx_database_id_PubMed'],
        'DOI': entry['rcsb_primary_citation']['pdbx_database_id_DOI'],
        'Resolution': entry['rcsb_entry_info']['resolution_combined']
    })

# Create a DataFrame from the entry data
df = pd.DataFrame(entry_data)

print(df.head())

# Write the DataFrame to a CSV file
df.to_csv('pdb_data.csv', index=False)

['5XR8', '5TGZ', '5U09', '5XRA', '6KPG', '6KQI', '6N4B', '7V3Z', '7FEE', '7WV9']
     ID  Initial Release Date  PubMed ID                         DOI   
0  5XR8  2017-07-12T00:00:00Z   28678776         10.1038/nature23272  \
1  5TGZ  2016-11-02T00:00:00Z   27768894  10.1016/j.cell.2016.10.004   
2  5U09  2016-12-07T00:00:00Z   27851727         10.1038/nature20613   
3  5XRA  2017-07-12T00:00:00Z   28678776         10.1038/nature23272   
4  6KPG  2020-02-12T00:00:00Z   32004463  10.1016/j.cell.2020.01.008   

  Resolution  
0     [2.95]  
1      [2.8]  
2      [2.6]  
3      [2.8]  
4      [3.0]  
