# SUBPROCESS
```python
import subprocess

sp = subprocess.run('ls -l', shell=True, capture_output=True, text=True)
print(sp.stdout)
```
Possiamo anche ottenere l'output di un programma .py usando subprocess

```python
sp = subprocess.run('python3 sqrt_iterative.py %s %s' % (number, iterations), shell=True, capture_output=True, text=True)
```

# REQUEST( ) --> PDB

## Query PDB using REST calls

```python
import requests

r = requests.get('https://data.rcsb.org/rest/v1/core/entry/4GYD')

data = r.json() # convert the json return value to a Python dictionary

type(data) # check it is indeed a dictionary

data.keys()
data.items() #return tuple of key,value
```
**We can also query multiple id**

```python
protein_ids = ['4GYD', '4H0J', '4H0K']

protein_dict = dict()
for protein in protein_ids:
    r = requests.get('https://data.rcsb.org/rest/v1/core/entry/%s' % protein)
    data = r.json()
    protein_dict[protein] = data['cell']

# now print e.g. length_a, length_b and length_c for all the proteins:
for (key,value) in protein_dict.items():
    print('Protein %s: a=%f, b=%f, c=%f' % (key, value['length_a'], value['length_b'], value['length_c']))
```

### 1.1 Polymer entity
Per ottenere dati su un **polimero**, dobbiamo dare *PDB ID* e il *polymer ID*
```python
r = requests.get('https://data.rcsb.org/rest/v1/core/polymer_entity/4GYD/1')
data = r.json()
data.keys()
data['entity_poly']
```
### 1.2 Get annotations

```python
r2 = requests.get('https://data.rcsb.org/rest/v1/core/pubmed/4GYD')
data2 = r2.json()
data2.keys()
data2['rcsb_pubmed_abstract_text']
```
### 1.3 Chemical component
```python
r = requests.get('https://data.rcsb.org/rest/v1/core/chemcomp/CFF')
data = r.json()
data.keys()
data['chem_comp']
```

### 1.4 DrugBank
```python
r = requests.get('https://data.rcsb.org/rest/v1/core/drugbank/CFF')
data = r.json()
data.keys()
data['drugbank_info'].keys() #è un dizionario a sua volta!
```

### 1.5 Getting sequence data in FASTA format
In questo caso l'output sarà formato di **testo** e non json
```python
protein_ids = ['4GYD', '4H0J', '4H0K']

for protein in protein_ids:
    r = requests.get('https://www.rcsb.org/fasta/entry/%s/download' % protein)
    print(r.text)
    
```

# 2. PDB Search API
```python
fasta = "MTEYKLVVVGAGGVGKSALTIQLIQNHFVDEYDPTIEDSYRKQVVIDGETCLLDILDTAGQEEYSAMRDQYMRTGEGFLCVFAINNTKSFEDIHQYREQIKRVKDSDDVPMVLVGNKCDLPARTVETRQAQDLARSYGIPYIETSAKTRQGVEDAFYTLVREIRQHKLRKLNPPDESGPGCMNCKCVIS"

my_query = '''{
  "query": {
    "type": "terminal",
    "service": "sequence",
    "parameters": {
      "evalue_cutoff": 1,
      "identity_cutoff": 0.9,
      "sequence_type": "protein",
      "value": "%s" #---> FASTA DA INSERIRE
    }
  },
  "request_options": {
    "scoring_strategy": "sequence"
  },
  "return_type": "polymer_entity"
}
''' % fasta

r = requests.get('http://search.rcsb.org/rcsbsearch/v2/query?json=%s' % requests.utils.requote_uri(my_query))
data = r.json()
print(data.keys())
```
### 2.1 PROSITE
```python
my_query = '''
{
  "query": {
    "type": "terminal",
    "service": "seqmotif",
    "parameters": {
      "value": "C-x(2,4)-C-x(3)-[LIVMFYWC]-x(8)-H-x(3,5)-H",
      "pattern_type": "prosite",
      "sequence_type": "protein"
    }
  },
  "return_type": "polymer_entity"
}
'''
r = requests.get('http://search.rcsb.org/rcsbsearch/v2/query?json=%s' % requests.utils.requote_uri(my_query))
j = r.json()
j.keys()

print('This is the detailed info for the first result:')
print(j['result_set'][0])
print('\nThe identifiers for the returned results are:')
for item in j['result_set']:
    print(item['identifier'])
```

This is a code that queries the PDB and gets the **common_name** found in the **rcsb_entity_source_organism** structure for each of the returned results. \
For instance, the common_name for 1A1F_3 is ‘house mouse’. Hint: keep in mind that the returned values for the Zinc finger example are polymers.

```python
my_query = '''
{
  "query": {
    "type": "terminal",
    "service": "seqmotif",
    "parameters": {
      "value": "C-x(2,4)-C-x(3)-[LIVMFYWC]-x(8)-H-x(3,5)-H",
      "pattern_type": "prosite",
      "sequence_type": "protein"
    }
  },
  "return_type": "polymer_entity"
}
'''
r = requests.get('http://search.rcsb.org/rcsbsearch/v2/query?json=%s' % requests.utils.requote_uri(my_query))
j = r.json()

list_id=[]
for names in j['result_set']:
   list_id.append(names['identifier'].split("_")) #In questo modo separiamo l'ID dal polymer entity



for el in list_id:
    r = requests.get('https://data.rcsb.org/rest/v1/core/polymer_entity/%s/%d' %(el[0],int(el[1])))
    j=r.json()
    print=('The soruce organism for %s is: %s ' %(el[0]+"_"+el[1], j['rcsb_entity_source_organism'][0]['common_name']))



## 3. GRAPHQL

```python
my_query = '''
{
    entry(entry_id: "4GYD") {
        cell {
            Z_PDB
            angle_alpha
            angle_beta
            angle_gamma
            formula_units_Z
            length_a
            length_b
            length_c
            pdbx_unique_axis
            volume
        }
    }
}
'''

r = requests.get('https://data.rcsb.org/graphql?query=%s' % requests.utils.requote_uri(my_query))
j = r.json()
j.keys()

# print results with some formatting:
params = j['data']['entry']['cell']
for key,value in params.items():
    print(key, ':', value)

### This function is taking as input a PDB ID and returning two strings: the structure title and the citation.
```python
def get_citation(PDB_id: str) -> tuple[str,str]:
    my_query = ''' {
  entry(entry_id:"%s") {
    rcsb_id
    rcsb_accession_info {
      initial_release_date
    }
    rcsb_primary_citation {
      pdbx_database_id_DOI
      pdbx_database_id_PubMed
    }
    audit_author {
      identifier_ORCID
      name
    }
    struct {
      pdbx_descriptor
      title
    }
    exptl {
      method
    }
  }
} ''' %PDB_id

    r = requests.get('https://data.rcsb.org/graphql?query=%s' % requests.utils.requote_uri(my_query))
    j = r.json()
    title = j['data']['entry']['struct']['title']
    PubMed_ID = j['data']['entry']['rcsb_primary_citation']['pdbx_database_id_PubMed']
    r2 = requests.get(' https://api.ncbi.nlm.nih.gov/lit/ctxp/v1/pubmed/?format=citation&id=%s' %PubMed_ID)
    data2 = r2.json()
    citation = data2['ama']['orig']
    return (title,citation)
    '''

# REQUEST( ) --> UNIPROT
Possiamo ottenere i dati da Uniprot usando il Proteins API. \
**Otteniamo una lista, non un dizionario** \
Bisogna specificare che vogliamo un output in formato JSON usano il parametro *headers*
```python
requestURL = "https://www.ebi.ac.uk/proteins/api/proteins?offset=0&size=10&accession=P0A3X7&reviewed=true"
r = requests.get(requestURL, headers={"Accept" : "application/json"})
j = r.json()

type(j) #j è una lista che contiene 1 solo elemento
j[0].keys() #Il primo e unico elemento è un dizionario

print("Data for accession %s (ID: %s)" % (j[0]['accession'], j[0]['id']))
print("List of Gene Ontologies:")
for item in j[0]['dbReferences']:
    if item['type'] == "GO":
        print("  id: %s, term: %s, source: %s" % (
                item['id'],
                item['properties']['term'],
                item['properties']['source']))


# REQUEST( ) --> NCBI
```python

headers = {'Accept': 'application/json'}
r = requests.get('https://api.ncbi.nlm.nih.gov/datasets/v1alpha/gene/id/%s' % 8291, headers=headers)
j = r.json()

j.keys()
gene = j['genes'][0]['gene']
gene['description'] #'dysferlin'

```