In [None]:
from Bio import SeqIO
for record in SeqIO.parse("pdb/6dwb.pdb", "pdb-seqres"):
    print("Record id %s, chain %s" % (record.id, record.annotations["chain"]))
    print(record.dbxrefs)
len(record.seq)

In [None]:
from Bio.PDB.PDBParser import PDBParser
from Bio.PDB.Chain import Chain
from Bio.PDB.internal_coords import *
from Bio.PDB.PICIO import write_PIC, read_PIC, read_PIC_seq
from Bio.PDB.ic_rebuild import write_PDB, IC_duplicate, structure_rebuild_test
from Bio.PDB.SCADIO import write_SCAD
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from Bio.PDB.PDBIO import PDBIO
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from pathlib import Path
from Bio.PDB import PDBList

protein_ids = ['7B3Y', '8C9N', '7LYJ', '6YUN', '7QCT', '6XRZ', '8UYS', '8FU7']

phi_psi = pd.DataFrame(columns=['seq','phi', 'psi', 'chain', 'pos_in_chain', 'protein_id'])

for protein_id in protein_ids:
    parser = PDBParser()

    filename = f"pdb/{protein_id}.pdb"
    filename_ent = f"pdb/pdb{str(protein_id).lower()}.ent"
    if Path(filename).is_file():
        protein = parser.get_structure(protein_id, filename)
    elif Path(filename_ent).is_file():
        protein = parser.get_structure(protein_id, filename_ent)
    else:
        print(f"Downloading {protein_id}")
        pdbl = PDBList()
        filename = pdbl.retrieve_pdb_file(protein_id, pdir='pdb', file_format='pdb', obsolete=False)
        if Path(filename).is_file():
            protein = parser.get_structure(protein_id, filename)

    for chain in list(protein.get_chains()):
        chain = protein[0][chain.id]

        chain.atom_to_internal_coordinates(verbose=True)
        resultDict = structure_rebuild_test(chain)
        if not resultDict['pass']:
            print(f"Failed {protein_id} {chain.id}")
            continue

        residues = list(chain.get_residues())
        for i in range(1,len(residues)-1):
            key = f'{residues[i-1].resname}, {residues[i].resname}, {residues[i+1].resname}'
            if not residues[i].internal_coord:
                psi,phi = np.nan, np.nan
            else:
                psi = residues[i].internal_coord.get_angle("psi")
                phi = residues[i].internal_coord.get_angle("phi")
            phi_psi.loc[len(phi_psi)] = [key, phi, psi, chain.id, residues[i].get_full_id()[3][1], protein.id]

In [None]:
phi_psi[~phi_psi.isna().any(axis=1)]
# phi_psi.protein_id.unique()

In [None]:
phi_psi.groupby('seq').count().sort_values('phi', ascending=False)

In [None]:
import seaborn as sns

key = 'SER, ALA, LEU'
x = phi_psi[phi_psi.seq == key].phi
y = phi_psi[phi_psi.seq == key].psi
df = phi_psi.loc[phi_psi.seq==key,['phi','psi']]

# plt.scatter(x, y, marker='.')
fig, axes = plt.subplots(2,1, sharex=True)
sns.kdeplot(
    data=df, 
    x='phi', 
    y='psi', 
    # cmap="Blues", 
    # fill=True, 
    # thresh=0.05,
    ax=axes[0]
)
axes[0].set_ylabel('psi')

axes[1].scatter(x, y, marker='.')
axes[1].set_xlabel('phi')
axes[1].set_ylabel('psi')

plt.show()

# PDB Mine API

In [45]:
# Read dotenv
from dotenv import load_dotenv
import os
import requests
import time
load_dotenv()
PDBMINE_URL = os.getenv("PDBMINE_URL")

# response = requests.get(PDBMINE_URL + f'/v1/api/protein/7B3Y')
# assert(response.ok)
# protein_data = response.json()

response = requests.post(
    PDBMINE_URL + '/v1/api/query',
    json={
        "residueChain": "AKYVCKICGYIYDEDAGDPDNGVSPG",
        "codeLength": 1,
        "windowSize": 7
    }
)
assert(response.ok)
query_id = response.json().get('queryID')
assert(query_id)

In [47]:
# wait for the query to finish
response = requests.get(PDBMINE_URL + f'/v1/api/query/{query_id}')
assert(response.ok)
response.json()

{'status': 'Complete',
 'queryID': '484e33bb-e581-11ee-91d6-0242ac110002',
 'frames': {'000_AKYVCKI': {'1IU5_A': [[{'residueName': 'A',
      'phi': 360,
      'psi': 162.3},
     {'residueName': 'K', 'phi': -120.8, 'psi': 145.5},
     {'residueName': 'Y', 'phi': -128.7, 'psi': 139.4},
     {'residueName': 'V', 'phi': -106.8, 'psi': 134.7},
     {'residueName': 'C', 'phi': -76.2, 'psi': 119},
     {'residueName': 'K', 'phi': -68.1, 'psi': -17.5},
     {'residueName': 'I', 'phi': -80.2, 'psi': -50.8}]],
   '1IU6_A': [[{'residueName': 'A', 'phi': 360, 'psi': 163.1},
     {'residueName': 'K', 'phi': -126.7, 'psi': 142.3},
     {'residueName': 'Y', 'phi': -129.4, 'psi': 141.5},
     {'residueName': 'V', 'phi': -109.9, 'psi': 140.9},
     {'residueName': 'C', 'phi': -81.6, 'psi': 118},
     {'residueName': 'K', 'phi': -65.5, 'psi': -19.1},
     {'residueName': 'I', 'phi': -82.5, 'psi': -48.9}]],
   '1RWD_A': [[{'residueName': 'A', 'phi': 360, 'psi': -54.2},
     {'residueName': 'K', 'phi': 