# Retrieving cofactor-protein interatomic interactions through PDBe API

This repository contains a Jupyter Notebook to query and analyze cofactor information from the Protein Data Bank (PDB) API.

**Quick start**  
1. Install dependencies: `pip install -r requirements.txt`  
2. Open the notebook: `jupyter lab` or `jupyter notebook`  
3. Run cells top-to-bottom.

**What this notebook does**  
- Queries the PDBe REST/JSON API to retrieve structural and annotation data.
- Assigns interatomic cofactor-protein interactions to each PDB entry from every coenzyme class.
- Distinguish between side chain or backbone amino acid interactions.
- Generates and saves clean, structured CSV files for downstream analysis.


In [24]:
import pandas as pd
import numpy as np
import requests
import matplotlib.pyplot as plt
from IPython.display import SVG, display
from pprint import pprint
import sys
sys.path.insert(0,'..') # to ensure the below import works in all Jupyter notebooks
from python_modules.api_modules import run_sequence_search, explode_dataset, get_ligand_site_data, run_search, pandas_dataset, pandas_count, pandas_plot, pandas_plot_multi_groupby #This module is adopted from PDBe API jupyter notebooks
import csv


## Get bound molecule codes


In [26]:
def get_bound_molecules(pdbId):
    URL_base = "https://www.ebi.ac.uk/pdbe/graph-api/pdb/bound_molecules"
    query = URL_base + "/" + pdbId
    response = requests.get(query)
    if response.status_code == 200:
        return response.json()
    else:
        print("No data available")
        return None

In [27]:
def save_pdb_chain_auth(pdbId, filename):
    try:
        response = get_bound_molecules(pdbId)
        with open(filename, "a") as file:
            for bm in response[pdbId]:
                for ligand in bm["composition"]["ligands"]:
                    print(pdbId, ligand["chain_id"], ligand["author_residue_number"], ligand["chem_comp_id"], ligand["entity"], sep=",", file=file)
    except TypeError:
        print("bad pdb code")
    return None
 
    

In [28]:
#Include the PDB code list per each coenzyme class
#pdb_list = ["6may", "2r98", "1a59", "1aca", "1aj8", "1al6", "1amz", "1b6t", "1b87", "1bo4", "1bob", "1bq6", "1buc", "1chw", "1cjw", "1cm0", "1cml", "1cqi", "1cqj", "1csc", "1csh", "1csi", "1csr", "1css", "1d6h", "1dlv", "1dm3", "1dq8", "1dq9", "1dqa", "1dub", "1e1c", "1eab", "1eac", "1ead", "1ebl", "1ee0", "1ef9", "1egc", "1esm", "1f0y", "1f12", "1f7l", "1fy7", "1ghe", "1h16", "1h17", "1h1t", "1h9g", "1hbk", "1hm8", "1hm9", "1hnd", "1hnh", "1hnj", "1hv9", "1i12", "1i1d", "1ib1", "1iic", "1iid", "1il0", "1ixe", "1iyk", "1j4j", "1jkj", "1jll", "1jqi", "1jtb", "1jxz", "1k39", "1kgq", "1kgt", "1khr", "1kk4", "1kqa", "1krr", "1kru", "1krv", "1kuv", "1kux", "1kuy", "1l0c", "1lo7", "1lo8", "1lo9", "1m1o", "1m3z", "1m4d", "1m4g", "1m4i", "1m75", "1m76", "1mj3", "1mj9", "1mja", "1mjb", "1mr9", "1mzj", "1n71", "1n8w", "1ndi", "1nl7", "1nvl", "1nzy", "1od2", "1on3", "1on9", "1ozp", "1p0h", "1p5r", "1p7t", "1pg3", "1pg4", "1ps9", "1pt5", "1pt8", "1pu9", "1pua", "1q2c", "1q2d", "1q4s", "1q4t", "1q4u", "1q51", "1q6y", "1qax", "1qfl", "1qr0", "1qsm", "1qsn", "1qsr", "1r31", "1req", "1rjn", "1s3z", "1s5k", "1s60", "1s7l", "1s7n", "1scu", "1sg4", "1sst", "1sui", "1t3z", "1t4c", "1t7q", "1tiq", "1txt", "1u6s", "1v0c", "1vgq", "1vgr", "1vi0", "1vpm", "1w6u", "1wdk", "1wdl", "1wdm", "1wl4", "1wlv", "1wn3", "1wtf", "1wwz", "1xa4", "1xet", "1xny", "1xpk", "1xpl", "1xpm", "1xvt", "1xvu", "1y7u", "1y81", "1yli", "1yqz", "1yre", "1ysl", "1yvk", "1z4r", "2a4n", "2a81", "2af3", "2af4", "2ahv", "2ahw", "2b3v", "2b4b", "2b4d", "2b58", "2bei", "2bsw", "2bue", "2bwo", "2c27", "2c43", "2c6x", "2cb8", "2cg5", "2cnm", "2cns", "2cnt", "2csc", "2cts", "2cy2", "2cye", "2d3m", "2d3t", "2d52", "2d5a", "2deb", "2dqz", "2dub", "2e1t", "2e6u", "2eft", "2eis", "2f2s", "2f3x", "2f6r", "2fa0", "2fa3", "2fiw", "2ft0", "2fxf", "2fy4", "2fy5", "2g2z", "2gd2", "2gd6", "2ge3", "2gf6", "2giv", "2gq3", "2grj", "2gyo", "2h12", "2h3p", "2h3u", "2h3w", "2h5m", "2h7c", "2hqy", "2hw5", "2i79", "2i7n", "2i7p", "2ibu", "2ibw", "2iby", "2ii3", "2ii4", "2ii5", "2il4", "2ix5", "2jbz", "2jdc", "2jdd", "2jev", "2jfk", "2ji6", "2ji8", "2jib", "2k5t", "2nmt", "2nu6", "2nu7", "2nu8", "2nu9", "2nua", "2nyg", "2o28", "2oas", "2ob0", "2oi5", "2oi6", "2oi7", "2onf", "2ou2", "2ozg", "2ozu", "2p0w", "2p2b", "2p2f", "2p2j", "2p6e", "2p6f", "2p6g", "2p8u", "2pfr", "2pq8", "2pr1", "2prb", "2psw", "2q29", "2q4v", "2q4y", "2q78", "2qf7", "2qir", "2qx1", "2r26", "2r8v", "2r9e", "2rc4", "2ref", "2req", "2rkv", "2scu", "2tdt", "2ux9", "2uzf", "2v18", "2v19", "2v1o", "2vat", "2vbq", "2vez", "2vfc", "2vhe", "2vjk", "2vjl", "2vjm", "2vjo", "2vqy", "2vss", "2vsu", "2vtz", "2vu0", "2vxk", "2vyx", "2vzz", "2wat", "2wdo", "2wds", "2wdy", "2wh5", "2wkt", "2wkv", "2wl4", "2wl5", "2wle", "2wlf", "2wlg", "2wpw", "2wpx", "2wsa", "2wuu", "2wya", "2x58", "2x7b", "2xat", "2xiq", "2xr7", "2xta", "2y0m", "2y0p", "2yiz", "2yj0", "2ync", "2ynd", "2yne", "2zba", "2zfn", "2zpa", "2zsd", "2zw4", "2zw5", "2zw7", "3ang", "3anp", "3awj", "3b2s", "3b6z", "3b7k", "3b8g", "3b96", "3biy", "3bj7", "3bj8", "3bli", "3bsy", "3cgb", "3cgc", "3cgd", "3cge", "3csc", "3cts", "3cv2", "3cw9", "3cz7", "3d2m", "3d2p", "3ddd", "3dr8", "3epy", "3eq6", "3exn", "3f0a", "3f5o", "3f8k", "3fbu", "3flv", "3fs8", "3fsb", "3fsc", "3fsy", "3gf3", "3glm", "3gma", "3gpc", "3gy9", "3gya", "3h5z", "3h77", "3ho8", "3hqj", "3icr", "3ics", "3ict", "3igj", "3ijw", "3il4", "3iu1", "3iu2", "3iwe", "3jtk", "3k9u", "3kvu", "3kzl", "3l92", "3lbe", "3lcj", "3ld2", "3lnb", "3lsj", "3mde", "3mgd", "3mk6", "3mp5", "3mpi", "3mqg", "3mqh", "3n0m", "3n0s", "3ne7", "3nfd", "3nt6", "3nta", "3ntd", "3nwz", "3nyq", "3nyr", "3nz2", "3otw", "3owc", "3oxo", "3oyz", "3p3i", "3pgp", "3pm5", "3pnb", "3pp9", "3pvr", "3pvt", "3pvy", "3pw8", "3pxu", "3pzc", "3q0g", "3q0j", "3q33", "3q35", "3q9n", "3q9u", "3qb8", "3qdq", "3qm0", "3qmn", "3r1k", "3r32", "3r34", "3r35", "3r37", "3r3a", "3r3b", "3r3c", "3r3d", "3r3f", "3r5c", "3r95", "3r96", "3r9e", "3r9f", "3r9g", "3rba", "3rhs", "3rq5", "3rt9", "3rta", "3rtg", "3ryo", "3s6f", "3s6g", "3s6h", "3slb", "3slf", "3sma", "3smp", "3spt", "3sqz", "3st8", "3sxn", "3t6s", "3t88", "3t8a", "3tdt", "3te4", "3tea", "3tfy", "3to6", "3to7", "3to9", "3tw6", "3u9e", "3u9s", "3ubm", "3uf6", "3v1u", "3v4e", "3vbi", "3vbj", "3vbk", "3vbl", "3vbm", "3vbn", "3vbp", "3vwd", "3vwe", "3vzs", "3wd7", "3whb", "3wr7", "3wxy", "3wy0", "3x1j", "3x1m", "3zbn", "3zj0", "3zw9", "3zwa", "3zwb", "3zwc", "4a0s", "4a0z", "4a2z", "4a30", "4a31", "4a32", "4a33", "4a95", "4ag7", "4ag9", "4ava", "4avb", "4avc", "4b10", "4b11", "4b12", "4b13", "4b14", "4b3i", "4b3j", "4b5o", "4b5p", "4bbh", "4bhw", "4bqn", "4bqo", "4c2j", "4c2x", "4c2y", "4c2z", "4c68", "4c7h", "4c7i", "4cae", "4caf", "4cav", "4caw", "4cax", "4cgl", "4cgm", "4cgn", "4cgo", "4cgp", "4cry", "4crz", "4cs0", "4csc", "4cyn", "4cyo", "4cyp", "4cyq", "4dpm", "4ea7", "4ea8", "4eaa", "4eab", "4em3", "4em4", "4emw", "4eqr", "4eqs", "4eqw", "4eu4", "4eu5", "4eu6", "4eu7", "4eu8", "4eu9", "4eua", "4eub", "4eud", "4fc6", "4fc7", "4fn8", "4fnb", "4fnd", "4fx9", "4gah", "4gs4", "4h6u", "4h6z", "4hkf", "4hur", "4hzd", "4hzo", "4hzp", "4i42", "4i49", "4i4b", "4i4z", "4i52", "4i56", "4i6a", "4ien", "4if5", "4ii4", "4isx", "4jae", "4jap", "4jaq", "4jd3", "4jd6", "4jvt", "4jwp", "4jxr", "4kec", "4ku2", "4ku3", "4ku5", "4kub", "4kuh", "4kvm", "4kvo", "4kvx", "4l80", "4l89", "4l8a", "4l9y", "4l9z", "4lrt", "4lx9", "4m20", "4m99", "4mfp", "4mfq", "4mob", "4moc", "4mrt", "4mxe", "4my0", "4mzq", "4mzu", "4n5m", "4n6b", "4n8i", "4n8j", "4nbu", "4nhd", "4nsq", "4nv7", "4o9c", "4ocg", "4omr", "4pdk", "4pk2", "4pk3", "4psw", "4psx", "4pv6", "4pze", "4pzr", "4pzs", "4pzt", "4q36", "4q38", "4qbj", "4qii", "4qij", "4qjk", "4qjl", "4qvh", "4qvt", "4r1l", "4r3k", "4r3l", "4r3u", "4r4u", "4r57", "4r87", "4req", "4ri1", "4rpm", "4rs2", "4ruk", "4rvn", "4u89", "4u9v", "4u9w", "4u9y", "4u9z", "4ua3", "4ubt", "4ubu", "4ubv", "4ucm", "4ucn", "4ucp", "4ufv", "4ufw", "4ufx", "4uwi", "4uwj", "4was", "4x0o", "4x5k", "4xc7", "4xc8", "4xl4", "4xnh", "4xpd", "4xpl", "4xx0", "4xyl", "4xym", "4xz3", "4y49", "4yak", "4yrh", "4z3y", "4zbg", "4zdb", "4zdc", "4zm6", "4zrb", "4zv3", "5a27", "5a28", "5ab6", "5ab7", "5ag4", "5ag5", "5ag6", "5ag7", "5age", "5ahs", "5ayv", "5bsr", "5byu", "5bz4", "5c88", "5cae", "5cjt", "5cju", "5cjv", "5cjw", "5csl", "5cts", "5cuo", "5cyv", "5d4e", "5dbv", "5ddk", "5dwn", "5e3q", "5ebv", "5ec4", "5egj", "5egl", "5eo2", "5f38", "5f48", "5f49", "5fal", "5fan", "5frd", "5fvj", "5g1f", "5g1z", "5g20", "5g21", "5g22", "5gcn", "5gi5", "5gi6", "5gi7", "5gi8", "5gi9", "5gif", "5gig", "5gih", "5gii", "5gk9", "5gxd", "5h84", "5h86", "5hbr", "5hgz", "5hh0", "5hh1", "5hmn", "5ht0", "5hwo", "5hwp", "5hwq", "5hwr", "5i0k", "5ib0", "5icv", "5icw", "5inf", "5ini", "5iv0", "5j9w", "5jbx", "5jfm", "5jfn", "5jph", "5jrh", "5k04", "5k18", "5k7h", "5k7z", "5k85", "5kf1", "5kf2", "5kf8", "5kf9", "5kga", "5kgh", "5kgj", "5kgp", "5kjt", "5kl9", "5klq", "5kp2", "5ktc", "5ktd", "5l1n", "5lbx", "5lkt", "5lku", "5lkx", "5lkz", "5lnq", "5lot", "5ls7", "5mgb", "5mu6", "5my0", "5my2", "5n1u", "5nji", "5nnp", "5npq", "5o08", "5o48", "5o4v", "5o6h", "5o6j", "5o9t", "5o9u", "5o9v", "5omo", "5req", "5suv", "5szu", "5szv", "5szy", "5szz", "5t02", "5t06", "5t53", "5t5u", "5t6c", "5t6e", "5t6h", "5t7d", "5t7e", "5trl", "5ts2", "5tva", "5tvj", "5u2k", "5uqr", "5uqu", "5us1", "5uut", "5v0p", "5v0w", "5v0x", "5v0z", "5v3a", "5vj1", "5vxc", "5vxo", "5w3x", "5w3y", "5w40", "5w8c", "5wc4", "5wci", "5wjd", "5wje", "5wpk", "5wx3", "5wx6", "5wx7", "5x8f", "5x8g", "5xuh", "5xuk", "5xun", "5xxr", "5xxs", "5yge", "5yh7", "5yo2", "5yo9", "5yoa", "5yrr", "5zai", "5zba", "5zzc", "6a6d", "6a75", "6a7d", "6abw", "6aby", "6add", "6ag4", "6ag5", "6ajn", "6ao7", "6aqp", "6arb", "6are", "6as5", "6axe", "6b0u", "6b1l", "6b2m", "6b3t", "6ba4", "6bc3", "6bc4", "6bc5", "6be0", "6bja", "6bjb", "6bon", "6boo", "6bvc", "6c28", "6c32", "6ciq", "6csc", "6ct5", "6cts", "6cxx", "6cy3", "6cyj", "6cyy", "6cz6", "6e1j", "6e3a", "6edd", "6edv", "6edz", "6ee1", "6ehj", "6el2", "6es9", "6esq", "6eu5", "6ewf", "6f56", "6fz2", "6fz3", "6fz5", "6g96", "6ge9", "6gnh", "6gns", "6gnt", "6gnu", "6gnv", "6gtp", "6gtr", "6gw3", "6gyr", "6gzt", "6he0", "6he2", "6hsj", "6hsp", "6hxh", "6hxi", "6hxj", "6hxl", "6hxm", "6hxn", "6hxp", "6hxq", "6i2u", "6ia6", "6iix", "6ioi", "6iox", "6iuf", "6j0p", "6j1e", "6j1f", "6j1g", "6j1i", "6j1j", "6jqn", "6jqo", "6k3c", "6k80", "6k8t", "6kkw", "6ksb", "6ktq", "6l2c", "6l3p", "6l7j", "6lpv", "6lpy", "6lq0", "6lq1", "6lq2", "6lq3", "6lq4", "6lq5", "6lq6", "6lq7", "6lq8", "6mak", "6maz", "6mb0", "6mb1", "6mb6", "6mb9", "6mfd", "6mgg", "6mn0", "6mn1", "6mn2", "6n2o", "6nas", "6nbe", "6nbw", "6nds", "6nxg", "6nzy", "6o07", "6omk", "6op5", "6p2j", "6p2p", "6p5u", "6p7k", "6pav", "6pcb", "6pcc", "6pcd", "6pf1", "6pfn", "6pfz", "6pgu", "6poe", "6ppl", "6pw9", "6qcl", "6qd9", "6qda", "6qdb", "6qdc", "6qdd", "6qde", "6qdf", "6qdg", "6qdh", "6qfb", "6qrm", "6qwu", "6qxq", "6qxr", "6qyf", "6qyg", "6r1e", "6rcx", "6req", "6rft", "6rop", "6ruz", "6rvb", "6rvh", "6sjz", "6sk1", "6sk2", "6sk3", "6sk8", "6skj", "6sll", "6sp0", "6tdf", "6tdg", "6tdh", "6tgx", "6th0", "6u9c", "6ui9", "6uia", "6uuw", "6uuz", "6uv5", "6v3t", "6v8k", "6vfy", "6vo5", "6vou", "6vp0", "6vp9", "6vr2", "6vr3", "6vta", "6vum", "6vz1", "6wcv", "6wf2", "6wf3", "6wf7", "6wfg", "6wfk", "6wfn", "6wfo", "6wn0", "6wuk", "6x7q", "6x7r", "6x7s", "6xbq", "6xbt", "6xka", "6xru", "6ybp", "6ybq", "6yca", "6ygb", "6ygc", "6ygd", "6ysw", "6yug", "6yus", "6yzz", "6z00", "6z2h", "6z5f", "6z5o", "6z5v", "6zmp", "6zng", "6znt", "6znu", "7amd", "7bor", "7c1l", "7c1r", "7c1s", "7c3o", "7c4e", "7c4f", "7c4g", "7cw5", "7cz3", "7jm1", "7jzs", "7k09", "7k0a", "7kes", "7kpp", "7kps", "7kvy", "7kye", "7kyj", "7l1k", "7l3q", "7l7y", "7l7z", "7l81", "7liw", "7lj9", "7lla", "7req"]
pdb_list = ["1hbo", "6may", "2r98", "1a59"]

In [29]:
def iterate_list1(my_list):
    for pdbId in pdb_list:
        save_pdb_chain_auth(pdbId, "cofmapping.csv")
        
#print()
#iterate_list1(pdb_list)

In [30]:
iterate_list1(pdb_list)

No data available
bad pdb code


In [31]:
# Filter those chem_comp_id that are cofactors. To preserve all lines containing the complete list of 453 cofactors associated IDs:

import csv

my_file_name = "cofmapping.csv"
cleaned_file = "cofmapping_cofactors.csv"

save_words = ['ASC', 'F43', 'M43', 'MDO', 'PNS', '0WD', '1DG', '3AA', '3CD', '5J8', '6V0', '80F', '8ID', 'A3D', 'AP0', 'CNA', 'CND', 'DG1', 'DN4', 'DND', 'DQV', 'EAD', 'ENA', 'LNC', 'N01', 'NA0', 'NAD', 'NAE', 'NAI', 'NAJ', 'NAP', 'NAQ', 'NAX', 'NBD', 'NBP', 'NDA', 'NDC', 'NDE', 'NDO', 'NDP', 'NHD', 'NHO', 'NJP', 'NPW', 'ODP', 'P1H', 'PAD', 'SAD', 'SAE', 'SND', 'TAD', 'TAP', 'TDT', 'TXD', 'TXE', 'TXP', 'ZID', '18W', '29P', 'DPM', '2MD', 'MCN', 'MGD', 'MSS', 'MTE', 'MTQ', 'MTV', 'PCD', 'PGD', 'XAX', 'B12', 'B1M', 'CNC', 'COB', 'COY', '6FA', 'FA8', 'FAA', 'FAB', 'FAD', 'FAE', 'FAO', 'FAS', 'FCG', 'FDA', 'FED', 'FNK', 'FSH', 'P5F', 'RFL', 'SFD', '1YJ', 'C2F', 'DHF', 'FFO', 'FOL', 'FON', 'FOZ', 'THF', 'THG', 'THH', '01A', '01K', '0ET', '1C4', '1CV', '1CZ', '1HA', '1VU', '1XE', '2CP', '2NE', '3CP', '3H9', '3HC', '3VV', '4CA', '4CO', '52O', '7L1', '8JD', '8Z2', '94Q', 'ACO', 'AMX', 'BCA', 'BCO', 'BSJ', 'BYC', 'CA3', 'CA5', 'CA6', 'CA8', 'CAA', 'CAJ', 'CAO', 'CIC', 'CMC', 'CMX', 'CO6', 'CO7', 'CO8', 'COA', 'COD', 'COF', 'COO', 'COT', 'COW', 'COZ', 'DCA', 'DCC', 'FAM', 'FCX', 'FRE', 'FYN', 'GRA', 'HAX', 'HMG', 'HSC', 'HXC', 'IVC', 'MCA', 'MCD', 'MDE', 'MLC', 'MYA', 'NHM', 'NHQ', 'NHW', 'NMX', 'OXK', 'Q5B', 'QHD', 'RMW', 'S0N', 'SCA', 'SCD', 'SCO', 'SDX', 'SOP', 'T1G', 'TC6', 'TUY', 'UT7', 'UTA', 'WCA', 'YNC', 'ZOZ', 'SHT', 'TP7', 'TPZ', 'TXZ', 'XP8', 'XP9', '4LS', '4LU', '9O9', '9P3', '9PF', '9Q6', '9QF', 'F7F', 'FMN', 'FNR', 'FNS', 'IRF', 'RBF', 'MQ7', 'COM', '1FH', '2FH', '522', '6HE', '76R', '7HE', 'BW9', 'CCH', 'COH', 'CV0', 'DDH', 'DHE', 'F0L', 'F0X', 'FDD', 'FDE', 'FEC', 'FMI', 'H02', 'HAS', 'HDD', 'HDE', 'HEA', 'HEB', 'HEC', 'HEM', 'HEO', 'HEV', 'HIF', 'HP5', 'ISW', 'MH0', 'MI9', 'MNH', 'MNR', 'MP1', 'N7H', 'OBV', 'PP9', 'SH0', 'SIR', 'SRM', 'UFE', 'VEA', 'VER', 'VOV', 'ZEM', 'ZNH', '4AB', '7AP', 'BHS', 'BIO', 'H2B', 'H4B', 'HBI', 'WSD', 'PQQ', 'BC4', 'BTI', 'BTN', 'BYT', 'DTB', 'Y7Y', 'LPA', 'LPB', '4YP', '9BL', 'AT5', 'DBT', 'RQX', 'UHD', 'UQ1', 'UQ2', 'UQ5', 'UQ6', '0HG', '0HH', '1JO', '1JP', '1R4', '3GC', '48T', '5AU', '6SG', 'ABY', 'AHE', 'ATA', 'BOB', 'BWS', 'BYG', 'EPY', 'ESG', 'GBI', 'GBP', 'GBX', 'GDN', 'GDS', 'GF5', 'GGC', 'GIP', 'GNB', 'GPR', 'GPS', 'GS8', 'GSB', 'GSF', 'GSH', 'GSM', 'GSN', 'GSO',
'GTB', 'GTD', 'GTS', 'GTX', 'GTY', 'GVX', 'HAG', 'HGS', 'IBG', 'ICY', 'JM2', 'JM5', 'JM7', 'L9X', 'LEE', 'LZ6', 'P9H', 'RGE', 'TGG', 'TS5', 'VDW', 'VWW', 'ZBF', '0AF', 'TOQ', 'TQQ', 'TRQ', '0UM', '0XU', '0Y0', '0Y1', '0Y2', '36A', '37H', '4IK', '62X', '6D6', '6NR', '76H', '76J', '76K', '76L', '76M', 'AN6', 'EEM', 'K15', 'P2J', 'SA8', 'SAH', 'SAM', 'SFG', 'SMM', 'SX0', 'TT8', '1TP', '1U0', '2TP', '5GY', '5SR', '8EF', '8EL', '8EO', '8FL', '8ML', '8N9', '8PA', 'A5X', 'D7K', 'EN0', 'HTL', 'M6T', 'N1T', 'N3T', 'NDQ', 'O2T', 'QSP', 'R1T', 'S1T', 'T5X', 'T6F', 'TD5', 'TD6', 'TD7', 'TD8', 'TD9', 'TDK', 'TDL', 'TDM', 'TDN', 'TDP', 'TDW', 'THD', 'THV', 'THW', 'THY', 'TOG', 'TOI', 'TP8', 'TPP', 'TPU', 'TPW', 'TZD', 'WWF', 'ZP1', 'EM2', 'MPL', 'NOP', 'NPL', 'PDP', 'PLP', 'PLR', 'PMP', 'PXP', 'PZP', 'UAH', 'X04',
'1TY', '2TY', '3TY', '4HL', 'AGQ', 'ESB', 'G27', 'HCC', 'P2Q', 'P3Q', 'PAQ', 'T0I', 'TPQ', 'TTS', 'TYQ', 'TYY', 'YPZ', 'ATP']

with open(my_file_name, 'r', newline='') as infile, \
     open(cleaned_file, 'w',newline='') as outfile:
    writer = csv.writer(outfile)
    for line in csv.reader(infile, delimiter=','):
        if any(save_word in element
                      for element in line
                      for save_word in save_words):
            writer.writerow(line)

## Get PDB bound ligand interatomic interactions 


In [33]:
import csv
import pandas as pd
import requests

# from pprint import pprint

# settings for PDBe API
base_url = "https://www.ebi.ac.uk/pdbe/"  # the beginning of the URL for PDBe's API.
search_url = base_url + 'search/pdb/select?'  # the rest of the URL used for PDBe's search API.

pdbe_kb_interacting_residues_api = base_url + "graph-api/uniprot/ligand_sites/"
pdbe_kb_api_uniprot_base_url = base_url + "graph-api/uniprot/"


def get_ligand_site_url():
    return pdbe_kb_api_uniprot_base_url + "ligand_sites/"


def get_interaction_site_url():
    return pdbe_kb_api_uniprot_base_url + "interface_residues/"


def get_url_with_accession(url, accession):
    url = url + accession
    ret = get_url(url)
    return ret.get(accession, {})


def get_url(url):
    """
    Makes a request to a URL. Returns a JSON of the results
    :param str url:
    :return dict:
    """
    response = requests.get(url)

    if response.status_code == 200:
        return response.json()
    else:
        print("[No data retrieved - %s] %s" % (response.status_code, response.text))

    return {}



In [34]:
#Add to python api_modules

pdbe_kb_api_pdb_base_url = base_url + "graph-api/pdb/"
pdbe_kb_api_bound_ligand_interactions = base_url + "graph-api/pdb/bound_ligand_interactions/" 

pdbe_sifts_mappings_api_url = base_url + "api/"

def get_bound_ligand_interactions_url():
    return pdbe_kb_api_pdb_base_url + "bound_ligand_interactions/" 


def get_sifts_mappings_url():
    return pdbe_sifts_mappings_api_url + "mappings/"


In [35]:
def bound_ligand_interactions(pdbId, chain, seqId, chem_comp_id, entity):
    url =  get_bound_ligand_interactions_url() + pdbId + "/" + chain + "/" + seqId
    print(url)
    data = get_url(url=url)
    #esta linea la voy a silenciar para no obtener listas
    data_to_ret = []
    for data_pdb_info in data:
        accession_data = data.get(data_pdb_info)
        for row in accession_data:
            chem_comp_id_api_i = row.get('ligand', {}).get('chem_comp_id')
            chain_id_api_i = row.get('ligand', {}).get('chain_id')
            author_residue_number_api_i = row.get('ligand', {}).get('author_residue_number')
            for interaction in row.get('interactions', []):
                interaction['amino_acid'] = interaction.get('end', {}).get('chem_comp_id')
                interaction['chain_id'] = interaction.get('end', {}).get('chain_id')
                interaction['sequence_residue'] = interaction.get('end', {}).get('author_residue_number')
                interaction['atom_names_features'] = interaction.get('end', {}).get('atom_names')
                #interaction['interaction_types'] = interaction.get('interaction_details')
                interaction['distance_A'] = interaction.get('distance')
                interaction['pdb_id'] = pdbId
                interaction['chain_request'] = chain
                interaction['auth_res_num_request'] = seqId
                interaction['chem_comp_id_api'] = chem_comp_id_api_i
                interaction['chain_id_api'] = chain_id_api_i
                interaction['author_residue_number_api'] = author_residue_number_api_i
                #this line for retrieving the entity:
                interaction['entity_b'] = entity 
                _ = interaction.pop("end")
                _ = interaction.pop("distance")
                data_to_ret.append(interaction)
    return data_to_ret

In [36]:
#Function to retrieve data with pd.Dataframe and joining multiple values in one

def get_interactions_tidy(filename):
    with open(filename) as csv_file:
        csv_reader = csv.reader(csv_file, delimiter=",")
        mylist = []  # creamos la lista fuera del ciclo `for`
        for row in csv_reader:
            pdbId, chain_Id, author_residue_number, chem_comp_id, entity  = row  
            mylist.extend(bound_ligand_interactions(pdbId, chain_Id, author_residue_number, chem_comp_id, entity))  # extendemos la lista con cada resultado
    df = explode_dataset(mylist)
    return df  # regresamos el `DataFrame`

In [37]:
results = get_interactions_tidy("cofmapping_cofactors.csv")

https://www.ebi.ac.uk/pdbe/graph-api/pdb/bound_ligand_interactions/1hbo/A/1550
https://www.ebi.ac.uk/pdbe/graph-api/pdb/bound_ligand_interactions/1hbo/A/1551
https://www.ebi.ac.uk/pdbe/graph-api/pdb/bound_ligand_interactions/1hbo/D/1550
https://www.ebi.ac.uk/pdbe/graph-api/pdb/bound_ligand_interactions/1hbo/D/1551
https://www.ebi.ac.uk/pdbe/graph-api/pdb/bound_ligand_interactions/1hbo/D/1552
https://www.ebi.ac.uk/pdbe/graph-api/pdb/bound_ligand_interactions/1hbo/A/1552
https://www.ebi.ac.uk/pdbe/graph-api/pdb/bound_ligand_interactions/6may/A/501
https://www.ebi.ac.uk/pdbe/graph-api/pdb/bound_ligand_interactions/1a59/A/380
https://www.ebi.ac.uk/pdbe/graph-api/pdb/bound_ligand_interactions/1a59/A_2/380


In [38]:
results.to_csv("cofmapping_cofactors_results.csv")

 ## Get ligand info for mappings

In [40]:
def get_ligands(pdbId):
    URL_base = "https://www.ebi.ac.uk/pdbe/api/pdb/entry/ligand_monomers"
    query = URL_base + "/" + pdbId
    response = requests.get(query)
    if response.status_code == 200:
        return response.json()
    else:
        print("No data available")
        return None

In [41]:
def save_ligands(pdbId, filename):
    response = get_ligands(pdbId)
    with open(filename, "a") as file:
        for ligand in response[pdbId]:
            print(pdbId, ligand["entity_id"], ligand["author_residue_number"], ligand["chain_id"], ligand["chem_comp_id"], sep=",", file=file)
    return None
 
    

In [42]:
def iterate_list_save_ligands(my_list):
    for pdbId in pdb_list:
        save_ligands(pdbId, "get_ligands_list.csv")
        
#print()
#iterate_list1(pdb_list)

In [43]:
iterate_list_save_ligands(pdb_list)

In [44]:
# To preserve all lines containing the complete list of 453 cofactors associated IDs:

import csv

my_file_name = "get_ligands_list.csv"
cleaned_file = "get_ligands_list_cofactors.csv"
save_words = ['ASC', 'F43', 'M43', 'MDO', 'PNS', '0WD', '1DG', '3AA', '3CD', '5J8', '6V0', '80F', '8ID', 'A3D', 'AP0', 'CNA', 'CND', 'DG1', 'DN4', 'DND', 'DQV', 'EAD', 'ENA', 'LNC', 'N01', 'NA0', 'NAD', 'NAE', 'NAI', 'NAJ', 'NAP', 'NAQ', 'NAX', 'NBD', 'NBP', 'NDA', 'NDC', 'NDE', 'NDO', 'NDP', 'NHD', 'NHO', 'NJP', 'NPW', 'ODP', 'P1H', 'PAD', 'SAD', 'SAE', 'SND', 'TAD', 'TAP', 'TDT', 'TXD', 'TXE', 'TXP', 'ZID', '18W', '29P', 'DPM', '2MD', 'MCN', 'MGD', 'MSS', 'MTE', 'MTQ', 'MTV', 'PCD', 'PGD', 'XAX', 'B12', 'B1M', 'CNC', 'COB', 'COY', '6FA', 'FA8', 'FAA', 'FAB', 'FAD', 'FAE', 'FAO', 'FAS', 'FCG', 'FDA', 'FED', 'FNK', 'FSH', 'P5F', 'RFL', 'SFD', '1YJ', 'C2F', 'DHF', 'FFO', 'FOL', 'FON', 'FOZ', 'THF', 'THG', 'THH', '01A', '01K', '0ET', '1C4', '1CV', '1CZ', '1HA', '1VU', '1XE', '2CP', '2NE', '3CP', '3H9', '3HC', '3VV', '4CA', '4CO', '52O', '7L1', '8JD', '8Z2', '94Q', 'ACO', 'AMX', 'BCA', 'BCO', 'BSJ', 'BYC', 'CA3', 'CA5', 'CA6', 'CA8', 'CAA', 'CAJ', 'CAO', 'CIC', 'CMC', 'CMX', 'CO6', 'CO7', 'CO8', 'COA', 'COD', 'COF', 'COO', 'COT', 'COW', 'COZ', 'DCA', 'DCC', 'FAM', 'FCX', 'FRE', 'FYN', 'GRA', 'HAX', 'HMG', 'HSC', 'HXC', 'IVC', 'MCA', 'MCD', 'MDE', 'MLC', 'MYA', 'NHM', 'NHQ', 'NHW', 'NMX', 'OXK', 'Q5B', 'QHD', 'RMW', 'S0N', 'SCA', 'SCD', 'SCO', 'SDX', 'SOP', 'T1G', 'TC6', 'TUY', 'UT7', 'UTA', 'WCA', 'YNC', 'ZOZ', 'SHT', 'TP7', 'TPZ', 'TXZ', 'XP8', 'XP9', '4LS', '4LU', '9O9', '9P3', '9PF', '9Q6', '9QF', 'F7F', 'FMN', 'FNR', 'FNS', 'IRF', 'RBF', 'MQ7', 'COM', '1FH', '2FH', '522', '6HE', '76R', '7HE', 'BW9', 'CCH', 'COH', 'CV0', 'DDH', 'DHE', 'F0L', 'F0X', 'FDD', 'FDE', 'FEC', 'FMI', 'H02', 'HAS', 'HDD', 'HDE', 'HEA', 'HEB', 'HEC', 'HEM', 'HEO', 'HEV', 'HIF', 'HP5', 'ISW', 'MH0', 'MI9', 'MNH', 'MNR', 'MP1', 'N7H', 'OBV', 'PP9', 'SH0', 'SIR', 'SRM', 'UFE', 'VEA', 'VER', 'VOV', 'ZEM', 'ZNH', '4AB', '7AP', 'BHS', 'BIO', 'H2B', 'H4B', 'HBI', 'WSD', 'PQQ', 'BC4', 'BTI', 'BTN', 'BYT', 'DTB', 'Y7Y', 'LPA', 'LPB', '4YP', '9BL', 'AT5', 'DBT', 'RQX', 'UHD', 'UQ1', 'UQ2', 'UQ5', 'UQ6', '0HG', '0HH', '1JO', '1JP', '1R4', '3GC', '48T', '5AU', '6SG', 'ABY', 'AHE', 'ATA', 'BOB', 'BWS', 'BYG', 'EPY', 'ESG', 'GBI', 'GBP', 'GBX', 'GDN', 'GDS', 'GF5', 'GGC', 'GIP', 'GNB', 'GPR', 'GPS', 'GS8', 'GSB', 'GSF', 'GSH', 'GSM', 'GSN', 'GSO',
'GTB', 'GTD', 'GTS', 'GTX', 'GTY', 'GVX', 'HAG', 'HGS', 'IBG', 'ICY', 'JM2', 'JM5', 'JM7', 'L9X', 'LEE', 'LZ6', 'P9H', 'RGE', 'TGG', 'TS5', 'VDW', 'VWW', 'ZBF', '0AF', 'TOQ', 'TQQ', 'TRQ', '0UM', '0XU', '0Y0', '0Y1', '0Y2', '36A', '37H', '4IK', '62X', '6D6', '6NR', '76H', '76J', '76K', '76L', '76M', 'AN6', 'EEM', 'K15', 'P2J', 'SA8', 'SAH', 'SAM', 'SFG', 'SMM', 'SX0', 'TT8', '1TP', '1U0', '2TP', '5GY', '5SR', '8EF', '8EL', '8EO', '8FL', '8ML', '8N9', '8PA', 'A5X', 'D7K', 'EN0', 'HTL', 'M6T', 'N1T', 'N3T', 'NDQ', 'O2T', 'QSP', 'R1T', 'S1T', 'T5X', 'T6F', 'TD5', 'TD6', 'TD7', 'TD8', 'TD9', 'TDK', 'TDL', 'TDM', 'TDN', 'TDP', 'TDW', 'THD', 'THV', 'THW', 'THY', 'TOG', 'TOI', 'TP8', 'TPP', 'TPU', 'TPW', 'TZD', 'WWF', 'ZP1', 'EM2', 'MPL', 'NOP', 'NPL', 'PDP', 'PLP', 'PLR', 'PMP', 'PXP', 'PZP', 'UAH', 'X04',
'1TY', '2TY', '3TY', '4HL', 'AGQ', 'ESB', 'G27', 'HCC', 'P2Q', 'P3Q', 'PAQ', 'T0I', 'TPQ', 'TTS', 'TYQ', 'TYY', 'YPZ', 'ATP']

with open(my_file_name, 'r', newline='') as infile, \
     open(cleaned_file, 'w',newline='') as outfile:
    writer = csv.writer(outfile)
    for line in csv.reader(infile, delimiter=','):
        if any(save_word in element
                      for element in line
                      for save_word in save_words):
            writer.writerow(line)

## Filter Cofactors

In [46]:
def get_cofactors(pdbId):
    URL_base = "https://www.ebi.ac.uk/pdbe/api/pdb/entry/cofactor"
    query = URL_base + "/" + pdbId
    response = requests.get(query)
    if response.status_code == 200:
        return response.json()
    else:
        print("No data available")
        return None

In [47]:
def save_cofactors(pdbId, filename):
    try:
        response = get_cofactors(pdbId)
        with open(filename, "a") as file:
            for ligand in response[pdbId]:
                print(pdbId, ligand["entity_id"], ligand["author_residue_number"], ligand["chain_id"], ligand["chem_comp_id"], ligand["class"], sep=",", file=file)
    except TypeError:
        print("badpdbforcofactor")
    return None
 
    

In [48]:
def iterate_list_save_cofactors(my_list):
    for pdbId in pdb_list:
        save_cofactors(pdbId, "get_cofactors_list.csv")
#print()
#iterate_list1(pdb_list)

In [49]:
iterate_list_save_cofactors(pdb_list)

## Distinguishing between side chain or backbone amino acid interactions

The information on whether this is sidechain or mainchain interaction is encoded in the assumption that interactions to the mainchain of standard and modified amino acids are made through the interactions with the following atom names: "N",  "C", "CA", "O".


In [51]:
import numpy as np

In [52]:
results.head()

Unnamed: 0,ligand_atoms,interaction_type,interaction_details,amino_acid,chain_id,sequence_residue,atom_names_features,distance_A,pdb_id,chain_request,auth_res_num_request,chem_comp_id_api,chain_id_api,author_residue_number_api,entity_b
0,C5C,atom-atom,hydrophobic,LEU,C,117,CD2,4.03,1hbo,A,1550,F43,A,1550,4
1,C5C,atom-atom,hydrophobic,LEU,C,117,CB,4.4,1hbo,A,1550,F43,A,1550,4
2,O7C,atom-atom,weak_polar,LEU,C,117,CB,3.46,1hbo,A,1550,F43,A,1550,4
3,O7C,atom-atom,hbond,SER,C,118,OG,3.38,1hbo,A,1550,F43,A,1550,4
4,O7C,atom-atom,polar,SER,C,118,OG,3.38,1hbo,A,1550,F43,A,1550,4


In [53]:
conditions = [
    (results['atom_names_features'] == 'N'), (results['atom_names_features'] == 'C'), (results['atom_names_features'] == 'CA'),
    (results['atom_names_features'] == 'O')]
choices = ['main_chain', 'main_chain', 'main_chain', 'main_chain']
results['chain_atom_type'] = np.select(conditions, choices, default='side_chain')

conditions1 = [
    (results['amino_acid'] == 'GLY'), (results['amino_acid'] == 'ALA'), (results['amino_acid'] == 'VAL'),
    (results['amino_acid'] == 'LEU'), (results['amino_acid'] == 'ILE'), (results['amino_acid'] == 'PRO'), 
    (results['amino_acid'] == 'THR'), (results['amino_acid'] == 'SER'), (results['amino_acid'] == 'GLU'),
    (results['amino_acid'] == 'ASP'), (results['amino_acid'] == 'ARG'), (results['amino_acid'] == 'LYS'),
    (results['amino_acid'] == 'HIS'), (results['amino_acid'] == 'PHE'), (results['amino_acid'] == 'TRP'), 
    (results['amino_acid'] == 'TYR'), (results['amino_acid'] == 'CYS'), (results['amino_acid'] == 'MET'), 
    (results['amino_acid'] == 'GLN'), (results['amino_acid'] == 'ASN')]
choices = ['early_AA', 'early_AA', 'early_AA', 'early_AA', 'early_AA', 'early_AA', 'early_AA',
           'early_AA', 'early_AA', 'early_AA', 'late_AA', 'late_AA', 'late_AA', 'late_AA',
           'late_AA', 'late_AA', 'late_AA', 'late_AA', 'late_AA', 'late_AA']
results['aminoacid_type'] = np.select(conditions1, choices, default='other')
results
print(results)
 

     ligand_atoms interaction_type interaction_details amino_acid chain_id  \
0             C5C        atom-atom         hydrophobic        LEU        C   
1             C5C        atom-atom         hydrophobic        LEU        C   
2             O7C        atom-atom          weak_polar        LEU        C   
3             O7C        atom-atom               hbond        SER        C   
4             O7C        atom-atom               polar        SER        C   
...           ...              ...                 ...        ...      ...   
1620          CDP        atom-atom         hydrophobic        LEU        A   
1621          C3P        atom-atom          weak_polar        CIT      A_2   
1622          C3P        atom-atom          weak_polar        CIT      A_2   
1623          C3P        atom-atom          weak_polar        CIT      A_2   
1624          C3P        atom-atom          weak_polar        CIT      A_2   

      sequence_residue atom_names_features  distance_A pdb_id c

In [54]:
results.to_csv("results_api.csv")

In [55]:
results.head()

Unnamed: 0,ligand_atoms,interaction_type,interaction_details,amino_acid,chain_id,sequence_residue,atom_names_features,distance_A,pdb_id,chain_request,auth_res_num_request,chem_comp_id_api,chain_id_api,author_residue_number_api,entity_b,chain_atom_type,aminoacid_type
0,C5C,atom-atom,hydrophobic,LEU,C,117,CD2,4.03,1hbo,A,1550,F43,A,1550,4,side_chain,early_AA
1,C5C,atom-atom,hydrophobic,LEU,C,117,CB,4.4,1hbo,A,1550,F43,A,1550,4,side_chain,early_AA
2,O7C,atom-atom,weak_polar,LEU,C,117,CB,3.46,1hbo,A,1550,F43,A,1550,4,side_chain,early_AA
3,O7C,atom-atom,hbond,SER,C,118,OG,3.38,1hbo,A,1550,F43,A,1550,4,side_chain,early_AA
4,O7C,atom-atom,polar,SER,C,118,OG,3.38,1hbo,A,1550,F43,A,1550,4,side_chain,early_AA
