# Contact sites of protein-DNA interaction

This example is a reproduction of this [Biotite's Example](https://www.biotite-python.org/examples/gallery/structure/contact_sites.html#sphx-glr-examples-gallery-structure-contact-sites-py)

In [1]:
from crimm.Fetchers import fetch_rcsb
from crimm.Visualization import View
from Bio.PDB import NeighborSearch
from Bio.PDB.Selection import unfold_entities



In [2]:
structure = fetch_rcsb('2or1', include_solvent=False)

In [3]:
structure

NGLWidget()

<Structure id=2OR1 Models=1>
│
├───<Model id=1 Chains=4>
	│
	├───<Polydeoxyribonucleotide id=A Residues=20>
	├──────Description: DNA (5'-D(*AP*AP*GP*TP*AP*CP*AP*AP*AP*CP*TP*TP*TP*CP*TP*TP*G P*TP*AP*T)-3')
	│
	├───<Polydeoxyribonucleotide id=B Residues=20>
	├──────Description: DNA (5'-D(*TP*AP*TP*AP*CP*AP*AP*GP*AP*AP*AP*GP*TP*TP*TP*GP*T P*AP*CP*T)-3')
	│
	├───<Polypeptide(L) id=C Residues=63>
	├──────Description: 434 REPRESSOR
	│
	├───<Polypeptide(L) id=D Residues=63>
	├──────Description: 434 REPRESSOR


In [4]:
# Get the first model
model = structure.models[0]
dnas = []
proteins = []
for chain in model:
    if chain.chain_type == 'Polydeoxyribonucleotide':
        dnas.append(chain)
    elif chain.chain_type == 'Polypeptide(L)':
        proteins.append(chain)

In [5]:
# Quick check if the two protein chains are really identical
assert proteins[0].masked_seq == proteins[1].masked_seq

In [6]:
# The maximum distance between an atom in the repressor and an atom in
# the DNA for them to be considered 'in contact'
THRESHOLD_DISTANCE = 4.0

In [7]:
# Fast identification of contacts via NeighborSearch (KDTree):
# NeighborSearch is initiliazed with the coordinates of the Repressor
# and later provided with the atom coordinates of the two DNA chains
repressor_atoms = unfold_entities(proteins, 'A')
selector = NeighborSearch(repressor_atoms)

In [8]:
selected_atoms = []
selected_res_l = set()
selected_res_r = set()
for atom in unfold_entities(dnas, 'A'):
    if cur_selected := selector.search(atom.coord, THRESHOLD_DISTANCE):
        selected_atoms.extend(cur_selected)

In [9]:
contact_residues = unfold_entities(selected_atoms, 'R')

In [10]:
# Find the common residues between the two chains
all_res_id = [res.id for res in contact_residues]
duplicates = [resid for resid in all_res_id if all_res_id.count(resid) > 1]
common_res_id = list(set(duplicates))
common_res = []
for res in contact_residues:
    if res.id in common_res_id:
        common_res.append(res)

In [11]:
view = View()
view.load_entity(model)
view.highlight_residues(common_res, add_licorice=True)
view

View()