# Calculating ligand-polymer interaction fingerprints

Demo how to calculate ligand-polymer interactions mapped onto polymer residues (fingerprints).

In [1]:
from pyspark.sql import SparkSession                  
from mmtfPyspark.io import mmtfReader
from mmtfPyspark.interactions import InteractionExtractor, InteractionFilter                                 

#### Configure Spark 

In [2]:
spark = SparkSession.builder.appName("LigandInteractionFingerprintDemo").getOrCreate()

## Download PDB structure 1OHR

In [3]:
pdb = mmtfReader.download_mmtf_files(['1OHR'])

## Find interactions of small molecules
The interaction filter is used to setup interaction criteria:
1. interactions within 4 A
2. ignore water interactions

In [4]:
interactionFilter = InteractionFilter()
interactionFilter.set_distance_cutoff(4.0)
interactionFilter.set_query_groups(False, "HOH") # ignore water interactions

## Print table of group (residue) interactions
Here, the query ligand (1UN) interacts with target chains A and B. Group number are the residues number of the interacting residues in the PDB entry. Sequence indices are zero-based indices of the interacting residues in the protein sequence.

In [5]:
interactions = InteractionExtractor.get_ligand_polymer_interactions(pdb, interactionFilter)
interactions.toPandas()

Unnamed: 0,structureChainId,queryGroupId,queryChainId,queryGroupNumber,targetGroupId,targetChainId,targetGroupNumber,sequenceIndex,sequence
0,1OHR.A,1UN,A,201,ALA,A,28,27,PQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKM...
1,1OHR.B,1UN,A,201,ASP,B,29,28,PQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKM...
2,1OHR.B,1UN,A,201,ILE,B,50,49,PQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKM...
3,1OHR.A,1UN,A,201,ASP,A,25,24,PQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKM...
4,1OHR.B,1UN,A,201,GLY,B,48,47,PQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKM...
5,1OHR.A,1UN,A,201,THR,A,80,79,PQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKM...
6,1OHR.A,1UN,A,201,ILE,A,84,83,PQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKM...
7,1OHR.A,1UN,A,201,PRO,A,81,80,PQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKM...
8,1OHR.B,1UN,A,201,ALA,B,28,27,PQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKM...
9,1OHR.B,1UN,A,201,LEU,B,23,22,PQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKM...


## Print table of atom interactions
Same as previous table, but interactions are listed at the atom level.

In [6]:
interactions = InteractionExtractor.get_ligand_polymer_interactions(pdb, interactionFilter, level='atom')
interactions.toPandas()

Unnamed: 0,structureChainId,queryGroupId,queryChainId,queryGroupNumber,queryAtomName,targetGroupId,targetChainId,targetGroupNumber,targetAtomName,distance,sequenceIndex,sequence
0,1OHR.A,1UN,A,201,O21,GLY,A,27,C,3.327191,26,PQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKM...
1,1OHR.B,1UN,A,201,C14,VAL,B,32,CG2,3.810405,31,PQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKM...
2,1OHR.B,1UN,A,201,C4,GLY,B,49,C,3.694572,48,PQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKM...
3,1OHR.B,1UN,A,201,C5,ILE,B,50,CG1,3.595052,49,PQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKM...
4,1OHR.A,1UN,A,201,C29,ASP,A,29,H,3.778979,28,PQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKM...
5,1OHR.A,1UN,A,201,C31,ASP,A,29,N,3.660577,28,PQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKM...
6,1OHR.A,1UN,A,201,C39,ALA,A,28,CB,3.874941,27,PQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKM...
7,1OHR.B,1UN,A,201,O25,ILE,B,50,CG1,3.921487,49,PQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKM...
8,1OHR.B,1UN,A,201,HOL,ASP,B,25,OD1,2.553169,24,PQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKM...
9,1OHR.A,1UN,A,201,O38,ASP,A,30,N,3.394716,29,PQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKM...


## Terminate Spark

In [7]:
spark.stop()