# Solution 2 - Protein Interactions

This tutorial shows how to find proteins for a specific organism, how to calculate protein-protein interactions, and visualize the results.


In [None]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import substring_index
from mmtfPyspark.datasets import pdbjMineDataset
from mmtfPyspark.webfilters import PdbjMineSearch
from mmtfPyspark.interactions import InteractionFilter, InteractionFingerprinter
from mmtfPyspark.io import mmtfReader
from ipywidgets import interact, IntSlider
import py3Dmol

#### Configure Spark


In [None]:
spark = SparkSession.builder.appName("mmtfPyspark-Solution-02").getOrCreate()

## Find protein structures for mouse


For our first task, we need to run a taxonomy query using SIFTS data. [See examples](https://github.com/sbl-sdsc/mmtf-pyspark/blob/master/demos/datasets/PDBMetaDataDemo.ipynb) and [SIFTS demo](https://github.com/sbl-sdsc/mmtf-pyspark/blob/master/demos/datasets/SiftsDataDemo.ipynb)

To figure out how to query for taxonomy, the command below lists the first 10 entries for the SIFTS taxonomy table. As you can see, we can use the science_name field to query for a specific organism.


In [None]:
taxonomy_query = "SELECT * FROM sifts.pdb_chain_taxonomy LIMIT 10"
taxonomy = pdbjMineDataset.get_dataset(taxonomy_query)
taxonomy.show()

### TODO-1: specify a taxonomy query where the scientific name is 'Mus musculus'


In [None]:
taxonomy_query = (
    "SELECT * FROM sifts.pdb_chain_taxonomy WHERE scientific_name = 'Mus musculus'"
)
taxonomy = pdbjMineDataset.get_dataset(taxonomy_query)
taxonomy.show(10)

In [None]:
path = "../../data/mmtf_full_sample/"

pdb = mmtfReader.read_sequence_file(path, fraction=0.1)

### TODO-2: Take the taxonomy from above and use it to filter the pdb structures


In [None]:
pdb = pdb.filter(PdbjMineSearch(taxonomy_query)).cache()

## Calculate polymer-polymer interactions for this subset of structures

Find protein-protein interactions with a 6 A distance cutoff


In [None]:
distance_cutoff = 6.0
interactionFilter = InteractionFilter(distance_cutoff, minInteractions=10)

interactions = InteractionFingerprinter.get_polymer_interactions(
    pdb,
    interactionFilter,
).cache()

In [None]:
interactions = interactions.withColumn(
    "structureId",
    substring_index(interactions.structureChainId, ".", 1),
).cache()
interactions.toPandas().head(10)

## Visualize the protein-protein interactions


#### Extract id columns as lists (required for visualization)


In [None]:
structure_ids = interactions.select("structureId").rdd.flatMap(lambda x: x).collect()
query_chain_ids = interactions.select("queryChainID").rdd.flatMap(lambda x: x).collect()
target_chain_ids = (
    interactions.select("targetChainID").rdd.flatMap(lambda x: x).collect()
)
target_groups = interactions.select("groupNumbers").rdd.flatMap(lambda x: x).collect()

Disable scrollbar for the visualization below


In [None]:
#%%javascript
# IPython.OutputArea.prototype._should_scroll = function(lines) {return false;}


#### Show protein-protein interactions within cutoff distance (query = orange, target = blue)


In [None]:
def view_protein_protein_interactions(
    structure_ids,
    query_chain_ids,
    target_chain_ids,
    target_groups,
    distance=4.5,
):
    def view3d(i=0):
        print(
            f"PDB: {structure_ids[i]}, query: {query_chain_ids[i]}, target: {target_chain_ids[i]}"
        )

        target = {"chain": target_chain_ids[i], "resi": target_groups[i]}

        viewer = py3Dmol.view(query="pdb:" + structure_ids[i], width=600, height=600)
        viewer.setStyle({})

        viewer.setStyle(
            {"chain": query_chain_ids[i]},
            {"line": {"colorscheme": "orangeCarbon"}},
        )
        viewer.setStyle(
            {
                "chain": query_chain_ids[i],
                "within": {"distance": distance, "sel": {"chain": target_chain_ids[i]}},
            },
            {"sphere": {"colorscheme": "orangeCarbon"}},
        )
        viewer.setStyle(
            {"chain": target_chain_ids[i]},
            {"line": {"colorscheme": "lightblueCarbon"}},
        )
        viewer.setStyle(target, {"stick": {"colorscheme": "lightblueCarbon"}})
        viewer.zoomTo(target)

        return viewer.show()

    s_widget = IntSlider(
        min=0,
        max=len(structure_ids) - 1,
        description="Structure",
        continuous_update=False,
    )
    return interact(view3d, i=s_widget)

In [None]:
view_protein_protein_interactions(
    structure_ids,
    query_chain_ids,
    target_chain_ids,
    target_groups,
    distance=distance_cutoff,
)

In [None]:
spark.stop()