# Interactors seach of AT1G80840 and AT4G34410


RD29A/LTI78 -AT5G52310 

NPR1 - AT1G64280

# Setup

## Library import
We import all the required Python libraries

In [1]:
import sys, re, pickle
from pathlib import Path
from datetime import datetime
from IPython.display import Markdown, display
from collections import defaultdict

import networkx as nx

In [2]:
# vis
import matplotlib.pyplot as plt

The following allows us to import functions from the skm-tools package. 
Note the relative path to the folder containing the 
"skm-tools" directory. 

In [3]:
sys.path.append("../")
from skm_tools import load_networks, ckn_utils

In [4]:
today = datetime.today().strftime('%Y.%m.%d'); today

'2024.07.29'

## Path and Parameter definitions

In [5]:
base_dir = Path("./")
data_dir = base_dir / "data"
output_dir = base_dir / "output"

In [None]:
if not data_dir.exists():
    data_dir.mkdir()

if not output_dir.exists():
    output_dir.mkdir()

In [6]:
ckn_edge_path = data_dir / "AtCKN-v2-2023.06.tsv.gz" 
ckn_node_path = data_dir / "AtCKN-v2-2023.06_node-annot.tsv.gz"

# Load CKN

In [7]:
ckn = load_networks.ckn_to_networkx(
    edge_path=ckn_edge_path, 
    node_path=ckn_node_path
)

print(f"\nNumber of nodes: {ckn.number_of_nodes()}\nNumber of edges: {ckn.number_of_edges()}")


Number of nodes: 26234
Number of edges: 898887


In [8]:
_ = ckn_utils.rank_counts(ckn)

rank 0:	 2,837
rank 1:	 36,620
rank 2:	 67,572
rank 3:	 718,389
rank 4:	 73,469


## Filtering nodes
Remove foreign nodes

In [9]:
species = set([data['species'] for n, data in ckn.nodes(data=True)])
species

{'ath', 'ath/foreign', 'foreign', nan}

In [10]:
species.remove('foreign')
species.remove('ath/foreign')
species

{'ath', nan}

In [11]:
removed = ckn_utils.filter_ckn_nodes(ckn, 
     species=species,
     remove_isolates=True
)

Removed 27 nodes from network.


## Filtering edge ranks

CKN edges are annotated with edge 'ranks', representing how reliability the edges are:

* 0 – manually curated interactions from PSS, 
* 1 – literature curated interactions detected using targeted experimental methods (e.ckn. luciferase reporter assay, co-immunoprecipitation, enzymatic assays), sometimes complemented with high-throughput technologies, 
* 2 – interactions detected using high-throughput technologies (e.ckn. high throughput yeast two-hybrid, chromatin immunoprecipitation sequencing, degradome sequencing), 
* 3 – interactions extracted from literature (co-citation, but not text mining) or predicted in silico and additionally validated with data, 
* 4 – interactions predicted using purely in silico binding prediction algorithms. 

Below, we can count the number of edges for each rank. 

In [12]:
_ = ckn_utils.rank_counts(ckn)

rank 0:	 2,661
rank 1:	 36,620
rank 2:	 67,572
rank 3:	 718,389
rank 4:	 73,469


Since our query space is quite large, for now we'll only consider the higher reliablity edges (rank 0 - rank 2), and filter out the rest. If our query result was not large enough (too large), we could re-run the queries using less strict (stricter) ranks. 

We'll used the skm-tools function for filter to our required ranks. 

In [13]:
keep_edge_ranks=[0, 1, 2]
ckn_utils.filter_ckn_edges(ckn, keep_edge_ranks=keep_edge_ranks, remove_isolates=True)
_ = ckn_utils.rank_counts(ckn)

Removed 791858 edges from network.
rank 0:	 2,661
rank 1:	 36,620
rank 2:	 67,572
rank 3:	 0
rank 4:	 0


In [14]:
print(f"\nNumber of nodes: {ckn.number_of_nodes()}\nNumber of edges: {ckn.number_of_edges()}")


Number of nodes: 13477
Number of edges: 106853


# Extract neighbourhoods

In [15]:
RD29A = "AT5G52310"
NPR1 = "AT1G64280"
seed_list = [RD29A, NPR1]

In [16]:
networks = {}
for n in seed_list:
    all_neighbours = list(ckn.to_undirected().neighbors(n))
    print(n, len(all_neighbours))
    g = nx.induced_subgraph(ckn, all_neighbours + [n]).copy()
    networks[n] = g

AT5G52310 18
AT1G64280 44


# Cytoscape


Load the required library, and make sure you can connect to Cytoscape. 

More py4cytoscape documentation is here: https://py4cytoscape.readthedocs.io/

In [17]:
import py4cytoscape as p4c
p4c.cytoscape_ping();

You are connected to Cytoscape!


In [18]:
from skm_tools import cytoscape_utils

We set the Cytoscape collection name for this notebook. 

In [19]:
COLLECTION = f"CKN searches (ranks {','.join([str(i) for i in keep_edge_ranks])}) {today}"
COLLECTION

'CKN searches (ranks 0,1,2) 2024.07.29'

In [20]:
suids = {}
for n in seed_list:
    if n in ckn.nodes():

        g = networks[n]
        suid = p4c.networks.create_network_from_networkx(
                g, 
                title=f"{n} - first neighbours", 
                collection=COLLECTION
        )
        cytoscape_utils.apply_builtin_style(suid, 'ckn')
        p4c.layout_network("cose", network=suid)
        print(n, suid)

Applying default style...
Applying preferred layout
Applied CKN-default to 6903
AT5G52310 6903
Applying default style...
Applying preferred layout
Applied CKN-default to 7374
AT1G64280 7374


# Rank 0 only

Redo the network extraction, but only use rank 0 edges. 

In [21]:
keep_edge_ranks=[0]
ckn_utils.filter_ckn_edges(ckn, keep_edge_ranks=keep_edge_ranks, remove_isolates=True)
_ = ckn_utils.rank_counts(ckn)

Removed 104192 edges from network.
rank 0:	 2,661
rank 1:	 0
rank 2:	 0
rank 3:	 0
rank 4:	 0


In [22]:
print(f"\nNumber of nodes: {ckn.number_of_nodes()}\nNumber of edges: {ckn.number_of_edges()}")


Number of nodes: 776
Number of edges: 2661


# Extract neighbourhoods

In [23]:
networks = {}
for n in seed_list:
    if n in ckn.nodes():
        all_neighbours = list(ckn.to_undirected().neighbors(n))
        print(n, len(all_neighbours))
        g = nx.induced_subgraph(ckn, all_neighbours + [n]).copy()
        networks[n] = g

AT5G52310 4
AT1G64280 31


## 2 Hops

Extract the network "2 hops" away from the seed list

Find the edges specifically to make an edge induced subgraph. 

See [networkx ego_graph](https://networkx.org/documentation/stable/reference/generated/networkx.generators.ego.ego_graph.html) for a nore straight forward implementation, but here I want to find the edges specifically to make an edge induced subgraph. 

In [24]:
networks_2hops_edges = {}
for n in seed_list:
    if n in ckn.nodes():

        edges = []
        all_neighbours = list(ckn.to_undirected().neighbors(n))
        print(f"{n}: first neighbours {len(all_neighbours)}")
        for neighbor in all_neighbours:
            if (neighbor, n) in ckn.edges():
                edges.append((neighbor, n))
            if (n, neighbor) in ckn.edges():
                edges.append((n, neighbor))            

            neighbours_2nd = list(ckn.to_undirected().neighbors(neighbor))
            for neighbour_2nd in neighbours_2nd:
                if (neighbour_2nd, neighbor) in ckn.edges():
                    edges.append((neighbour_2nd, neighbor))
                if (neighbor, neighbour_2nd) in ckn.edges():
                    edges.append((neighbor, neighbour_2nd))            
        print(len(edges))
        g = nx.edge_subgraph(ckn, edges).copy()
        networks_2hops_edges[n] = g
        print(n, g.number_of_nodes())

AT5G52310: first neighbours 4
36
AT5G52310 12
AT1G64280: first neighbours 31
358
AT1G64280 150


### Cytoscape again


In [25]:
networks_2hops_edges
suids = {}
for n in seed_list:
    g = networks_2hops_edges[n]
    suid = p4c.networks.create_network_from_networkx(
            g, 
            title=f"{n} - second neighbours (edge induced)", 
            collection=COLLECTION
    )
    cytoscape_utils.apply_builtin_style(suid, 'ckn')
    p4c.layout_network("cose", network=suid)
    print(n, suid)

Applying default style...
Applying preferred layout
Applied CKN-default to 8646
AT5G52310 8646
Applying default style...
Applying preferred layout
Applied CKN-default to 8970
AT1G64280 8970


In [26]:
# END