## Data Setup

In [None]:
#!pip install panel hvplot gdown datashader jupyter_bokeh
## Run the above if you are not using the Saturn Cloud Environment
import os
import panel as pn 
import pandas as pd 
import holoviews as hv 
import hvplot.pandas
import gdown

url = "https://drive.google.com/uc?id=1BC-gHxPwg5vniOfYYAZMdnIRbQMhjJZx" # Downloading the PMDB Graph Network dataframe
graph_file = "PMDB_graph_data.pq"
if not os.path.exists(graph_file):
    gdown.download(url, graph_file, quiet=True) # This file is ~ 1 GB, so it can take some time to download.

In [None]:
df = pd.read_parquet(graph_file)

## Level 1: Overview Graph

In [None]:
# Set up the overview network graph for viewing
all_graph1 = df.hvplot.scatter(x='x',y='y',
                               rasterize=True, 
                               width=1000, height=1000, 
                               color='bray_all', 
                               colorbar=True,
                                )
all_graph2 = df.query('clu_deg_sum>2').hvplot.scatter(x='x',y='y',
                                rasterize=False, 
                                width=1000, height=1000, 
                                color='#E50000', 
                                hover_cols='full_id',
                                )

In [None]:
all_graph1*all_graph2

In [None]:
len(df) # See the length of the dataframe

**Because of graph size (10+ million points), rastering/zooming the graph can take some time. Due to this, we recommend selecting a graph area manually, and rerendering this way.**

## Level 2: Zoom Into Graph Area

In [None]:
#SELECT GRAPH SECTION
x_min = 4
x_max = 9
y_min = 8
y_max = 12
df_subset = df.query(f'{x_min}<x<{x_max} and {y_min}<y<{y_max}')

In [None]:
len(df_subset)

In [None]:
l2_graph1 = df_subset.hvplot.scatter(x='x',y='y',
                                     rasterize=True, 
                                     dynamic=True, 
                                     width=1000,height=1000, 
                                     color='bray_all', 
                                     colorbar=True,
                                    )
l2_graph2 = df_subset.query('clu_deg_sum>2').hvplot.scatter(x='x',y='y',
                                    rasterize=False, 
                                    width=1000,height=1000, 
                                    color='#E50000', 
                                    hover_cols='full_id',
                                    )

In [None]:
l2_graph1*l2_graph2

## Level 3: Filter Genes of Interest

**One you have selected an area of genes, we recommend manually analyzing genes of interest and looking them up in the PMDB Database!**

**Using pandas search terms, it is fairly simple to look for genes with**
* **Predicted annotations of interest**
* **High meta-omic observance similarity (bray curtis or jaccard) to known plastizymes**

In [None]:
#SELECT SUBGRAPH SECTION
x2_min = 7.65
x2_max = 8.36
y2_min = 10.13
y2_max = 11.56
df_filter = df.query(f'{x2_min}<x<{x2_max} and {y2_min}<y<{y2_max}')

In [None]:
# An example search
(df_filter
.query("product.str.contains('oxidase')")
.sort_values("bray_all", ascending=False)
)

**The ref_90 or full_id columns can be used for searches within the PMDB Database.**

**If you would like to manually look through the annotations on the graph also, we recommend doing this with a smaller subset of the protein network graph.**

In [None]:
l3_graph1 = df_filter.hvplot.scatter(x='x',y='y',
                        rasterize=False, # Turning off raster mode is required to view the annotations 
                        width=1000, height=1000, 
                        color='bray_all',
                        hover_cols='full_id', # Adding this line turns on the annotation in the tooltip. 
                        colorbar=True,
                            )
l3_graph2 = df_filter.query('clu_deg_sum>2').hvplot.scatter(x='x',y='y',
                        rasterize=False, 
                        width=1000, height=1000, 
                        color='#E50000', 
                        hover_cols='full_id',
                            )

In [None]:
l3_graph1*l3_graph2