## Radar Plots: number of instances of BioPAX classes in the BioPAX exports of several pathway databases

#### Import libraries

In [20]:
import pandas as pd
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go

#### Get the results of the SPARQL queries

The results of SPARQL queries generated in `01_count_biopax_classes.ipynb`are used to build the radar plots

In [21]:
# Get SPARQL queries results files for each database
reactome_v65 = pd.read_csv("../Results/reactome_v65_counts_bp_classes.csv", sep=",", skiprows=1, header=None, index_col=0)
reactome_pc = pd.read_csv("../Results/reactome_pc_counts_bp_classes.csv", sep=",", skiprows=1, header=None, index_col=0)
panther_pc = pd.read_csv("../Results/panther_pc_counts_bp_classes.csv", sep=",", skiprows=1, header=None, index_col=0)
pathbank_pc = pd.read_csv("../Results/pathbank_pc_counts_bp_classes.csv", sep=",", skiprows=1, header=None, index_col=0)
humancyc_pc = pd.read_csv("../Results/humancyc_pc_counts_bp_classes.csv", sep=",", skiprows=1, header=None, index_col=0)
kegg_pc = pd.read_csv("../Results/kegg_pc_counts_bp_classes.csv", sep=",", skiprows=1, header=None, index_col=0)
pid_pc = pd.read_csv("../Results/pid_pc_counts_bp_classes.csv", sep=",", skiprows=1, header=None, index_col=0)
inoh_pc = pd.read_csv("../Results/inoh_pc_counts_bp_classes.csv", sep=",", skiprows=1, header=None, index_col=0)
netpath_pc = pd.read_csv("../Results/netpath_pc_counts_bp_classes.csv", sep=",", skiprows=1, header=None, index_col=0)

df_list = [reactome_v65, reactome_pc, panther_pc, pathbank_pc, humancyc_pc, kegg_pc, pid_pc, inoh_pc, netpath_pc]

In [22]:
# List of BioPAX classes to represent on the radar plots
classes = ['Pathway', 'PathwayStep', 'BiochemicalReaction', 'Rna', 'Dna', 'Protein', 'SmallMolecule', 'Interaction', 'Pathway']
# Define angles for the radar plot
angles = [i*(360/8) for i in range(1,9)]

In [23]:
# Concatenate SPARQL query results in a single dataframe
concat_df_reduced = pd.concat([reactome_v65, reactome_pc[1], panther_pc[1], pathbank_pc[1], humancyc_pc[1], kegg_pc[1], pid_pc[1], inoh_pc[1], netpath_pc[1]], axis=1, ignore_index=True)
concat_df_reduced.columns = ["reactome_v65", "reactome_pc", "panther_pc", "pathbank_pc", "humancyc_pc", "kegg_pc", "pid_pc", "inoh_pc", "netpath_pc"]
concat_df_reduced.index = ["BiochemicalReaction", "Pathway", "PathwayStep", "SmallMolecule", "Rna", "Interaction", "Protein", "Dna"]
# export to CSV
concat_df_reduced.to_csv("../Results/Counts_BP_Classes_All_Databases.csv", sep=",")

# Normalize dataset by the maximal value of each category (max number of instances for a BioPAX class)
concat_df_reduced_norm = concat_df_reduced.div(concat_df_reduced.max(axis=1), axis=0)
concat_df_reduced_norm = concat_df_reduced_norm.reindex(classes)
print(concat_df_reduced_norm)


                     reactome_v65  reactome_pc  panther_pc  pathbank_pc  \
Pathway                       1.0     0.849981    0.110363     0.484474   
PathwayStep                   1.0     0.831847    0.000000     0.000000   
BiochemicalReaction           1.0     0.828336    0.151064     0.137467   
Rna                           1.0     0.477064    0.000000     0.079511   
Dna                           1.0     0.601758    0.000000     0.006085   
Protein                       1.0     0.839900    0.168789     0.078075   
SmallMolecule                 1.0     0.766551    0.372594     0.640108   
Interaction                   1.0     0.833015    0.253722     0.224414   
Pathway                       1.0     0.849981    0.110363     0.484474   

                     humancyc_pc   kegg_pc    pid_pc   inoh_pc  netpath_pc  
Pathway                 0.090535  0.045642  0.278713  0.289562    0.010101  
PathwayStep             0.063510  0.000000  0.000000  0.197147    0.000000  
BiochemicalReactio

In [24]:
# Get the counts of instances for each databases into a list
counts_reactome_v65 = concat_df_reduced_norm['reactome_v65'].to_list()
# Duplicate first value to close the line
counts_reactome_v65.append(counts_reactome_v65[0])
counts_reactome_pc = concat_df_reduced_norm['reactome_pc'].to_list()
counts_reactome_pc.append(counts_reactome_pc[0])
counts_panther = concat_df_reduced_norm['panther_pc'].to_list()
counts_panther.append(counts_panther[0])
counts_pathbank = concat_df_reduced_norm['pathbank_pc'].to_list()
counts_pathbank.append(counts_pathbank[0])
counts_humancyc = concat_df_reduced_norm['humancyc_pc'].to_list()
counts_humancyc.append(counts_humancyc[0])
counts_kegg = concat_df_reduced_norm['kegg_pc'].to_list()
counts_kegg.append(counts_kegg[0])
counts_pid = concat_df_reduced_norm['pid_pc'].to_list()
counts_pid.append(counts_pid[0])
counts_inoh = concat_df_reduced_norm['inoh_pc'].to_list()
counts_inoh.append(counts_inoh[0])
counts_netpath = concat_df_reduced_norm['netpath_pc'].to_list()
counts_netpath.append(counts_netpath[0])

## Generate the radar plots

We generate here the radar plots for the pathway databases that are maintained: Reactome (standalone export version 65 and PathwayCommons version), Panther (PathwayCommons), PathBank (PathwayCommons), HumanCyc (PathwayCommons), KEGG (PathwayCommons)

In [25]:
fig = make_subplots(rows=2, 
                    cols=3, 
                    start_cell="top-left", 
                    horizontal_spacing=0.1,
                    vertical_spacing=0.1,
                    specs=[
    [{"type":"scatterpolar"}, {"type":"scatterpolar"}, {"type":"scatterpolar"}],
    [{"type":"scatterpolar"}, {"type":"scatterpolar"}, {"type":"scatterpolar"}]
])  

fig.add_trace(go.Scatterpolar(name = "Reactome BioPAX v65", r = counts_reactome_v65, theta=classes), 1, 1)
fig.add_trace(go.Scatterpolar(name = "Reactome PC", r = counts_reactome_pc, theta=classes), 1, 2)
fig.add_trace(go.Scatterpolar(name = "Panther PC", r = counts_panther, theta=classes), 1, 3)
fig.add_trace(go.Scatterpolar(name = "PathBank PC", r = counts_pathbank, theta=classes), 2, 1)
fig.add_trace(go.Scatterpolar(name = "HumanCyc PC", r = counts_humancyc, theta=classes), 2, 2)
fig.add_trace(go.Scatterpolar(name = "KEGG PC", r = counts_kegg, theta=classes), 2, 3)


fig.update_layout(margin=dict(t=150))
fig.update_polars(radialaxis=dict(range=[0, 1])) # same axis
fig.update_layout(width=1600, height=1300, polar=dict(radialaxis=dict(visible=True)))
fig.write_image("../Results/radar_plot_bp_classes_updated_databases.pdf")
fig.write_image("../Results/radar_plot_bp_classes_updated_databases.svg")
fig.show()

## Supplementary Figure

The supplementary figure represents the radar plots of the nine pathway databases listed on PathwayCommons: Reactome (standalone export version 65 and PathwayCommons version), Panther (PathwayCommons), PathBank (PathwayCommons), HumanCyc (PathwayCommons), KEGG (PathwayCommons), PID (PathwayCommons), INOH (PathwayCommons) and NetPath (PathwayCommons)

In [26]:
fig2 = make_subplots(rows=3, 
                    cols=3, 
                    start_cell="top-left", 
                    horizontal_spacing=0.1,
                    vertical_spacing=0.1,
                    specs=[
    [{"type":"scatterpolar"}, {"type":"scatterpolar"}, {"type":"scatterpolar"}],
    [{"type":"scatterpolar"}, {"type":"scatterpolar"}, {"type":"scatterpolar"}],
    [{"type":"scatterpolar"}, {"type":"scatterpolar"}, {"type":"scatterpolar"}]
])  

fig2.add_trace(go.Scatterpolar(name = "Reactome BioPAX v65", r = counts_reactome_v65, theta=classes), 1, 1)
fig2.add_trace(go.Scatterpolar(name = "Reactome PC", r = counts_reactome_pc, theta=classes), 1, 2)
fig2.add_trace(go.Scatterpolar(name = "Panther PC", r = counts_panther, theta=classes), 1, 3)
fig2.add_trace(go.Scatterpolar(name = "PathBank PC", r = counts_pathbank, theta=classes), 2, 1)
fig2.add_trace(go.Scatterpolar(name = "HumanCyc PC", r = counts_humancyc, theta=classes), 2, 2)
fig2.add_trace(go.Scatterpolar(name = "KEGG PC", r = counts_kegg, theta=classes), 2, 3)
fig2.add_trace(go.Scatterpolar(name = "PID PC", r = counts_pid, theta=classes), 3, 1)
fig2.add_trace(go.Scatterpolar(name = "INOH PC", r = counts_inoh, theta=classes), 3, 2)
fig2.add_trace(go.Scatterpolar(name = "NetPath PC", r = counts_netpath, theta=classes), 3, 3)


fig2.update_layout(margin=dict(t=150))
fig2.update_polars(radialaxis=dict(range=[0, 1])) # same axis
fig2.update_layout(width=1600, height=1300, polar=dict(radialaxis=dict(visible=True)))
fig2.write_image("../Results/radar_plot_bp_classes_all_databases.pdf")
fig2.write_image("../Results/radar_plot_bp_classes_all_databases.svg")
fig2.show()