## Radar Plots: number of instances of BioPAX classes in the BioPAX exports of several pathway databases

#### Import libraries

In [21]:
import pandas as pd
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go

#### Get the results of the SPARQL queries

The results of SPARQL queries generated in `01_count_biopax_classes.ipynb`are used to build the radar plots

In [22]:
# Get SPARQL queries results files for each database
reactome_standalone = pd.read_csv("../Results/reactome_standalone_biopax_counts_bp_classes.csv", sep=",", skiprows=1, header=None, index_col=0)
reactome_pc = pd.read_csv("../Results/reactome_pc_counts_bp_classes.csv", sep=",", skiprows=1, header=None, index_col=0)
panther_standalone = pd.read_csv("../Results/panther_standalone_biopax_counts_bp_classes.csv", sep=",", skiprows=1, header=None, index_col=0)
panther_pc = pd.read_csv("../Results/panther_pc_counts_bp_classes.csv", sep=",", skiprows=1, header=None, index_col=0)
pathbank_pc = pd.read_csv("../Results/pathbank_pc_counts_bp_classes.csv", sep=",", skiprows=1, header=None, index_col=0)
humancyc_pc = pd.read_csv("../Results/humancyc_pc_counts_bp_classes.csv", sep=",", skiprows=1, header=None, index_col=0)
kegg_pc = pd.read_csv("../Results/kegg_pc_counts_bp_classes.csv", sep=",", skiprows=1, header=None, index_col=0)
pid_pc = pd.read_csv("../Results/pid_pc_counts_bp_classes.csv", sep=",", skiprows=1, header=None, index_col=0)
inoh_pc = pd.read_csv("../Results/inoh_pc_counts_bp_classes.csv", sep=",", skiprows=1, header=None, index_col=0)
netpath_pc = pd.read_csv("../Results/netpath_pc_counts_bp_classes.csv", sep=",", skiprows=1, header=None, index_col=0)

df_list = [reactome_standalone, reactome_pc, panther_pc, pathbank_pc, humancyc_pc, kegg_pc, pid_pc, inoh_pc, netpath_pc]

In [23]:
# List of BioPAX classes to represent on the radar plots
classes = ['Pathway', 'PathwayStep', 'BiochemicalReaction', 'Rna', 'Dna', 'Protein', 'SmallMolecule', 'Interaction', 'Pathway']
# Define angles for the radar plot
angles = [i*(360/8) for i in range(1,9)]

In [24]:
# Concatenate SPARQL query results in a single dataframe
concat_df_reduced = pd.concat([reactome_standalone, reactome_pc[1], panther_standalone[1], panther_pc[1], pathbank_pc[1], humancyc_pc[1], kegg_pc[1], pid_pc[1], inoh_pc[1], netpath_pc[1]], axis=1, ignore_index=True)
concat_df_reduced.columns = ["reactome_standalone", "reactome_pc", "panther_standalone", "panther_pc", "pathbank_pc", "humancyc_pc", "kegg_pc", "pid_pc", "inoh_pc", "netpath_pc"]
concat_df_reduced.index = ["BiochemicalReaction", "Pathway", "PathwayStep", "SmallMolecule", "Rna", "Interaction", "Protein", "Dna"]
# export to CSV
concat_df_reduced.to_csv("../Results/Counts_BP_Classes_All_Databases.csv", sep=",")

# Normalize dataset by the maximal value of each category (max number of instances for a BioPAX class)
concat_df_reduced_norm = concat_df_reduced.div(concat_df_reduced.max(axis=1), axis=0)
concat_df_reduced_norm = concat_df_reduced_norm.reindex(classes)
print(concat_df_reduced_norm)


                     reactome_standalone  reactome_pc  panther_standalone  \
Pathway                         0.724247     0.712625            0.085578   
PathwayStep                     0.662164     0.650322            0.000000   
BiochemicalReaction             1.000000     0.981917            0.163939   
Rna                             1.000000     0.992447            0.000000   
Dna                             1.000000     0.988327            0.000000   
Protein                         1.000000     0.991037            0.190418   
SmallMolecule                   0.892480     0.888568            0.379891   
Interaction                     0.791676     0.775048            0.211987   
Pathway                         0.724247     0.712625            0.085578   

                     panther_pc  pathbank_pc  humancyc_pc   kegg_pc    pid_pc  \
Pathway                0.085578     1.000000     0.094559  0.051506  0.196778   
PathwayStep            0.000000     1.000000     0.079528  0.000000

In [25]:
# Get the counts of instances for each databases into a list
counts_reactome_standalone = concat_df_reduced_norm['reactome_standalone'].to_list()
# Duplicate first value to close the line
counts_reactome_standalone.append(counts_reactome_standalone[0])

counts_panther_standalone = concat_df_reduced_norm['panther_standalone'].to_list()
counts_panther_standalone.append(counts_panther_standalone[0])

counts_reactome_pc = concat_df_reduced_norm['reactome_pc'].to_list()
counts_reactome_pc.append(counts_reactome_pc[0])
counts_panther = concat_df_reduced_norm['panther_pc'].to_list()
counts_panther.append(counts_panther[0])
counts_pathbank = concat_df_reduced_norm['pathbank_pc'].to_list()
counts_pathbank.append(counts_pathbank[0])
counts_humancyc = concat_df_reduced_norm['humancyc_pc'].to_list()
counts_humancyc.append(counts_humancyc[0])
counts_kegg = concat_df_reduced_norm['kegg_pc'].to_list()
counts_kegg.append(counts_kegg[0])
counts_pid = concat_df_reduced_norm['pid_pc'].to_list()
counts_pid.append(counts_pid[0])
counts_inoh = concat_df_reduced_norm['inoh_pc'].to_list()
counts_inoh.append(counts_inoh[0])
counts_netpath = concat_df_reduced_norm['netpath_pc'].to_list()
counts_netpath.append(counts_netpath[0])

## Generate the radar plots

We generate here the radar plots for the pathway databases that are maintained: Reactome (standalone export version 87 and PathwayCommons version), Panther (PathwayCommons), PathBank (PathwayCommons), HumanCyc (PathwayCommons), KEGG (PathwayCommons)

In [26]:
fig = make_subplots(
    rows=2, 
    cols=4, 
    start_cell="top-left", 
    horizontal_spacing=0.1,
    vertical_spacing=0.1,
    specs=[
        [{"type":"scatterpolar"}, {"type":"scatterpolar"}, {"type":"scatterpolar"}, {"type":"scatterpolar"}],
        [{"type":"scatterpolar"}, {"type":"scatterpolar"}, {"type":"scatterpolar"}, None]
    ]
)  

fig.add_trace(go.Scatterpolar(name = "Reactome Standalone BioPAX", r = counts_reactome_standalone, theta=classes), 1, 1)
fig.add_trace(go.Scatterpolar(name = "Reactome PC", r = counts_reactome_pc, theta=classes), 1, 2)
fig.add_trace(go.Scatterpolar(name = "Panther Standalone BioPAX", r = counts_panther_standalone, theta=classes), 1, 3)
fig.add_trace(go.Scatterpolar(name = "PANTHER Pathway PC", r = counts_panther, theta=classes), 1, 4)
fig.add_trace(go.Scatterpolar(name = "PathBank PC", r = counts_pathbank, theta=classes), 2, 1)
fig.add_trace(go.Scatterpolar(name = "HumanCyc PC", r = counts_humancyc, theta=classes), 2, 2)
fig.add_trace(go.Scatterpolar(name = "KEGG Pathway PC", r = counts_kegg, theta=classes), 2, 3)

# Update layout
fig.update_layout(
    margin=dict(t=150),
    width=1600,
    height=1300,
    polar=dict(radialaxis=dict(visible=True))
)
# Apply consistent radial axis range
fig.update_polars(radialaxis=dict(range=[0, 1]))

# Exporting as image files
fig.write_image("../Results/radar_plot_bp_classes_updated_databases.pdf")
fig.write_image("../Results/radar_plot_bp_classes_updated_databases.svg")

# Display the plot
fig.show()


## Supplementary Figure

The supplementary figure represents the radar plots of the nine pathway databases listed on PathwayCommons: Reactome (standalone export version 87 and PathwayCommons version), Panther (PathwayCommons), PathBank (PathwayCommons), HumanCyc (PathwayCommons), KEGG (PathwayCommons), PID (PathwayCommons), INOH (PathwayCommons) and NetPath (PathwayCommons)

In [27]:
# Adjusting to a 3x4 grid (3 rows, 4 columns) for 10 plots
fig2 = make_subplots(
    rows=3, 
    cols=4, 
    start_cell="top-left", 
    horizontal_spacing=0.1,
    vertical_spacing=0.1,
    specs=[
        [{"type":"scatterpolar"}, {"type":"scatterpolar"}, {"type":"scatterpolar"}, {"type":"scatterpolar"}],
        [{"type":"scatterpolar"}, {"type":"scatterpolar"}, {"type":"scatterpolar"}, {"type":"scatterpolar"}],
        [{"type":"scatterpolar"}, {"type":"scatterpolar"}, {"type":"scatterpolar"}, {"type":"scatterpolar"}]
    ]
)  

# Add traces to the appropriate cells (10 scatterpolar plots)
fig2.add_trace(go.Scatterpolar(name = "Reactome Standalone BioPAX", r = counts_reactome_standalone, theta=classes), 1, 1)
fig2.add_trace(go.Scatterpolar(name = "Reactome PC", r = counts_reactome_pc, theta=classes), 1, 2)
fig2.add_trace(go.Scatterpolar(name = "Panther Standalone BioPAX", r = counts_panther_standalone, theta=classes), 1, 3)
fig2.add_trace(go.Scatterpolar(name = "PANTHER Pathway PC", r = counts_panther, theta=classes), 1, 4)
fig2.add_trace(go.Scatterpolar(name = "PathBank PC", r = counts_pathbank, theta=classes), 2, 1)
fig2.add_trace(go.Scatterpolar(name = "HumanCyc PC", r = counts_humancyc, theta=classes), 2, 2)
fig2.add_trace(go.Scatterpolar(name = "KEGG Pathway PC", r = counts_kegg, theta=classes), 2, 3)
fig2.add_trace(go.Scatterpolar(name = "PID PC", r = counts_pid, theta=classes), 2, 4)
fig2.add_trace(go.Scatterpolar(name = "INOH PC", r = counts_inoh, theta=classes), 3, 1)
fig2.add_trace(go.Scatterpolar(name = "NetPath PC", r = counts_netpath, theta=classes), 3, 2)

# Update layout and axis ranges
fig2.update_layout(
    margin=dict(t=150),
    width=1600,
    height=1300,
    polar=dict(radialaxis=dict(visible=True))
)

# Apply consistent radial axis range for all plots
fig2.update_polars(radialaxis=dict(range=[0, 1]))

# Save as PDF and SVG
fig2.write_image("../Results/radar_plot_bp_classes_all_databases.pdf")
fig2.write_image("../Results/radar_plot_bp_classes_all_databases.svg")

# Display the plot
fig2.show()
