#### The goal of this notebook is provide an evaluation overview.

In [6]:
import plotly.express as px
import plotly.io as pio
import pandas as pd
import numpy as np
pio.templates.default = "seaborn"

In [7]:
results = pd.read_excel("results.xlsx")
results["Topic"] = results["Query"].copy()
results.loc[1::2, "Topic"] = np.nan
results = results.rename(columns={"Unnamed: 0": "Index"})
plt_df = results.ffill()
plt_df = plt_df[plt_df["Recall"] != 0]


In [8]:
fig = px.histogram(plt_df, x="Topic", y=["Recall", "Cosine Precision",
                                         "Cosine F2", "Cosine F2",
                                         "Cluster Precision", "Cluster F2",
                                         "MVEE Precision", "MVEE F2",
                                         "Hull Precision", "Hull F2"],
                    title="Metrics for Predicted and Baseline Queries", barmode="group",
                   facet_row="Index", facet_row_spacing=0.1)

fig.for_each_trace(lambda t: t.update(x=t.x[::-1], y=t.y[::-1]))
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
fig.update_layout(yaxis1_title="", yaxis2_title="", yaxis1_dtick=0.2, yaxis2_dtick=0.2)
fig.show()

In [9]:
diff_df = (results[results["Index"] == "Predicted"][["Recall",
                                                    "Cosine Precision", "Cosine F2",
                                                    "Cluster Precision", "Cluster F2",
                                                    "MVEE Precision", "MVEE F2",
                                                    "Hull Precision", "Hull F2"]].values\
      - results[results["Index"] == "Baseline"][["Recall",
                                                "Cosine Precision","Cosine F2", 
                                                "Cluster Precision", "Cluster F2",
                                                "MVEE Precision", "MVEE F2",
                                                "Hull Precision", "Hull F2"]].values).astype(float)

diff_df = pd.DataFrame(np.round(diff_df,3), columns=["Recall",
                                                    "Cosine Precision","Cosine F2", 
                                                    "Cluster Precision", "Cluster F2",
                                                    "MVEE Precision", "MVEE F2",
                                                    "Hull Precision", "Hull F2"], index=results.dropna()["Topic"])
diff_df = diff_df.style.map(lambda x: 'background-color: #6b0801' if x < -0.5 else 'background-color: #a82b22' if x < 0 else '')
diff_df.format("{:.3f}").set_caption("Difference in metrics between predicted and baseline (Negative means the baseline is better)")
display(diff_df.hide(subset=diff_df.index[-7:]))

Unnamed: 0_level_0,Recall,Cosine Precision,Cosine F2,Cluster Precision,Cluster F2,MVEE Precision,MVEE F2,Hull Precision,Hull F2
Topic,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Robotic Arthroplasty,0.0,-0.761,-0.56,-0.528,-0.49,-0.703,-0.55,-0.64,-0.56
Soft Robotics,0.111,-0.134,-0.13,-0.147,-0.09,-0.297,-0.37,-0.197,-0.3
Crop Yield Prediction,0.109,-0.28,-0.21,-0.118,-0.23,-0.294,-0.67,-0.265,-0.55
Synthetic Biology,0.31,-0.05,0.03,-0.185,0.08,0.656,-0.29,0.335,-0.04
Resilience in Business and management,0.185,-0.022,0.06,-0.838,0.17,0.103,0.29,0.046,0.18
Cervical Myelopathy,0.085,-0.298,-0.33,-0.299,-0.25,-0.086,-0.7,-0.055,-0.64
Drones in Agriculture,0.48,-0.184,0.22,0.298,0.16,0.133,0.32,0.061,0.22
Tourism Growth Nexus,0.0,-0.562,-0.07,0.31,-0.04,0.0,0.0,0.0,0.0
Sustainable Biofuel Economy,0.26,-0.122,0.19,0.733,0.0,0.636,0.06,0.402,0.26
Perovskite Solar Cells Stability,0.103,-0.237,-0.46,-0.213,-0.43,-0.005,-0.43,0.042,-0.48


In [10]:
diff_df.hide(subset=diff_df.index[:15])

Unnamed: 0_level_0,Recall,Cosine Precision,Cosine F2,Cluster Precision,Cluster F2,MVEE Precision,MVEE F2,Hull Precision,Hull F2
Topic,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Data Stream Processing Latency,-0.073,-0.008,-0.07,-0.321,-0.1,-0.132,-0.11,-0.083,-0.1
Business Process Meta Models,0.269,0.039,0.19,-0.082,0.09,0.148,0.11,0.172,0.17
Multicore Performance Prediction,0.0,0.217,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Cloud Migration,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Software Fault Prediction Metrics,0.562,-0.596,0.0,0.061,0.33,-0.018,-0.08,-0.036,-0.1
Software Defect Prediction,-0.129,-0.584,-0.28,-0.446,-0.21,-0.619,-0.76,-0.498,-0.76
