#### The goal of this notebook is provide an evaluation overview.

In [38]:
import plotly.express as px
import plotly.io as pio
import pandas as pd
import numpy as np
pio.templates.default = "seaborn"
PLOT_CONFIGS = dict(
    title_x=0.5, title_font_size=15, title_font_family="Modern Computer", font_family="Modern Computer",
    xaxis_title="", yaxis_title="", showlegend=True, legend_title="",
    xaxis_tickfont_size=8, yaxis_tickfont_size=8, legend_font_size=10,
    xaxis_title_font_size=12, yaxis_title_font_size=12, xaxis_tickangle=45,
    width=800, height=500,
    legend_orientation="h", legend_yanchor="bottom", legend_y=-0.8, legend_xanchor="center", legend_x=0.5
    )

In [39]:
results = pd.read_excel("results.xlsx")
results["Topic"] = results["Query"].copy()
results.loc[1::2, "Topic"] = np.nan
results = results.rename(columns={"Unnamed: 0": "Index"})
plt_df = results.ffill()
# plt_df = plt_df[plt_df["Recall"] != 0]


In [None]:
fig = px.histogram(plt_df, x="Topic", y=["Recall",
                                        "Cosine Precision", "Cluster Precision", "MVEE Precision", "Hull Precision"
                                        "Cosine F2","Cluster F2", "MVEE F2", "Hull F2"
                                                    ],
                    title="Metrics for Predicted and Baseline Queries", barmode="group",
                   facet_row="Index", facet_row_spacing=0.1)

fig.for_each_trace(lambda t: t.update(x=t.x[::-1], y=t.y[::-1]))
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
# fig.update_layout(yaxis1_dtick=0.2, yaxis2_dtick=0.2, yaxis2_title="",
#                    **PLOT_CONFIGS)
# fig.write_image("LitQEval-report/pics/all-metrics-1.pdf")
fig.show()

In [54]:
corr = plt_df[plt_df["Cluster Precision"] != 1][["Cosine Precision", "Cluster Precision", "MVEE Precision", "Hull Precision"]].corr()
px.imshow(corr, title="Correlation between Precision Metrics").show()

In [None]:
diff_df = (results[results["Index"] == "Predicted"][["Recall",
                                                    "Cosine Precision", "Cluster Precision", "MVEE Precision", "Hull Precision"
                                                    "Cosine F2","Cluster F2", "MVEE F2", "Hull F2"
                                                    ]].values\
      - results[results["Index"] == "Baseline"][["Recall",
                                                    "Cosine Precision", "Cluster Precision", "MVEE Precision", "Hull Precision"
                                                    "Cosine F2","Cluster F2", "MVEE F2", "Hull F2"
                                                    ]].values).astype(float)

diff_df = pd.DataFrame(np.round(diff_df,3), columns=["Recall",
                                                    "Cosine Precision", "Cluster Precision", "MVEE Precision", "Hull Precision"
                                                    "Cosine F2","Cluster F2", "MVEE F2", "Hull F2"
                                                    ], index=results.dropna()["Topic"])
diff_df = diff_df.style.map(lambda x: 'background-color: #6b0801' if x < -0.5 else 'background-color: #a82b22' if x < 0 else '')
diff_df.format("{:.3f}").set_caption("Difference in metrics between predicted and baseline (Negative means the baseline is better)")
# display(diff_df.hide(subset=diff_df.index[-7:]))
diff_df

Unnamed: 0_level_0,Recall,Cosine Precision,Cosine F2,Cluster Precision,Cluster F2,MVEE Precision,MVEE F2,Hull Precision,Hull F2
Topic,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Robotic Arthroplasty,0.0,-0.761,-0.56,-0.528,-0.49,-0.707,-0.57,-0.679,-0.57
Soft Robotics,0.111,-0.134,-0.13,-0.147,-0.09,-0.292,-0.4,-0.152,-0.3
Crop Yield Prediction,0.109,-0.28,-0.21,-0.118,-0.23,-0.261,-0.74,-0.234,-0.53
Synthetic Biology,0.31,-0.05,0.03,-0.185,0.08,0.637,-0.42,0.51,-0.33
Resilience in Business and management,0.185,-0.022,0.06,-0.838,0.17,0.15,0.33,0.071,0.24
Cervical Myelopathy,0.085,-0.298,-0.33,-0.299,-0.25,-0.061,-0.7,-0.017,-0.59
Drones in Agriculture,0.48,-0.184,0.22,0.298,0.16,0.069,0.24,0.028,0.12
Tourism Growth Nexus,0.0,-0.562,-0.07,0.31,-0.04,0.0,0.0,0.0,0.0
Sustainable Biofuel Economy,0.26,-0.122,0.19,0.733,0.0,0.513,0.15,0.343,0.32
Perovskite Solar Cells Stability,0.103,-0.237,-0.46,-0.213,-0.43,0.051,-0.47,0.082,-0.54
