#### The goal of this notebook is provide an evaluation overview.

In [808]:
import plotly.express as px
import plotly.io as pio
import pandas as pd
import numpy as np

pio.templates.default = "seaborn"
PLOT_CONFIGS = dict(
    title_x=0.5, title_font_size=25, title_font_family="Modern Computer", font_family="Modern Computer",
    xaxis_title="", yaxis_title="", showlegend=True, legend_title="",
    xaxis_tickfont_size=15, yaxis_tickfont_size=13, legend_font_size=15,
    xaxis_title_font_size=12, yaxis_title_font_size=12, xaxis_tickangle=45,
    width=800, height=500,
    legend_orientation="h", legend_yanchor="bottom", legend_y=-1.5, legend_xanchor="center", legend_x=0.5
    )
COLORS = [
    '#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd',
    '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf',
    '#ff33cc', '#00cc99', '#ffcc00', '#3399ff', '#9933cc',
    '#66ff66', '#ff0066', '#669999', '#996633', '#00cccc'
]

In [809]:
results = pd.read_excel("results.xlsx")
results = results.rename(columns={"Unnamed: 0": "Index"})
predicted_query = results[results["Index"] == "Predicted"]["Query"].values
baseline_query = results[results["Index"] == "Baseline"]["Query"].values
results.loc[results["Index"] == "Predicted", "Query"] = baseline_query
results.loc[results["Index"] == "Baseline", "Query"] = predicted_query

results["Topic"] = results["Query"].copy()
results.loc[1::2, "Topic"] = np.nan
results.loc[results["Index"] == "Baseline", "Query"] = predicted_query
results = results.rename(columns={"Unnamed: 0": "Index"})
plt_df = results.ffill()

In [810]:
fig = px.histogram(plt_df, x="Topic", y=["Recall",
                                        "Cosine Precision", "Cluster Precision", "MVEE Precision", "Hull Precision",
                                        "Cosine F2","Cluster F2", "MVEE F2", "Hull F2"
                                                    ],
                    title="Metrics for Predicted and Baseline Queries", barmode="group",
                   facet_row="Index", facet_row_spacing=0.1)

fig.for_each_trace(lambda t: t.update(x=t.x[::-1], y=t.y[::-1]))
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1], font_size=15))
fig.update_layout(yaxis1_dtick=0.2, yaxis2_dtick=0.2, yaxis2_title="",
                   **PLOT_CONFIGS)
fig.update_layout(yaxis1_tickfont_size=15, yaxis2_tickfont_size=15)
# fig.write_image("LitQEval-report/pics/all-metrics-1.pdf", width=1300, height=600)
fig.show()

In [811]:
diff_df = (results[results["Index"] == "Predicted"][["Recall",
                                                    "Cosine Precision", "Cluster Precision", "MVEE Precision", "Hull Precision",
                                                    "Cosine F2","Cluster F2", "MVEE F2", "Hull F2",
                                                    ]].values\
      - results[results["Index"] == "Baseline"][["Recall",
                                                    "Cosine Precision", "Cluster Precision", "MVEE Precision", "Hull Precision",
                                                    "Cosine F2","Cluster F2", "MVEE F2", "Hull F2",
                                                    ]].values).astype(float)

diff_df = pd.DataFrame(np.round(diff_df,2), columns=["Recall",
                                                    "Cosine Precision", "Cluster Precision", "MVEE Precision", "Hull Precision",
                                                    "Cosine F2","Cluster F2", "MVEE F2", "Hull F2",
                                                    ], index=results.dropna()["Topic"])
fig = px.imshow(diff_df, title="Performance Difference between Predicted and Baseline Queries",
                color_continuous_midpoint=0, color_continuous_scale="RdBu",
                labels=dict(x="Metric", y="Topic", color="Difference"), aspect="auto",
                text_auto=".2f")
fig.update_layout(
        margin={"t": 50, "b": 0, "r": 0, "l": 0,},
        **PLOT_CONFIGS)
fig.update_traces(textfont_size=15)
fig.show()
# pio.write_image(fig, "LitQEval-report/pics/eval1_results.pdf", width=1200)

In [812]:
q_type = "Predicted"
temp = plt_df[plt_df["Index"] == q_type].round(2).drop(["Query", "Index"], axis=1)
heatmap_color = np.array(
    [temp[clm].values for clm in temp.columns if "Relevant" in clm]
).T
temp.index = temp["Topic"]
temp = temp[
    [
        "Recall",
        "Cosine Precision",
        "Cluster Precision",
        "MVEE Precision",
        "Hull Precision",
        "Cosine F2",
        "Cluster F2",
        "MVEE F2",
        "Hull F2",
    ]
]
customdata = np.full((len(temp), len(temp.columns)), "", dtype=object)
customdata[:, -4:] = heatmap_color
fig = px.imshow(
    temp,
    title=f"Evaluation Results of the {q_type} Queries",
    labels=dict(x="Metric", y="Topic", color="Value"),
    aspect="auto",
    color_continuous_scale="RdBu",
)
fig.data[0].text = customdata
fig.data[0].texttemplate = "%{text:.2s}"
fig.update_layout(**PLOT_CONFIGS)
fig.update_layout(
    yaxis_tickfont_size=20,
    title_x=0.8,
    margin={"t": 50, "b": 0, "r": 0, "l": 0},
     )
fig.update_traces(textfont_size=15)
fig.show()
# pio.write_image(
#     fig, f"LitQEval-report/pics/{q_type.lower()}_results.pdf", width=1100, height=800
# )


In [813]:
results = pd.read_excel("slr_results.xlsx")
results = results.rename(columns={"Unnamed: 0": "Index"})
predicted_query = results[results["Index"] == "Predicted"]["Query"].values
baseline_query = results[results["Index"] == "Baseline"]["Query"].values
results.loc[results["Index"] == "Predicted", "Query"] = baseline_query
results.loc[results["Index"] == "Baseline", "Query"] = predicted_query
results["Topic"] = results["Query"].copy()
results.loc[1::2, "Topic"] = np.nan
results = results.rename(columns={"Unnamed: 0": "Index"})
plt_df = results.ffill()
plt_df["Index"].replace("Predicted", "SLR", inplace=True)


A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.





In [814]:
fig = px.histogram(plt_df, x="Topic", y=["Recall",
                                        "Cosine Precision", "Cluster Precision", "MVEE Precision", "Hull Precision",
                                        "Cosine F2","Cluster F2", "MVEE F2", "Hull F2"
                                                    ],
                    title="Metrics for Predicted and Baseline Queries", barmode="group",
                   facet_row="Index", facet_row_spacing=0.1)

fig.for_each_trace(lambda t: t.update(x=t.x[::-1], y=t.y[::-1]))
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
# fig.update_layout(yaxis1_dtick=0.2, yaxis2_dtick=0.2, yaxis2_title="",
#                    **PLOT_CONFIGS)
# fig.write_image("LitQEval-report/pics/all-metrics-2.pdf")
fig.show()

In [815]:
diff_df = (results[results["Index"] == "Predicted"][["Recall",
                                                    "Cosine Precision", "Cluster Precision", "MVEE Precision", "Hull Precision",
                                                    "Cosine F2","Cluster F2", "MVEE F2", "Hull F2",
                                                    ]].values\
      - results[results["Index"] == "Baseline"][["Recall",
                                                    "Cosine Precision", "Cluster Precision", "MVEE Precision", "Hull Precision",
                                                    "Cosine F2","Cluster F2", "MVEE F2", "Hull F2",
                                                    ]].values).astype(float)

diff_df = pd.DataFrame(np.round(diff_df,3), columns=["Recall",
                                                    "Cosine Precision", "Cluster Precision", "MVEE Precision", "Hull Precision",
                                                    "Cosine F2","Cluster F2", "MVEE F2", "Hull F2",
                                                    ], index=results.dropna()["Topic"])
fig = px.imshow(diff_df, title="Performance Difference between the Original SLR and Baseline Queries",
                color_continuous_midpoint=0, color_continuous_scale="RdBu",
                labels=dict(x="Metric", y="Topic", color="Difference"), aspect="auto",
                text_auto=".2f")
fig.update_layout(
        margin={"t": 50, "b": 0, "r": 0, "l": 0,},
        **PLOT_CONFIGS)
fig.update_traces(textfont_size=15)
fig.show()
# pio.write_image(fig, "LitQEval-report/pics/eval2_results.pdf", width=900)

In [816]:
q_type = "SLR"
temp = plt_df[plt_df["Index"] == q_type].round(2).drop(["Query", "Index"], axis=1)
heatmap_color = np.array(
    [temp[clm].values for clm in temp.columns if "Relevant" in clm]
).T
temp.index = temp["Topic"]
temp = temp[
    [
        "Recall",
        "Cosine Precision",
        "Cluster Precision",
        "MVEE Precision",
        "Hull Precision",
        "Cosine F2",
        "Cluster F2",
        "MVEE F2",
        "Hull F2",
    ]
]
customdata = np.full((len(temp), len(temp.columns)), "", dtype=object)
customdata[:, -4:] = heatmap_color
fig = px.imshow(
    temp,
    title=f"Evaluation Results of the {q_type} Queries",
    labels=dict(x="Metric", y="Topic", color="Value"),
    aspect="auto",
    color_continuous_scale="RdBu",zmin=0, zmax=1
)
fig.data[0].text = customdata
fig.data[0].texttemplate = "%{text:.2s}"
fig.update_layout(**PLOT_CONFIGS)
fig.update_layout(
    yaxis_tickfont_size=20,
    title_x=0.8,
    margin={"t": 50, "b": 0, "r": 0, "l": 0},
)
fig.update_traces(textfont_size=15)
fig.show()
# pio.write_image(
#     fig, f"LitQEval-report/pics/{q_type.lower()}_results.pdf", width=1100, height=800
# )
