In [2]:
from sklearn.metrics.pairwise import cosine_similarity
from litQeval.eval_utils import *
import plotly.express as px
import plotly.io as pio
from tqdm.auto import tqdm
import pandas as pd
import numpy as np
import json
pio.templates.default = "seaborn"

In [3]:
for i in json.load(open('data/queries.json')):
    if i["baseline"] == "Software Defect Prediction":
        baseline = i["baseline"]
        predicted = i["predicted"]
        break
data = get_data(baseline, predicted)
core_pubs = data["core_pubs"]
core_mean_embedding = data["core_mean_embedding"]
baseline_pubs = data["baseline_pubs"]
predicted_pubs = data["predicted_pubs"]
baseline_vs = data["baseline_vs"]
predicted_vs = data["predicted_vs"]
core_vs = data["core_vs"]
core_embeddigs = data["core_embeddings"]
core_threshold = data["core_threshold"]
core_embeddings = data["core_embeddings"]
predicted_embeddings = np.array([embedding for embedding in predicted_vs.get(include=["embeddings"])["embeddings"]])
baseline_embeddings = np.array([embedding for embedding in baseline_vs.get(include=["embeddings"])["embeddings"]])
core_mean_embedding.shape, predicted_embeddings.shape, baseline_embeddings.shape

((1, 1536), (14651, 1536), (3563, 1536))

In [4]:
cosine_sim = cosine_similarity(core_mean_embedding, baseline_embeddings).flatten()
baseline_pubs["similarity"] = cosine_sim

core_pubs_in_baseline = baseline_pubs[baseline_pubs["id"].isin(core_pubs)]
relevent_baseline_pubs = baseline_pubs[baseline_pubs["similarity"] >= core_threshold].copy()
print(f"Number of core publications in the baseline: {core_pubs_in_baseline.shape[0]}")
print(f"Number of relevant publications in the baseline: {relevent_baseline_pubs.shape[0]}")

Number of core publications in the baseline: 18
Number of relevant publications in the baseline: 3366


In [5]:
# predicted
cosine_sim = cosine_similarity(core_mean_embedding, predicted_embeddings).flatten()
predicted_pubs["similarity"] = cosine_sim

core_pubs_in_predicted = predicted_pubs[predicted_pubs["id"].isin(core_pubs)]
relevant_predicted_pubs = predicted_pubs[predicted_pubs["similarity"] >= core_threshold].copy()
print(f"Number of core publications in the predicted: {core_pubs_in_predicted.shape[0]}")
print(f"Number of relevant publications in the predicted: {relevant_predicted_pubs.shape[0]}")

Number of core publications in the predicted: 11
Number of relevant publications in the predicted: 6485


### Cosine Similarity Measure

In [6]:
recall = evaluate_recall(core_pubs, baseline_pubs, predicted_pubs)
# semnatic precision: every element that is more similar than the least similar core publication is considered relevant
# relevant_predicted_pubs: publications that are more similar than the least similar core publication.
pred_precision = relevant_predicted_pubs.shape[0] / predicted_pubs.shape[0] # total number of found publications
baseline_precision = (relevent_baseline_pubs.shape[0] / baseline_pubs.shape[0]) if baseline_pubs.shape[0] > 0 else 0
pred_f2 = fscore(pred_precision, recall["predicted_recall"], 2)
baseline_f2 = fscore(baseline_precision, recall["baseline_recall"], 2)
df = pd.DataFrame({
    "Semantic Precision": [pred_precision, baseline_precision],
    "Recall": [recall["predicted_recall"], recall["baseline_recall"]],
    "Semantic F2": [pred_f2, baseline_f2]
}, index=["Predicted", "Baseline"])
df

Unnamed: 0,Semantic Precision,Recall,Semantic F2
Predicted,0.442632,0.2,0.224626
Baseline,0.94471,0.327273,0.376485


### Minimum Volume Enclosing Ellipsoid MMVE

In [7]:
A, c = mvee(core_embeddings)
base_is_inside = is_inside_ellipse(A, c, baseline_embeddings)
predicted_is_inside = is_inside_ellipse(A, c, predicted_embeddings)

100%|██████████| 3563/3563 [00:00<00:00, 5374.07it/s]
100%|██████████| 14651/14651 [00:02<00:00, 5705.92it/s]


In [8]:
mvve_prec_baseline = base_is_inside.sum() / len(base_is_inside)
mvve_prec_predicted = predicted_is_inside.sum() / len(predicted_is_inside)

mvve_df = pd.DataFrame({
    "MVVE Precision": [mvve_prec_predicted, mvve_prec_baseline],
    "Recall": [recall["predicted_recall"], recall["baseline_recall"]],
    "MVVE F2": [fscore(mvve_prec_predicted, recall["predicted_recall"], 2), fscore(mvve_prec_baseline, recall["baseline_recall"], 2)]
}, index=["Predicted", "Baseline"])
print(f"Baseline - Inside: {base_is_inside.sum()}")
print(f"Predicted - Inside: {predicted_is_inside.sum()}")
mvve_df

Baseline - Inside: 1979
Predicted - Inside: 9151


Unnamed: 0,MVVE Precision,Recall,MVVE F2
Predicted,0.624599,0.2,0.23147
Baseline,0.555431,0.327273,0.356567


In [9]:
results = pd.DataFrame({
    "Query": [predicted] + [baseline],
    "Recall": [recall["predicted_recall"], recall["baseline_recall"]],
    "Semantic Precision": [pred_precision, baseline_precision],
    "Semantic F2": [pred_f2, baseline_f2],
    "MVVE Precision": [mvve_prec_predicted, mvve_prec_baseline],
    "MVVE F2": [fscore(mvve_prec_predicted, recall["predicted_recall"], 2), fscore(mvve_prec_baseline, recall["baseline_recall"], 2)]
}, index=["Predicted", "Baseline"])

try:
    old_results = pd.read_excel("results.xlsx", index_col=0)
    results = pd.concat([old_results, results]).drop_duplicates(subset=["Query"]).round(3)
    results.to_excel("results.xlsx")
except FileNotFoundError:
    results.to_excel("results.xlsx")

display(results)

Unnamed: 0,Query,Recall,Semantic Precision,Semantic F2,MVVE Precision,MVVE F2
Predicted,"""digital surgery"" OR ""surgical navigation"" OR ...",0.957,0.081,0.302,0.523,0.821
Baseline,Robotic Arthroplasty,0.957,0.72,0.898,0.483,0.8
Predicted,"""compliant materials"" OR (soft robotics) OR ""s...",0.722,0.507,0.666,0.62,0.699
Baseline,Soft Robotics,0.556,0.755,0.587,0.563,0.557
Predicted,"""crop yield estimation"" OR ""crop simulation mo...",0.652,0.25,0.494,0.527,0.623
Baseline,Crop Yield Prediction,0.543,0.622,0.558,0.551,0.545
Predicted,"(synthetic biology) OR ""genome editing"" OR ""mi...",0.586,0.552,0.579,0.532,0.574
Baseline,Synthetic Biology,0.207,0.619,0.239,0.563,0.237
Predicted,"""resilience assessment"" OR ""resilience strateg...",0.222,0.29,0.233,0.419,0.245
Baseline,Resilience in Business and management,0.0,0.484,0.0,0.45,0.0


In [10]:
results["Topic"] = results["Query"].copy()
results["Topic"][0::2] = np.nan
plt_df = results.bfill()
plt_df.reset_index(inplace=True)

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  results["Topic"][0::2] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  results["Topic"][0::2] = np.nan

In [11]:
fig = px.histogram(plt_df, x="Topic", y=["Recall", "Semantic Precision", "MVVE Precision", "Semantic F2", "MVVE F2"], title="Metrics for Predicted and Baseline Queries", barmode="group",
                   facet_row="index", facet_row_spacing=0.1)

# Sort by value
fig.for_each_trace(lambda t: t.update(x=t.x[::-1], y=t.y[::-1]))
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
fig.update_layout(yaxis1_title="", yaxis2_title="", yaxis1_dtick=0.2, yaxis2_dtick=0.2)
fig.show()

In [70]:
# difference in metrics between predicted and baseline (Recall	Semantic Precision	Semantic F2	MVVE Precision	MVVE F2	)
diff_df = results.loc["Predicted"][["Recall","Semantic Precision" ,"Semantic F2" ,"MVVE Precision", "MVVE F2"]].values\
      - results.loc["Baseline"][["Recall","Semantic Precision" ,"Semantic F2" ,"MVVE Precision", "MVVE F2"]].values

diff_df = pd.DataFrame(diff_df.round(3), columns=["Recall","Semantic Precision" ,"Semantic F2" ,"MVVE Precision", "MVVE F2"], index=results.loc["Baseline"]["Query"])
diff_df = diff_df.style.map(lambda x: 'background-color: #6b0801' if x < -0.5 else 'background-color: #a82b22' if x < 0 else '')
diff_df.format("{:.3f}").set_caption("Difference in metrics between predicted and baseline (Negative means the baseline is better)")
# hide the first row
diff_df.hide(subset=diff_df.index[-7:])

Unnamed: 0_level_0,Recall,Semantic Precision,Semantic F2,MVVE Precision,MVVE F2
Query,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Robotic Arthroplasty,0.0,-0.639,-0.596,0.04,0.021
Soft Robotics,0.166,-0.248,0.079,0.057,0.142
Crop Yield Prediction,0.109,-0.372,-0.064,-0.024,0.078
Synthetic Biology,0.379,-0.067,0.34,-0.031,0.337
Resilience in Business and management,0.222,-0.194,0.233,-0.031,0.245
Cervical Myelopathy,0.085,-0.294,-0.042,-0.037,0.038
Drones in Agriculture,0.48,-0.297,0.39,-0.008,0.465
Tourism Growth Nexus,0.0,-0.357,-0.076,0.019,0.0
Sustainable Biofuel Economy,0.34,-0.146,0.353,0.015,0.358
Perovskite Solar Cells Stability,0.103,-0.291,-0.033,-0.007,0.057


In [48]:
diff_df.hide(subset=diff_df.index[:15])


Unnamed: 0_level_0,Recall,Semantic Precision,Semantic F2,MVVE Precision,MVVE F2
Query,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Software Process Line,-0.233,0.324,-0.126,-0.043,-0.184
Data Stream Processing Latency,-0.058,0.007,-0.064,0.021,-0.061
Business Process Meta Models,0.346,0.091,0.306,-0.004,0.287
Multicore Performance Prediction,0.0,0.017,0.0,-0.074,0.0
Cloud Migration,0.0,0.007,0.0,-0.029,0.0
Software Fault Prediction Metrics,0.562,-0.672,0.113,0.016,0.427
Software Defect Prediction,-0.127,-0.502,-0.151,0.07,-0.126
