In [1]:
import pandas as pd
import numpy as np
import pickle


In [2]:
df = pd.read_csv("processed_reviews.csv")
df["review_text"] = df["review_text"].astype(str)

with open("minilm_results.pkl", "rb") as f:
    minilm_results = pickle.load(f)

queries = [
    "battery life",
    "screen quality",
    "sound quality",
    "build quality",
    "camera performance"
]

print("Documents:", len(df))
print("Queries:", queries)


Documents: 61
Queries: ['battery life', 'screen quality', 'sound quality', 'build quality', 'camera performance']


In [3]:
def label_relevance(df, query):
    """
    Weak supervision:
    A review is relevant if it contains the query terms.
    """
    labels = []
    q = query.lower()

    for text in df["review_text"]:
        labels.append(1 if q in text else 0)

    return np.array(labels)


In [5]:
def precision_at_k(rel, k):
    if len(rel) == 0:
        return 0.0
    return np.sum(rel[:k]) / k

def recall_at_k(rel, total_rel, k):
    if total_rel == 0:
        return 0.0
    return np.sum(rel[:k]) / total_rel

def ndcg_at_k(rel, k):
    if len(rel) == 0:
        return 0.0

    gains = (2 ** rel[:k] - 1)
    discounts = np.log2(np.arange(2, k + 2))
    dcg = np.sum(gains / discounts)

    ideal = np.sort(rel)[::-1]
    ideal_gains = (2 ** ideal[:k] - 1)
    ideal_dcg = np.sum(ideal_gains / discounts)

    if ideal_dcg == 0:
        return 0.0

    return dcg / ideal_dcg


In [6]:
results = []

for q in queries:
    if q not in minilm_results:
        print(f"Warning: no results for query '{q}'")
        continue

    labels = label_relevance(df, q)
    total_rel = np.sum(labels)

    ranks = minilm_results[q]

    ranks = [i for i in ranks if i < len(labels)]

    rel_at_k = labels[ranks]

    results.append({
        "query": q,
        "model": "MiniLM",
        "Precision@10": precision_at_k(rel_at_k, 10),
        "Recall@10": recall_at_k(rel_at_k, total_rel, 10),
        "nDCG@10": ndcg_at_k(rel_at_k, 10)
    })

results_df = pd.DataFrame(results)
results_df


Unnamed: 0,query,model,Precision@10,Recall@10,nDCG@10
0,battery life,MiniLM,0.0,0.0,0.0
1,screen quality,MiniLM,0.0,0.0,0.0
2,sound quality,MiniLM,0.0,0.0,0.0
3,build quality,MiniLM,0.0,0.0,0.0
4,camera performance,MiniLM,0.0,0.0,0.0


In [7]:
avg_results = results_df.groupby("model").mean(numeric_only=True)
avg_results


Unnamed: 0_level_0,Precision@10,Recall@10,nDCG@10
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
MiniLM,0.0,0.0,0.0


In [8]:
results_df.to_csv("retrieval_metrics.csv", index=False)
avg_results.to_csv("average_metrics.csv")

print("Saved retrieval_metrics.csv and average_metrics.csv")


Saved retrieval_metrics.csv and average_metrics.csv


In [9]:
for q in queries:
    print("\nQuery:", q)
    top_ids = minilm_results[q][:3]
    for i in top_ids:
        print("-", df.iloc[i]["review_text"][:150])



Query: battery life
- (34gb) - all 233.1 million reviews
- - time of the review (unix time)
- (14.3gb) - subset of the data in which all users and items have at least 5 reviews (75.26 million reviews)

Query: screen quality
- - url of the high resolution product image
- we appreciate any help or feedback to improve the quality of our dataset! feel free to reach us at jin018@ucsd.edu if you meet any following questions
- (34gb) - all 233.1 million reviews

Query: sound quality
- (34gb) - all 233.1 million reviews
- (14.3gb) - subset of the data in which all users and items have at least 5 reviews (75.26 million reviews)
- we appreciate any help or feedback to improve the quality of our dataset! feel free to reach us at jin018@ucsd.edu if you meet any following questions

Query: build quality
- (34gb) - all 233.1 million reviews
- - bullet-point format features of the product
- - the first technical detail table of the product

Query: camera performance
- (34gb) - all 233.1 million revi