In [1]:
import pandas as pd
import numpy as np
import os

In [2]:
for dirname, _, filenames in os.walk('./data/'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

./data/aisles.csv
./data/departments.csv
./data/orders.csv
./data/order_products__prior.csv
./data/order_products__train.csv
./data/products.csv
./data/.ipynb_checkpoints\aisles-checkpoint.csv


In [3]:
for dirname, _, filenames in os.walk('./results/'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

./results/7-Fine Tuned Sentence Bert-Pytorch.ipynb
./results/fine_tuned_sentence_bert_FAISS_recommendation.csv
./results/fine_tuned_sentence_bert_product_embeddings.csv
./results/product_pairs_for_finetune.csv
./results/.ipynb_checkpoints\7-Fine Tuned Sentence Bert-Pytorch-checkpoint.ipynb
./results/fine_tuned_model\config.json
./results/fine_tuned_model\config_sentence_transformers.json
./results/fine_tuned_model\model.safetensors
./results/fine_tuned_model\modules.json
./results/fine_tuned_model\README.md
./results/fine_tuned_model\sentence_bert_config.json
./results/fine_tuned_model\special_tokens_map.json
./results/fine_tuned_model\tokenizer.json
./results/fine_tuned_model\tokenizer_config.json
./results/fine_tuned_model\vocab.txt
./results/fine_tuned_model\1_Pooling\config.json


In [4]:
read_path = './data/'
save_path = './results/'
prod_df = pd.read_csv(read_path+"products.csv")
faiss_recom_df = pd.read_csv(save_path+"fine_tuned_sentence_bert_FAISS_recommendation.csv", encoding='latin1')

In [5]:
faiss_recom_df = faiss_recom_df.drop("nearest_0", axis=1)

In [6]:
prod_df.head(5)

Unnamed: 0,product_id,product_name,aisle_id,department_id
0,1,Chocolate Sandwich Cookies,61,19
1,2,All-Seasons Salt,104,13
2,3,Robust Golden Unsweetened Oolong Tea,94,7
3,4,Smart Ones Classic Favorites Mini Rigatoni Wit...,38,1
4,5,Green Chile Anytime Sauce,5,13


In [7]:
faiss_recom_df.head(5)

Unnamed: 0,product_id,product_name,nearest_1,nearest_2,nearest_3,nearest_4,nearest_5,nearest_6,nearest_7,nearest_8,...,nearest_11,nearest_12,nearest_13,nearest_14,nearest_15,nearest_16,nearest_17,nearest_18,nearest_19,nearest_20
0,1,Chocolate Sandwich Cookies,Chocolate Creme Sandwich Cookies,Chocolate Berry Creme Sandwich Cookies,Vanilla Sandwich Cookies,Chocolate Mint Creme Sandwich Cookies,Birthday Cake Chocolate Sandwich Cookies,Crispy Chocolate Chip Cookies,Montanas Chocolate Chip Cookies,Oreo Chocolate Sandwich Cookies,...,Oreo Mini Bite Size Chocolate Sandwich Cookies,Chocolate Chip Crunchy Cookies,Mini Crunchy Chocolate Chip Cookies,Chocolate Peanut Butter Creme Sandwich Cookies,Chocolate Chip Cookies,Chunky Chocolate Chip Cookies,Chocolate Mini Big Bag Sandwich Cookies,Cookie Bites Chocolate Chip Cookies,Mini Chocolate Chunk Cookies,Chewy Chocolate Chip Cookies
1,2,All-Seasons Salt,Epsom Salt,Season All Seasoned Salt,Table Salt,Lavender Epsom Salt,Premium Epsom Salt,Himalania Fine Pink Salt,Salt Substitute,Canning & Pickling Salt,...,Organic Celery Salt,Coarse Pink Salt,Original Mixed Up Salt,Celery Salt,Original Seasoned Salt,Plain Salt,Lite Salt,Eucalyptus Epsom Salt,Iodized Salt,Lemon & Pepper Seasoning Salt
2,3,Robust Golden Unsweetened Oolong Tea,Unsweetened Golden Oolong Tea,Unsweetened Black Tea,Earl Grey Decaffeinated Black Tea,Just Green Unsweetened Tea,Unsweetened Pure & Smooth Green Tea,100% Pure Oolong Tea,Unsweetened Mint Green Tea,Decaffeinated Tea,...,Pure Leaf Unsweetened Iced Tea,Decaffeinated Earl Grey Tea,Pure Leaf Unsweetened Real Brewed Tea,Unsweetened Zero Calorie Green White Tea,Classic Decaffeinated Green Tea,Unsweetened Green Tea,Amber Sun Turmeric Tea,Pure Leaf Unsweetened Green Tea,Organic Unsweetened Black Tea,Caffeine Free Echinacea Complete Care Tea
3,4,Smart Ones Classic Favorites Mini Rigatoni Wit...,Gluten-Free Yellow Cheddar Cheese Sauce Mix,Creamy Vodka Italian Sauce,Riserva Artichokes & Asiago Cheese Sauce,"In a creamy sauce, topped with aged parmesan c...",Creamy Garlic Alfredo Sauce Mix,"The Original Picante Sauce, Medium",Creamy Pesto Sauce Mix,Sweet Italian Sasuage,...,Italian Herb Spaghetti Sauce Mix,"Grille Sauce, Maple Chipotle",Cheese Creations Creamy Mozzarella Cheese Sauce,Parma Rosa Sauce Mix,Maui Mountain Sweet 'n Sour Sauce & Marinade,Super Blue Cheese Dressing + Dip,Thick & Zesty Spaghetti Sauce Mix,Edamame Fettucine With Thai Coconut Sauce,Raspberry Chipotle Sauce,"All-Purpose Sauce, Kale Tahini"
4,5,Green Chile Anytime Sauce,Mild Enchilada Green Chile Sauce,Green Chile Enchilada Sauce,Green Chile Medium Enchilada Sauce,Mild Green Chile Enchilada Sauce,Medium Green Chile Enchilada Sauce,Red Chile Enchilada Sauce,Original Vegan Alfredo Sauce,Mild Red Chile Enchilada Sauce,...,Picante Hot Green Chile Enchilada Sauce,Green Enchilada Sauce,Chunky Garden Combo Italian Sauce,Classic Red Chile Enchilada Sauce,Mild Green Enchilada Sauce,Best Chili Beans Mild Sauce,Black Bean Garlic Sauce,Garlic & Green Onion Teriyaki Sauce,Roasted Garlic Herb Italian Sauce,Garlic Teriyaki Sauce


# Precision@K, Recall@K, NDCG@K
Precision@K: We assume that if the recommended product and the original product belongs to the same category (aisles or department), even if a "hit".

In [8]:
def evaluate_recommendation(products_df, neighbors_df, k=20):
    # 商品名 -> 类目
    name_to_aisle = products_df.set_index("product_name")["aisle_id"].to_dict()

    precisions, recalls, ndcgs = [], [], []
    total_valid = 0

    # 构建类目 -> 商品名集合（用于 Recall@K 分母）
    aisle_to_items = products_df.groupby("aisle_id")["product_name"].apply(set).to_dict()

    for _, row in neighbors_df.iterrows():
        anchor_name = row["product_name"]
        recommended_names = [row.get(f"nearest_{i}") for i in range(1, k + 1)]

        anchor_aisle = name_to_aisle.get(anchor_name)
        if anchor_aisle is None:
            continue

        ground_truth_set = aisle_to_items.get(anchor_aisle, set()) - {anchor_name}
        if not ground_truth_set:
            continue

        hits = 0
        rels = []
        for rec_name in recommended_names:
            rec_aisle = name_to_aisle.get(rec_name)
            if rec_aisle is not None and rec_aisle == anchor_aisle:
                hits += 1
                rels.append(1)
            else:
                rels.append(0)

        # Precision@K
        precisions.append(hits / k)

        # Recall@K
        recall = hits / len(ground_truth_set)
        recalls.append(recall)

        # NDCG@K
        dcg = sum([rel / np.log2(idx + 2) for idx, rel in enumerate(rels)])
        idcg = sum([1 / np.log2(i + 2) for i in range(min(len(ground_truth_set), k))])
        ndcg = dcg / idcg if idcg > 0 else 0
        ndcgs.append(ndcg)

        total_valid += 1

    return {
        "Precision@K": sum(precisions) / total_valid,
        "Recall@K": sum(recalls) / total_valid,
        "NDCG@K": sum(ndcgs) / total_valid
    }


In [9]:
print("fine tuned sbert+FAISS")
for k_ in [5,10,15,20]:
    evaluation = evaluate_recommendation(prod_df, faiss_recom_df, k=k_)
    print(f"k={k_} ",evaluation)

fine tuned sbert+FAISS
k=5  {'Precision@K': 0.5986429005468908, 'Recall@K': 0.0075120066333178755, 'NDCG@K': 0.6110854266352317}
k=10  {'Precision@K': 0.5663783674296131, 'Recall@K': 0.01409477929798361, 'NDCG@K': 0.5843054483207635}
k=15  {'Precision@K': 0.5458672608196611, 'Recall@K': 0.020244831343824658, 'NDCG@K': 0.5664965759644934}
k=20  {'Precision@K': 0.5304213084869354, 'Recall@K': 0.02609477732143434, 'NDCG@K': 0.552723751495889}
