**INSTALLS**


In [1]:
!pip install pandas numpy nltk scikit-learn matplotlib seaborn

# --- Imports ---
import pandas as pd
import numpy as np
import nltk
import string
import math
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# --- NLTK setup (tokenizer, stopwords) ---
nltk.download('punkt')
nltk.download('stopwords')
from nltk.corpus import stopwords

print("‚úÖ Setup complete! Libraries imported.")



[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


‚úÖ Setup complete! Libraries imported.


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


In [2]:
import nltk
nltk.download('punkt')
nltk.download('punkt_tab')
nltk.download('stopwords')


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

# üß± Part 1: Indexing

In [5]:
from google.colab import files
import pandas as pd

# Step 1: Upload the CSV file from your computer
print("Please select your 'fashion_products_clean.csv' file")
uploaded = files.upload()  # This will open a file picker

# Step 2: Load the uploaded CSV
file_name = list(uploaded.keys())[0]  # Get the uploaded file name
# Try with UTF-8 first, then fallback to latin1 if needed
try:
    df = pd.read_csv(file_name, encoding='utf-8', engine='python', on_bad_lines='skip')
except Exception:
    df = pd.read_csv(file_name, encoding='latin1', engine='python', on_bad_lines='skip')

print("\n‚úÖ Dataset loaded successfully!")
print("Shape:", df.shape)
print("Columns:", df.columns.tolist())

if 'combined_info' in df.columns:
    df['combined_info'] = df['combined_info'].astype(str)
else:
    print("‚ö†Ô∏è 'combined_info' column not found. Columns available:", df.columns.tolist())

df.head(3)



Please select your 'fashion_products_clean.csv' file


Saving fashion_products_clean.csv to fashion_products_clean (2).csv

‚úÖ Dataset loaded successfully!
Shape: (28053, 24)
Columns: ['_id', 'actual_price', 'average_rating', 'brand', 'category', 'crawled_at', 'description', 'discount', 'images', 'out_of_stock', 'pid', 'product_details', 'seller', 'selling_price', 'sub_category', 'title', 'url', 'clean_title', 'clean_description', 'product_details_text', 'combined_info', 'discount_percent', 'title_length', 'desc_length']


Unnamed: 0,_id,actual_price,average_rating,brand,category,crawled_at,description,discount,images,out_of_stock,...,sub_category,title,url,clean_title,clean_description,product_details_text,combined_info,discount_percent,title_length,desc_length
0,fa8e22d6-c0b6-5229-bb9e-ad52eda39a0a,2999.0,3.9,York,Clothing and Accessories,1612987911000,Yorker trackpants made from 100% rich combed c...,69% off,['https://rukminim1.flixcart.com/image/128/128...,False,...,Bottomwear,Solid Women Multicolor Track Pants,https://www.flipkart.com/yorker-solid-men-mult...,solid women multicolor track pant,yorker trackpant made 100 rich comb cotton giv...,1005COMBO2 Elastic Side Pockets Cotton Blend S...,Clothing and Accessories Bottomwear York Shyam...,69.0,5.0,21.0
1,893e6980-f2a0-531f-b056-34dd63fe912c,1499.0,3.9,York,Clothing and Accessories,1612987912000,Yorker trackpants made from 100% rich combed c...,66% off,['https://rukminim1.flixcart.com/image/128/128...,False,...,Bottomwear,Solid Men Blue Track Pants,https://www.flipkart.com/yorker-solid-men-blue...,solid men blue track pant,yorker trackpant made 100 rich comb cotton giv...,"1005BLUE Drawstring, Elastic Side Pockets Cott...",Clothing and Accessories Bottomwear York Shyam...,66.0,5.0,21.0
2,eb4c8eab-8206-59d0-bcd1-a724d96bf74f,2999.0,3.9,York,Clothing and Accessories,1612987912000,Yorker trackpants made from 100% rich combed c...,68% off,['https://rukminim1.flixcart.com/image/128/128...,False,...,Bottomwear,Solid Men Multicolor Track Pants,https://www.flipkart.com/yorker-solid-men-mult...,solid men multicolor track pant,yorker trackpant made 100 rich comb cotton giv...,1005COMBO4 Elastic Side Pockets Cotton Blend S...,Clothing and Accessories Bottomwear York Shyam...,68.0,5.0,21.0


## 1.1 Inverted Index

In [6]:
import nltk
from nltk.corpus import stopwords
from collections import defaultdict

# Ensure stopwords are available
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))

# Function to tokenize and clean text
def tokenize(text):
    if not isinstance(text, str):
        return []
    # Lowercase, remove punctuation, split
    tokens = nltk.word_tokenize(text.lower())
    # Keep only alphabetic tokens and remove stopwords
    tokens = [t for t in tokens if t.isalpha() and t not in stop_words]
    return tokens

# Build the inverted index
inverted_index = defaultdict(set)


for idx, row in df.iterrows():
    tokens = tokenize(row.get('combined_info', ''))
    for token in set(tokens):  # set() to avoid duplicates per document
        inverted_index[token].add(idx)

print(f"Inverted index with {len(inverted_index)} unique terms.")

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Inverted index with 5112 unique terms.


In [29]:
def and_conjunctive_lookup(query):
    """
    Returns the set of document indices that contain ALL query terms.
    Uses the inverted_index built from tokenize().
    """
    # tokenize query same way as docs
    q_tokens = tokenize(query)
    if not q_tokens:
        return set()

    # start with posting list of the first token
    postings_lists = []
    for tok in q_tokens:
        postings_lists.append(inverted_index.get(tok, set()))

    # intersection of all posting lists (AND semantics)
    if not postings_lists:
        return set()

    doc_set = postings_lists[0]
    for pl in postings_lists[1:]:
        doc_set = doc_set & pl

    return doc_set

# Example test:
and_docs = and_conjunctive_lookup("women cotton sweatshirt")
print("Documents matching ALL terms:", len(and_docs))
list(and_docs)[:10]


Documents matching ALL terms: 72


[23809, 23864, 23299, 23940, 9094, 9095, 16392, 22538, 23307, 22540]

In [7]:
from sklearn.feature_extraction.text import TfidfVectorizer
import re

# Custom tokenizer for better cleaning
def clean_tokenizer(text):
    # Lowercase, keep only alphabetic tokens
    tokens = nltk.word_tokenize(text.lower())
    tokens = [t for t in tokens if t.isalpha()]
    return tokens

# Create vectorizer
vectorizer = TfidfVectorizer(
    tokenizer=clean_tokenizer,
    stop_words='english',
    lowercase=True
)

tfidf_matrix = vectorizer.fit_transform(df['combined_info'].astype(str))

print(f"Clean TF-IDF matrix with shape: {tfidf_matrix.shape}")
terms = vectorizer.get_feature_names_out()

print("\nSample clean terms:", terms[:10])




Clean TF-IDF matrix with shape: (28053, 5045)

Sample clean terms: ['aaa' 'aadikart' 'aahe' 'aakashi' 'aalae' 'aami' 'aao' 'aapna' 'aasami'
 'aashray']


## 1.2/3 Test Queries & TF-IDF Ranking


In [9]:
# --- Query Search and Ranking ---

from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

def search_query(query, top_k=10):
    """
    Search the dataset using TF-IDF and cosine similarity.
    Returns the top_k most relevant documents.
    """
    if not isinstance(query, str) or not query.strip():
        print("‚ö†Ô∏è Invalid query input.")
        return pd.DataFrame()

    # Transform query using the fitted vectorizer
    query_vec = vectorizer.transform([query])

    # Compute cosine similarity between query and documents
    cosine_sim = cosine_similarity(query_vec, tfidf_matrix).flatten()

    # Get top_k indices (sorted descending)
    top_indices = np.argsort(cosine_sim)[::-1][:top_k]

    # Safely select available columns
    cols_to_show = [c for c in ['pid', 'title', 'brand', 'selling_price', 'average_rating'] if c in df.columns]
    results = df.iloc[top_indices][cols_to_show].copy()

    # Add similarity scores
    results['similarity_score'] = cosine_sim[top_indices].astype(float)

    print(f"\nüîç Top {top_k} results for query: '{query}'")
    return results

# --- Run example test queries (Total 6) ---
search_query("women blue cotton tshirt")




üîç Top 10 results for query: 'women blue cotton tshirt'


Unnamed: 0,pid,title,brand,selling_price,average_rating,similarity_score
17906,TSHFPCXCMQNAWNPV,Printed Women Round Neck Multicolor T-Shirt,RodZ,376.0,4.1,0.666471
1173,TSHFUTG5JZGFTXDF,Color Block Men Round Neck Multicolor T-Shirt,Clo,351.0,4.1,0.582994
9401,TSHFGF4FQQGQ8RPD,Printed Women Round Neck Dark Blue T-Shirt,CupidSto,499.0,3.2,0.452769
9229,TSHFGF3SGGHCAMGK,Printed Women Round Neck Dark Blue T-Shirt,CupidSto,499.0,3.2,0.451795
9228,TSHFGF3VUUSW9BJE,Printed Women Round Neck Blue T-Shirt,CupidSto,499.0,3.2,0.451223
9209,TSHFGF3VFDHJDQ7Z,Printed Women Round Neck Blue T-Shirt,CupidSto,499.0,2.0,0.451223
21403,TSHFNUHHUNJYBFDA,Solid Women Round Neck Blue T-Shirt,ARBO,426.0,5.0,0.438857
9040,TSHFVGC84CZZFQHT,Solid Women Round Neck Black T-Shirt¬†¬†(Pack of 2),Lucky Bi,939.0,2.7,0.425164
9041,TSHFVGC8HKEBBHFS,Solid Women Round Neck Black T-Shirt¬†¬†(Pack of 2),Lucky Bi,939.0,2.7,0.424238
9039,TSHFVGC8BKN9PCQE,Solid Women Round Neck Green T-Shirt¬†¬†(Pack of 2),Lucky Bi,939.0,2.7,0.421456


In [10]:
search_query("men black jeans slim fit")


üîç Top 10 results for query: 'men black jeans slim fit'


Unnamed: 0,pid,title,brand,selling_price,average_rating,similarity_score
5822,JEAFH7N5ZWSHZ2BQ,Slim Men Black Jeans,Reliable Ca,691.0,3.8,0.441987
11502,JEAFXUE2ZGDUHGRU,Skinny Men Black Jeans,ECKO Unl,1598.0,2.5,0.430917
11654,JEAFUZXSZB3FZEKJ,Skinny Men Black Jeans,ECKO Unl,1616.0,2.5,0.430917
12095,JEAFUZXSNGYAXHJM,Tapered Fit Men Black Jeans,ECKO Unl,1319.0,2.5,0.430917
11688,JEAFUZXRTGKMXCNK,Super Skinny Men Black Jeans,ECKO Unl,1423.0,2.5,0.412001
11687,JEAFUZXSDTXBFSVG,Super Skinny Men Blue Jeans,ECKO Unl,1539.0,2.5,0.410746
11631,JEAFUZXSZFBQZFZU,Super Skinny Men Blue Jeans,ECKO Unl,1516.0,2.5,0.410746
11611,JEAFUZXSVVFXQWTG,Tapered Fit Men Blue Jeans,ECKO Unl,1516.0,2.5,0.410746
11747,JEAFUZXSGPAQ2A3A,Super Skinny Men Blue Jeans,ECKO Unl,1462.0,2.5,0.410746
24526,JEAFJM2PHHPFTAH8,Slim Women Black Jeans,Absolu,545.0,4.1,0.407483


In [11]:
search_query("cotton round neck sweatshirt")


üîç Top 10 results for query: 'cotton round neck sweatshirt'


Unnamed: 0,pid,title,brand,selling_price,average_rating,similarity_score
19326,SWSFNMS57EXWPNFD,Full Sleeve Color Block Men Sweatshirt,FLEXIM,699.0,4.1,0.485386
19239,SWSFNMS5QHGAJQ3A,Full Sleeve Color Block Men Sweatshirt,FLEXIM,699.0,4.1,0.485386
19250,SWSFNMS5FFRGFQTK,Full Sleeve Color Block Women Sweatshirt,FLEXIM,699.0,4.1,0.485386
19389,SWSFNMS5N4TEXFSG,Full Sleeve Color Block Women Sweatshirt,FLEXIM,699.0,4.1,0.485386
19241,SWSFNMS5HZGZK8QX,Full Sleeve Color Block Men Sweatshirt,FLEXIM,699.0,4.1,0.485386
21357,SWSFMTNHCG3SRHJR,Full Sleeve Solid Men Sweatshirt,ARBO,711.0,4.2,0.459958
21521,SWSFMTNHZR59ZUXR,Full Sleeve Solid Men Sweatshirt,ARBO,711.0,4.2,0.459372
25430,SWSFUY8ATUXFKJPZ,Full Sleeve Printed Women Sweatshirt,Pu,1264.0,4.1,0.453483
9090,SWSFFVKBAZGKCQCX,Full Sleeve Graphic Print Women Sweatshirt,CupidSto,999.0,4.3,0.447058
25312,SWSFUFGRYFEH8HBG,Full Sleeve Printed Women Sweatshirt,Pu,1199.0,4.1,0.446337


In [12]:
search_query("women red dress long sleeve")


üîç Top 10 results for query: 'women red dress long sleeve'


Unnamed: 0,pid,title,brand,selling_price,average_rating,similarity_score
5427,KTAFP4ZHCEMZWGSG,Women Solid Pure Cotton Ethnic Dress¬†¬†(Blue),SATDEVANGIKHADIBHAND,599.0,3.6,0.39393
5421,KTAFP4ZQ86ZAMHK6,Women Solid Pure Cotton Ethnic Dress¬†¬†(Blue),SATDEVANGIKHADIBHAND,599.0,3.6,0.39393
5446,KTAFP4ZHFSJWTPJQ,Women Solid Pure Cotton Ethnic Dress¬†¬†(Blue),SATDEVANGIKHADIBHAND,599.0,3.6,0.39393
5436,KTAFP4ZHXPZACRCB,Women Solid Pure Cotton Ethnic Dress¬†¬†(Blue),SATDEVANGIKHADIBHAND,599.0,3.6,0.39393
5441,KTAFP4ZHH5NBHZTJ,Women Solid Pure Cotton Ethnic Dress¬†¬†(Brown),SATDEVANGIKHADIBHAND,599.0,3.6,0.380618
5502,KTAFP4ZQCZTU83XD,Women Solid Pure Cotton Ethnic Dress¬†¬†(Brown),SATDEVANGIKHADIBHAND,544.0,3.6,0.380618
5505,KTAFP5Y2ZTHSYHWG,Women Solid Pure Cotton Ethnic Dress¬†¬†(Pink),SATDEVANGIKHADIBHAND,502.0,3.6,0.364765
5447,KTAFP4ZHPEQTZBFT,Men Solid Pure Cotton Ethnic Dress¬†¬†(Brown),SATDEVANGIKHADIBHAND,599.0,3.6,0.36009
5434,KTAFP4ZHFHQRGSYP,Men Solid Pure Cotton Ethnic Dress¬†¬†(Brown),SATDEVANGIKHADIBHAND,599.0,3.6,0.36009
5430,KTAFP4ZHZSSNZRSR,Men Solid Pure Cotton Ethnic Dress¬†¬†(Brown),SATDEVANGIKHADIBHAND,599.0,3.6,0.36009


In [13]:
search_query("men leather jacket brown")


üîç Top 10 results for query: 'men leather jacket brown'


Unnamed: 0,pid,title,brand,selling_price,average_rating,similarity_score
3992,JCKFXY6F9UFABRGP,Full Sleeve Solid Women Leather Jacket,Cher,1299.0,3.0,0.729836
3993,JCKFXY6FHKFVSJEZ,Full Sleeve Solid Women Leather Jacket,Cher,1299.0,3.0,0.729836
4002,JCKFWZBYFHGWZ6RF,Full Sleeve Solid Men Leather Jacket,Cher,1299.0,3.2,0.714739
3996,JCKFWZBYHDRNMSZF,Full Sleeve Solid Men Leather Jacket,Cher,1299.0,3.2,0.714739
4011,JCKFXY6FKF36GMNN,Full Sleeve Solid Men Leather Jacket,Cher,1299.0,3.0,0.669001
3981,JCKFXY6FZMWUMTD4,Full Sleeve Solid Men Leather Jacket,Cher,1299.0,3.0,0.66378
4012,JCKFXY6FPHVHGFG4,Full Sleeve Solid Women Leather Jacket,Cher,1299.0,3.0,0.65359
4005,JCKFXY6FPFZTG5WN,Full Sleeve Solid Women Leather Jacket,Cher,1299.0,3.0,0.652295
4009,JCKFXY6FC8Z6YENR,Full Sleeve Solid Men Leather Jacket,Cher,1299.0,3.0,0.651976
3974,JCKFXY6FKFXW66DD,Full Sleeve Solid Women Leather Jacket,Cher,1299.0,3.0,0.648491


In [14]:
search_query("kids white sneakers")


üîç Top 10 results for query: 'kids white sneakers'


Unnamed: 0,pid,title,brand,selling_price,average_rating,similarity_score
25548,TSHFRAMF4PNC3YKZ,Printed Men Round Neck White T-Shirt,Pu,599.0,4.3,0.396624
25651,TSHFRAMFY2MAVNNF,Printed Men Round Neck Blue T-Shirt,Pu,974.0,4.3,0.338766
13653,JEAFVN95DF6FQHPC,Skinny Women Blue Jeans,Marca Disa,879.0,4.2,0.271131
13267,VESFUQXYMWBRQ3XQ,Sheny Men Vest,Unknown,199.0,3.0,0.141756
17202,TSHFYRYBKCSQNGGS,Solid Women Round Neck White T-Shirt¬†¬†(Pack of 4),Onei,799.0,3.7,0.137566
8859,CAPFFNNYKSGCHZF8,Self Design Roy Caps Combo Pack white & Blue C...,R,349.0,3.3,0.136618
8860,CAPFFEZFVDYZJFBN,Self Design Roy Caps Combo Pack white & Blue C...,R,349.0,3.3,0.135354
19551,RNCFGGTR5JWV8ES9,Solid Men Raincoat,Wet Off Ho,799.0,3.8,0.126647
5995,SHTFG3GFQAJ9YSQK,Women Regular Fit Checkered Casual Shirt,Mo,799.0,2.9,0.122265
5966,SHTFGYHEWBFYCBXD,Women Regular Fit Checkered Casual Shirt,Mo,768.0,2.9,0.122265


# üìä Part 2: Evaluation

In [15]:

import pandas as pd
import numpy as np
from google.colab import files

# ------------------------------------------------------------
# Upload and load validation_labels.csv
# ------------------------------------------------------------
print("üìÇ Please upload your 'validation_labels.csv' file")
uploaded = files.upload()

val_file = list(uploaded.keys())[0]
val_df = pd.read_csv(val_file)

print("\n‚úÖ Validation file loaded successfully!")
print("Shape:", val_df.shape)
print("Columns:", val_df.columns.tolist())
display(val_df.head(5))


üìÇ Please upload your 'validation_labels.csv' file


Saving validation_labels.csv to validation_labels.csv

‚úÖ Validation file loaded successfully!
Shape: (40, 4)
Columns: ['title', 'pid', 'query_id', 'labels']


Unnamed: 0,title,pid,query_id,labels
0,Full Sleeve Printed Women Sweatshirt,SWSFFVKBCQG5FHPF,1,1
1,Full Sleeve Striped Women Sweatshirt,SWSFJY5ZFHQ7HXKW,1,0
2,Full Sleeve Printed Women Sweatshirt,SWSFUY89NHMZHZPX,1,1
3,Full Sleeve Graphic Print Women Sweatshirt,SWSFXQ5YX6RZKHP4,1,1
4,Full Sleeve Solid Women Sweatshirt,JCKFTZBC3DMCVYXH,1,0


In [16]:
# ------------------------------------------------------------
# Normalize both DataFrames for safe matching
# ------------------------------------------------------------
df['pid'] = df['pid'].astype(str).str.strip().str.upper()
val_df['pid'] = val_df['pid'].astype(str).str.strip().str.upper()

# Check overlap between datasets
overlap = val_df['pid'].isin(df['pid']).sum()
print(f"\nüîç Found {overlap} of {len(val_df)} validation PIDs in dataset.")





üîç Found 40 of 40 validation PIDs in dataset.


## 2.1 Metric Implementation

In [17]:
# ============================================================
# üìè Evaluation Metrics
# ============================================================

def precision_at_k(predicted, relevant, k):
    predicted = predicted[:k]
    if not predicted:
        return 0.0
    return len(set(predicted) & set(relevant)) / len(predicted)

def recall_at_k(predicted, relevant, k):
    predicted = predicted[:k]
    if not relevant:
        return 0.0
    return len(set(predicted) & set(relevant)) / len(relevant)

def f1_at_k(predicted, relevant, k):
    p = precision_at_k(predicted, relevant, k)
    r = recall_at_k(predicted, relevant, k)
    if p + r == 0:
        return 0.0
    return 2 * p * r / (p + r)

def average_precision_at_k(predicted, relevant, k):
    predicted = predicted[:k]
    score = 0.0
    hits = 0
    for i, pid in enumerate(predicted, start=1):
        if pid in relevant:
            hits += 1
            score += hits / i
    return score / hits if hits > 0 else 0.0

def mean_average_precision(all_ap):
    return np.mean(all_ap) if all_ap else 0.0

def reciprocal_rank(predicted, relevant):
    for i, pid in enumerate(predicted, start=1):
        if pid in relevant:
            return 1 / i
    return 0.0

def ndcg_at_k(predicted, relevant, k):
    predicted = predicted[:k]
    dcg = 0.0
    for i, pid in enumerate(predicted, start=1):
        if pid in relevant:
            dcg += 1 / np.log2(i + 1)
    ideal_dcg = sum(1 / np.log2(i + 1) for i in range(1, min(k, len(relevant)) + 1))
    return dcg / ideal_dcg if ideal_dcg > 0 else 0.0


In [21]:
# ============================================================
# üîç Query Evaluation Functions
# ============================================================

def search_and_get_pids(query_text, top_k=10):
    """Run query and return list of predicted PIDs (uppercase)."""
    results = search_query(query_text, top_k=top_k)
    if results.empty:
        return []
    return [str(pid).strip().upper() for pid in results['pid'].tolist()]

def evaluate_all_metrics(query_text, query_id, top_k=10, verbose=True):
    """Compute all metrics for one query, for a given cutoff k."""
    predicted = search_and_get_pids(query_text, top_k)
    relevant = val_df[(val_df['query_id'] == query_id) & (val_df['labels'] == 1)]['pid'].tolist()

    if verbose:
        print(f"\nüîπ Evaluating Query {query_id}: '{query_text}'")
        print(f"Top {top_k} predicted PIDs ({len(predicted)}): {predicted[:10]}")
        print(f"All relevant PIDs ({len(relevant)}): {relevant}")

    if not predicted:
        print("‚ö†Ô∏è No predictions found for this query.")
        return None

    metrics = {
        "Precision@K": precision_at_k(predicted, relevant, top_k),
        "Recall@K": recall_at_k(predicted, relevant, top_k),
        "F1@K": f1_at_k(predicted, relevant, top_k),
        "AP@K": average_precision_at_k(predicted, relevant, top_k),
        "RR": reciprocal_rank(predicted, relevant),
        "NDCG@K": ndcg_at_k(predicted, relevant, top_k)
    }

    if verbose:
        print("\nüìè Metrics:")
        for key, value in metrics.items():
            print(f"{key:12}: {value:.3f}")

    return metrics



## 2.2 Evaluation with validation_labels.csv

In [26]:
# ============================================================
# üß™ Evaluate the Two Predefined Queries (Detailed Comparison)
# ============================================================

# Query definitions
queries = {
    1: "women full sleeve sweatshirt cotton",
    2: "men slim jeans blue"
}

# --- Run Evaluation @10 ---
metrics_q1_at10 = evaluate_all_metrics(queries[1], 1, top_k=10, verbose=True)
metrics_q2_at10 = evaluate_all_metrics(queries[2], 2, top_k=10, verbose=True)

# --- Run Evaluation @100 (to detect if relevant docs appear later) ---
metrics_q1_at100 = evaluate_all_metrics(queries[1], 1, top_k=100, verbose=False)
metrics_q2_at100 = evaluate_all_metrics(queries[2], 2, top_k=100, verbose=False)

# --- Display Comparison Summary ---
print("\nüí° Comparison of Retrieval Performance")
print("======================================")

print("\nüî∏ Query 1: women full sleeve sweatshirt cotton")
print("Precision@10 :", f"{metrics_q1_at10['Precision@K']:.3f}")
print("Precision@100:", f"{metrics_q1_at100['Precision@K']:.3f}")
print("Recall@10    :", f"{metrics_q1_at10['Recall@K']:.3f}")
print("Recall@100   :", f"{metrics_q1_at100['Recall@K']:.3f}")

print("\nüî∏ Query 2: men slim jeans blue")
print("Precision@10 :", f"{metrics_q2_at10['Precision@K']:.3f}")
print("Precision@100:", f"{metrics_q2_at100['Precision@K']:.3f}")
print("Recall@10    :", f"{metrics_q2_at10['Recall@K']:.3f}")
print("Recall@100   :", f"{metrics_q2_at100['Recall@K']:.3f}")

# --- Compute MAP across both queries (using AP@K from @100 cutoff for completeness) ---
map_score = mean_average_precision([
    metrics_q1_at100["AP@K"],
    metrics_q2_at100["AP@K"]
])
print(f"\nüìà Mean Average Precision (MAP, using @100 cutoff): {map_score:.3f}")



üîç Top 10 results for query: 'women full sleeve sweatshirt cotton'

üîπ Evaluating Query 1: 'women full sleeve sweatshirt cotton'
Top 10 predicted PIDs (10): ['SWSFVZRFS7GHGKSF', 'SWSFWEF3XGHFBCJC', 'SWSFWEF36Z3RKTJ7', 'SWSFWCXMJRZDWKVU', 'SWSFWCXHFRX2DBSG', 'SWSFWEF2T9VGRUXA', 'SWSFWCS9DZGKEGVG', 'SWSFVZRFNFMYYRCK', 'SWSFVZRFVZ2SZ3HZ', 'SWSFVZRFTM4P9XGF']
All relevant PIDs (13): ['SWSFFVKBCQG5FHPF', 'SWSFUY89NHMZHZPX', 'SWSFXQ5YX6RZKHP4', 'SWSFWCTDHRABJFCR', 'SWSFVGKENHBSVUWJ', 'JCKF7M8DNBB6WZ8Y', 'SWSFWCSPEGMDH8FV', 'SWSFXYRYTHZWSZPE', 'SWSFW6BQ74JCVHHH', 'SWSF5R7F38FNMWXG', 'SWSFWEFYEW4AZYTZ', 'SWSFN8YZXFSUCAJX', 'SWSFMJGS8HVBPEH6']

üìè Metrics:
Precision@K : 0.000
Recall@K    : 0.000
F1@K        : 0.000
AP@K        : 0.000
RR          : 0.000
NDCG@K      : 0.000

üîç Top 10 results for query: 'men slim jeans blue'

üîπ Evaluating Query 2: 'men slim jeans blue'
Top 10 predicted PIDs (10): ['JEAFUZXSGPAQ2A3A', 'JEAFUZXSDTXBFSVG', 'JEAFUZXSVVFXQWTG', 'JEAFUZXSZFBQZFZU', 'JEAFU

In [27]:
print("\n=== Final Evaluation Results (Numeric Only) ===")
print(f"Query 1 - P@10: {metrics_q1_at10['Precision@K']:.3f}, R@10: {metrics_q1_at10['Recall@K']:.3f}, MAP@100: {metrics_q1_at100['AP@K']:.3f}")
print(f"Query 2 - P@10: {metrics_q2_at10['Precision@K']:.3f}, R@10: {metrics_q2_at10['Recall@K']:.3f}, MAP@100: {metrics_q2_at100['AP@K']:.3f}")
print(f"Mean Average Precision (MAP): {map_score:.3f}")



=== Final Evaluation Results (Numeric Only) ===
Query 1 - P@10: 0.000, R@10: 0.000, MAP@100: 0.055
Query 2 - P@10: 0.000, R@10: 0.000, MAP@100: 0.062
Mean Average Precision (MAP): 0.059


In [28]:
def check_relevant_presence(query_text, query_id, top_k=100):
    """Inspect which relevant PIDs were retrieved."""
    print(f"\nüìä Checking overlap for Query {query_id}: '{query_text}'")
    predicted = search_and_get_pids(query_text, top_k)
    relevant = val_df[(val_df['query_id'] == query_id) & (val_df['labels'] == 1)]['pid'].tolist()

    overlap = set(predicted) & set(relevant)
    print(f"Relevant retrieved: {len(overlap)} / {len(relevant)}")
    if overlap:
        print("Matched PIDs:", list(overlap)[:10], "...")

# Example usage:
check_relevant_presence("women full sleeve sweatshirt cotton", 1)
check_relevant_presence("men slim jeans blue", 2)


üìä Checking overlap for Query 1: 'women full sleeve sweatshirt cotton'

üîç Top 100 results for query: 'women full sleeve sweatshirt cotton'
Relevant retrieved: 3 / 13
Matched PIDs: ['SWSFWEFYEW4AZYTZ', 'SWSFWCSPEGMDH8FV', 'SWSF5R7F38FNMWXG'] ...

üìä Checking overlap for Query 2: 'men slim jeans blue'

üîç Top 100 results for query: 'men slim jeans blue'
Relevant retrieved: 4 / 10
Matched PIDs: ['JEAFSKYHRVZSABPR', 'JEAFUZXSMRFFNGC2', 'JEAFUZXSQQHZXRYM', 'JEAFUPSWVCTKXMHF'] ...


In [30]:
# Final numeric evaluation output (required by statement)

queries = {
    1: "women full sleeve sweatshirt cotton",
    2: "men slim jeans blue"
}

metrics_q1_at10 = evaluate_all_metrics(queries[1], 1, top_k=10, verbose=False)
metrics_q2_at10 = evaluate_all_metrics(queries[2], 2, top_k=10, verbose=False)

map_score = mean_average_precision([
    metrics_q1_at10["AP@K"],
    metrics_q2_at10["AP@K"]
])

print(f"{'Query':<8} {'P@10':>8} {'R@10':>8} {'F1@10':>8} {'AP@10':>8} {'RR':>8} {'NDCG@10':>10}")
print(f"{'Q1':<8} "
      f"{metrics_q1_at10['Precision@K']:.3f} "
      f"{metrics_q1_at10['Recall@K']:.3f} "
      f"{metrics_q1_at10['F1@K']:.3f} "
      f"{metrics_q1_at10['AP@K']:.3f} "
      f"{metrics_q1_at10['RR']:.3f} "
      f"{metrics_q1_at10['NDCG@K']:.3f}")
print(f"{'Q2':<8} "
      f"{metrics_q2_at10['Precision@K']:.3f} "
      f"{metrics_q2_at10['Recall@K']:.3f} "
      f"{metrics_q2_at10['F1@K']:.3f} "
      f"{metrics_q2_at10['AP@K']:.3f} "
      f"{metrics_q2_at10['RR']:.3f} "
      f"{metrics_q2_at10['NDCG@K']:.3f}")
print(f"{'MAP':<8} {map_score:.3f}")



üîç Top 10 results for query: 'women full sleeve sweatshirt cotton'

üîç Top 10 results for query: 'men slim jeans blue'
Query        P@10     R@10    F1@10    AP@10       RR    NDCG@10
Q1       0.000 0.000 0.000 0.000 0.000 0.000
Q2       0.000 0.000 0.000 0.000 0.000 0.000
MAP      0.000


In [24]:
def debug_query_ranking(query_text, query_id, top_k=15):
    print(f"\nüîé DEBUG for Query {query_id}: '{query_text}'")

    # Run retrieval
    results_df = search_query(query_text, top_k=top_k).copy()

    # Safety in case the search returns empty
    if results_df.empty:
        print("‚ö†Ô∏è No results returned for this query.")
        return results_df

    # Normalize pid for matching
    results_df['pid'] = results_df['pid'].astype(str).str.strip().str.upper()

    # Get ground truth relevant PIDs
    relevant_set = set(
        val_df[(val_df['query_id'] == query_id) & (val_df['labels'] == 1)]['pid'].tolist()
    )

    # Add a relevance flag column
    results_df['is_relevant'] = results_df['pid'].apply(lambda x: 1 if x in relevant_set else 0)

    # Add rank column (1-based rank)
    results_df.insert(0, 'rank', range(1, len(results_df) + 1))

    # Show debug info
    print("\nüß† Ground truth relevant PIDs:")
    print(list(relevant_set))

    print("\nüìã Top ranked documents with relevance flag:")
    display(results_df[['rank', 'pid', 'title', 'similarity_score', 'is_relevant']])

    # Where's the first relevant?
    relevant_rows = results_df[results_df['is_relevant'] == 1]
    if not relevant_rows.empty:
        first_rel_rank = relevant_rows['rank'].iloc[0]
        print(f"\n‚úÖ First relevant item appears at rank {first_rel_rank}")
    else:
        print("\n‚ùå No relevant items in the top_k shown.")

    return results_df

# Run for both queries:
_ = debug_query_ranking("women full sleeve sweatshirt cotton", 1, top_k=15)
_ = debug_query_ranking("men slim jeans blue", 2, top_k=15)



üîé DEBUG for Query 1: 'women full sleeve sweatshirt cotton'

üîç Top 15 results for query: 'women full sleeve sweatshirt cotton'

üß† Ground truth relevant PIDs:
['SWSFWCSPEGMDH8FV', 'JCKF7M8DNBB6WZ8Y', 'SWSFXYRYTHZWSZPE', 'SWSF5R7F38FNMWXG', 'SWSFMJGS8HVBPEH6', 'SWSFUY89NHMZHZPX', 'SWSFN8YZXFSUCAJX', 'SWSFVGKENHBSVUWJ', 'SWSFW6BQ74JCVHHH', 'SWSFXQ5YX6RZKHP4', 'SWSFWEFYEW4AZYTZ', 'SWSFFVKBCQG5FHPF', 'SWSFWCTDHRABJFCR']

üìã Top ranked documents with relevance flag:


Unnamed: 0,rank,pid,title,similarity_score,is_relevant
22558,1,SWSFVZRFS7GHGKSF,Full Sleeve Solid Women Sweatshirt,0.49045,0
23316,2,SWSFWEF3XGHFBCJC,Full Sleeve Graphic Print Women Sweatshirt,0.477308,0
23516,3,SWSFWEF36Z3RKTJ7,Full Sleeve Graphic Print Women Sweatshirt,0.467162,0
23329,4,SWSFWCXMJRZDWKVU,Full Sleeve Graphic Print Women Sweatshirt,0.465524,0
23523,5,SWSFWCXHFRX2DBSG,Full Sleeve Graphic Print Women Sweatshirt,0.465524,0
23929,6,SWSFWEF2T9VGRUXA,Full Sleeve Graphic Print Women Sweatshirt,0.463932,0
23630,7,SWSFWCS9DZGKEGVG,Full Sleeve Graphic Print Women Sweatshirt,0.459595,0
22547,8,SWSFVZRFNFMYYRCK,Full Sleeve Solid Men Sweatshirt,0.457662,0
22500,9,SWSFVZRFVZ2SZ3HZ,Full Sleeve Solid Men Sweatshirt,0.457662,0
22494,10,SWSFVZRFTM4P9XGF,Full Sleeve Solid Men Sweatshirt,0.457662,0



‚úÖ First relevant item appears at rank 15

üîé DEBUG for Query 2: 'men slim jeans blue'

üîç Top 15 results for query: 'men slim jeans blue'

üß† Ground truth relevant PIDs:
['JEAFUZXRDYSVFK4Y', 'JEAFUZXRDZWSYWGG', 'JEAFTGSGTYKZGAEZ', 'JEAFSKYHRVZSABPR', 'JEAFUZXSMRFFNGC2', 'JEAFUPSWVCTKXMHF', 'JEAFUZXSQQHZXRYM', 'JEAFVXG4GGZH9VFA', 'JEAF8CHSVCP5GUH9', 'JEAE32FSQ4JXYJK6']

üìã Top ranked documents with relevance flag:


Unnamed: 0,rank,pid,title,similarity_score,is_relevant
11747,1,JEAFUZXSGPAQ2A3A,Super Skinny Men Blue Jeans,0.504086,0
11687,2,JEAFUZXSDTXBFSVG,Super Skinny Men Blue Jeans,0.504086,0
11611,3,JEAFUZXSVVFXQWTG,Tapered Fit Men Blue Jeans,0.504086,0
11631,4,JEAFUZXSZFBQZFZU,Super Skinny Men Blue Jeans,0.504086,0
11771,5,JEAFUZXTZDGEVFPJ,Super Skinny Men Blue Jeans,0.482773,0
11630,6,JEAFWBJKGNRYCY7Y,Skinny Men Blue Jeans,0.481658,0
11731,7,JEAFWBJK8FTXWTT6,Super Skinny Men Blue Jeans,0.478572,0
14340,8,JEAFRB6ZTVQ9HA46,Slim Men Blue Jeans,0.473802,0
11520,9,JEAFUZXSPGQFPYFC,Super Skinny Men Blue Jeans,0.473735,0
12198,10,JEAFXUE7CHZYHWYE,Jogger Fit Men Blue Jeans,0.473735,0



‚úÖ First relevant item appears at rank 15


## 2.3 Expert-Labeled Evaluation

In [32]:
# ============================================================
# üß† Step 1 ‚Äî Build our own validation-like labels
# ============================================================

def build_manual_validation(queries, top_k=100):
    """
    For each query, get top_k TF-IDF ranked docs,
    mark label=1 if in AND set, else 0.
    Returns a combined DataFrame like validation_labels.csv.
    """
    all_rows = []
    for qid, qtext in queries.items():
        print(f"Building labels for Query {qid}: '{qtext}'")

        # Docs that satisfy AND condition
        and_docs = and_conjunctive_lookup(qtext)
        and_pids = set(df.loc[list(and_docs), 'pid'].astype(str).str.strip().str.upper())

        # Retrieve top_k ranked docs (full TF-IDF)
        results = search_query(qtext, top_k=top_k)
        results['pid'] = results['pid'].astype(str).str.strip().str.upper()
        results['query_id'] = qid
        results['query_text'] = qtext

        # Assign label = 1 if in AND docs, else 0
        results['label'] = results['pid'].apply(lambda x: 1 if x in and_pids else 0)

        all_rows.append(results[['query_id', 'pid', 'label']])

    labeled_df = pd.concat(all_rows, ignore_index=True)
    print("\n‚úÖ Created manual validation-like labels.")
    display(labeled_df.head(10))
    return labeled_df

queries_custom = {
    1: "women blue cotton tshirt",
    2: "men black jeans slim fit",
    3: "cotton round neck sweatshirt",
    4: "women red dress long sleeve",
    5: "men leather jacket brown"
}

manual_val_df = build_manual_validation(queries_custom, top_k=100)

# Optionally save it
manual_val_df.to_csv("manual_validation_labels.csv", index=False)



Building labels for Query 1: 'women blue cotton tshirt'

üîç Top 100 results for query: 'women blue cotton tshirt'
Building labels for Query 2: 'men black jeans slim fit'

üîç Top 100 results for query: 'men black jeans slim fit'
Building labels for Query 3: 'cotton round neck sweatshirt'

üîç Top 100 results for query: 'cotton round neck sweatshirt'
Building labels for Query 4: 'women red dress long sleeve'

üîç Top 100 results for query: 'women red dress long sleeve'
Building labels for Query 5: 'men leather jacket brown'

üîç Top 100 results for query: 'men leather jacket brown'

‚úÖ Created manual validation-like labels.


Unnamed: 0,query_id,pid,label
0,1,TSHFPCXCMQNAWNPV,0
1,1,TSHFUTG5JZGFTXDF,0
2,1,TSHFGF4FQQGQ8RPD,1
3,1,TSHFGF3SGGHCAMGK,1
4,1,TSHFGF3VUUSW9BJE,1
5,1,TSHFGF3VFDHJDQ7Z,1
6,1,TSHFNUHHUNJYBFDA,1
7,1,TSHFVGC84CZZFQHT,1
8,1,TSHFVGC8HKEBBHFS,1
9,1,TSHFVGC8BKN9PCQE,1


In [33]:
def evaluate_all_metrics_custom(query_text, query_id, labels_df, top_k=10, verbose=True):
    predicted = search_and_get_pids(query_text, top_k)
    relevant = labels_df[(labels_df['query_id'] == query_id) & (labels_df['label'] == 1)]['pid'].tolist()

    if verbose:
        print(f"\nEvaluating Query {query_id}: '{query_text}'")
        print(f"Relevant docs: {len(relevant)}")

    metrics = {
        "Precision@K": precision_at_k(predicted, relevant, top_k),
        "Recall@K": recall_at_k(predicted, relevant, top_k),
        "F1@K": f1_at_k(predicted, relevant, top_k),
        "AP@K": average_precision_at_k(predicted, relevant, top_k),
        "RR": reciprocal_rank(predicted, relevant),
        "NDCG@K": ndcg_at_k(predicted, relevant, top_k)
    }
    return metrics


In [36]:
# ============================================================
# üìä Step 2 ‚Äî Evaluate ranking against our AND-based ground truth
# ============================================================

results_manual = {}
for qid, qtext in queries_custom.items():
    results_manual[qid] = evaluate_all_metrics_custom(qtext, qid, manual_val_df, top_k=10, verbose=False)

map_manual = mean_average_precision([r["AP@K"] for r in results_manual.values()])

print(f"{'Query':<8} {'P@10':>8} {'R@10':>8} {'F1@10':>8} {'AP@10':>8} {'RR':>8} {'NDCG@10':>10}")
print("-"*60)
for qid, metrics in results_manual.items():
    print(f"{qid:<8} "
          f"{metrics['Precision@K']:.3f} "
          f"{metrics['Recall@K']:.3f} "
          f"{metrics['F1@K']:.3f} "
          f"{metrics['AP@K']:.3f} "
          f"{metrics['RR']:.3f} "
          f"{metrics['NDCG@K']:.3f}")
print("-"*60)
print(f"{'MAP':<8} {map_manual:.3f}")
print("‚úÖ Numeric results only ‚Äî commentary provided in report.")




üîç Top 10 results for query: 'women blue cotton tshirt'

üîç Top 10 results for query: 'men black jeans slim fit'

üîç Top 10 results for query: 'cotton round neck sweatshirt'

üîç Top 10 results for query: 'women red dress long sleeve'

üîç Top 10 results for query: 'men leather jacket brown'
Query        P@10     R@10    F1@10    AP@10       RR    NDCG@10
------------------------------------------------------------
1        0.800 0.129 0.222 0.643 0.333 0.641
2        0.000 0.000 0.000 0.000 0.000 0.000
3        0.900 0.265 0.409 0.989 1.000 0.934
4        0.000 0.000 0.000 0.000 0.000 0.000
5        0.200 0.667 0.308 0.417 0.333 0.437
------------------------------------------------------------
MAP      0.410
‚úÖ Numeric results only ‚Äî commentary provided in report.


In [37]:
# ============================================================
# üß© Debug function (Part 2.3): visualize ranking vs AND-based relevance
# ============================================================

def debug_query_ranking_custom(query_text, query_id, labels_df, top_k=15):
    print(f"\nüîé DEBUG for Query {query_id}: '{query_text}'")

    # Run retrieval using TF-IDF ranking (full dataset)
    results_df = search_query(query_text, top_k=top_k).copy()

    # Safety check
    if results_df.empty:
        print("‚ö†Ô∏è No results returned for this query.")
        return results_df

    # Normalize PIDs for matching
    results_df['pid'] = results_df['pid'].astype(str).str.strip().str.upper()

    # Get ground-truth relevant PIDs from manual labels
    relevant_set = set(
        labels_df[(labels_df['query_id'] == query_id) & (labels_df['label'] == 1)]['pid'].tolist()
    )

    # Add relevance flag
    results_df['is_relevant'] = results_df['pid'].apply(lambda x: 1 if x in relevant_set else 0)

    # Add rank column
    results_df.insert(0, 'rank', range(1, len(results_df) + 1))

    # Display summary
    print(f"\nüß† Ground truth relevant PIDs ({len(relevant_set)}):")
    print(list(relevant_set)[:10], "..." if len(relevant_set) > 10 else "")

    print("\nüìã Top ranked documents with relevance flag:")
    display(results_df[['rank', 'pid', 'title', 'similarity_score', 'is_relevant']])

    # Find first relevant
    relevant_rows = results_df[results_df['is_relevant'] == 1]
    if not relevant_rows.empty:
        first_rank = relevant_rows['rank'].iloc[0]
        print(f"\n‚úÖ First relevant item appears at rank {first_rank}")
    else:
        print("\n‚ùå No relevant items in the top_k shown.")

    return results_df


# ============================================================
# üß™ Run for all 5 custom queries (Part 2.3)
# ============================================================
for qid, qtext in queries_custom.items():
    _ = debug_query_ranking_custom(qtext, qid, manual_val_df, top_k=15)

print("‚úÖ Numeric results only ‚Äî commentary provided in report.")




üîé DEBUG for Query 1: 'women blue cotton tshirt'

üîç Top 15 results for query: 'women blue cotton tshirt'

üß† Ground truth relevant PIDs (62):
['TSHFGKYQ54PKZGST', 'TSHFZNRGEEZZ6BBF', 'TSHFGGYC6PHJQDMP', 'TSHFGGFCYB7HFUPY', 'TSHFGKYPPAJPNVAF', 'TSHFVGC8VJRXMVYD', 'TSHFGGFCG2MCAEFF', 'TSHFGF3VFDHJDQ7Z', 'TSHFGZKGUSA7HCFS', 'TSHFYSKCGUA99HPS'] ...

üìã Top ranked documents with relevance flag:


Unnamed: 0,rank,pid,title,similarity_score,is_relevant
17906,1,TSHFPCXCMQNAWNPV,Printed Women Round Neck Multicolor T-Shirt,0.666471,0
1173,2,TSHFUTG5JZGFTXDF,Color Block Men Round Neck Multicolor T-Shirt,0.582994,0
9401,3,TSHFGF4FQQGQ8RPD,Printed Women Round Neck Dark Blue T-Shirt,0.452769,1
9229,4,TSHFGF3SGGHCAMGK,Printed Women Round Neck Dark Blue T-Shirt,0.451795,1
9228,5,TSHFGF3VUUSW9BJE,Printed Women Round Neck Blue T-Shirt,0.451223,1
9209,6,TSHFGF3VFDHJDQ7Z,Printed Women Round Neck Blue T-Shirt,0.451223,1
21403,7,TSHFNUHHUNJYBFDA,Solid Women Round Neck Blue T-Shirt,0.438857,1
9040,8,TSHFVGC84CZZFQHT,Solid Women Round Neck Black T-Shirt¬†¬†(Pack of 2),0.425164,1
9041,9,TSHFVGC8HKEBBHFS,Solid Women Round Neck Black T-Shirt¬†¬†(Pack of 2),0.424238,1
9039,10,TSHFVGC8BKN9PCQE,Solid Women Round Neck Green T-Shirt¬†¬†(Pack of 2),0.421456,1



‚úÖ First relevant item appears at rank 3

üîé DEBUG for Query 2: 'men black jeans slim fit'

üîç Top 15 results for query: 'men black jeans slim fit'

üß† Ground truth relevant PIDs (0):
[] 

üìã Top ranked documents with relevance flag:


Unnamed: 0,rank,pid,title,similarity_score,is_relevant
5822,1,JEAFH7N5ZWSHZ2BQ,Slim Men Black Jeans,0.441987,0
11502,2,JEAFXUE2ZGDUHGRU,Skinny Men Black Jeans,0.430917,0
11654,3,JEAFUZXSZB3FZEKJ,Skinny Men Black Jeans,0.430917,0
12095,4,JEAFUZXSNGYAXHJM,Tapered Fit Men Black Jeans,0.430917,0
11688,5,JEAFUZXRTGKMXCNK,Super Skinny Men Black Jeans,0.412001,0
11687,6,JEAFUZXSDTXBFSVG,Super Skinny Men Blue Jeans,0.410746,0
11631,7,JEAFUZXSZFBQZFZU,Super Skinny Men Blue Jeans,0.410746,0
11611,8,JEAFUZXSVVFXQWTG,Tapered Fit Men Blue Jeans,0.410746,0
11747,9,JEAFUZXSGPAQ2A3A,Super Skinny Men Blue Jeans,0.410746,0
24526,10,JEAFJM2PHHPFTAH8,Slim Women Black Jeans,0.407483,0



‚ùå No relevant items in the top_k shown.

üîé DEBUG for Query 3: 'cotton round neck sweatshirt'

üîç Top 15 results for query: 'cotton round neck sweatshirt'

üß† Ground truth relevant PIDs (34):
['SWSFUFGWCHHUPTN4', 'SWSFNMS5N4TEXFSG', 'SWSFV5JNSVQX3SQA', 'SWSFNMS5FFRGFQTK', 'SWSFUY8ATUXFKJPZ', 'SWSFV5JNXGFZGB7S', 'SWSFNMS5HZGZK8QX', 'SWSFYFFYNUHYEQGD', 'SWSF9W3ZHHJFQFJF', 'SWSFMTNHZR59ZUXR'] ...

üìã Top ranked documents with relevance flag:


Unnamed: 0,rank,pid,title,similarity_score,is_relevant
19326,1,SWSFNMS57EXWPNFD,Full Sleeve Color Block Men Sweatshirt,0.485386,1
19239,2,SWSFNMS5QHGAJQ3A,Full Sleeve Color Block Men Sweatshirt,0.485386,1
19250,3,SWSFNMS5FFRGFQTK,Full Sleeve Color Block Women Sweatshirt,0.485386,1
19389,4,SWSFNMS5N4TEXFSG,Full Sleeve Color Block Women Sweatshirt,0.485386,1
19241,5,SWSFNMS5HZGZK8QX,Full Sleeve Color Block Men Sweatshirt,0.485386,1
21357,6,SWSFMTNHCG3SRHJR,Full Sleeve Solid Men Sweatshirt,0.459958,1
21521,7,SWSFMTNHZR59ZUXR,Full Sleeve Solid Men Sweatshirt,0.459372,1
25430,8,SWSFUY8ATUXFKJPZ,Full Sleeve Printed Women Sweatshirt,0.453483,1
9090,9,SWSFFVKBAZGKCQCX,Full Sleeve Graphic Print Women Sweatshirt,0.447058,0
25312,10,SWSFUFGRYFEH8HBG,Full Sleeve Printed Women Sweatshirt,0.446337,1



‚úÖ First relevant item appears at rank 1

üîé DEBUG for Query 4: 'women red dress long sleeve'

üîç Top 15 results for query: 'women red dress long sleeve'

üß† Ground truth relevant PIDs (0):
[] 

üìã Top ranked documents with relevance flag:


Unnamed: 0,rank,pid,title,similarity_score,is_relevant
5427,1,KTAFP4ZHCEMZWGSG,Women Solid Pure Cotton Ethnic Dress¬†¬†(Blue),0.39393,0
5421,2,KTAFP4ZQ86ZAMHK6,Women Solid Pure Cotton Ethnic Dress¬†¬†(Blue),0.39393,0
5446,3,KTAFP4ZHFSJWTPJQ,Women Solid Pure Cotton Ethnic Dress¬†¬†(Blue),0.39393,0
5436,4,KTAFP4ZHXPZACRCB,Women Solid Pure Cotton Ethnic Dress¬†¬†(Blue),0.39393,0
5441,5,KTAFP4ZHH5NBHZTJ,Women Solid Pure Cotton Ethnic Dress¬†¬†(Brown),0.380618,0
5502,6,KTAFP4ZQCZTU83XD,Women Solid Pure Cotton Ethnic Dress¬†¬†(Brown),0.380618,0
5505,7,KTAFP5Y2ZTHSYHWG,Women Solid Pure Cotton Ethnic Dress¬†¬†(Pink),0.364765,0
5447,8,KTAFP4ZHPEQTZBFT,Men Solid Pure Cotton Ethnic Dress¬†¬†(Brown),0.36009,0
5434,9,KTAFP4ZHFHQRGSYP,Men Solid Pure Cotton Ethnic Dress¬†¬†(Brown),0.36009,0
5430,10,KTAFP4ZHZSSNZRSR,Men Solid Pure Cotton Ethnic Dress¬†¬†(Brown),0.36009,0



‚ùå No relevant items in the top_k shown.

üîé DEBUG for Query 5: 'men leather jacket brown'

üîç Top 15 results for query: 'men leather jacket brown'

üß† Ground truth relevant PIDs (3):
['JCKFWZBYFHGWZ6RF', 'JCKFM76BTSH6YX4Z', 'JCKFWZBYHDRNMSZF'] 

üìã Top ranked documents with relevance flag:


Unnamed: 0,rank,pid,title,similarity_score,is_relevant
3992,1,JCKFXY6F9UFABRGP,Full Sleeve Solid Women Leather Jacket,0.729836,0
3993,2,JCKFXY6FHKFVSJEZ,Full Sleeve Solid Women Leather Jacket,0.729836,0
4002,3,JCKFWZBYFHGWZ6RF,Full Sleeve Solid Men Leather Jacket,0.714739,1
3996,4,JCKFWZBYHDRNMSZF,Full Sleeve Solid Men Leather Jacket,0.714739,1
4011,5,JCKFXY6FKF36GMNN,Full Sleeve Solid Men Leather Jacket,0.669001,0
3981,6,JCKFXY6FZMWUMTD4,Full Sleeve Solid Men Leather Jacket,0.66378,0
4012,7,JCKFXY6FPHVHGFG4,Full Sleeve Solid Women Leather Jacket,0.65359,0
4005,8,JCKFXY6FPFZTG5WN,Full Sleeve Solid Women Leather Jacket,0.652295,0
4009,9,JCKFXY6FC8Z6YENR,Full Sleeve Solid Men Leather Jacket,0.651976,0
3974,10,JCKFXY6FKFXW66DD,Full Sleeve Solid Women Leather Jacket,0.648491,0



‚úÖ First relevant item appears at rank 3
‚úÖ Numeric results only ‚Äî commentary provided in report.
