In [1]:
import os

import json
import pandas as pd
from utils.bedrock_functions import build_anthropic_request_body, invoke_bedrock_endpoint
from utils.prompts import gen_search_task_prompt

data_dir = '/home/ec2-user/code_repos/PersonalizationAE/inputs/pwab_data'


In [2]:
def load_json(files, data_dir= data_dir):
    if isinstance(files, str):
        with open(f"{data_dir}/{files}", "r", encoding="utf-8") as f:
            data = json.load(f)
            print(f"Loaded {len(data)} records from {files}")
            return data

    elif isinstance(files, list):
        
        combined_data = []
        for filepath in files:
            with open(f"{data_dir}/{filepath}", "r", encoding="utf-8") as f:
                data = json.load(f)
                combined_data.append(data)
                print(f"Loaded {len(data)} records from {filepath}")
                
        # if combined_data is a list of lists, flatten it
        if isinstance(combined_data[0], list):
            combined_data = [item for sublist in combined_data for item in sublist]
        # if combined_data is a list of dicts, combine them
        elif isinstance(combined_data[0], dict):
            combined_data = {k: v for d in combined_data for k, v in d.items()}
        else:
            raise ValueError("Data must be a list of lists or a list of dicts.")
        return combined_data

    else:
        raise ValueError("Argument must be either a single file path (string) or a list of file paths.")


In [3]:
filenames = os.listdir(data_dir)

# Identify files belonging to 'all_products'
all_products_files = [
    f for f in filenames
    if f.startswith("all_products_part_") and f.endswith(".json")
]

# Identify files belonging to 'user_history'
user_history_files = [
    f for f in filenames
    if f.startswith("user_history_part_") and f.endswith(".json")
]

# Sort files if desired (e.g., to ensure consistent order)
all_products_files.sort()
user_history_files.sort()

In [4]:
all_products = load_json(all_products_files)
user_history = load_json(user_history_files)
user_instructions = load_json("user_instructions.json")
user_profiles = load_json("user_profiles.json")

Loaded 17886 records from all_products_part_1.json
Loaded 17886 records from all_products_part_2.json
Loaded 334 records from user_history_part_1.json
Loaded 334 records from user_history_part_2.json
Loaded 332 records from user_history_part_3.json
Loaded 2 records from user_instructions.json
Loaded 1000 records from user_profiles.json


In [5]:
user_instructions['train'][0]['target']['product_info']#['user_id']

{'main_category': 'Home_and_Kitchen',
 'title': 'Electropan Replacement Vacuum Filter Brush Kit for Robotic Vacuum ILIFE V3, V3s, V3s pro, V5, V5s, V5s pro ILIFE Robot Vacuum Replacement Parts Ilife Vacuum Filters V3s',
 'average_rating': 4.3,
 'rating_number': 2534,
 'features': ['Vaccum Parts works with Ilife V3, Ilife V3s pro, Ilife V5, Ilife V5s, Ilife V5s Pro Robotic Vacuum Cleaner. Good quality ilife v3s pro Robotic Vacuum Replacement Parts',
  'Robot Parts Cleaner Accessory kit can keep your vacuum cleaner at peak performance, Suggest to replace ILIFE Vacuum Filters & Robot Vacuum Brush Replacement every 2-3 months',
  'Vacuum Cleaner Parts/ robot Vacuum Accessories for Ilife robot helps to keep your house clean and save effort for housework',
  'Ilife Vacuum Replacement Parts is very easy to install, and very easy to wash these Ilife vacuum parts (including Ilife robot vacuum hepa filters, Ilife vacuum primary filter, Ilife robot vacuum side brushes)',
  '100% Compatible with I

In [6]:
user_prof_list = []
for i in range(len(user_instructions['train'])):
    user_prof_list.append(user_profiles[user_instructions['train'][i]['user_id']]['user_profile'])


In [7]:
df = pd.DataFrame(user_prof_list)

In [8]:
df

Unnamed: 0,Gender,Age,Occupation,Price Sensitivity,Shopping Interest,Brand Preference,Diversity Preference,Interaction Complexity,Tone and Style,Item Reference,Focus Aspect
0,Female,35-44,Writer,Medium,"Electronics, Home & Kitchen, Fashion, Health &...","Aibocn, DACHEE, IBENZER, Columbia, Nutrition E...",High,High,"Enthusiastic, Detailed, Positive",Specific products and brands mentioned frequently,"Quality, Brand, Price"
1,Female,35-44,Writer,Medium,"Electronics, Home & Kitchen, Fashion, Health &...","Aibocn, DACHEE, IBENZER, Columbia, Nutrition E...",High,High,"Enthusiastic, Detailed, Positive",Specific products and brands mentioned frequently,"Quality, Brand, Price"
2,Female,35-44,Writer,Medium,"Electronics, Home & Kitchen, Fashion, Health &...","Aibocn, DACHEE, IBENZER, Columbia, Nutrition E...",High,High,"Enthusiastic, Detailed, Positive",Specific products and brands mentioned frequently,"Quality, Brand, Price"
3,Female,35-44,Writer,Medium,"Electronics, Home & Kitchen, Fashion, Health &...","Aibocn, DACHEE, IBENZER, Columbia, Nutrition E...",High,High,"Enthusiastic, Detailed, Positive",Specific products and brands mentioned frequently,"Quality, Brand, Price"
4,Female,35-44,Writer,Medium,"Electronics, Home & Kitchen, Fashion, Health &...","Aibocn, DACHEE, IBENZER, Columbia, Nutrition E...",High,High,"Enthusiastic, Detailed, Positive",Specific products and brands mentioned frequently,"Quality, Brand, Price"
...,...,...,...,...,...,...,...,...,...,...,...
6891,Female,35-44,Writer,Medium,"Home & Kitchen, Electronics, Fashion","Anker, Sleepwish, DOWAN, Under Armour, Canon, ...",Medium,Medium,"Detailed, practical, and occasionally humorous",Specific products and brands mentioned frequently,"Average Rating, Price, Brand"
6892,Female,35-44,Writer,Medium,"Home & Kitchen, Electronics, Fashion","Anker, Sleepwish, DOWAN, Under Armour, Canon, ...",Medium,Medium,"Detailed, practical, and occasionally humorous",Specific products and brands mentioned frequently,"Average Rating, Price, Brand"
6893,Female,25-34,Other,Low,"Electronics, Home & Kitchen, Health & Househol...","Amazon, Fantasia, Hypnotic Gems, Cuarto Astral...",High,High,"Positive, Enthusiastic, Detailed",Specific products and brands mentioned in reviews,"Average Rating, Number of Ratings, Quality"
6894,Female,25-34,Other,Medium,"Electronics, Home Goods, Clothing, Personal Care","ASURION, MR.SIGA, WYUZE, Hanes, SONORO KATE, D...",Medium,Medium,"Positive, Enthusiastic, Practical","Specific products, Purchase history, Recommend...",Average Rating


In [9]:
#get all unique values in the dataframe for each column
unique_vals = {}
for col in df.columns:
    # print(col, df[col].unique())
    unique_vals[col] = df[col].unique()

In [21]:
unique_vals#.keys()

{'Gender': array(['Female', 'Male'], dtype=object),
 'Age': array(['35-44', '45-49', '25-34', '56+', '50-55', '18-24'], dtype=object),
 'Occupation': array(['Writer', 'Other', 'Homemaker', 'Programmer', 'Retired',
        'Self-employed', 'Customer service', 'Veteran', 'Driver',
        'College/grad student', 'Pharmacist', 'Technician/Engineer',
        'Social worker', 'Artist', 'Doctor/health care', 'DJ/Producer',
        'Nurse', 'Baker', 'Photographer', 'Chef', 'Musician'], dtype=object),
 'Price Sensitivity': array(['Medium', 'Low', 'High'], dtype=object),
 'Shopping Interest': array(['Electronics, Home & Kitchen, Fashion, Health & Beauty',
        'Home Decor, Kitchenware, Fashion, Health & Beauty',
        'Home & Kitchen, Electronics, Health & Personal Care',
        'Electronics, Kitchenware, Home Organization',
        'Clothing, Home Decor, Kitchen Supplies, Health & Beauty',
        'Home & Kitchen, Electronics, Personal Care',
        'Home and Kitchen, Health and Persona

In [11]:
idx = 0
prompt = gen_search_task_prompt(user_instructions['train'][idx]['task'], user_profiles[user_instructions['train'][idx]['user_id']]['user_profile'])

In [12]:
print(prompt)

You will act as an online shopper.
Your Profile:
Gender: Female
Age: 35-44
Occupation: Writer
Price Sensitivity: Medium
Shopping Interest: Electronics, Home & Kitchen, Fashion, Health & Beauty
Brand Preference: Aibocn, DACHEE, IBENZER, Columbia, Nutrition Essentials, Stauffers, KitchenAid, Fossil, ILIFE, simplehuman, Utopia Bedding, HC COLLECTION, Linenspa, CINEYO, DearMy, Basic Brands, Roku, Anne Klein, Hanes, ZESICA, Madison Park
Diversity Preference: High
Interaction Complexity: High
Tone and Style: Enthusiastic, Detailed, Positive
Item Reference: Specific products and brands mentioned frequently
Focus Aspect: Quality, Brand, Price


You are required to generate a search phrase to perform the following search task:
Task: Hey there! I'm super excited to find some high-quality replacement parts for my robotic vacuum cleaner. Looking for a reputable brand with great filters and brushes that'll keep my floors sparkling clean. Any awesome recommendations around $20?

Generate a search ph

In [30]:
all_products['B08GC2KTG6']

{'main_category': 'Home_and_Kitchen',
 'title': 'Electropan Replacement Vacuum Filter Brush Kit for Robotic Vacuum ILIFE V3, V3s, V3s pro, V5, V5s, V5s pro ILIFE Robot Vacuum Replacement Parts Ilife Vacuum Filters V3s',
 'average_rating': 4.3,
 'rating_number': 2534,
 'features': ['Vaccum Parts works with Ilife V3, Ilife V3s pro, Ilife V5, Ilife V5s, Ilife V5s Pro Robotic Vacuum Cleaner. Good quality ilife v3s pro Robotic Vacuum Replacement Parts',
  'Robot Parts Cleaner Accessory kit can keep your vacuum cleaner at peak performance, Suggest to replace ILIFE Vacuum Filters & Robot Vacuum Brush Replacement every 2-3 months',
  'Vacuum Cleaner Parts/ robot Vacuum Accessories for Ilife robot helps to keep your house clean and save effort for housework',
  'Ilife Vacuum Replacement Parts is very easy to install, and very easy to wash these Ilife vacuum parts (including Ilife robot vacuum hepa filters, Ilife vacuum primary filter, Ilife robot vacuum side brushes)',
  '100% Compatible with I

In [13]:
idx = 0
user_instructions['train'][idx]['task']
user_profiles[user_instructions['train'][idx]['user_id']]['user_profile']

{'Gender': 'Female',
 'Age': '35-44',
 'Occupation': 'Writer',
 'Price Sensitivity': 'Medium',
 'Shopping Interest': 'Electronics, Home & Kitchen, Fashion, Health & Beauty',
 'Brand Preference': 'Aibocn, DACHEE, IBENZER, Columbia, Nutrition Essentials, Stauffers, KitchenAid, Fossil, ILIFE, simplehuman, Utopia Bedding, HC COLLECTION, Linenspa, CINEYO, DearMy, Basic Brands, Roku, Anne Klein, Hanes, ZESICA, Madison Park',
 'Diversity Preference': 'High',
 'Interaction Complexity': 'High',
 'Tone and Style': 'Enthusiastic, Detailed, Positive',
 'Item Reference': 'Specific products and brands mentioned frequently',
 'Focus Aspect': 'Quality, Brand, Price'}

In [29]:
user_instructions['train'][0]['target']['product_info']#.keys()

{'main_category': 'Home_and_Kitchen',
 'title': 'Electropan Replacement Vacuum Filter Brush Kit for Robotic Vacuum ILIFE V3, V3s, V3s pro, V5, V5s, V5s pro ILIFE Robot Vacuum Replacement Parts Ilife Vacuum Filters V3s',
 'average_rating': 4.3,
 'rating_number': 2534,
 'features': ['Vaccum Parts works with Ilife V3, Ilife V3s pro, Ilife V5, Ilife V5s, Ilife V5s Pro Robotic Vacuum Cleaner. Good quality ilife v3s pro Robotic Vacuum Replacement Parts',
  'Robot Parts Cleaner Accessory kit can keep your vacuum cleaner at peak performance, Suggest to replace ILIFE Vacuum Filters & Robot Vacuum Brush Replacement every 2-3 months',
  'Vacuum Cleaner Parts/ robot Vacuum Accessories for Ilife robot helps to keep your house clean and save effort for housework',
  'Ilife Vacuum Replacement Parts is very easy to install, and very easy to wash these Ilife vacuum parts (including Ilife robot vacuum hepa filters, Ilife vacuum primary filter, Ilife robot vacuum side brushes)',
  '100% Compatible with I

In [44]:
import random

def gen_search_task_prompt(task, profile_dict, max_words=10):
    """
    Generate a search task prompt based on the task and user profile.
    Args:
        task (str): The search task to be performed.
        profile_dict (dict): Dictionary containing the user profile fields and values.
        max_words (int): Maximum allowed words for the generated search phrase.
    Returns:
        str: The generated search task prompt.
    """
    prompt = f"""You will act as an online shopper.
Your Profile:
Gender: {profile_dict['Gender']}
Age: {profile_dict['Age']}
Occupation: {profile_dict['Occupation']}
Price Sensitivity: {profile_dict['Price Sensitivity']}
Shopping Interest: {profile_dict['Shopping Interest']}
Brand Preference: {profile_dict['Brand Preference']}
Diversity Preference: {profile_dict['Diversity Preference']}
Interaction Complexity: {profile_dict['Interaction Complexity']}
Tone and Style: {profile_dict['Tone and Style']}
Item Reference: {profile_dict['Item Reference']}
Focus Aspect: {profile_dict['Focus Aspect']}


You are required to generate a search phrase to perform the following search task:
Task: {task}

Generate a search phrase that is somewhat vague, reflecting your preferences and personalities without revealing the complete details
of the target product.
Rules:
• You pay more attention to "Focus Aspect" of products, make sure to include some of them in the search phrase.
• Ensure the search phrase aligns with your overall tone and style: {profile_dict['Tone and Style']}.
• Try to make the phrase as natural as possible and stick to the personalities in your profile.
• Do not include any additional information or explanations and stay grounded.
• Do not hallucinate information that is not provided.
• No more than {max_words} words.
"""
    return prompt

def calculate_res_acc(rank):
    """
    Calculate the retrieval accuracy (Res Acc) based on the rank of the target product.
    Args:
        rank (int): The rank of the target product in the retrieved list.
    Returns:
        float: The calculated Res Acc score.
    """
    if rank <= 10:
        return 1 - (rank - 1) / 10
    else:
        return 0

def generate_random_counterfactuals(task, valid_values, ground_truth, repetitions, llm_system):
    """
    Generate random counterfactuals by varying user profile dimensions.
    Args:
        tasks (list): List of search tasks.
        user_profile_fields (list): List of user profile field names.
        valid_values (dict): Dictionary of valid values for each profile field.
        ground_truths (dict): Dictionary of ground truth items for each task.
        repetitions (int): Number of repetitions for each profile variation.
        llm_system (object): The LLM system with a `retrieve` method to generate search results.
    Returns:
        list: Results for each task, including average scores for each profile variation.
    """
    results = []
    prompts = []

    # for task_num in range(len(tasks)):
    # task = tasks[task_num]
    # prompts[task_num] = []
    task_results = []
    ground_truth = ground_truths#[task]

    for field, values in valid_values.items():
        field_results = []

        for value in values:
            for j in range(repetitions):
                # first entry should be not be randomized
                if j == 0:
                    profile = {k: v[0] if k != field else value for k, v in valid_values.items()}
                else:
                    # Generate a randomized profile with `field` set to `value`
                    profile = {k: random.choice(v) if k != field else value
                            for k, v in valid_values.items()}

                # Generate search prompt
                prompt = gen_search_task_prompt(task, profile)
                prompts.append(prompt)

                # Retrieve results from the LLM system
                ranked_list = llm_system.retrieve(prompt)

                # Evaluate results
                if ground_truth in ranked_list:
                    rank = ranked_list.index(ground_truth) + 1  # Get the 1-based rank
                    score = calculate_res_acc(rank)
                else:
                    score = 0  # Ground truth not in top 10
                field_results.append(score)

        # Average score for the current field-value pair
        avg_score = sum(field_results) / len(field_results)
        task_results.append((field, value, avg_score))

    results.append((task, task_results))

    return results, prompts

# Example usage
class MockLLMSystem:
    """
    Mock LLM system to simulate retrieval of ranked lists.
    """
    def retrieve(self, prompt):
        prod_list = ["item1", "item2", "item3", "item4", "item5",
                "item6", "item7", "item8", "item9", "item10"]

        #randomly reorder the list 
        random.shuffle(prod_list)
        
        return prod_list



In [70]:
import random

def gen_search_task_prompt(task, profile_dict, max_words=10):
    """
    Generate a search task prompt based on the task and user profile.
    Args:
        task (str): The search task to be performed.
        profile_dict (dict): Dictionary containing the user profile fields and values.
        max_words (int): Maximum allowed words for the generated search phrase.
    Returns:
        str: The generated search task prompt.
    """
    prompt = f"""You will act as an online shopper.
Your Profile:
Gender: {profile_dict['Gender']}
Age: {profile_dict['Age']}
Occupation: {profile_dict['Occupation']}
Price Sensitivity: {profile_dict['Price Sensitivity']}
Shopping Interest: {profile_dict['Shopping Interest']}
Brand Preference: {profile_dict['Brand Preference']}
Diversity Preference: {profile_dict['Diversity Preference']}
Interaction Complexity: {profile_dict['Interaction Complexity']}
Tone and Style: {profile_dict['Tone and Style']}
Item Reference: {profile_dict['Item Reference']}
Focus Aspect: {profile_dict['Focus Aspect']}


You are required to generate a search phrase to perform the following search task:
Task: {task}

Generate a search phrase that is somewhat vague, reflecting your preferences and personalities without revealing the complete details
of the target product.
Rules:
• You pay more attention to "Focus Aspect" of products, make sure to include some of them in the search phrase.
• Ensure the search phrase aligns with your overall tone and style: {profile_dict['Tone and Style']}.
• Try to make the phrase as natural as possible and stick to the personalities in your profile.
• Do not include any additional information or explanations and stay grounded.
• Do not hallucinate information that is not provided.
• No more than {max_words} words.
"""
    return prompt

def calculate_res_acc(rank):
    """
    Calculate the retrieval accuracy (Res Acc) based on the rank of the target product.
    Args:
        rank (int): The rank of the target product in the retrieved list.
    Returns:
        float: The calculated Res Acc score.
    """
    if rank <= 10:
        return 1 - (rank - 1) / 10
    else:
        return 0

def generate_random_counterfactuals(task, valid_values, user_profile, repetitions, llm_system):
    """
    Generate random counterfactuals by varying user profile dimensions.
    Args:
        tasks (list): List of search tasks.
        user_profile_fields (list): List of user profile field names.
        valid_values (dict): Dictionary of valid values for each profile field.
        ground_truths (dict): Dictionary of ground truth items for each task.
        repetitions (int): Number of repetitions for each profile variation.
        llm_system (object): The LLM system with a `retrieve` method to generate search results.
    Returns:
        list: Results for each task, including average scores for each profile variation.
    """
    prompts = []
    #one of the prompt should be the original profile
    # next (repetitions - 1) entries should be randomized

    for field, values in valid_values.items():

        for value in values:
            for j in range(repetitions):
                # first entry should be not be randomized
                if j == 0:
                    profile = {k: v[0] if k != field else value for k, v in valid_values.items()}
                else:
                    # Generate a randomized profile with `field` set to `value`
                    profile = {k: random.choice(v) if k != field else value
                            for k, v in valid_values.items()}

                # Generate search prompt
                prompt = gen_search_task_prompt(task, profile)
                prompts.append(prompt)
                
def generate_random_counterfactuals(task, valid_values, user_profile, repetitions):
    """
    Generate random counterfactuals by varying user profile dimensions.
    Args:
        task (str): The specific task for which prompts are generated.
        valid_values (dict): Dictionary of valid values for each profile field.
        user_profile (dict): Original user profile to be used as the base.
        repetitions (int): Number of repetitions for generating counterfactuals.
        llm_system (object): The LLM system with a `retrieve` method to generate search results.
    Returns:
        tuple: Results and a list of prompts for each task, including average scores for each profile variation.
    """
    prompts = []

    for i in range(repetitions):
        if i == 0:
            # First iteration: Use the original profile without randomization
            profile = user_profile.copy()
        else:
            # Subsequent iterations: Randomize profile by perturbing across multiple dimensions
            profile = {
                field: (random.choice(values) if field in valid_values else user_profile[field])
                for field, values in valid_values.items()
            }

        # Generate a search prompt using the current profile
        prompt = gen_search_task_prompt(task, profile)
        prompts.append(prompt)


    return prompts

    return results, prompts

# Retrieve results from the LLM system
                # ranked_list = llm_system.retrieve(prompt)
# # Evaluate results
#                 if ground_truth in ranked_list:
#                     rank = ranked_list.index(ground_truth) + 1  # Get the 1-based rank
#                     score = calculate_res_acc(rank)
#                 else:
#                     score = 0  # Ground truth not in top 10
#                 field_results.append(score)

#         # Average score for the current field-value pair
#         avg_score = sum(field_results) / len(field_results)
#         task_results.append((field, value, avg_score))

#     results.append((task, task_results))


# Example usage
class MockLLMSystem:
    """
    Mock LLM system to simulate retrieval of ranked lists.
    """
    def retrieve(self, prompt):
        prod_list = ["item1", "item2", "item3", "item4", "item5",
                "item6", "item7", "item8", "item9", "item10"]

        #randomly reorder the list 
        random.shuffle(prod_list)
        
        return prod_list



In [67]:

# Example parameters
task = "Find the best laptop for work"

valid_values = {
    "Gender": ["Female", "Male"],
    "Age": ["18-24", "25-34", "35-44", "45-49", "50-55", "56+"],
    "Occupation": ["Student", "Professional", "Retired"],
    "Price Sensitivity": ["Low", "Medium", "High"],
    "Shopping Interest": ["Electronics", "Clothing", "Sports Gear"],
    "Brand Preference": ["Brand A", "Brand B", "Brand C"],
    "Diversity Preference": ["High", "Low"],
    "Interaction Complexity": ["Simple", "Detailed"],
    "Tone and Style": ["Casual", "Professional"],
    "Item Reference": ["Laptop", "Shoe"],
    "Focus Aspect": ["Price", "Durability", "Design"]
}

repetitions = 10
mock_llm = MockLLMSystem()

# Run the evaluation
results, prompts = evaluate_personalized_search(task, valid_values, 'item3', repetitions, mock_llm)

# Print the results
for task_result in results:
    task, field_results = task_result
    print(f"Task: {task}")
    for field, value, avg_score in field_results:
        print(f"  Field: {field}, Value: {value}, Avg. Score: {avg_score:.2f}")

Task: Find the best laptop for work
  Field: Gender, Value: Male, Avg. Score: 0.55
  Field: Age, Value: 56+, Avg. Score: 0.60
  Field: Occupation, Value: Retired, Avg. Score: 0.60
  Field: Price Sensitivity, Value: High, Avg. Score: 0.41
  Field: Shopping Interest, Value: Sports Gear, Avg. Score: 0.63
  Field: Brand Preference, Value: Brand C, Avg. Score: 0.48
  Field: Diversity Preference, Value: Low, Avg. Score: 0.55
  Field: Interaction Complexity, Value: Detailed, Avg. Score: 0.47
  Field: Tone and Style, Value: Professional, Avg. Score: 0.50
  Field: Item Reference, Value: Shoe, Avg. Score: 0.45
  Field: Focus Aspect, Value: Design, Avg. Score: 0.58


In [52]:
len(set(prompts))

298

In [23]:
user_instructions['train'][idx]['target']['product_info']['parent_asin']

'B08GC2KTG6'

In [71]:
idx = 0

user_task = user_instructions['train'][idx]['task']
user_profile = user_profiles[user_instructions['train'][idx]['user_id']]['user_profile']
valid_values = unique_vals

# Run the evaluation
prompts = generate_random_counterfactuals(task, valid_values, user_profile, repetitions)


In [75]:
user_profile

{'Gender': 'Female',
 'Age': '35-44',
 'Occupation': 'Writer',
 'Price Sensitivity': 'Medium',
 'Shopping Interest': 'Electronics, Home & Kitchen, Fashion, Health & Beauty',
 'Brand Preference': 'Aibocn, DACHEE, IBENZER, Columbia, Nutrition Essentials, Stauffers, KitchenAid, Fossil, ILIFE, simplehuman, Utopia Bedding, HC COLLECTION, Linenspa, CINEYO, DearMy, Basic Brands, Roku, Anne Klein, Hanes, ZESICA, Madison Park',
 'Diversity Preference': 'High',
 'Interaction Complexity': 'High',
 'Tone and Style': 'Enthusiastic, Detailed, Positive',
 'Item Reference': 'Specific products and brands mentioned frequently',
 'Focus Aspect': 'Quality, Brand, Price'}

In [79]:
print(prompts[5])

You will act as an online shopper.
Your Profile:
Gender: Female
Age: 56+
Occupation: College/grad student
Price Sensitivity: Low
Shopping Interest: Outdoor gear, clothing, electronics, home improvement
Brand Preference: Neewer, Patriot Memory, Belkin, Satechi, Ray-Ban, Distil Union, Jaybird, Evelots, OLIVERS, ONEHOO, Injinji, Outdoor Research, Kunodi, Troadlop, Goodthreads, U MUST HAVE, Love-KANKEI, Utopia Bedding, PUR, Muscle Pharm, DBR Tech, NY Threads, MIRA, BALEAF, Crave Beverages, Amazon, PANGTON VILLA, Trademark Fine Art, Wallniture, SUPERIOR, Unique Loom, JS NOVA JUNS, Memorygou, Donerton, MILPROX, BULKSUPPLEMENTS.COM, MOSISO, RAINYEAR make life easier, JORMATT, THE NORTH FACE, MARMOT, Mottee&Zconia, Bedsure, Genteele, Kingole, QCQHDU, TEKAMON, Stupell Industries, Safavieh, Deco Brothers, Apple, Valuetoner, HIWARE, TP-Link, Winsome, BeeGreeny, Genesis, Jadaol, Kurt S. Adler, Dandy Decor by Ko & Co, BLACK+DECKER, EKASN, HCB, Cuisinart, KWUY, Oirlv, Southpole, Alpine Swiss, Sigvet

In [53]:
user_profile

NameError: name 'user_profile' is not defined

In [38]:
print(prompts[0][9])

You will act as an online shopper.
Your Profile:
Gender: Female
Age: 18-24
Occupation: Retired
Price Sensitivity: Low
Shopping Interest: Electronics
Brand Preference: Brand C
Diversity Preference: Low
Interaction Complexity: Detailed
Tone and Style: Casual
Item Reference: Shoe
Focus Aspect: Price


You are required to generate a search phrase to perform the following search task:
Task: Find the best laptop for work

Generate a search phrase that is somewhat vague, reflecting your preferences and personalities without revealing the complete details
of the target product.
Rules:
• You pay more attention to "Focus Aspect" of products, make sure to include some of them in the search phrase.
• Ensure the search phrase aligns with your overall tone and style: Casual.
• Try to make the phrase as natural as possible and stick to the personalities in your profile.
• Do not include any additional information or explanations and stay grounded.
• Do not hallucinate information that is not provided

In [None]:
from utils.search_product_by_query import search_product_by_query

In [None]:
res = 

In [None]:
from pyserini.search.lucene import LuceneSearcher

# Use a prebuilt index for testing
searcher = LuceneSearcher.from_prebuilt_index('robust04')
hits = searcher.search('hello world')

print(f'Total hits: {len(hits)}')
for i in range(len(hits)):
    print(f"{i+1}: {hits[i].docid}, {hits[i].score}")
