In [None]:
# Import python packages
from snowflake.snowpark.functions import col
from snowflake.snowpark.functions import col, udf
from snowflake.snowpark.types import StringType, ArrayType, StructType, StructField, FloatType
from snowflake.snowpark.context import get_active_session

from copy import deepcopy

session = get_active_session()


In [None]:
STRUCTURED_QUERY_PROMPT = """You are an assitant helping a user searching for a product on amazon, given the user query and product information you have to give a rating. The user is flexible with product requirements which doesn't match the exact query. Specifically you should give a three point rating which is defined as:
0 - Product doesn't seem related to the use query. 
1 - Product is related to the query and might vaguely satisfy the intent behind the query even though it might not match all the requirements present in the query and/or there are significant differences in sepcifications provided in the query (while still being related to the query).
2 - Product is broadly what the user is searching for. However there might still be some minor differences in specifications, such as brand, or exact details of the product.
3 - Product is completely what the user is searching for, matching all description in query. 

You should think step by step about the user query and the search result and rate the search result. You should also provide a reasoning for your rating.
    
Use the following format:
Reasoning: Example Reasoning
Rating: Example Rating

### Examples
Example 1:
INPUT:
Query: 4.2 Lt Freezerless Mini Fridge
PRODUCT: 
TITLE: 4 Lt. Refrigerator Description: Energy Star Apartment Freezerless Fridge, Stainless Steel, E-Star with LED Lighting, Reversible Door, Adjustable Temperature, Quiet, for Dorm, Office, Home Kitchen
OUTPUT:
Reasoning: In this case the product is a 4 Lt. Refrigerator which is close to the 4.2 Lt Freezerless Mini Fridge and all other requirements match. Hence the product is a close match to the user query and is broadly what the user is looking for. Therefore it is rated 2.
Rating: 2

Example 2:
INPUT:
Query: Lead pencil without plastic grip
PRODUCT: 
TITLE: Pencil Description: A pack of 10 lead pencils. The pencils are 2B and are perfect for writing and drawing. Erasers are provided for free with the 10 pencil pack.
OUTPUT:
Reasoning: The query mentions a lead pencil without a plastic grip. The product is a pack of 10 lead pencils which don't mention a plastic grip, so we can assume they don't have a plastic grip. Therefore the product is exactly what the user is looking for. Hence the rating is 2.
Rating: 2

Example 3:
INPUT:
Query: US immigration test pass gift cup
PRODUCT: 
TITLE: Immigration Gift Jacket Description: A perfect gift for someone who recently pased the immigration test. US Flag is on the back of the jacket. 
OUTPUT:
Reasoning: Although the query doesn't request for a jacket, it is about a gift for someone who passed immigration test. Hence the product vaguely satisfies the intent behind the query. Therefore the rating is 1.
Rating: 1

Example 4:
INPUT:
Query: Bomber Jacket with chinese collar
PRODUCT: 
TITLE: Bomber Jacket Description: A stylish dark brown bomber jacket with a zipper and pockets with a fur collar. Will keep you warm in the winter while looking stylish. Needs dry cleaning.
OUTPUT: 
Reasoning: The query is looking for a bomber jacket with chinese collars. The product is a bomber jacket with a fur collar. Which is a similar product to what the query is searching for. Therefore the rating is 1.
Rating: 1

Example 5:
{example2_0}

Example 6:
{example2_1}

Example 7:
{example1_0}

Example 8:
{example1_1}

Example 9:
{example0_0}

Example 10:
{example3_0}
###

Now given the user query and search result below, rate the search result based on its relevance to the user query and provide a reasoning for your rating.
INPUT:
User Query: {query}
Search Result: {passage}
OUTPUT:
"""

RATINGS_TO_DEFAULT_EXAMPLES = {
    "0": [
"""Query: Daiwa Liberty Club Short Swing
PRODUCT: YONEX AC1025P Tennis Badminton Grip. DESCRIPTION: Product Description Muscle Power locates the string on rounded archways that eliminate stress-load and fatigue through contact friction. 
OUTPUT:
Rating: 0
Reasoning: The query is looking for a fishing club short swing from Daiwa. But the product is a tennis badminton grip. It's completely unrelated.
""",
    ],
    "1": [
"""INPUT:
Query: YONEX AC1025P Tennis Badminton Grip
PRODUCT: TITLE: Yonex Badminton Racquet Voltric 200 Taufik Series - 80Gms DESCRIPTION: Product Description YONEX's head-light series, NANORAY provides a fast and controlled swing with enhanced repulsion via the New Aero Frame. 
OUTPUT:
Rating: 1
Reasoning: The query is looking for badminton grip, but product is badminton racquet instead. The product isn't what user is looking for.
""",
"""INPUT:
User Query: 2010 dodge nitro crossbar
PRODUCT: BLACK HORSE Armour Roll Bar Compatible with 2000 to 2022 Ram Chevrolet Ford GMC Toyota 3500 2500 Silverado F-150 Sierra Tundra 1500 2500 3500 Black Steel RB-AR1B
OUTPUT:
Rating: 1
Reasoning: The query is looking for crossbar, but product is roll bar, which isn't same product. Even if dodge 2010 is mentioned, it's not a complete match given product doesn't align.
""",
         ],
    "2": [
"""INPUT:
Query: Dogfish 500GB Msata Internal SSD
PRODUCT: Crucial MX500 500GB 3D NAND SATA M.2 Internal SSD, up to 560MB/s & Seagate Barracuda 2TB Internal Hard Drive HDD – 3.5 Inch SATA 6Gb/s 7200 RPM 256MB Cache 3.5-Inch – Frustration Free Packaging
OUTPUT:
Rating: 2
Reasoning: Query is looking for 500GB SSD, which product satisfies. However, brand mentioned in query, Dogfish, doesn't match brand in product.
""",
"""INPUT:
Query: YONEX AC1025P Tennis Badminton Grip
PRODUCT: WILSON Pro Overgrip-Comfort DESCRIPTION: Product Description Will fit tennis, racquetball, badminton, and squash handles. Product Description Will fit tennis, racquetball, badminton, and squash handles.
OUTPUT:
Rating: 2
Reasoning: The query is looking for badminton grip, which is aligned with product. But the product has minor details like brand that doens't fit query description.
""",
    ],
    "3": [
"""INPUT:
Query: center console organizer
PRODUCT: MX Auto Center Console Organizer| Compatible with Ford Trucks & SUVs – Accessories for F150, F250, F350, Raptor, Expedition|2015, 16, 17, 18, 19, 20, 21| Must-Have Bucket Seats|SEE COMPATIBILITY BELOW
OUTPUT:
Rating: 2
Reasoning: Query and product are both center console organizer, complete match.
""",
    ],
}

def generate_llm_label(
    input_query: str,
    intermediate_columns: list[str],
    output_select_columns: list[str],
    output_table: str,
) -> None:
    table_raw = session.sql(input_query)
    def generate_prompt(query: str, passage: str, golden_docs: list):
        query_ratings = deepcopy(RATINGS_TO_DEFAULT_EXAMPLES)
        ptrs = {
            "0": len(RATINGS_TO_DEFAULT_EXAMPLES["0"]), 
            "1": len(RATINGS_TO_DEFAULT_EXAMPLES["1"]), 
            "2": len(RATINGS_TO_DEFAULT_EXAMPLES["2"]), 
            "3": len(RATINGS_TO_DEFAULT_EXAMPLES["3"]),
        }
        for gd in golden_docs:
            gd_score = str(gd["score"])
            if gd_score in ptrs and ptrs[gd_score] > 0: # still need
                # SKIPPED REASONING GIVEN NONE EXISTS, IT MAY NOT BE GOOD...
                query_ratings[gd_score][ptrs[gd_score]-1] = f"""INPUT:
Query: {query}
PRODUCT: {gd["doc_text"]}
OUTPUT:
Rating: {gd_score}
"""
        return STRUCTURED_QUERY_PROMPT.format(
            query=query,
            passage=passage,
            example2_0=query_ratings["2"][0],
            example2_1=query_ratings["2"][1],
            example1_0=query_ratings["1"][0],
            example1_1=query_ratings["1"][1],
            example0_0=query_ratings["0"][0],
            example3_0=query_ratings["3"][0],
        )
    
    # Register the generate_prompt function as a UDF
    golden_doc_struct = StructType([
        StructField("score", FloatType()),
        StructField("doc_text", StringType()),
         StructField("doc_id", StringType())
    ])
    input_cols = [StringType() for _ in range(len(intermediate_columns))]
    input_cols.append(ArrayType(golden_doc_struct))
    generate_prompt_udf = udf(
        generate_prompt,
        return_type=StringType(),
        input_types=input_cols,
        packages=['snowflake-snowpark-python'],
        max_batch_size=100,
    )
    
    # Apply the UDF to generate the 'generated_question' column
    table_with_prompt = table_raw.with_column(
        "PROMPT",
        generate_prompt_udf(
            *[col(colname) for colname in intermediate_columns],
            col("GOLDEN_DOCS"),
        ),
    )
    
    # Filter and limit the rows, then show them
    table_with_prompt = table_with_prompt.select(output_select_columns)
    # table_with_prompt.limit(1).show()
    
    # Save the DataFrame to a Snowflake table
    table_with_prompt.write.save_as_table(f"{output_table}_INTERMEDIATE", mode="overwrite")

    session.sql(f"""CREATE OR REPLACE TABLE {output_table} AS
SELECT
  *,
  SNOWFLAKE.CORTEX.COMPLETE(
      'llama3.1-405b',
      [{{'role': 'user', 'content': prompt}}],
      {{'temperature': 0,'top_p': 1}}
  )['choices'][0]['messages']::VARCHAR AS LLM_JUDGE,
  REGEXP_SUBSTR(LLM_JUDGE, 'Rating: ([0-9])', 1, 1, 'e', 1) AS LLM_RELEVANCE
FROM {output_table}_INTERMEDIATE""").collect()
    


In [None]:
input_query = """SELECT QUERY_ID, TEXT, QUERY, RANK FROM CORTEX_SEARCH_DB.GOLDEN.TREC23_WITH_SUBTABLE_BASE_RESULTS"""

generate_llm_label(
    input_query=input_query,
    intermediate_columns=["QUERY", "TEXT"],
    output_select_columns=["QUERY_ID", "QUERY", "TEXT", "PROMPT", "RANK"],
    output_table="CORTEX_SEARCH_DB.GOLDEN.TREC23_WITH_SUBTABLE_BASE_RESULTS_LLM_JUDGE",
)