In [5]:
from enum import Enum

from langchain_community.document_loaders import PolarsDataFrameLoader
from langchain.chat_models import init_chat_model
import polars as pl
from pydantic import BaseModel, Field
from thefuzz import process

from mfg_capabilities.config import config
from mfg_capabilities.utils import get_sqlalchemy_engine


CATALOG = "manufacturing_dev"
SCHEMA = "work_agent_barney"


engine = get_sqlalchemy_engine(
    catalog=CATALOG,
    schema=SCHEMA
)


pl.Config.set_tbl_rows(20)  # to set the number of rows displayed)
# pl.Config.set_tbl_cols(10) # to set the number of columns
# pl.Config.set_fmt_str_lengths(50) # to set the max string length displayed

polars.config.Config

## Load line and product data

In [6]:
# I manually cleaned the Excel sheets from central planning
# and stored in a single excel file
PRODUCTS_PATH = config.data_dir / "from_central_planning" / "products_from_capacity.xlsx"

# Read all columns as strings first to handle concatenated CSVs with extra headers
products_raw = pl.read_excel(
    PRODUCTS_PATH,
    sheet_name="Sheet1",
    has_header=True,
    infer_schema_length=0  # Treat all columns as strings
)

# display(products_raw)

In [7]:
product_col = (
    pl.when(
        (pl.col("Plant Number") == "102") &
        ~pl.col("Product").str.starts_with("B")
    )
    .then(pl.concat_str([pl.lit("B"), pl.col("Product")]))
    .otherwise(pl.col("Product"))
    .alias("Product")
)

products_cleaned = (
    products_raw
    .filter(~pl.col("Product").str.contains("(?i)none"))
    .select(
        "Plant Number",
        "Plant Name",
        "Line Name",
        product_col
    )
    .unique()
    .sort("Plant Number", "Plant Name", "Line Name", "Product")
)

display(products_cleaned)

Plant Number,Plant Name,Line Name,Product
str,str,str,str
"""049""","""State Avenue Foods""","""670 SPICE""","""K00106 HT ONION POWDER 4OZ."""
"""049""","""State Avenue Foods""","""670 SPICE""","""K48302 PS POPPY SEED"""
"""049""","""State Avenue Foods""","""670 SPICE""","""K48307 PS GROUND CLOVES"""
"""049""","""State Avenue Foods""","""670 SPICE""","""K48309 PS WHOLE CLOVES"""
"""049""","""State Avenue Foods""","""670 SPICE""","""K48463 PS GROUND CUMIN"""
"""049""","""State Avenue Foods""","""670 SPICE""","""K48464 PS THYME LEAVES"""
"""049""","""State Avenue Foods""","""670 SPICE""","""K48466 PS GROUND CINNAMON"""
"""049""","""State Avenue Foods""","""670 SPICE""","""K48468 PS GROUND RED PEPPER"""
"""049""","""State Avenue Foods""","""670 SPICE""","""K48470 PS GROUND GINGER"""
"""049""","""State Avenue Foods""","""670 SPICE""","""K48471 PS GROUND NUTMEG"""


## Infer RCK line numbers

In [8]:
rck_products = (
    products_cleaned
    .filter(pl.col("Plant Number") == "714")
)

display(rck_products)

Plant Number,Plant Name,Line Name,Product
str,str,str,str
"""714""","""RCK Foods""","""Deli Modern""","""Buffalo Chicken Dip 10 oz."""
"""714""","""RCK Foods""","""Deli Modern""","""Candied Jalapeno Bacon Dip 12 …"
"""714""","""RCK Foods""","""Deli Modern""","""Margherita Pizza Dip 12 oz."""
"""714""","""RCK Foods""","""Deli Modern""","""Sweet and Sour Dressing"""
"""714""","""RCK Foods""","""Deli Multivac 1""","""Albacore Tuna Salad"""
"""714""","""RCK Foods""","""Deli Multivac 1""","""American Potato Salad"""
"""714""","""RCK Foods""","""Deli Multivac 1""","""Artisan Mac N Cheese"""
"""714""","""RCK Foods""","""Deli Multivac 1""","""BLT Pasta Salad"""
"""714""","""RCK Foods""","""Deli Multivac 1""","""Bistro Bow Tie Pasta Salad Bas…"
"""714""","""RCK Foods""","""Deli Multivac 1""","""Calico Bean Salad"""


In [9]:
query = """
SELECT
    plant_num,
    plant_name,
    bsc_fp_num,
    line_desc_finished_product,
    dept_name_production_plant,
    dept_name_sales_plant,
    line_market_desc
FROM
    financials_item_info_silver
WHERE
    plant_num = '714'
"""
rck_bsc_items = (
    pl.read_database(query, engine)
    .select([
        pl.col("plant_num").str.strip_chars().alias("plant_num"),
        pl.col("plant_name").str.strip_chars().alias("plant_name"),
        pl.col("bsc_fp_num").str.strip_chars().alias("bsc_fp_num"),
        pl.col("line_desc_finished_product").str.strip_chars().alias("line_desc_finished_product"),
        pl.col("dept_name_production_plant").str.strip_chars().alias("dept_name_production_plant"),
        pl.col("dept_name_sales_plant").str.strip_chars().alias("dept_name_sales_plant"),
        pl.col("line_market_desc").str.strip_chars().alias("line_market_desc"),
    ])
)
display(rck_bsc_items)

[WARN] Parameter '_user_agent_entry' is deprecated; use 'user_agent_entry' instead. This parameter will be removed in the upcoming releases.


plant_num,plant_name,bsc_fp_num,line_desc_finished_product,dept_name_production_plant,dept_name_sales_plant,line_market_desc
str,str,str,str,str,str,str
"""714""","""RCK Foods""","""D03189""","""6/3LB KRO SOTHRN POTATO SALAD""","""Deli MFG Salads""","""Deli MFG Salads""","""KRO SOTHRN POTATO SALAD"""
"""714""","""RCK Foods""","""D03218""","""12/1LB KRO SOUTHRN POTATO SLD""","""Deli MFG Salads""","""Deli MFG Salads""","""KROGER SOUTHERN POTATO SALAD"""
"""714""","""RCK Foods""","""D03219""","""12/1LB KRO MUSTRD POTATO SALAD""","""Deli MFG Salads""","""Deli MFG Salads""","""KROGER MUSTARD POTATO SALAD"""
"""714""","""RCK Foods""","""D03271""","""12/1LB KRO HMSTYL BAKED BEANS""","""Deli MFG Salads""","""Deli MFG Salads""","""KROGER HOMESTYLE BAKED BEANS"""
"""714""","""RCK Foods""","""D03322""","""6/3LB KRO MSTRD POTATO SALAD""","""Deli MFG Salads""","""Deli MFG Salads""","""KROGER MUSTARD POTATO SALAD"""
"""714""","""RCK Foods""","""D03373""","""6/3LB KRO HMSTYL BAKED BEANS""","""Deli MFG Salads""","""Deli MFG Salads""","""KROGER HOMESTYLE BAKED BEANS"""
"""714""","""RCK Foods""","""D03734""","""WHOLESOME@HOME CRANBERRY CELEB""","""Deli MFG Salads""","""Deli MFG Salads""","""WHOLESOME@HOME CRANBERRY CELEB"""
"""714""","""RCK Foods""","""D08314""","""2/5LBS KROGER HOMESTYLE COCKTA""","""Deli MFG Salads""","""Deli MFG Salads""","""KROGER HOMESTYLE COCKTAIL SAUC"""
"""714""","""RCK Foods""","""D21090""","""12/15OZ BFR MINI SUGAR CKY 36C""","""FROZEN MFT COOKIES""","""FROZEN MFT COOKIES""","""BFR MINI SUGAR CKY 36CT"""
"""714""","""RCK Foods""","""D21091""","""12/15OZ BFR MINI CHOC CH CKY 3""","""FROZEN MFT COOKIES""","""FROZEN MFT COOKIES""","""BFR MINI CHOC CH CKY 36CT"""


In [10]:
# Get the list of choices from rck_bsc_items
choices = rck_bsc_items["line_market_desc"].to_list()

# Function to get the best match and score
def get_best_match_and_score(product_name):
    """Finds the best match and its score for a product name from a list of choices."""
    best_match = process.extractOne(product_name, choices, score_cutoff=0)
    if best_match:
        return {"match": best_match[0], "score": best_match[1]}
    else:
        return {"match": None, "score": None}

# Apply the function to create new columns with the best match and score
rck_products_with_match = (
    rck_products
    .with_columns(
        pl.col("Product")
        .map_elements(get_best_match_and_score, return_dtype=pl.Struct([pl.Field("match", pl.Utf8), pl.Field("score", pl.Int64)]))
        .alias("match_struct")
    )
    .with_columns([
        pl.col("match_struct").struct.field("match").alias("best_match_desc"),
        pl.col("match_struct").struct.field("score").alias("match_score"),
    ])
    .drop("match_struct")
)

rck_products_joined = (
    rck_products_with_match
    .join(
        rck_bsc_items.select("bsc_fp_num", "line_market_desc"),
        left_on="best_match_desc",
        right_on="line_market_desc",
        how="left"
    )
    .select(
        pl.col("Plant Number"),
        pl.col("Plant Name"),
        pl.col("Line Name"),
        pl.col("Product"),
        pl.lit(True).alias("Fuzzy Matched"),
        pl.col("best_match_desc").alias("Best Fuzzy Match"),
        pl.col("match_score").alias("Fuzzy Match Score"),
        pl.col("bsc_fp_num").alias("BSC Finished Product Number"),
        pl.concat_str(pl.col("bsc_fp_num"), pl.lit(" "), pl.col("Product")).alias("Updated Product"),
    )
)

display(rck_products_joined)

Plant Number,Plant Name,Line Name,Product,Fuzzy Matched,Best Fuzzy Match,Fuzzy Match Score,BSC Finished Product Number,Updated Product
str,str,str,str,bool,str,i64,str,str
"""714""","""RCK Foods""","""Deli Modern""","""Buffalo Chicken Dip 10 oz.""",true,"""CHICKEN MILANESE""",86,"""D65981""","""D65981 Buffalo Chicken Dip 10 …"
"""714""","""RCK Foods""","""Deli Modern""","""Candied Jalapeno Bacon Dip 12 …",true,"""PRVT SEL CAN JAL BCN DIP""",58,"""D95191""","""D95191 Candied Jalapeno Bacon …"
"""714""","""RCK Foods""","""Deli Modern""","""Margherita Pizza Dip 12 oz.""",true,"""PRVT SEL PZZA DP""",56,"""D95192""","""D95192 Margherita Pizza Dip 12…"
"""714""","""RCK Foods""","""Deli Modern""","""Sweet and Sour Dressing""",true,"""RNDY SWEET SOUR COLE SLAW""",59,"""D66163""","""D66163 Sweet and Sour Dressing"""
"""714""","""RCK Foods""","""Deli Multivac 1""","""Albacore Tuna Salad""",true,"""ALBACORE TUNA SALAD""",100,"""D66010""","""D66010 Albacore Tuna Salad"""
"""714""","""RCK Foods""","""Deli Multivac 1""","""American Potato Salad""",true,"""AMERICAN POTATO SALAD""",100,"""D66033""","""D66033 American Potato Salad"""
"""714""","""RCK Foods""","""Deli Multivac 1""","""Artisan Mac N Cheese""",true,"""RCK ARTISAN MAC N CHEESE""",95,"""D66151""","""D66151 Artisan Mac N Cheese"""
"""714""","""RCK Foods""","""Deli Multivac 1""","""BLT Pasta Salad""",true,"""2/5LB BGS RCK BLT PASTA SALAD""",90,"""D66155""","""D66155 BLT Pasta Salad"""
"""714""","""RCK Foods""","""Deli Multivac 1""","""Bistro Bow Tie Pasta Salad Bas…",true,"""BISTRO BOW TIE PASTA SA""",88,"""D66026""","""D66026 Bistro Bow Tie Pasta Sa…"
"""714""","""RCK Foods""","""Deli Multivac 1""","""Calico Bean Salad""",true,"""CALICO BEAN SALAD""",100,"""D66042""","""D66042 Calico Bean Salad"""


### Use LLM to vet the fuzzy matches

In [11]:
matches_to_eval = (
    rck_products_joined
    .filter(pl.col("Fuzzy Match Score") < 90)  # Filter for low confidence matches
    .select(
        pl.col("Line Name").alias("Page Content"),
        "Line Name",
        "Product",
        pl.col("Best Fuzzy Match").alias("Matched Product"),
    )
)

loader = PolarsDataFrameLoader(matches_to_eval, page_content_column="Page Content")
docs_to_eval = loader.load()


class LikertConfidence(int, Enum):
    not_at_all_confident = 1
    slightly_confident = 2
    somewhat_confident = 3
    moderately_confident = 4
    quite_confident = 5
    very_confident = 6
    extremely_confident = 7


class ScoredMatch(BaseModel):
    """
    Represents a food production line, product, and that products best fuzzy match
    from another dataset.
    """
    line_name: str = Field("The production line on which the product is produced")
    product: str = Field("The product being produced on the line")
    best_fuzzy_match: str = Field("The best fuzzy match for the product from another dataset")
    likert_confidence: LikertConfidence = Field("Your confidence in the match being correct on a scale of 1 to 7, where 1 is not at all confident and 7 is extremely confident")
    reasoning: str = Field("Your reasoning for the match being correct or not")


class ScoredMatches(BaseModel):
    """
    Represents a list of scored matches for evaluation.
    """
    matches: list[ScoredMatch] = Field("A list of scored matches.")


prompt_template = """
You'll be given a list of food production lines, products, and their best fuzzy matches from another dataset.
Your task is to evaluate the matches and provide a confidence score for each one. Just having similar keywords
is not enough to be confident in the match. Make sure the products are of the same type, i.e. salad is not the
same thing as dresssing even if they are of a similar style. For each match, please provide your confidence in
the match being correct on a scale of 1 to 7, where 1 is not at all confident and 7 is extremely confident.
Please provide your confidence in the match being correct for each of the following matches:
{matches}
"""

llm = init_chat_model(model="gpt-4o", model_provider="openai")
structured_llm = llm.with_structured_output(ScoredMatches)
response = structured_llm.invoke(prompt_template.format(matches=docs_to_eval))

In [12]:
matches_llm_eval = (
    pl.DataFrame(response.matches)
    .select(
        pl.col("line_name").alias("Line Name"),
        pl.col("product").alias("Product"),
        pl.col("best_fuzzy_match").alias("Best Fuzzy Match"),
        pl.col("likert_confidence").alias("Likert Score"),
        pl.col("reasoning").alias("Reasoning"),
    )
)

display(matches_llm_eval)

Line Name,Product,Best Fuzzy Match,Likert Score,Reasoning
str,str,str,i64,str
"""Deli Modern""","""Buffalo Chicken Dip 10 oz.""","""CHICKEN MILANESE""",1,"""The product 'Buffalo Chicken D…"
"""Deli Modern""","""Candied Jalapeno Bacon Dip 12 …","""PRVT SEL CAN JAL BCN DIP""",7,"""The products both describe the…"
"""Deli Modern""","""Margherita Pizza Dip 12 oz.""","""PRVT SEL PZZA DP""",6,"""Both products appear to be piz…"
"""Deli Modern""","""Sweet and Sour Dressing""","""RNDY SWEET SOUR COLE SLAW""",2,"""Sweet and Sour Dressing and Sw…"
"""Deli Multivac 1""","""Bistro Bow Tie Pasta Salad Bas…","""BISTRO BOW TIE PASTA SA""",6,"""Both products describe a bow t…"
"""Deli Multivac 1""","""Creamy Coleslaw Dressing""","""CREAMY COLE SLAW KIT""",3,"""The dressing and kit could be …"
"""Deli Multivac 1""","""Presto Pasta Salad""","""KROGER SOUTHERN POTATO SALAD""",2,"""Presto Pasta Salad and Souther…"
"""Deli Multivac 1""","""Raw Beet Red Cabbage Salad""","""TUNA SALAD SPREAD""",1,"""Beet and cabbage salad is very…"
"""Deli Multivac 1""","""Red Cabbage Cranberry""","""CRNBRY WALNUT RED CABBAGE""",5,"""Both products describe red cab…"
"""Deli Multivac 1""","""Rotisserie Chicken Salad""","""CHICKEN MILANESE""",1,"""Chicken Milanese is a dish, no…"


In [13]:
updated_product_col = (
    pl.when(pl.col("Fuzzy Matched"))
    .then(pl.col("Updated Product"))
    .otherwise(pl.col("Product"))
)        

original_product_col = (
    pl.when(pl.col("Fuzzy Matched"))
    .then(pl.col("Product"))
)

# modify this
fuzzy_matched_col = (
    pl.col("Fuzzy Matched")
    .fill_null(False)
)

products_final = (
    products_cleaned
    .join(
        rck_products_joined,
        on=["Plant Number", "Plant Name", "Line Name", "Product"],
        how="left"
    )
    .join(
        matches_llm_eval,
        on=["Line Name", "Product", "Best Fuzzy Match"],
        how="left"
    )
    .with_columns(updated_product_col.alias("product"))
    .select(
        pl.col("Plant Number").alias("plant_num"),
        pl.col("Plant Name").alias("plant_name"),
        pl.col("Line Name").alias("line_name"),
        pl.col("product").str.extract(r"^[A-Za-z0-9]+", 0).alias("bsc_fp_num"),
        pl.col("product"),
        fuzzy_matched_col.alias("fuzzy_matched"),
        pl.col("Fuzzy Match Score").alias("fuzzy_match_score"),
        original_product_col.alias("original_product"),
        pl.col("Best Fuzzy Match").alias("best_fuzzy_match"),
        pl.col("Likert Score").alias("llm_confidence"),
        pl.col("Reasoning").alias("llm_reasoning")
    )
)

display(products_final)

plant_num,plant_name,line_name,bsc_fp_num,product,fuzzy_matched,fuzzy_match_score,original_product,best_fuzzy_match,llm_confidence,llm_reasoning
str,str,str,str,str,bool,i64,str,str,i64,str
"""049""","""State Avenue Foods""","""670 SPICE""","""K00106""","""K00106 HT ONION POWDER 4OZ.""",false,,,,,
"""049""","""State Avenue Foods""","""670 SPICE""","""K48302""","""K48302 PS POPPY SEED""",false,,,,,
"""049""","""State Avenue Foods""","""670 SPICE""","""K48307""","""K48307 PS GROUND CLOVES""",false,,,,,
"""049""","""State Avenue Foods""","""670 SPICE""","""K48309""","""K48309 PS WHOLE CLOVES""",false,,,,,
"""049""","""State Avenue Foods""","""670 SPICE""","""K48463""","""K48463 PS GROUND CUMIN""",false,,,,,
"""049""","""State Avenue Foods""","""670 SPICE""","""K48464""","""K48464 PS THYME LEAVES""",false,,,,,
"""049""","""State Avenue Foods""","""670 SPICE""","""K48466""","""K48466 PS GROUND CINNAMON""",false,,,,,
"""049""","""State Avenue Foods""","""670 SPICE""","""K48468""","""K48468 PS GROUND RED PEPPER""",false,,,,,
"""049""","""State Avenue Foods""","""670 SPICE""","""K48470""","""K48470 PS GROUND GINGER""",false,,,,,
"""049""","""State Avenue Foods""","""670 SPICE""","""K48471""","""K48471 PS GROUND NUTMEG""",false,,,,,


In [14]:
good_matches = (
    products_final
    .filter(
        (pl.col("fuzzy_match_score") >= 90) |
        (pl.col("llm_confidence") >= 5)
    )
)
display(good_matches)

plant_num,plant_name,line_name,bsc_fp_num,product,fuzzy_matched,fuzzy_match_score,original_product,best_fuzzy_match,llm_confidence,llm_reasoning
str,str,str,str,str,bool,i64,str,str,i64,str
"""714""","""RCK Foods""","""Deli Modern""","""D95191""","""D95191 Candied Jalapeno Bacon …",true,58,"""Candied Jalapeno Bacon Dip 12 …","""PRVT SEL CAN JAL BCN DIP""",7,"""The products both describe the…"
"""714""","""RCK Foods""","""Deli Modern""","""D95192""","""D95192 Margherita Pizza Dip 12…",true,56,"""Margherita Pizza Dip 12 oz.""","""PRVT SEL PZZA DP""",6,"""Both products appear to be piz…"
"""714""","""RCK Foods""","""Deli Multivac 1""","""D66010""","""D66010 Albacore Tuna Salad""",true,100,"""Albacore Tuna Salad""","""ALBACORE TUNA SALAD""",,
"""714""","""RCK Foods""","""Deli Multivac 1""","""D66033""","""D66033 American Potato Salad""",true,100,"""American Potato Salad""","""AMERICAN POTATO SALAD""",,
"""714""","""RCK Foods""","""Deli Multivac 1""","""D66151""","""D66151 Artisan Mac N Cheese""",true,95,"""Artisan Mac N Cheese""","""RCK ARTISAN MAC N CHEESE""",,
"""714""","""RCK Foods""","""Deli Multivac 1""","""D66155""","""D66155 BLT Pasta Salad""",true,90,"""BLT Pasta Salad""","""2/5LB BGS RCK BLT PASTA SALAD""",,
"""714""","""RCK Foods""","""Deli Multivac 1""","""D66026""","""D66026 Bistro Bow Tie Pasta Sa…",true,88,"""Bistro Bow Tie Pasta Salad Bas…","""BISTRO BOW TIE PASTA SA""",6,"""Both products describe a bow t…"
"""714""","""RCK Foods""","""Deli Multivac 1""","""D66042""","""D66042 Calico Bean Salad""",true,100,"""Calico Bean Salad""","""CALICO BEAN SALAD""",,
"""714""","""RCK Foods""","""Deli Multivac 1""","""D66063""","""D66063 Cheddar Mac N Cheese""",true,95,"""Cheddar Mac N Cheese""","""RNDY CHEDDAR MAC N CHEESE""",,
"""714""","""RCK Foods""","""Deli Multivac 1""","""D66147""","""D66147 Classic Egg Salad""",true,95,"""Classic Egg Salad""","""RCK CLASSIC EGG SALAD""",,


In [15]:
bad_matches = (
    products_final
    .filter(pl.col("llm_confidence") < 5)
)
display(bad_matches)

plant_num,plant_name,line_name,bsc_fp_num,product,fuzzy_matched,fuzzy_match_score,original_product,best_fuzzy_match,llm_confidence,llm_reasoning
str,str,str,str,str,bool,i64,str,str,i64,str
"""714""","""RCK Foods""","""Deli Modern""","""D65981""","""D65981 Buffalo Chicken Dip 10 …",true,86,"""Buffalo Chicken Dip 10 oz.""","""CHICKEN MILANESE""",1,"""The product 'Buffalo Chicken D…"
"""714""","""RCK Foods""","""Deli Modern""","""D66163""","""D66163 Sweet and Sour Dressing""",true,59,"""Sweet and Sour Dressing""","""RNDY SWEET SOUR COLE SLAW""",2,"""Sweet and Sour Dressing and Sw…"
"""714""","""RCK Foods""","""Deli Multivac 1""","""D66012""","""D66012 Creamy Coleslaw Dressin…",true,77,"""Creamy Coleslaw Dressing""","""CREAMY COLE SLAW KIT""",3,"""The dressing and kit could be …"
"""714""","""RCK Foods""","""Deli Multivac 1""","""D03218""","""D03218 Presto Pasta Salad""",true,86,"""Presto Pasta Salad""","""KROGER SOUTHERN POTATO SALAD""",2,"""Presto Pasta Salad and Souther…"
"""714""","""RCK Foods""","""Deli Multivac 1""","""D66009""","""D66009 Raw Beet Red Cabbage Sa…",true,86,"""Raw Beet Red Cabbage Salad""","""TUNA SALAD SPREAD""",1,"""Beet and cabbage salad is very…"
"""714""","""RCK Foods""","""Deli Multivac 1""","""D65981""","""D65981 Rotisserie Chicken Sala…",true,86,"""Rotisserie Chicken Salad""","""CHICKEN MILANESE""",1,"""Chicken Milanese is a dish, no…"
"""714""","""RCK Foods""","""Deli Multivac 1""","""D65985""","""D65985 Stk Hse Potato Salad""",true,86,"""Stk Hse Potato Salad""","""TWICE BAKED POTATO BACON & CHE""",3,"""These are both potato products…"
"""714""","""RCK Foods""","""Deli Multivac 1""","""D66017""","""D66017 Sweet Bowtie Pasta Sala…",true,86,"""Sweet Bowtie Pasta Salad""","""MACARONI SALAD""",3,"""Sweet Bowtie Pasta Salad and M…"
"""714""","""RCK Foods""","""Deli Multivac 2""","""D66012""","""D66012 Creamy Coleslaw Dressin…",true,77,"""Creamy Coleslaw Dressing""","""CREAMY COLE SLAW KIT""",3,"""The dressing and kit could be …"
"""714""","""RCK Foods""","""Deli Multivac 2""","""D03218""","""D03218 Presto Pasta Salad""",true,86,"""Presto Pasta Salad""","""KROGER SOUTHERN POTATO SALAD""",2,"""Presto Pasta Salad and Souther…"


## Join with Formulations

In [17]:
# Map formulation to output to trade
# Liberal with allowed statuses until we have very current data
query = """
WITH exploded AS (
  SELECT
    *,
    explode(produced_by_plant_num) AS plant_num
  FROM
    plm_spec_map_trade_output_formulation_silver
)
SELECT
  plant_num,
  formulation_spec_number,
  formulation_spec_name,
  output_material_spec_number,
  output_material_name,
  trade_spec_number
FROM
  exploded
WHERE
  plant_num IN ('049', '102', '714')
  AND output_type = 'Referenced'
  AND (
    (material_spec_status IN ('Approved', 'Review')) 
    OR (trade_spec_status IN ('Approved', 'Draft', 'In Process - CFT'))
    OR (formulation_spec_status IN ('Approved', 'Review', 'Plant Trial', 'Draft'))
  );
"""
formula_to_trade_map = pl.read_database(query, engine)
display(formula_to_trade_map)

plant_num,formulation_spec_number,formulation_spec_name,output_material_spec_number,output_material_name,trade_spec_number
str,str,str,str,str,str
"""049""","""5000441-005""","""75104001 - RANCH DRESSING - FI…","""5000445-004""","""75104001 - RANCH DRESSING - FI…","""5107943-002"""
"""049""","""5000441-005""","""75104001 - RANCH DRESSING - FI…","""5000445-004""","""75104001 - RANCH DRESSING - FI…","""5003745-002"""
"""049""","""5000441-005""","""75104001 - RANCH DRESSING - FI…","""5000445-004""","""75104001 - RANCH DRESSING - FI…","""5003986-004"""
"""049""","""5000441-005""","""75104001 - RANCH DRESSING - FI…","""5000445-004""","""75104001 - RANCH DRESSING - FI…","""5004051-002"""
"""049""","""5000441-005""","""75104001 - RANCH DRESSING - FI…","""5000445-004""","""75104001 - RANCH DRESSING - FI…","""5003986-002"""
"""049""","""5000441-005""","""75104001 - RANCH DRESSING - FI…","""5000445-004""","""75104001 - RANCH DRESSING - FI…","""5081676-001"""
"""049""","""5000441-005""","""75104001 - RANCH DRESSING - FI…","""5000445-004""","""75104001 - RANCH DRESSING - FI…","""5081676-002"""
"""049""","""5000441-005""","""75104001 - RANCH DRESSING - FI…","""5000445-004""","""75104001 - RANCH DRESSING - FI…","""5125824-002"""
"""049""","""5000441-005""","""75104001 - RANCH DRESSING - FI…","""5000445-004""","""75104001 - RANCH DRESSING - FI…","""5150418-001"""
"""049""","""5000441-005""","""75104001 - RANCH DRESSING - FI…","""5000445-004""","""75104001 - RANCH DRESSING - FI…","""5125824-001"""


In [18]:
# Map trade spec to bsc finished product
query = """
SELECT
  spec_number AS trade_spec_number,
  bsc_num,
  spec_type_derived
FROM
  plm_spec_cross_references_silver
WHERE
  spec_type = 'Trade Specification'
  AND bsc_num IS NOT NULL
ORDER BY spec_number;
"""
trade_to_bsc_map = pl.read_database(query, engine)
display(trade_to_bsc_map)

trade_spec_number,bsc_num,spec_type_derived
str,str,str
"""5003672-001""","""K48638""","""fp_consumer"""
"""5003673-001""","""K48639""","""fp_consumer"""
"""5003674-001""","""K48651""","""fp_consumer"""
"""5003674-002""","""K48651""","""fp_consumer"""
"""5003675-001""","""K48693""","""fp_consumer"""
"""5003675-002""","""K48693""","""fp_consumer"""
"""5003676-001""","""K48696""","""fp_consumer"""
"""5003676-002""","""K48696""","""fp_consumer"""
"""5003677-001""","""K48697""","""fp_consumer"""
"""5003677-002""","""K48697""","""fp_consumer"""


In [19]:
plm_to_bsc_map = (
    formula_to_trade_map
    .join(
        trade_to_bsc_map,
        on="trade_spec_number",
        how="left"
    )
    .select(
        pl.col("plant_num"),
        pl.col("formulation_spec_number"),
        pl.col("formulation_spec_name"),
        pl.col("output_material_spec_number"),
        pl.col("output_material_name"),
        pl.col("trade_spec_number"),
        pl.col("bsc_num").alias("bsc_fp_num")
    )
    .filter(pl.col("bsc_fp_num").is_not_null())
)

display(plm_to_bsc_map)

plant_num,formulation_spec_number,formulation_spec_name,output_material_spec_number,output_material_name,trade_spec_number,bsc_fp_num
str,str,str,str,str,str,str
"""049""","""5000441-005""","""75104001 - RANCH DRESSING - FI…","""5000445-004""","""75104001 - RANCH DRESSING - FI…","""5107943-002""","""K98588"""
"""049""","""5000441-005""","""75104001 - RANCH DRESSING - FI…","""5000445-004""","""75104001 - RANCH DRESSING - FI…","""5003745-002""","""K76327"""
"""049""","""5000441-005""","""75104001 - RANCH DRESSING - FI…","""5000445-004""","""75104001 - RANCH DRESSING - FI…","""5003986-004""","""K99661"""
"""049""","""5000441-005""","""75104001 - RANCH DRESSING - FI…","""5000445-004""","""75104001 - RANCH DRESSING - FI…","""5004051-002""","""K47855"""
"""049""","""5000441-005""","""75104001 - RANCH DRESSING - FI…","""5000445-004""","""75104001 - RANCH DRESSING - FI…","""5003986-002""","""K06912"""
"""049""","""5000441-005""","""75104001 - RANCH DRESSING - FI…","""5000445-004""","""75104001 - RANCH DRESSING - FI…","""5081676-001""","""K97586"""
"""049""","""5000441-005""","""75104001 - RANCH DRESSING - FI…","""5000445-004""","""75104001 - RANCH DRESSING - FI…","""5081676-002""","""K97586"""
"""049""","""5000441-005""","""75104001 - RANCH DRESSING - FI…","""5000445-004""","""75104001 - RANCH DRESSING - FI…","""5125824-002""","""K96776"""
"""049""","""5000441-005""","""75104001 - RANCH DRESSING - FI…","""5000445-004""","""75104001 - RANCH DRESSING - FI…","""5150418-001""","""K99001"""
"""049""","""5000441-005""","""75104001 - RANCH DRESSING - FI…","""5000445-004""","""75104001 - RANCH DRESSING - FI…","""5125824-001""","""K98821"""


In [20]:
products_with_specs = (
    products_final
    .join(
        plm_to_bsc_map,
        on=["plant_num", "bsc_fp_num"],
        how="left"
    )
    .select(
        pl.col("plant_num"),
        pl.col("plant_name"),
        pl.col("line_name"),
        pl.col("product"),
        pl.col("bsc_fp_num"),
        pl.col("formulation_spec_number").alias("plm_formulation_spec_number"),
        pl.col("formulation_spec_name").alias("plm_formulation_spec_name"),
        pl.col("output_material_spec_number").alias("plm_output_material_spec_number"),
        pl.col("output_material_name").alias("plm_output_material_name"),
        pl.col("trade_spec_number").alias("plm_trade_spec_number"),
        pl.col("fuzzy_matched"),
        pl.col("fuzzy_match_score"),
        pl.col("original_product"),
        pl.col("best_fuzzy_match"),
        pl.col("llm_confidence"),
        pl.col("llm_reasoning"),
        
    )
)

display(products_with_specs)

plant_num,plant_name,line_name,product,bsc_fp_num,plm_formulation_spec_number,plm_formulation_spec_name,plm_output_material_spec_number,plm_output_material_name,plm_trade_spec_number,fuzzy_matched,fuzzy_match_score,original_product,best_fuzzy_match,llm_confidence,llm_reasoning
str,str,str,str,str,str,str,str,str,str,bool,i64,str,str,i64,str
"""049""","""State Avenue Foods""","""670 SPICE""","""K00106 HT ONION POWDER 4OZ.""","""K00106""",,,,,,false,,,,,
"""049""","""State Avenue Foods""","""670 SPICE""","""K48302 PS POPPY SEED""","""K48302""","""5006583-001""","""83510873 - POPPY SEED - PREMIU…","""5006584-001""","""83510873 - POPPY SEED - PREMIU…","""5010540-001""",false,,,,,
"""049""","""State Avenue Foods""","""670 SPICE""","""K48302 PS POPPY SEED""","""K48302""","""5006583-001""","""83510873 - POPPY SEED - PREMIU…","""5006584-001""","""83510873 - POPPY SEED - PREMIU…","""5010540-002""",false,,,,,
"""049""","""State Avenue Foods""","""670 SPICE""","""K48307 PS GROUND CLOVES""","""K48307""","""5006595-001""","""83510881 - GROUND CLOVE - PREM…","""5006596-002""","""83510881 - GROUND CLOVE - PREM…","""5010545-002""",false,,,,,
"""049""","""State Avenue Foods""","""670 SPICE""","""K48309 PS WHOLE CLOVES""","""K48309""","""5006585-001""","""83510874 - WHOLE CLOVES - PREM…","""5006586-001""","""83510874 - WHOLE CLOVES, WHOLE…","""5010546-002""",false,,,,,
"""049""","""State Avenue Foods""","""670 SPICE""","""K48309 PS WHOLE CLOVES""","""K48309""","""5006585-001""","""83510874 - WHOLE CLOVES - PREM…","""5006586-001""","""83510874 - WHOLE CLOVES, WHOLE…","""5010546-001""",false,,,,,
"""049""","""State Avenue Foods""","""670 SPICE""","""K48463 PS GROUND CUMIN""","""K48463""","""5006587-001""","""83510875 - GROUND CUMIN - PREM…","""5006588-001""","""83510875 - GROUND CUMIN - PREM…","""5010548-001""",false,,,,,
"""049""","""State Avenue Foods""","""670 SPICE""","""K48463 PS GROUND CUMIN""","""K48463""","""5006587-001""","""83510875 - GROUND CUMIN - PREM…","""5006588-001""","""83510875 - GROUND CUMIN - PREM…","""5010548-002""",false,,,,,
"""049""","""State Avenue Foods""","""670 SPICE""","""K48464 PS THYME LEAVES""","""K48464""",,,,,,false,,,,,
"""049""","""State Avenue Foods""","""670 SPICE""","""K48466 PS GROUND CINNAMON""","""K48466""","""5006393-001""","""83510005 - GROUND CINNAMON - P…","""5006394-001""","""83510005 - GROUND CINNAMON - P…","""5010551-001""",false,,,,,


In [41]:
# Extract base spec and issue number, and select max issue per base spec
products_with_specs_good = (
    products_with_specs
    .filter(
        (pl.col("fuzzy_matched") == False) |
        (pl.col("fuzzy_match_score") >= 90) |
        (pl.col("llm_confidence") >= 5)
    )
    .filter(pl.col("plm_formulation_spec_number").is_not_null())
    .with_columns([
        pl.col("plm_formulation_spec_number").str.slice(0, 7).alias("base_spec"),
        pl.col("plm_formulation_spec_number").str.slice(-3, 3).cast(pl.Int32).alias("issue_number")
    ])
    .with_columns(
        pl.col("issue_number").max().over("base_spec").alias("max_issue_number")
    )
    .filter(pl.col("issue_number") == pl.col("max_issue_number"))
    .select(
        pl.col("plant_num"),
        pl.col("plant_name"),
        pl.col("line_name"),
        pl.col("product"),
        pl.col("bsc_fp_num"),
        pl.col("plm_formulation_spec_number"),
        pl.col("plm_formulation_spec_name"),
        pl.col("plm_output_material_spec_number"),
        pl.col("plm_output_material_name")
    )
    .unique()
    .sort("plant_num", "line_name", "plm_formulation_spec_number", "product")
)

display(products_with_specs_good)

plant_num,plant_name,line_name,product,bsc_fp_num,plm_formulation_spec_number,plm_formulation_spec_name,plm_output_material_spec_number,plm_output_material_name
str,str,str,str,str,str,str,str,str
"""049""","""State Avenue Foods""","""670 SPICE""","""K98126 5.25oz KRO CSB GARLIC P…","""K98126""","""5006303-003""","""83250001 - GARLIC POWDER CALIF…","""5006307-001""","""83250001 - GARLIC POWDER CALIF…"
"""049""","""State Avenue Foods""","""670 SPICE""","""K88999 KRO SEASONED MEAT TEND""","""K88999""","""5006334-002""","""83255001 - SEASONED MEAT TENDE…","""5006338-002""","""83255001 - SEASONED MEAT TENDE…"
"""049""","""State Avenue Foods""","""670 SPICE""","""K48479 PS PARSLEY FLAKES""","""K48479""","""5006371-001""","""83300530 - PARSLEY FLAKES - PR…","""5006376-001""","""83300530 - PARSLEY FLAKES - PR…"
"""049""","""State Avenue Foods""","""670 SPICE""","""K48466 PS GROUND CINNAMON""","""K48466""","""5006393-001""","""83510005 - GROUND CINNAMON - P…","""5006394-001""","""83510005 - GROUND CINNAMON - P…"
"""049""","""State Avenue Foods""","""670 SPICE""","""K49365 PS FENNEL SEED""","""K49365""","""5006473-001""","""83510152 - FENNEL SEED - PREMI…","""5006474-001""","""83510152 - FENNEL SEED - PREMI…"
"""049""","""State Avenue Foods""","""670 SPICE""","""K48471 PS GROUND NUTMEG""","""K48471""","""5006553-001""","""83510799 - GROUND NUTMEG - PRE…","""5006554-001""","""83510799 - GROUND NUTMEG - PRE…"
"""049""","""State Avenue Foods""","""670 SPICE""","""K48475 PS ITALIAN SEASONING""","""K48475""","""5006555-001""","""83510859 - ITALIAN SEASONING -…","""5006556-001""","""83510859 - ITALIAN SEASONING -…"
"""049""","""State Avenue Foods""","""670 SPICE""","""K96213 PS SWEET BASIL LEAVES""","""K96213""","""5006567-001""","""83510865 - BASIL LEAVES - PREM…","""5006568-001""","""83510865 - BASIL LEAVES - PREM…"
"""049""","""State Avenue Foods""","""670 SPICE""","""K99074 P.S. ROSEMARY LEAVES""","""K99074""","""5006569-001""","""83510866 - ROSEMARY LEAVES - P…","""5006570-001""","""83510866 - ROSEMARY LEAVES - P…"
"""049""","""State Avenue Foods""","""670 SPICE""","""K99086 PS MEDIT OREGANO""","""K99086""","""5006571-001""","""83510867 - MEDITERRANEAN OREGA…","""5006572-001""","""83510867 - MEDITERRANEAN OREGA…"


In [42]:
line_product_agg = (
    products_with_specs_good
    .group_by("plant_num", "plant_name", "line_name")
    .agg(pl.col("plm_formulation_spec_number").n_unique().alias("unique_formulation_specs"))
    .sort("plant_num", "plant_name", "line_name")
)

grand_total_unique_specs = products_with_specs_good["plm_formulation_spec_number"].n_unique()

print("Grand total of unique specs:", grand_total_unique_specs)
display(line_product_agg)

Grand total of unique specs: 287


plant_num,plant_name,line_name,unique_formulation_specs
str,str,str,u32
"""049""","""State Avenue Foods""","""670 SPICE""",28
"""049""","""State Avenue Foods""","""750 Liquid Line""",29
"""049""","""State Avenue Foods""","""752 Semi-Solid Line""",11
"""049""","""State Avenue Foods""","""770 Preserves Line""",34
"""049""","""State Avenue Foods""","""771 Bakery Filling""",12
"""049""","""State Avenue Foods""","""772 Vinegar Line""",2
"""049""","""State Avenue Foods""","""790 Red Sauce Line""",19
"""049""","""State Avenue Foods""","""800 Canned Foods""",5
"""049""","""State Avenue Foods""","""820 BBQ/Syrup Line""",19
"""102""","""Country Oven Bakery""","""Bread Line""",2
