In [None]:
import polars as pl

from mfg_capabilities.config import config

from mfg_capabilities.utils import get_sqlalchemy_engine

from thefuzz import process

CATALOG = "manufacturing_dev"
SCHEMA = "work_agent_barney"


engine = get_sqlalchemy_engine(
    catalog=CATALOG,
    schema=SCHEMA
)


pl.Config.set_tbl_rows(20)  # to set the number of rows displayed)
# pl.Config.set_tbl_cols(10) # to set the number of columns
# pl.Config.set_fmt_str_lengths(50) # to set the max string length displayed

polars.config.Config

## Load line and product data

In [26]:
# I manually cleaned the Excel sheets from central planning
# and stored in a single excel file
PRODUCTS_PATH = config.data_dir / "from_central_planning" / "products_from_capacity.xlsx"

# Read all columns as strings first to handle concatenated CSVs with extra headers
products_raw = pl.read_excel(
    PRODUCTS_PATH,
    sheet_name="Sheet1",
    has_header=True,
    infer_schema_length=0  # Treat all columns as strings
)

# display(products_raw)

In [23]:
products_cleaned = (
    products_raw
    .filter(~pl.col("Product").str.contains("(?i)none"))
    .select("Plant Number", "Plant Name", "Line Name", "Product")
    .unique()
    .sort("Plant Number", "Plant Name", "Line Name", "Product")
)

# display(products_cleaned)

## Infer RCK line numbers

In [22]:
rck_products = (
    products_cleaned
    .filter(pl.col("Plant Number") == "714")
)

display(rck_products)

Plant Number,Plant Name,Line Name,Product
str,str,str,str
"""714""","""RCK Foods""","""Deli Modern""","""Buffalo Chicken Dip 10 oz."""
"""714""","""RCK Foods""","""Deli Modern""","""Candied Jalapeno Bacon Dip 12 …"
"""714""","""RCK Foods""","""Deli Modern""","""Margherita Pizza Dip 12 oz."""
"""714""","""RCK Foods""","""Deli Modern""","""Sweet and Sour Dressing"""
"""714""","""RCK Foods""","""Deli Multivac 1""","""Albacore Tuna Salad"""
"""714""","""RCK Foods""","""Deli Multivac 1""","""American Potato Salad"""
"""714""","""RCK Foods""","""Deli Multivac 1""","""Artisan Mac N Cheese"""
"""714""","""RCK Foods""","""Deli Multivac 1""","""BLT Pasta Salad"""
"""714""","""RCK Foods""","""Deli Multivac 1""","""Bistro Bow Tie Pasta Salad Bas…"
"""714""","""RCK Foods""","""Deli Multivac 1""","""Calico Bean Salad"""


In [29]:
query = """
SELECT
    plant_num,
    plant_name,
    bsc_fp_num,
    line_desc_finished_product,
    dept_name_production_plant,
    dept_name_sales_plant,
    line_market_desc
FROM
    financials_item_info_silver
WHERE
    plant_num = '714'
"""
rck_bsc_items = pl.read_database(query, engine)
display(rck_bsc_items)

plant_num,plant_name,bsc_fp_num,line_desc_finished_product,dept_name_production_plant,dept_name_sales_plant,line_market_desc
str,str,str,str,str,str,str
"""714""","""RCK Foods ""","""D03189""","""6/3LB KRO SOTHRN POTATO SALAD ""","""Deli MFG Salads ""","""Deli MFG Salads ""","""KRO SOTHRN POTATO SALAD """
"""714""","""RCK Foods ""","""D03218""","""12/1LB KRO SOUTHRN POTATO SLD ""","""Deli MFG Salads ""","""Deli MFG Salads ""","""KROGER SOUTHERN POTATO SALAD """
"""714""","""RCK Foods ""","""D03219""","""12/1LB KRO MUSTRD POTATO SALAD""","""Deli MFG Salads ""","""Deli MFG Salads ""","""KROGER MUSTARD POTATO SALAD """
"""714""","""RCK Foods ""","""D03271""","""12/1LB KRO HMSTYL BAKED BEANS ""","""Deli MFG Salads ""","""Deli MFG Salads ""","""KROGER HOMESTYLE BAKED BEANS """
"""714""","""RCK Foods ""","""D03322""","""6/3LB KRO MSTRD POTATO SALAD ""","""Deli MFG Salads ""","""Deli MFG Salads ""","""KROGER MUSTARD POTATO SALAD """
"""714""","""RCK Foods ""","""D03373""","""6/3LB KRO HMSTYL BAKED BEANS ""","""Deli MFG Salads ""","""Deli MFG Salads ""","""KROGER HOMESTYLE BAKED BEANS """
"""714""","""RCK Foods ""","""D03734""","""WHOLESOME@HOME CRANBERRY CELEB""","""Deli MFG Salads ""","""Deli MFG Salads ""","""WHOLESOME@HOME CRANBERRY CELEB"""
"""714""","""RCK Foods ""","""D08314""","""2/5LBS KROGER HOMESTYLE COCKTA""","""Deli MFG Salads ""","""Deli MFG Salads ""","""KROGER HOMESTYLE COCKTAIL SAUC"""
"""714""","""RCK Foods ""","""D21090""","""12/15OZ BFR MINI SUGAR CKY 36C""","""FROZEN MFT COOKIES ""","""FROZEN MFT COOKIES ""","""BFR MINI SUGAR CKY 36CT """
"""714""","""RCK Foods ""","""D21091""","""12/15OZ BFR MINI CHOC CH CKY 3""","""FROZEN MFT COOKIES ""","""FROZEN MFT COOKIES ""","""BFR MINI CHOC CH CKY 36CT """


In [43]:
from thefuzz import process

# Get the list of choices from rck_bsc_items
choices = rck_bsc_items["line_market_desc"].to_list()

# Function to get the best match
def get_best_match(product_name):
    """Finds the best match for a product name from a list of choices."""
    # Using score_cutoff within extractOne is more efficient
    best_match = process.extractOne(product_name, choices, score_cutoff=0)
    if best_match:
        return best_match[0]  # Return the matched string
    else:
        return None  # Return None if no match is found

# Apply the function to create a new column with the best match
rck_products_with_match = (
    rck_products
    .with_columns(
        pl.col("Product").map_elements(get_best_match, return_dtype=pl.Utf8).alias("best_match_desc")
    )
)

display(rck_products_with_match)

# # Join with rck_bsc_items
# rck_products_joined = rck_products_with_match.join(
#     rck_bsc_items,
#     left_on="best_match_desc",
#     right_on="line_market_desc",
#     how="left"
# )

# display(rck_products_joined)

Plant Number,Plant Name,Line Name,Product,best_match_desc
str,str,str,str,str
"""714""","""RCK Foods""","""Deli Modern""","""Buffalo Chicken Dip 10 oz.""","""CHICKEN MILANESE """
"""714""","""RCK Foods""","""Deli Modern""","""Candied Jalapeno Bacon Dip 12 …","""PRVT SEL CAN JAL BCN DIP """
"""714""","""RCK Foods""","""Deli Modern""","""Margherita Pizza Dip 12 oz.""","""PRVT SEL PZZA DP """
"""714""","""RCK Foods""","""Deli Modern""","""Sweet and Sour Dressing""","""RNDY SWEET SOUR COLE SLAW """
"""714""","""RCK Foods""","""Deli Multivac 1""","""Albacore Tuna Salad""","""ALBACORE TUNA SALAD """
"""714""","""RCK Foods""","""Deli Multivac 1""","""American Potato Salad""","""AMERICAN POTATO SALAD """
"""714""","""RCK Foods""","""Deli Multivac 1""","""Artisan Mac N Cheese""","""RCK ARTISAN MAC N CHEESE """
"""714""","""RCK Foods""","""Deli Multivac 1""","""BLT Pasta Salad""","""2/5LB BGS RCK BLT PASTA SALAD """
"""714""","""RCK Foods""","""Deli Multivac 1""","""Bistro Bow Tie Pasta Salad Bas…","""BISTRO BOW TIE PASTA SA """
"""714""","""RCK Foods""","""Deli Multivac 1""","""Calico Bean Salad""","""CALICO BEAN SALAD """


In [24]:

query = "SELECT * FROM plm_spec_cross_references_silver"
plm_spec_cross_refs = pl.read_database(query, engine)

[WARN] Parameter '_user_agent_entry' is deprecated; use 'user_agent_entry' instead. This parameter will be removed in the upcoming releases.


In [6]:
display(plm_spec_cross_refs)

spec_number,spec_type,spec_status,formula_management_formula,formula_management_formula_output,bsc_fp_num_consumer,bsc_fp_num_case,bsc_fp_num_pallet,bsc_raw_mat_num,bsc_num,spec_type_derived
str,str,str,str,str,str,str,str,str,str,str
"""5000072-004""","""Ingredient Specification""","""Deleted""",,,,,,"""666666""","""666666""","""raw_material"""
"""5000072-005""","""Ingredient Specification""","""Draft""",,,,,,"""786123""","""786123""","""raw_material"""
"""5000072-007""","""Ingredient Specification""","""Deleted""",,,,,,"""300176""","""300176""","""raw_material"""
"""5000072-011""","""Ingredient Specification""","""Retired""",,,,,,"""300176""","""300176""","""raw_material"""
"""5000072-012""","""Ingredient Specification""","""Draft""",,,,,,"""300176""","""300176""","""raw_material"""
"""5000073-002""","""Ingredient Specification""","""Retired""",,,,,,"""300182""","""300182""","""raw_material"""
"""5000074-001""","""Ingredient Specification""","""Retired""",,,,,,"""300193""","""300193""","""raw_material"""
"""5000075-001""","""Ingredient Specification""","""Retired""",,,,,,"""300202""","""300202""","""raw_material"""
"""5000076-001""","""Ingredient Specification""","""Deleted""",,,,,,"""300248""","""300248""","""raw_material"""
"""5000077-001""","""Ingredient Specification""","""Retired""",,,,,,"""300260""","""300260""","""raw_material"""


In [21]:

query = """
SELECT
    output_material_spec_number,
    trade_spec_number,
    trade_spec_status,
    material_spec_status,
FROM
    plm_spec_map_trade_output_formulation_silver
WHERE
    array_contains(produced_by_plant_num, "049")
    OR array_contains(produced_by_plant_num, "102")
    OR array_contains(produced_by_plant_num, "714")
"""

plm_spec_map = pl.read_database(query, engine)

DatabaseError: (databricks.sql.exc.ServerOperationError) [TRAILING_COMMA_IN_SELECT] Trailing comma detected in SELECT clause. Remove the trailing comma before the FROM clause. SQLSTATE: 42601; line 7 pos 0
[SQL: 
SELECT
    output_material_spec_number,
    trade_spec_number,
    trade_spec_status,
    material_spec_status,
FROM
    plm_spec_map_trade_output_formulation_silver
WHERE
    array_contains(produced_by_plant_num, "049")
    OR array_contains(produced_by_plant_num, "102")
    OR array_contains(produced_by_plant_num, "714")
]
(Background on this error at: https://sqlalche.me/e/20/4xp6)

In [20]:
display(plm_spec_map)

trade_spec_number
str
"""5003986-001"""
"""5003745-001"""
"""5004051-001"""
"""5004052-001"""
"""5004056-001"""
"""5004062-001"""
"""5003986-001"""
"""5003745-001"""
"""5004051-001"""
"""5004052-001"""
