## Training

In [1]:
from langchain_chroma import Chroma
from langchain_core.documents import Document
from langchain_openai import AzureOpenAIEmbeddings
import json
import os

In [2]:
# --- Helpers ---
def _require_env(var: str) -> str:
    value = os.getenv(var)
    if not value:
        raise EnvironmentError(f"Missing required environment variable: {var}")
    return value

In [3]:
# --- Environment Vars ---
AZURE_ENDPOINT = _require_env("AZURE_OPENAI_ENDPOINT")
AZURE_API_KEY = _require_env("AZURE_OPENAI_API_KEY")
AZURE_API_VERSION = _require_env("AZURE_OPENAI_API_VERSION")
DEFAULT_EMBED_MODEL = _require_env("AZURE_OPENAI_EMBEDDING_MODEL")

In [4]:
# --- Embeddings ---
embeddings = AzureOpenAIEmbeddings(
    azure_deployment=DEFAULT_EMBED_MODEL,
    openai_api_key=AZURE_API_KEY,
    azure_endpoint=AZURE_ENDPOINT,
    openai_api_version=AZURE_API_VERSION,
)

In [5]:
# --- Setup Chroma store ---
collection_name = "product_catalog"
vector_db_dir = r"C:\Production\Langraph Master Bot\VectorStore\product_catalog"
vectorstore = Chroma(
    collection_name=collection_name,
    persist_directory=vector_db_dir,
    embedding_function=embeddings,
)

In [6]:
product_catalog_path = r"C:\Production\Langraph Master Bot\query_router\product_catalog.json"

In [7]:
with open(product_catalog_path, "r") as f:
    catalog = json.load(f)

In [8]:
# --- Prepare documents ---
documents = []
for solution in catalog:
    for module in solution["modules"]:
        entry_text = (
            f"{module['title']} — {module['description']} "
            f"Keywords: {', '.join(module['keywords'])} "
            f"Capabilities: {', '.join(module['capabilities'])}"
        )
        metadata = {
            "solution": solution["solution"],
            "title": module["title"],
            "slug": module["slug"],
            "url_pattern": module["url_pattern"],
            "industries": ", ".join(module["industries"]),  # flatten lists
            "industry_slugs": ", ".join(module["industry_slugs"])
        }
        documents.append(Document(page_content=entry_text, metadata=metadata))

In [9]:
# --- Add to vector store ---
vectorstore.add_documents(documents)

['10b69574-d052-4a57-b489-9176e51b3ff1',
 'e02e14bd-4203-4fb7-904f-5b9f4a1239e3',
 'cbcc7641-78b2-4479-af30-0dfd64c9a348',
 '26ca1f92-7c08-40ca-aa41-5aa07fb24f08',
 '1fb3d434-600d-41d4-beb8-6122291064ca',
 'a89524b6-3a46-4441-825c-138cbe2a90c2',
 '1db1fdb7-5a7e-450e-aa89-f8024fdc8b28',
 'e89355c4-e0a1-4294-b105-a749d965fa63',
 '8afee901-7c65-410e-8363-41e291083d82',
 '96a73ac9-039e-4360-91ec-01568f85d139',
 '214a1cbf-370c-47ca-a8e5-43d931dcd8c8',
 '17a12e46-e7ed-4731-9e66-8794d04f2090',
 'bd175685-6d93-412e-9c69-f15e0c12ed2c',
 '710d6121-7bbf-422a-a87b-169e6283a45a',
 '811f95fa-4354-4055-b8bf-89449bafcaca',
 '7f02f661-d5e3-4380-a28e-54ea4c75da5a',
 '15c7a6e3-36e8-401a-a71e-3023b0e53981',
 '7da14464-8958-45aa-a6cc-3995f9847344',
 'c3d2ed48-45fe-49e9-bcde-29534372df2f',
 '6bb57576-0897-4e0d-ba62-9292ec98ef4c',
 '056f9afe-aa0c-4eca-a675-fba5b2c9279d',
 'e3c31c85-39aa-44cf-bb5b-dedf5adc937d',
 'df33a3f6-de2e-42b8-b6e5-f126936098bb',
 '5216c6d5-2497-42d4-bcf1-71a5636f3504',
 'f17bd926-756f-

## Testing

In [10]:
vectorstore = Chroma(
    collection_name=collection_name,
    persist_directory=vector_db_dir,
    embedding_function=embeddings
)

query = "Which brokers currently have the most high-priority cases?"
results = vectorstore.similarity_search(query, k=3)

for r in results:
    print(r.metadata["title"], r.metadata["url_pattern"], r.page_content)

Priority Bin Dashboard https://www.spinnakerhub.com/lead-prioritizer/{industry-slug}/priority-bin-dashboard Priority Bin Dashboard — Interactive pipeline and priority segmentation dashboard that categorizes applications into priority bins (High Priority, Normal, Low, DTQ) and tracks case counts, premiums, and conversion metrics for each bin. Displays key KPIs including total cases, sold count, % sold, total premium, and expected premium. Visualizations include application mix by priority bin, total premium by bin, expected premium by bin, average case score by bin, and expected sold ratio by bin, with pipeline averages highlighted for comparison. Users can filter by product, application type, underwriter, broker, state, and income bin to evaluate performance drivers and prioritize sales activities for the most promising cases. Keywords: priority bin, high priority cases, normal priority, low priority, DTQ cases, pipeline value, expected premium, total premium, case score analysis, expe