# Core


In [None]:
# | default_exp core


In [None]:
# | export

from invoice_parser.imports import *
from invoice_parser.utils import *

In [None]:
#| hide

%load_ext autoreload
%autoreload 2
%reload_ext autoreload

## Is Invoice


In [None]:
# | export


def page0_text(pdf):
    loaded_pdf = PdfReader(pdf)
    p = loaded_pdf.pages[0]
    return p.extract_text()


def is_invoice_text(text, model):
    return model(text).detach().cpu().item() == 0


def is_invoice(pdf, model, device=None):
    if model is None:
        model = load_invoice_model(device=device)
    return is_invoice_text(page0_text(pdf), model)


def is_invoice_chain(
    model,
    device=None,
    input_variables=["pdf"],
    output_variables=["is_invoice"],
    verbose=False,
):
    return transform_chain(
        is_invoice,
        transform_kwargs={"model": model, "device": device},
        vars_kwargs_mapping={input_variables[0]: "pdf"},
        input_variables=input_variables,
        output_variables=output_variables,
        verbose=verbose,
    )


## Parsing


In [None]:
# | export

vline_settings = {
    "horizontal_strategy": "text",
    "vertical_strategy": "lines",
    "intersection_x_tolerance": 5,
    "snap_y_tolerance": 5,
    "join_x_tolerance": 5,
    "join_y_tolerance": 5,
}
hline_settings = {
    "horizontal_strategy": "lines",
    "vertical_strategy": "text",
    "intersection_x_tolerance": 5,
    "snap_y_tolerance": 5,
    "join_x_tolerance": 5,
    "join_y_tolerance": 5,
}
line_settings = {
    "horizontal_strategy": "lines",
    "vertical_strategy": "lines",
    "intersection_x_tolerance": 5,
    "snap_y_tolerance": 5,
    "join_x_tolerance": 5,
    "join_y_tolerance": 5,
}
text_settings = {
    "horizontal_strategy": "text",
    "vertical_strategy": "text",
    "intersection_x_tolerance": 5,
    "snap_y_tolerance": 5,
    "join_x_tolerance": 5,
    "join_y_tolerance": 5,
}
text_settings = {
    # "intersection_x_tolerance": 5,
    # "snap_y_tolerance": 5,
    # "join_x_tolerance": 5,
    # "join_y_tolerance": 5,
    "text_layout": True
}


def get_fullest_row(table):
    rows = [r for r in table if full_row(r)]
    if len(rows) == 0:
        rows = table
    row = max(rows, key=len)
    return row, table.index(row)


def num_full_parts(row):
    return len([p for p in row if not empty_part(p)])


def get_table_items(table):
    if table is None or len(table) == 0:
        return []

    cols, cols_idx = get_fullest_row(table)
    for i, c in enumerate(cols):
        if empty_part(c):
            cols[i] = f"col_{i}"

    # let's assume that the first full row after the cols row is the first item
    first_order_row_idx = get_first_full_row(table[cols_idx + 1 :])[1]
    if first_order_row_idx is None:
        first_order_row_idx = get_first_non_empty_row(table[cols_idx + 1 :])[1]
    if first_order_row_idx is None:
        first_order_row_idx = 0
    first_order_row_idx += cols_idx + 1

    items = []
    item = {c: "" for c in cols}
    first_order_row_idx = min(first_order_row_idx, len(table) - 1)
    order_table = table[first_order_row_idx:]
    curr_row_len = num_full_parts(order_table[0])
    for row in order_table:
        if ((num_full_parts(row) == curr_row_len) or empty_row(row)) and len(item) > 0:
            items.append(item)
            item = {c: "" for c in cols}
            if not empty_row(row):
                curr_row_len = num_full_parts(row)
        for i, c in enumerate(cols):
            row_part = row[i]
            if not empty_part(row_part):
                row_part = " ".join(row[i].split("\n"))
                item[c] += row_part + " "
    items.append(item)
    return items


In [None]:
# | export


def row_check(row, target_list, target_thresh=2):
    """
    Checks if the given row contains the target elements.

    Parameters:
        row (str): A string representing the row to check.
        target_list (list): A list of strings representing the target elements.
        target_thresh (int): The minimum number of target elements that must be present in the row.

    Returns:
        bool: True if the row contains the target elements, False otherwise.
    """
    check_list = [hc for hc in target_list if hc.lower() in row.strip().lower()]
    return len(check_list) >= target_thresh


def extract_sub_text(
    text,
    top_cols,
    bottom_cols,
    top_thresh=2,
    bottom_thresh=1,
    alt_top_index=0,
    alt_bottom_index=0,
):
    """
    Extracts the text between the top_cols and bottom_cols.
    """
    top_idx = find_target_index(
        text, top_cols, target_thresh=top_thresh, alt_index=alt_top_index
    )
    bottom_idx = find_target_index(
        text[::-1], bottom_cols, target_thresh=bottom_thresh, alt_index=alt_bottom_index
    )
    bottom_idx = len(text) - bottom_idx
    return (
        [t for t in text[top_idx : bottom_idx + 1] if len(t.strip()) > 0],
        top_idx,
        bottom_idx,
    )


def find_target_index(data, target_list, target_thresh=2, alt_index=0):
    """
    Finds the index of the row in the given data that contains the target elements.

    Parameters:
        data (list): A list of strings representing the data.
        target_list (list): A list of strings representing the target elements.
        target_thresh (int): The minimum number of target elements that must be present in a row.

    Returns:
        int: The index of the row that contains the target elements. If no such row exists, returns alt_index.
    """
    target_idx = None
    for idx, row in enumerate(data):
        if row_check(row, target_list, target_thresh):
            target_idx = idx
            break
    if target_idx is None:
        msg.warn(f"No target found in data. Setting it to {alt_index}.", spaced=True)
        target_idx = alt_index
    return target_idx


In [None]:
# | export


def json_str(x):
    x = x[x.find("{") : x.rfind("}") + 1]
    x = x.splitlines()
    jstr = [x[0]]
    for s in x[1:]:
        if ":" not in s and s.strip() != "}":
            jstr[-1] = jstr[-1][:-2] + " " + s[1:]
        else:
            jstr.append(s)
    jstr = " ".join(jstr).strip()
    if jstr[-1] in [",", ";"]:
        jstr = jstr[:-1].strip()
    jstr = jstr.replace(",}", "}")
    jstr = re.sub(r"\s+", " ", jstr)
    jstr = jstr.replace('" ', '"')
    jstr = jstr.replace(' "', '"')
    jstr = jstr.replace(", }", "}")
    jstr = re.sub(r"[a-zA-Z0-9]\}", '"}', jstr)
    jstr = re.sub(r":\s*(\w)", r': "\1', jstr)
    jstr = re.sub(r"\b0+(\d+)\b", r"\1", jstr)
    # jstr = re.sub(r"\s*([{}])\s*", r"\1", jstr)
    jstr = re.sub(r"\s*([:,])\s*", r"\1 ", jstr)
    jstr = re.sub(r",}", '"}', jstr)
    jstr = re.sub(r"\"+}", '"}', jstr)
    jstr = jstr.replace('""', '","')
    return jstr


def str_to_json(x, max_try=10):
    # jstr = json_str(x)
    jstr = x[x.find("{") : x.rfind("}") + 1]
    jstr = jstr.replace(': ",', ': "",')
    json_dict = {}
    tries = 0
    while True and tries < max_try:
        try:
            json_dict = json.loads(jstr)
            break
        except Exception as e:
            unexp = int(re.findall(r"\(char (\d+)\)", str(e))[0])
            unesc = jstr.rfind(r'"', 0, unexp)
            jstr = jstr[:unesc] + r"\"" + jstr[unesc + 1 :]
            closg = jstr.find(r'"', unesc + 2)
            jstr = jstr[:closg] + r"\"" + jstr[closg + 1 :]
            tries += 1
    return jstr, json_dict

## Docs


In [None]:
# | export


def extract_text(path):
    data = pdfplumber.open(path)
    pdf_text = [p.extract_text(layout=True).splitlines() for p in data.pages]
    text = pdf_text[0]
    return text


def extract_order_docs(
    text,
    header_cols=["item", "description", "price", "quantity", "amount", "total", "qty"],
    get_parts=False,
    splitter=None,
    chunk_size=4000,
    chunk_overlap=0,
):
    if splitter is None:
        splitter = RecursiveCharacterTextSplitter(
            separators=["\n\n"], chunk_size=chunk_size, chunk_overlap=chunk_overlap
        )
    avg_len = mode([len(t) for t in text])
    order_text = [text[0]]
    order_metadatas = [{}]
    desc = ""
    # for i, txt in enumerate(text[1:-1], start=1):
    for txt in text[1:-1]:
        if not len(txt) >= avg_len * 2:
            txt = txt.replace('"', "").replace("'", "").strip()
            if row_check(txt, header_cols, 2):
                order_text.append(txt)
                order_metadatas.append({})
            elif len(txt) < avg_len * 0.75 and not row_check(
                order_text[-1], header_cols, 2
            ):
                desc += " " + txt
            else:
                if len(desc) > 0:
                    order_metadatas.append({"desc": desc.strip()})
                    desc = ""
                if get_parts:
                    part_nums = [
                        x
                        for x in re.findall(r"\d{5}", txt)
                        if not x.startswith("00") and "." not in x
                    ]
                    if len(part_nums) == 0:
                        part_nums = [
                            x
                            for x in re.findall(r"\d{4}", txt)
                            if not x.startswith("00") and "." not in x
                        ]
                    if len(part_nums) > 0:
                        part_num = part_nums[0]
                        txt += f" part_number: {part_num}"
                order_text.append(txt)
    if len(desc) > 0:
        order_metadatas.append({"desc": desc.strip()})
    order_text.append(text[-1])
    order_metadatas += [{} for _ in range(len(order_text) - len(order_metadatas))]
    return splitter.create_documents(order_text, metadatas=order_metadatas)


def info_order_docs(
    text,
    header_cols=["item", "description", "price", "quantity", "amount", "total", "qty"],
    total_cols=["total", "subtotal", "tax"],
    chunk_size=4000,
    chunk_overlap=0,
    get_parts=False,
):
    splitter = RecursiveCharacterTextSplitter(
        separators=["\n\n"], chunk_size=chunk_size, chunk_overlap=chunk_overlap
    )
    table_text, top_idx, bottom_idx = extract_sub_text(
        text,
        top_cols=header_cols,
        bottom_cols=total_cols,
        top_thresh=2,
        bottom_thresh=1,
        alt_top_index=0,
        alt_bottom_index=0,
    )

    top_text = [
        t.replace('"', "").replace("'", "").strip()
        for t in text[:top_idx]
        if len(t.strip()) > 0
    ]
    bottom_text = [
        t.replace('"', "").replace("'", "").strip()
        for t in text[bottom_idx:]
        if len(t.strip()) > 0
    ]
    info_text = top_text + bottom_text
    info_docs = splitter.create_documents(info_text)
    order_docs = extract_order_docs(
        table_text, header_cols=header_cols, get_parts=get_parts, splitter=splitter
    )
    return dict(info_docs=info_docs, order_docs=order_docs)


def pdf_to_info_order_docs(
    path,
    header_cols=["item", "description", "price", "quantity", "amount", "total", "qty"],
    total_cols=["total", "subtotal", "tax"],
    chunk_size=4000,
    chunk_overlap=0,
    get_parts=False,
):
    text = extract_text(path)
    return info_order_docs(
        text,
        header_cols=header_cols,
        total_cols=total_cols,
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap,
        get_parts=get_parts,
    )


## LLM


In [None]:
# device = default_device()

# embeddings = SentenceTransformerEmbeddings(
#     model_name="sentence-transformers/all-mpnet-base-v2",
#     model_kwargs={"device": default_device()},
# )
# model = "tiiuae/falcon-7b"

In [None]:
# | export


def qa_llm_chain(model="meta-llama/Llama-2-7b-chat-hf"):
    token = "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"
    tokenizer = AutoTokenizer.from_pretrained(model, token=token)
    model = AutoModelForCausalLM.from_pretrained(model,device_map='auto', torch_dtype=torch.float16, token=token)
    pipe = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        torch_dtype=torch.bfloat16,
        trust_remote_code=True,
        device_map="auto",
        max_new_tokens=1024,
        do_sample=True,
        top_k=5,
        num_return_sequences=1,
        eos_token_id=tokenizer.eos_token_id,
        pad_token_id=tokenizer.eos_token_id,
    )
    llm = HuggingFacePipeline(pipeline=pipe, model_kwargs={"temperature": 0})
    return load_qa_chain(llm, "stuff")


## QUERY


In [None]:
# | export


def fix_json(text):
    json_str = copy.deepcopy(text)
    if json_str.count("{") != json_str.count("}"):
        while json_str.count("{") % 2 != 0:
            json_str = "{" + json_str
            json_str = json_str.replace("}{", "},{")
        while json_str.count("}") % 2 != 0:
            json_str = json_str + "}"
            json_str = json_str.replace("}{", "},{")
    if json_str.count("{") < json_str.count("}"):
        while json_str.count("{") != json_str.count("}"):
            json_str = "{" + json_str
            json_str = json_str.replace("}{", "},{")
    elif json_str.count("{") > json_str.count("}"):
        while json_str.count("{") != json_str.count("}"):
            json_str = json_str + "}"
            json_str = json_str.replace("}{", "},{")
    json_str = json_str.replace("\n", "")
    if json_str.startswith("{{") and json_str.endswith("}}"):
        json_str = json_str[1:-1]
    json_str = json_str.replace('"""', '"')
    json_str = json_str.replace('""', '","')
    return json_str


def json_response(chain, docs, query, max_tries=6):
    tries = 0
    res = ""
    while res == "" and tries < max_tries:
        res = chain(dict(input_documents=docs, question=query))
        res = res["output_text"].strip()
        res = res[res.find("{") : res.rfind("}") + 1]
        tries += 1
    tries = 0
    while tries < max_tries:
        try:
            return dict(json_str=res, json=json.loads(fix_json(res)))
        except:
            tries += 1

    return dict(json_str=res, json={})


def info_json(chain, info_docs, max_tries=6):
    info_query = """Extract the order information like the numbers, dates, shipping address and total amount. Include the quote number too if found."""
    json_query = "\nReturn the text in JSON format. It must be compatible with json.loads."
    suffix = "\nDon't tell me how to do it, just do it. Don't add any disclaimer."
    info_query += json_query + suffix
    return json_response(chain, info_docs, info_query, max_tries)


def order_json(
    chain,
    order_docs,
    max_tries=6,
    get_parts=False,
    chunk_size=4000,
    chunk_overlap=0,
):
    splitter = RecursiveCharacterTextSplitter(
        separators=["\n\n"], chunk_size=chunk_size, chunk_overlap=chunk_overlap
    )
    json_query = "\nReturn the text in JSON format. It must be compatible with json.loads."
    suffix = ("\nDon't tell me how to do it, just do it. Don't add any disclaimer.",)
    part_query = "Include the part numbers if defined."
    query = "Extract the order items with full details and descriptions and prices."
    if get_parts:
        query += " " + part_query
    query += suffix
    query = query.strip()
    items = chain(dict(input_documents=order_docs, question=query))["output_text"].strip()

    item_query = json_query
    if get_parts:
        item_query += " " + part_query
    item_query += suffix
    item_query = item_query.strip()

    item_docs = splitter.create_documents([items])
    return json_response(chain, item_docs, item_query, max_tries=max_tries)


def pdf_to_info_order_json(path, chain, max_tries=6, get_parts=False):
    info_order_dict = pdf_to_info_order_docs(path, get_parts=get_parts)
    info_dict = info_json(
        chain=chain, info_docs=info_order_dict["info_docs"], max_tries=max_tries
    )
    order_dict = order_json(
        chain=chain,
        order_docs=info_order_dict["order_docs"],
        max_tries=max_tries,
        get_parts=get_parts,
    )
    return {"info": info_dict, "order": order_dict}

In [None]:
# | hide
# | eval: false

llm_chain = qa_llm_chain()


In [None]:
# | hide
# | eval: false

data_path = Path("/home/hamza/demo_files/ap/")
data_path = Path("/home/hamza/demo_files/pdf")
file_name = "wt13.pdf"
pdf = data_path / file_name


In [None]:
# | hide
# | eval: false

info_order_json = pdf_to_info_order_json(pdf, llm_chain)


In [None]:
# json_path = Path("/home/hamza/demo/json/")
# os.makedirs(json_path, exist_ok=True)
# with open(Path(json_path / Path(pdf).stem).with_suffix(".json"), "w") as f:
#     f.write(json.dumps({"info": info_json, "items": items_json}, indent=4))
#     # f.write(json.dumps({"info": str(info_json), "items": items_res}, indent=4))
#     # f.write(json.dumps({"info": str(info_res), "items": items_json}, indent=4))

In [None]:
# good = {
#     "wt4.pdf": {
#         "info": '{"OrderNumber": "51020", "OrderDate": "7/11/2023", "ShipVia": "UPS GROUND V70010", "Terms": "NET 30", "VendorCode": "WILSON TOOL", "VendorPhone": "651-286-6125", "PurchasingAgent": "VANDERPOOL, BRYAN", "FOB": "ST PAUL, MN", "QuoteNumber": "21652926", "Quantity": "306.75", "OrderTotal": "$306.75"}',
#         "items": 'Sure, here are the order items with full details and descriptions and prices:\n\nOrder Item Description                   Quantity    Unit Price    Total\n\n12961 12961 B [1-1/4"] Station Thick Slug Hugger, 2 Die Shape SD, 0.9590 IN, 0.0600 IN, RADIUS=0.0600 IN, $93.50 EA, $93.50\n2964 12964  Station Thick Slug Hugger, 2 Die Shape SD, 1.3010 IN, 1.2220 IN, RADIUS=0.0600 IN, $213.25 EA, $213.25\n\nTotal: $306.75',
#     },
#     "wt6.pdf": {
#         "info": "{'PurchaseOrderID': 'PO94095', 'PurchaseOrderDate': '7/17/2023', 'ShippingAddress': {'Street': '1369 Cox Avenue', 'City': 'Erlanger', 'State': 'KY', 'PostalCode': '41018', 'Country': 'USA'}}",
#         "items": "Order Items:\n\n1. American Precision, Punch Push\n\t* Part Number: 50074\n\t* Taxable: Yes\n\t* U.O.M.: 10.0000\n\t* Revision: 7/20/2023\n\t* Price: $8.5000\n\t* Extended Price: $85.0000\n2. American Precision, Punch Push\n\t* Part Number: 6604\n\t* Taxable: Yes\n\t* U.O.M.: 1.0000\n\t* Revision: 7/20/2023\n\t* Price: $466.0000\n\t* Extended Price: $466.0000\n",
#     },
#     "wt7.pdf": {
#         "info": "{'Purchase Order Number': 'PO12722', 'Order Date': '7/11/2023', 'Buyer': 'dspooner', 'Vendor': 'WILSON TOOL INTERNATIONAL INC', 'Ship To': '4 Commerce Way Arden NC 28704', 'Vendor Phone': '(612) 426-1384 Ext. 0000', 'Order Total': '$137.50'}",
#         "items": "Order Items:\n\n1. STATION THICK POSITIVE DIE\nDescription: STATION THICK POSITIVE DIE\nFOB: Each\nUnit Price: $68.75\nExtension Price: $68.75\n\n2. STATION THICK POSITIVE DIE\nDescription: STATION THICK POSITIVE DIE\nFOB: Each\nUnit Price: $68.75\nExtension Price: $68.75",
#     },
#     "wt13.pdf": {
#         "info": "{'orderNumber': '21653677', 'customerNumber': '1016197', 'shipToAddress': {'street': '12912 Farnham Avenue', 'city': 'White Bear Lake', 'state': 'Minnesota', 'postalCode': '55110', 'country': 'U.S.A.'}}",
#         "items": "Order Items:\n\n1. STATION THICK POSITIVE DIE\nDescription: STATION THICK POSITIVE DIE\nFOB: Each\nUnit Price: $68.75\nExtension Price: $68.75\n\n2. STATION THICK POSITIVE DIE\nDescription: STATION THICK POSITIVE DIE\nFOB: Each\nUnit Price: $68.75\nExtension Price: $68.75",
#     },
#     "wt25.pdf": {
#         "info": "{'PurchaseOrderNumber': '155500', 'SupplierNumber': 'PONo', 'OrderDate': '6/29/2023', 'Tel': '262-343-8690', 'Fax': '262-343-8689', 'ShippingAddress': {'StreetAddress': '624 TowerDrive', 'City': 'Fredonia', 'State': 'WI', 'PostalCode': '53021', 'Country': 'USA'}}",
#         "items": "| Order Item | Part No | Description | Quantity | Price |\n| --- | --- | --- | --- | --- |\n| 1 | P5367-IMPAXVARIABLE | P5367-IMPAXVARIABLEINCH | 2 | 0.00 |\n| 2 | P5367-IMPAXVARIABLE | P5367-IMPAXVARIABLEINCH | 4 | 0.00 |\n| 3 | P5367-IMPAXVARIABLE | P5367-IMPAXVARIABLEINCH | 6 | 0.00 |\n| Grand Total | 0.00 |\n\nNote: The answer is in a table format, with the order items listed in the first column, their part numbers listed in the second column, the descriptions listed in the third column, the quantities listed in the fourth column, and the prices listed in the fifth column. The grand total is listed at the bottom of the table.",
#     },
#     "wt26.pdf": {
#         "info": "{'orderNumber': '0000193617', 'purchaseDate': '6/29/2023', 'shipToAddress': {'street': '4328 S York Hwy', 'city': 'Saint Paul', 'state': 'MN', 'postalCode': '55170-7676', 'country': 'USA'}}",
#         "items": "Order Items:\n\n1. MISC (50075)\n* Description:\n* Ordered: 20.000\n* Received Back: 0.000\n* UOM: Each\n* Unit Cost: 8.750\n* Amount: 175.00\n2. MISC (8141)\n* Description:\n* Ordered: 20.000\n* Received Back: 0.000\n* UOM: Each\n* Unit Cost: 4.750\n* Amount: 95.00\n3. MISC (51083)\n* Description:\n* Ordered: 20.000\n* Received Back: 0.000\n* UOM: Each\n* Unit Cost: 16.500\n* Amount: 330.00\n4. MISC (50079)\n* Description:\n* Ordered: 20.000\n* Received Back: 1.000\n* UOM: Each\n* Unit Cost: 1.000\n* Amount: 20.00\n\nFreight: 0.00\n\nTotal: 620.00",
#     },
#     "wt27.pdf": {
#         "info": "{'Purchase Order ID': '140916', 'Purchase Order Date': '7/13/23', 'Ship To Address': {'Street Address': '3855 64th Avenue SE', 'City': 'Calgary', 'Province': 'AB', 'Postal Code': 'T2C 2V5', 'Country': 'Canada'}}",
#         "items": "Order Items:\n\n1. Trumpf Size 1 Flat Punch Shape 2.36mm\nDescription: Flat punch shape\nQuantity: 2\nUnit Price: $108.50\n\n2. Trumpf Size 1 Cupped GL Die Shape RT\nDescription: Cupped GL die shape RT\nQuantity: 2\nUnit Price: $110.75\n\nTotal PO Value: $438.50",
#     },
#     "wt41.pdf": {
#         "info": "{'PO Number': '58900', 'Vendor Name': 'KATHY LIMANEN', 'Ship To Address': {'Street': '5657 Prospect Street', 'City': 'High Point', 'State': 'NC', 'Postal Code': '27263', 'Country': 'USA'}}",
#         "items": "Order Items:\n\nLine Part Number/Rev/Description     Order Qty         Unit Price   Ext Price\n\n1  25771                             2.00 EA         100.50000 /1  201.00\n\n* Trumpf 241 Flat Punch Shape 2.36mm-30.00mm RT 0.5000 IN X 0.1250 INFLAT,ULTIMA PREMIUM PUNCH STEEL\n\n\nOrder Quantity: 2.00 EA\nUnit Price: 100.50000 / 1\nExtension Price: 201.00\n\n\n\n2  26740                             2.00 EA         110.75000 /1  221.50\n\n* Trumpf Size 1 Cupped GL Die Shape RT 0.5000 IN X 0.1250 IN 0.0120 IN, CLEARANCESHAPE 1=0.0000 DEG\n\n\nOrder Quantity: 2.00 EA\nUnit Price: 110.75000 / 1\nExtension Price: 221.50\n\n\nNote:\nThe order items with full details and descriptions and prices are:\n* Trumpf 241 Flat Punch Shape 2.36mm-30.00mm RT 0.5000 IN X 0.1250 INFLAT,ULTIMA PREMIUM PUNCH STEEL (Order Quantity: 2.00 EA, Unit Price: 100.50000 / 1, Extension Price: 201.00)\n* Trumpf Size 1 Cupped GL Die Shape RT 0.5000 IN X 0.1250 IN 0.0120 IN, CLEARANCESHAPE 1=0.0000 DEG (Order Quantity: 2.00 EA, Unit Price: 110.75000 / 1, Extension Price: 221.50)\n\nPlease let me know if you need further assistance.",
#     },
#     "wt45.pdf": {
#         "info": "{'Order Number': '17512', 'Order Date': '7/13/2023', 'Vendor Phone': '800-328-9646', 'Purchasing Agent': 'Joe Astarita', 'Vendor FAX': '123456789', 'FOB': ',', 'Quantity Part Number': '123456', 'Date': '7/13/2023', 'Unit': '183.17', 'Ship Via': 'United Parcel Service'}",
#         "items": "Order Items:\n\n* P/N 2385 A - TOOLING - CUSTOM - $39.5200 EA - $39.52\n* P/N 13045 A - TOOLING - CUSTOM - $10.4100 EA - $10.41\n* P/N 2907 A - TOOLING - CUSTOM - $133.2400 EA - $133.24\nTotal Order Amount: $183.17",
#     },
#     # "wt49.pdf": {
#     #     "info": "{'PO': 17938, 'DATE': '24/06/2023', 'BILL TO': 'TROQUELADOS Y LAMINADOS MONTERREY', 'RFC': 'TLM85122335A', 'PROVIDER': 'PI0133 Mariano Escobedo, 3587', 'SHIP TO': {'STREET_ADDRESS': 'BLVD. DIAZ ORDAZ', 'CITY': 'SANTA MARIA', 'POSTAL_CODE': '64500', 'COUNTRY': 'MX'}}",
#     #     "items": 'Order Items:\n1. GF6048204900 - 1882 E [4-1/2"] STATION THICK STRIPPER\nDescription: 1882 E [4-1/2"] STATION THICK STRIPPER\nQuantity: 1\nPrice: $118.80\n\n2. GF6048204900 - SHIPPING & IMPORT\n\nDescription: SHIPPING & IMPORT\n\nQuantity: 1\n\nPrice: $120.00\n\n\nTotal: $248.80\n\n\nPlease provide me with the exact details of each order item along with its description, quantity, and price.',
#     # },
#     "wt61.pdf": {
#         "info": "{'order': {'purchase_order_number': '863959', 'purchase_date': '6/29/2023', 'shipping_address': {'street_address': '845 Corporate Way', 'city': 'Sumter', 'state': 'US', 'postal_code': '29154', 'country': 'US'}}}",
#         "items": "Order Items:\n\n1. MAINT_SUPPLIES\nDescription: Trumpf Size 1 To Size 2 Trumpf Die Adapter For\nQuantity: 12\nUnit Cost: 244.93\nTotal Cost: 2,939.16\n\nPlease let me know if you want me to do anything else.",
#     },
# }

In [None]:
# | hide
import nbdev

nbdev.nbdev_export()