In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import keyring
import os 
import snowflake.connector as sf_connector # ( https://docs.snowflake.com/en/developer-guide/python-connector/python-connector-connect)
from snowflake.connector.pandas_tools import write_pandas # (https://docs.snowflake.com/en/developer-guide/python-connector/python-connector-api#write_pandas)
import pdfplumber
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PDFPlumberLoader
from langchain.evaluation import load_evaluator
from collections import defaultdict

import numpy as np
from tqdm import tqdm
import time
import re
import json

from io import BytesIO
import fitz 
from shapely.geometry import box
from shapely.ops import unary_union
from PIL import Image, ImageDraw
import cv2
from openai import OpenAI


# Set max rows to display in pandas DataFrame 200
pd.set_option('display.max_rows', 200)

In [4]:
open_ai_api_key = keyring.get_password("openai api", "api_key")
client = OpenAI(api_key = open_ai_api_key)

account_identifier = keyring.get_password('NC_Snowflake_Trial_Account_Name', 'account_identifier')
user_name = "EMHALDEMO1"
password = keyring.get_password('NC_Snowflake_Trial_User_Password', user_name)
database = "WASHING_MACHINE_MANUALS"
schema = "PUBLIC"

print("Account Identifier: ", account_identifier)
print("User Name: ", user_name)
print("Database: ", database)
print("Schema: ", schema)

try:
    connection_parameters = {
        "account_identifier": account_identifier,
        "user": user_name,
        "password": password,
        "role": "ACCOUNTADMIN",
        "warehouse": "COMPUTE_WH",
        "database": database,
        "schema": schema
    }
except:
        connection_parameters = {
        "account_identifier": account_identifier,
        "user": user_name,
        "password": password,
        "role": "ACCOUNTADMIN",
        "warehouse": "COMPUTE_WH",
        "database": "SNOWFLAKE",
        "schema": "CORTEX"
    }


# Connect to Snowflake
conn = sf_connector.connect(
    user=connection_parameters['user'],
    password=connection_parameters['password'],
    account=connection_parameters['account_identifier'],
    warehouse=connection_parameters['warehouse'],
    database=connection_parameters['database'],
    schema=connection_parameters['schema'],
    role=connection_parameters['role']
)

cursor = conn.cursor()
cursor.execute(f" CREATE DATABASE IF NOT EXISTS {database}; ")
cursor.execute(f" CREATE SCHEMA IF NOT EXISTS {database}.{schema}; ")
cursor.execute(f" USE DATABASE {database}; ")
cursor.execute(f" USE SCHEMA {schema}; ")


Account Identifier:  EPTJRCA-HWB83214
User Name:  EMHALDEMO1
Database:  WASHING_MACHINE_MANUALS
Schema:  PUBLIC


<snowflake.connector.cursor.SnowflakeCursor at 0x21d07bbd280>

In [15]:
# Lets see the table
cursor.execute("""
    SELECT * 
    FROM DOCUMENTS;
""")

documents_df = cursor.fetch_pandas_all()
documents_df

Unnamed: 0,DOCUMENT_ID,DOCUMENT_NAME,DOC_VERSION,FILE_PATH,FILE_SIZE,CREATED_AT
0,1,WAV28KH3GB.pdf,,.\Washer_Manuals\WAV28KH3GB.pdf,5686613,2025-04-22 01:52:03.390000-07:00
1,2,WGA1420SIN.pdf,,.\Washer_Manuals\WGA1420SIN.pdf,3247850,2025-04-22 01:52:03.390000-07:00
2,3,WGG254Z0GB.pdf,,.\Washer_Manuals\WGG254Z0GB.pdf,3291555,2025-04-22 01:52:03.390000-07:00
3,101,WAV28KH3GB.pdf,,.\Washer_Manuals\WAV28KH3GB.pdf,5686613,2025-04-25 04:44:31.675000-07:00
4,102,WGA1420SIN.pdf,,.\Washer_Manuals\WGA1420SIN.pdf,3247850,2025-04-25 04:44:31.675000-07:00
5,103,WGG254Z0GB.pdf,,.\Washer_Manuals\WGG254Z0GB.pdf,3291555,2025-04-25 04:44:31.675000-07:00


In [5]:
# Lets see the table
cursor.execute("""
    SELECT * 
    FROM CHUNKS_LARGE;
""")

large_chunks_df = cursor.fetch_pandas_all()
large_chunks_df.head()

Unnamed: 0,CHUNK_ID,DOCUMENT_ID,PAGE_START_NUMBER,PAGE_END_NUMBER,CHUNK_ORDER,CHUNK_TEXT,EMBEDDING,CREATED_AT
0,1,1,0,3,0,Register your b M o ge s y n c t B e h f o r w...,"[0.048339844, 0.0158844, -0.015388489, -0.0038...",2025-04-22 01:53:38.352000-07:00
1,2,1,3,7,1,t. ¡ Up to an altitude of max. 4000 m above se...,"[0.038604736, 0.08459473, -0.023513794, -0.014...",2025-04-22 01:53:38.352000-07:00
2,3,1,7,10,2,on or lean against the appliance door. ▶ Do no...,"[0.074279785, 0.068359375, -0.0036392212, 0.02...",2025-04-22 01:53:38.352000-07:00
3,4,1,10,15,3,"ese instructions, your creases energy and wate...","[0.008338928, 0.00027441978, 0.044067383, 0.00...",2025-04-22 01:53:38.352000-07:00
4,5,1,15,22,4,ht. Water outlet connection types 4.6 Aligning...,"[0.052764893, 0.074157715, -0.033294678, 0.011...",2025-04-22 01:53:38.352000-07:00


In [7]:

prompt = """ 
    I will provide a long string of text in which i want you to identify all of the error codes and return them in a JSON format.
    It is likely that it doesn't contain any error codes
    The JSON format should be as follows:
    {
        "Error Codes": [
            {
                "Code": "<error code>",
                "Description": "<description of the error code>"
            },
            ...
        ]
    }

    Here is the text:
    
"""

idx = 7

response = client.responses.create(
    model="gpt-4o",
    instructions= prompt,
    input=large_chunks_df.loc[idx,"CHUNK_TEXT"],
)

print("Chunk: \n", large_chunks_df.loc[idx,"CHUNK_TEXT"])
print("")
print(response.output_text)

Chunk: 
 ing that has ¡ Laundry has absorbed been worn for a just a few odours hours Normal Dirt or light stains are vis- ¡ T-shirts, shirts or blouses ible are sweat-soaked or have been worn a few times ¡ Towels or bedding that has been used for up to one week Heavy Dirt or stains are clearly Tea towels, baby items or visible work clothing 11.4 Care symbols on the care labels Washing care symbols Symbol Washing process Recommended pro- gramme ⁠ ⁠ Normal Cotton ⁠ Gentle Easy-Care ⁠ Particularly gentle Delicates/Silk for washing by hand Hand wash Wool 35 en Detergents and care products Symbol Washing process Recommended pro- gramme Not suitable for machine-washing – ¡ Do not mix different liquid deter- D1etergents and care products 2 Detergents and care gents products ¡ Do not mix detergent and fabric softener Detergents and care products ¡ Do not use products that have ex- Note: Please read the information in pired or are highly congealed the Safety → Page 4 and Preventing ¡ Do not use

In [None]:
idx = 11
# This loop costs money, so be careful to spend the entire $0.06 price tag. No i will not share my api key with you.

for i in tqdm(range(0, 20), total =20, desc="Burning through the cash"):

    response = client.responses.create(
        model="gpt-4o",
        instructions= prompt,
        input=large_chunks_df.loc[idx,"CHUNK_TEXT"],
    )

    print("Chunk: \n", large_chunks_df.loc[idx,"CHUNK_TEXT"])
    print(response.output_text)
    print("\n"*2)

Burning through the cash:   5%|▌         | 1/20 [00:02<00:45,  2.41s/it]

Chunk: 
```json
{
    "Error Codes": [
        {
            "Code": "E:30 / -80",
            "Description": "Drain pipe or water outlet hose is blocked. Ensure that the drain pipe and water outlet hose are not kinked or trapped. Check that the drain pump is not blocked and that the water outlet hose is not installed too high. Also, check that the pump cover is correctly assembled. Adjust detergent dosage if necessary."
        }
    ]
}
```





Burning through the cash:  10%|█         | 2/20 [00:04<00:40,  2.26s/it]

Chunk: 
```json
{
    "Error Codes": [
        {
            "Code": "E:30 / -80",
            "Description": "Drain pipe or water outlet hose is blocked. Ensure that the drain pipe, water outlet hose, and drain pump are not kinked, trapped, or blocked."
        }
    ]
}
```





Burning through the cash:  15%|█▌        | 3/20 [00:06<00:34,  2.01s/it]

Chunk: 
```json
{
    "Error Codes": [
        {
            "Code": "E:30 / -80",
            "Description": "Drain pipe or water outlet hose is blocked."
        },
        {
            "Code": "E:30 / -80",
            "Description": "Unapproved extension installed on the water drain hose."
        }
    ]
}
```





Burning through the cash:  20%|██        | 4/20 [00:09<00:39,  2.45s/it]

Chunk: 
```json
{
    "Error Codes": [
        {
            "Code": "E:30 / -80",
            "Description": "Drain pipe or water outlet hose is blocked. Clean the drain pipe and the water outlet hose. Ensure that the drain pipe and water drain hose are not kinked or trapped. Drain pump is blocked. Install the water outlet hose at a maximum height of 1 metre. Correctly assemble the pump cover. Detergent dosage is too high. Remove any unapproved extensions from the water hose."
        }
    ]
}
```





Burning through the cash:  25%|██▌       | 5/20 [00:11<00:36,  2.42s/it]

Chunk: 
```json
{
    "Error Codes": [
        {
            "Code": "E:30 / -80",
            "Description": "Drain pipe or water outlet hose is blocked or has other issues related to installation and assembly."
        }
    ]
}
```





Burning through the cash:  30%|███       | 6/20 [00:14<00:36,  2.62s/it]

Chunk: 
```json
{
    "Error Codes": [
        {
            "Code": "E:30 / -80",
            "Description": "Drain pipe or water outlet hose is blocked; Ensure the drain pipe and water drain hose are not kinked or trapped; Drain pump is blocked; Water outlet hose is installed too high; Pump cover is not correctly assembled; Detergent dosage is too high; Unapproved extension on the water drain hose."
        }
    ]
}
```





Burning through the cash:  35%|███▌      | 7/20 [00:17<00:33,  2.56s/it]

Chunk: 
```json
{
    "Error Codes": [
        {
            "Code": "E:30 / -80",
            "Description": "Drain pipe or water outlet hose is blocked."
        },
        {
            "Code": "E:30 / -80",
            "Description": "Unapproved extension installed on the water drain hose."
        }
    ]
}
```





Burning through the cash:  40%|████      | 8/20 [00:22<00:39,  3.30s/it]

Chunk: 
```json
{
    "Error Codes": [
        {
            "Code": "E:30 / -80",
            "Description": "Drain pipe or water outlet hose is blocked. Clean the drain pipe and the water outlet hose."
        },
        {
            "Code": "E:30 / -80",
            "Description": "Drain pipe or water drain hose is caught or jammed. Ensure that the drain pipe and water drain hose are not kinked or trapped."
        },
        {
            "Code": "E:30 / -80",
            "Description": "Drain pump is blocked. See 'Cleaning the drain pump', Page 50."
        },
        {
            "Code": "E:30 / -80",
            "Description": "The water outlet hose is connected too high. Install the water outlet hose at a maximum height of 1 metre."
        },
        {
            "Code": "E:30 / -80",
            "Description": "Pump cover is not correctly assembled. Correctly assemble the pump cover."
        },
        {
            "Code": "E:30 / -80",
            "Description": "Deterg

Burning through the cash:  45%|████▌     | 9/20 [00:24<00:33,  3.01s/it]

Chunk: 
```json
{
    "Error Codes": [
        {
            "Code": "E:30 / -80",
            "Description": "Drain pipe or water outlet hose is blocked. Clean the drain pipe and water outlet hose. Ensure that the pipes are not kinked or jammed. The pump cover may not be correctly assembled or detergent dosage might be too high. Remove any water drain hose extensions."
        }
    ]
}
```





Burning through the cash:  50%|█████     | 10/20 [00:28<00:32,  3.21s/it]

Chunk: 
```json
{
    "Error Codes": [
        {
            "Code": "E:30 / -80",
            "Description": "Drain pipe or water outlet hose is blocked. Clean the drain pipe and the water outlet hose. Drain pipe or water drain hose is caught or jammed. Ensure that the drain pipe and water drain hose are not kinked or trapped. Drain pump is blocked. Cleaning the drain pump. The water outlet hose is connected too high. Install the water outlet hose at a maximum height of 1 metre. Pump cover is not correctly assembled. Correctly assemble the pump cover. Detergent dosage is too high. Immediate measure: Mix one tablespoon of fabric softener with 0.5 litres of water and pour the mixture into the compartment for manual dosing. If intelligent dosing is activated, reduce the basic dosage. If you dose manually, reduce the amount of detergent for the next washing cycle with the same load."
        }
    ]
}
```





Burning through the cash:  55%|█████▌    | 11/20 [00:33<00:34,  3.83s/it]

Chunk: 
```json
{
    "Error Codes": [
        {
            "Code": "E:30 / -80",
            "Description": "Drain pipe or water outlet hose is blocked."
        },
        {
            "Code": "E:30 / -80",
            "Description": "Unapproved extension installed on the water drain hose."
        }
    ]
}
```





Burning through the cash:  60%|██████    | 12/20 [00:36<00:28,  3.58s/it]

Chunk: 
```json
{
    "Error Codes": [
        {
            "Code": "E:30 / -80",
            "Description": "Drain pipe or water outlet hose is blocked. Clean the drain pipe and the water outlet hose. Ensure that the drain pipe and water drain hose are not kinked or trapped. The drain pump is blocked. Install the water outlet hose at a maximum height of 1 metre. The pump cover is not correctly assembled. Detergent dosage is too high. Remove any unapproved extensions from the water hose."
        }
    ]
}
```





Burning through the cash:  65%|██████▌   | 13/20 [00:39<00:23,  3.30s/it]

Chunk: 
```json
{
    "Error Codes": [
        {
            "Code": "E:30 / -80",
            "Description": "Possible blockages or issues with the drain pipe or water outlet hose. Check for blockages, kinks, or installations too high. Also, check for blocked drain pump or incorrectly assembled pump cover."
        }
    ]
}
```





Burning through the cash:  70%|███████   | 14/20 [00:43<00:21,  3.52s/it]

Chunk: 
```json
{
    "Error Codes": [
        {
            "Code": "E:30 / -80",
            "Description": "Drain pipe or water outlet hose is blocked. Clean the drain pipe and the water outlet hose. Drain pipe or water drain hose is caught or jammed. Ensure that the drain pipe and water drain hose are not kinked or trapped. Drain pump is blocked. Install the water outlet hose at a maximum height of 1 metre. Pump cover is not correctly assembled. Correctly assemble the pump cover. Detergent dosage is too high. Remove any unapproved extensions from the water hose."
        }
    ]
}
```





Burning through the cash:  75%|███████▌  | 15/20 [00:45<00:16,  3.29s/it]

Chunk: 
```json
{
    "Error Codes": [
        {
            "Code": "E:30 / -80",
            "Description": "Drain pipe or water outlet hose is blocked. Clean the drain pipe and the water outlet hose. Ensure that the drain pipe and water drain hose are not kinked or trapped. Drain pump is blocked. The water outlet hose is connected too high. Install the water outlet hose at a maximum height of 1 metre. Pump cover is not correctly assembled. Correctly assemble the pump cover. Detergent dosage is too high. Unapproved extension installed on the water drain hose. Remove any unapproved extensions from the water hose."
        }
    ]
}
```





Burning through the cash:  80%|████████  | 16/20 [00:48<00:12,  3.21s/it]

Chunk: 
```json
{
    "Error Codes": [
        {
            "Code": "E:30 / -80",
            "Description": "Drain pipe or water outlet hose is blocked. Clean the drain pipe and the water outlet hose. Ensure that the drain pipe and water drain hose are not kinked or trapped. Drain pump is blocked. Install the water outlet hose at a maximum height of 1 metre. Pump cover is not correctly assembled. Detergent dosage is too high. Unapproved extension installed on the water drain hose."
        }
    ]
}
```





Burning through the cash:  85%|████████▌ | 17/20 [00:51<00:09,  3.15s/it]

Chunk: 
```json
{
    "Error Codes": [
        {
            "Code": "E:30 / -80",
            "Description": "Drain pipe or water outlet hose is blocked."
        },
        {
            "Code": "E:30 / -80",
            "Description": "Drain pipe or water drain hose is caught or jammed."
        },
        {
            "Code": "E:30 / -80",
            "Description": "Drain pump is blocked."
        },
        {
            "Code": "E:30 / -80",
            "Description": "The water outlet hose is connected too high."
        },
        {
            "Code": "E:30 / -80",
            "Description": "Pump cover is not correctly assembled."
        },
        {
            "Code": "E:30 / -80",
            "Description": "Detergent dosage is too high."
        },
        {
            "Code": "E:30 / -80",
            "Description": "Unapproved extension installed on the water drain hose."
        }
    ]
}
```





Burning through the cash:  90%|█████████ | 18/20 [00:54<00:06,  3.14s/it]

Chunk: 
```json
{
    "Error Codes": [
        {
            "Code": "E:30 / -80",
            "Description": "Drain pipe or water outlet hose is blocked. Clean the drain pipe and the water outlet hose. Drain pipe or water drain hose is caught or jammed. Ensure that the drain pipe and water drain hose are not kinked or trapped. Drain pump is blocked. Install the water outlet hose at a maximum height of 1 metre. Pump cover is not correctly assembled. Detergent dosage is too high. Unapproved extension installed on the water drain hose."
        }
    ]
}
```





Burning through the cash:  95%|█████████▌| 19/20 [00:57<00:02,  2.84s/it]

Chunk: 
```json
{
    "Error Codes": [
        {
            "Code": "E:30 / -80",
            "Description": "Drain pipe or water outlet hose is blocked. Ensure that the drain pipe and water drain hose are not kinked or trapped. The drain pump is blocked, or the water outlet hose is connected too high."
        }
    ]
}
```





Burning through the cash: 100%|██████████| 20/20 [01:02<00:00,  3.12s/it]

Chunk: 
```json
{
    "Error Codes": [
        {
            "Code": "E:30 / -80",
            "Description": "Drain pipe or water outlet hose is blocked."
        },
        {
            "Code": "E:30 / -80",
            "Description": "Drain pipe or water drain hose is caught or jammed."
        },
        {
            "Code": "E:30 / -80",
            "Description": "Drain pump is blocked."
        },
        {
            "Code": "E:30 / -80",
            "Description": "The water outlet hose is connected too high."
        },
        {
            "Code": "E:30 / -80",
            "Description": "Pump cover is not correctly assembled."
        },
        {
            "Code": "E:30 / -80",
            "Description": "Detergent dosage is too high."
        },
        {
            "Code": "E:30 / -80",
            "Description": "Unapproved extension installed on the water drain hose."
        }
    ]
}
```






