In [3]:
!pip install -r requirements.txt



In [4]:
import warnings
warnings.filterwarnings('ignore')

In [5]:

import os
import sys

from langchain.llms.bedrock import Bedrock

module_path = ".."
sys.path.append(os.path.abspath(module_path))
from modules.utils import bedrock

# ---- ⚠️ Un-comment and edit the below lines as needed for your AWS setup ⚠️ ----

os.environ["AWS_DEFAULT_REGION"] = "us-east-1"
# os.environ["AWS_PROFILE"] = ""
# os.environ["BEDROCK_ASSUME_ROLE"] = ""  # E.g. "arn:aws:..."

boto3_bedrock = bedrock.get_bedrock_client(
    assumed_role=os.environ.get("BEDROCK_ASSUME_ROLE", None),
    region=os.environ.get("AWS_DEFAULT_REGION", None),
    runtime=False)

bedrock_runtime = bedrock.get_bedrock_client(
    assumed_role=os.environ.get("BEDROCK_ASSUME_ROLE", None),
    region=os.environ.get("AWS_DEFAULT_REGION", None))

model_parameter = {
    "temperature": 0.0, 
    "top_p": .5, 
    "top_k": 250, 
    "max_tokens_to_sample": 2000, 
    "stop_sequences": ["\n\n Human: bye"]
}
llm = Bedrock(
    model_id="anthropic.claude-v2", 
    model_kwargs=model_parameter, 
    client=bedrock_runtime
)

Create new client
  Using region: us-east-1
boto3 Bedrock client successfully created!
bedrock(https://bedrock.us-east-1.amazonaws.com)
Create new client
  Using region: us-east-1
boto3 Bedrock client successfully created!
bedrock-runtime(https://bedrock-runtime.us-east-1.amazonaws.com)


In [6]:
# Create Vector Index
import pandas as pd
parquet_file_path = "processed/grainger_products.parquet"
print("Attempting to load file from:", parquet_file_path)

# Now attempt to load the file
try:
    df = pd.read_parquet(parquet_file_path)
    print("File loaded successfully!")
except FileNotFoundError as e:
    print("Error loading file:", e)

print(df.head())


Attempting to load file from: processed/grainger_products.parquet
File loaded successfully!
                          Brand    Code  \
0  LION FIRE BOOTS BY THOROGOOD   3XRG7   
1           GLOWEAR BY ERGODYNE   1CXK5   
2                      CARHARTT  491V68   
3                      TRIPLETT  794UC5   
4                        DEWALT  492U19   

                                                Name  \
0  Insulated Firefighter Boots: Insulated, Steel,...   
1  GLOWEAR BY ERGODYNE Baseball Cap: Orange, Univ...   
2  CARHARTT Bib Overalls: Men's, XL ( 42 in x 32 ...   
3  TRIPLETT Combustible Gas Detector: Audible/Vib...   
4  DEWALT Heated Jacket: Men's, S, Black, Up to 9...   

                                       PictureUrl600    Price  \
0  https://static.grainger.com/rp/s/is/image/Grai...  $197.55   
1  https://static.grainger.com/rp/s/is/image/Grai...   $13.93   
2  https://static.grainger.com/rp/s/is/image/Grai...   $95.79   
3  https://static.grainger.com/rp/s/is/image/Grai...

In [7]:
# AS A DOCUMENT
# Automates the process and optimizes for large and changing data sets.
from langchain.embeddings import BedrockEmbeddings
from langchain.vectorstores import FAISS
import pandas as pd
import numpy as np


class Document:
    def __init__(self, page_content, metadata):
        self.page_content = page_content
        self.metadata = metadata
        
# Initialize the Titan Embeddings Model
print("Initializing Titan Embeddings Model...")
bedrock_embeddings = BedrockEmbeddings(model_id="amazon.titan-embed-text-v1", client=bedrock_runtime)
print("Titan Embeddings Model initialized.")

documents = []
for _, row in df.iterrows():
    page_content = f"{row['Code']} {row['Name']} {row['Brand']} {row['Description'] if pd.notna(row['Description']) else ''}"
    metadata = {
        'Brand': row['Brand'],
        'Code': row['Code'],
        'Name': row['Name'],
        'Description': row['Description'],
        'Price': row['Price']
    }
    documents.append(Document(page_content, metadata))


# Print the structured documents
print("Structured documents created:")
for idx, doc in enumerate(documents[:5], 1):  
    print(f"Document {idx} of {len(documents)}:")
    print(doc.page_content[:200])
    print()

# Create FAISS vector store from structured documents
print("Creating FAISS vector store from structured documents...")
vectorstore_faiss_doc = FAISS.from_documents(documents, bedrock_embeddings)
print("FAISS vector store created.")



Initializing Titan Embeddings Model...
Titan Embeddings Model initialized.
Structured documents created:
Document 1 of 1736:
3XRG7 Insulated Firefighter Boots: Insulated, Steel, 10-1/2, M, Structural, 1 PR LION FIRE BOOTS BY THOROGOOD <p>Thorogood® 807-6003 Insulated Boots are designed for use by firefighters in demanding a

Document 2 of 1736:
1CXK5 GLOWEAR BY ERGODYNE Baseball Cap: Orange, Universal, Baseball Hat Hat, Polyester, Gen Purpose GLOWEAR BY ERGODYNE <p>Baseball hats have a curved brim that shades the eyes from sun to reduce eye 

Document 3 of 1736:
491V68 CARHARTT Bib Overalls: Men's, XL ( 42 in x 32 in ), Navy, Cotton, Zipper, Zipper, 9 Pockets CARHARTT <p>Overalls (sometimes called bib overalls) are sleeveless garments that cover the torso and

Document 4 of 1736:
794UC5 TRIPLETT Combustible Gas Detector: Audible/Vibration/Visual Indicator, Rechargeable Li-Po Battery TRIPLETT 

Document 5 of 1736:
492U19 DEWALT Heated Jacket: Men's, S, Black, Up to 9 hr, 36 in Max Chest

In [8]:
# ENTER INITIAL INPUT HERE

customer_input = "I am looking for waterproof insulated boots for my men working on my commercial deep sea fishing boat in the arctic. Must have large sizes"


In [9]:

query_embedding_doc = bedrock_embeddings.embed_query(customer_input)
print("Customer input processed.")

# Convert query embedding to numpy array
np_array_query_embedding_doc = np.array(query_embedding_doc)
print("Query embedding converted to numpy array.")

# Print the resulting query embedding
print("Resulting query embedding:")
print(np_array_query_embedding_doc)


Customer input processed.
Query embedding converted to numpy array.
Resulting query embedding:
[-0.9609375  -0.13574219 -0.06787109 ...  0.16796875 -0.65234375
 -0.62890625]


In [10]:
customer_input = "I am looking for hats to protect my men from the sun while working out in road construction in Arizona heat. I have a large company and need a solution that I can buy in bulk."

In [20]:
import re


def extract_customer_attributes(customer_input):
    # Define the NER prompt with placeholders for the customer input
    ner_prompt = """Human: Find industry, size, Sustainability Focus, Inventory Manager, and the location in the customer input.
    Instructions:
    The industry can be one of the following: Manufacturing, Warehousing, Government and Public Safety, Education, Food and Beverage Distribution, Hospitality, Property Management, Retail, or Other
    The size can be one of the following: Small Businesses (Smaller companies might prioritize cost-effective solutions and fast shipping options), or Large Enterprises (Larger organizations may require more comprehensive solutions, including strategic services like inventory management and safety consulting), Womens, Other
    The Sustainability Focused true or false meaning Environmentally Conscious Buyers: Customers interested in sustainability solutions, looking for products that focus on energy management, water conservation, waste reduction, and air quality improvement, or NOT Environmentally Conscious Buyers,
    The Inventory Manager true or false meaning a purchaser in large amounts to supply an organizational group, versus an individual user purchasing for personal use,
    The output must be in JSON format inside the tags <attributes></attributes>

    If the information of an entity is not available in the input then don't include that entity in the JSON output

    Begin!

    Customer input: {customer_input}
    Assistant:""".format(customer_input=customer_input)

    # Process the customer input with the NER model
    entity_extraction_result = llm(ner_prompt).strip()

    # Extract the attributes from the processed result
    result = re.search('<attributes>(.*?)</attributes>', entity_extraction_result, re.DOTALL)
    if result:
        attributes_str = result.group(1)
        # Convert the attributes string to JSON
        attributes = json.loads(attributes_str)
        return attributes
    else:
        return {}


In [21]:
# ## GET LIST OF PRODUCTS AND CODES
# from langchain.chains import RetrievalQA
# from langchain.prompts import PromptTemplate
# prompt_template2 = """Human: Extract list of upto 5 products and their respective physical IDs from catalog that answer the user question. 
# The catalog of products is provided under <catalog></catalog> tags below.
# <catalog>
# {context}
# </catalog>
# Question: {question}
# 
# The output should be a json of the form <products>[{{"product": <description of the product from the catalog>, "code":<code of the product from the catalog>}}, ...]</products>
# Skip the preamble and always return valid json.
# Assistant: """
# PROMPT = PromptTemplate(
#     template=prompt_template2, input_variables=["context", "question"]
# )
# 
# # Use RetrievalQA customizations for improving Q&A experience
# search_index_get_answer_from_llm = RetrievalQA.from_chain_type(
#     llm=llm,
#     chain_type="stuff",
#     retriever=vectorstore_faiss_doc.as_retriever(
#         search_type="similarity", search_kwargs={"k": 6}
#     ),
#     return_source_documents=False,
#     chain_type_kwargs={"prompt": PROMPT},
# )


In [40]:
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate

prompt_template3 = """Human: Extract list of upto 5 products and their respective physical IDs from catalog that answer the user question. 
The catalog of products is provided under <catalog></catalog> tags below.
<catalog>
{context}
</catalog>
Question: {question}

The output should be a json of the form <products>[{{"product": <description of the product from the catalog>, "code":<code of the product from the catalog>}}, ...]</products> for me to process.
Also, provide a user-readable message responding in full to the question with all the of the information to display to the user in the form <response>{{message}}</response>.
Skip the preamble and always return valid json.
Assistant: """

PROMPT = PromptTemplate(
    template=prompt_template3, input_variables=["context", "question"]
)

# Use RetrievalQA customizations for improving Q&A experience
search_index_get_answer_from_llm = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vectorstore_faiss_doc.as_retriever(
        search_type="similarity", search_kwargs={"k": 6}
    ),
    return_source_documents=False,
    chain_type_kwargs={"prompt": PROMPT},
)

In [22]:
# Call for reviews:
# TODO
reviews_dict = None

In [23]:
import json

def response_to_json(recs_response):
    # Ensure recs_response is handled correctly
    recs_response = recs_response.strip()  # Remove leading/trailing whitespace
    response_json = ""

    # Check if the response starts and ends with expected JSON markers
    if recs_response.startswith("<products>") and recs_response.endswith("</products>"):
        json_content = recs_response[len("<products>") : -len("</products>")].strip()

        try:
            parsed_response = json.loads(json_content)

            if isinstance(parsed_response, list):
                products_list = []
                for product_info in parsed_response:
                    # Assuming product_info is a dictionary with 'product' and 'code' keys
                    product_data = {
                        "product": product_info.get("product", ""),
                        "code": product_info.get("code", "")
                    }
                    products_list.append(product_data)

                response_json = {"products": products_list}
                return response_json
            else:
                print("Error: Unexpected format of parsed response")
                return None

        except json.JSONDecodeError as e:
            print(f"Error decoding JSON: {str(e)}")
            return None
    else:
        print("Error: Unexpected format of recs_response")
        return None

In [42]:
import json
import re

def split_process_and_message_from_response(recs_response):
    recs_response = recs_response.strip()  # Remove leading/trailing whitespace

    # Extract the message
    message_match = re.search('<response>(.*?)</response>', recs_response, re.DOTALL)
    message = message_match.group(1).strip() if message_match else None

    # Extract the products
    if "<products>" in recs_response and "</products>" in recs_response:
        json_content = recs_response[recs_response.index("<products>") + len("<products>"): recs_response.index("</products>")].strip()

        try:
            parsed_response = json.loads(json_content)

            if isinstance(parsed_response, list):
                products_list = []
                for product_info in parsed_response:
                    product_data = {
                        "product": product_info.get("product", ""),
                        "code": product_info.get("code", "")
                    }
                    products_list.append(product_data)

                response_json = {"products": products_list}
                return message, response_json
            else:
                print("Error: Unexpected format of parsed response")
                return None, None

        except json.JSONDecodeError as e:
            print(f"Error decoding JSON: {str(e)}")
            return None, None
    else:
        print("Error: Unexpected format of recs_response")
        return None, None

In [24]:
chat_history = [" "]

In [44]:
from langchain.chains import conversation


def process_chat_question(question, clear_history=False):
    try:
        if clear_history:
            chat_history.clear()  # Clear chat history if specified
        
        chat_history.append([question])
        
        # Extract product attributes from the question
        customer_attributes_retrieved = extract_customer_attributes(question)

        # Format the customer input with the extracted attributes
        customer_input_with_attributes = "{} {}".format(question, str(customer_attributes_retrieved))

        # # Retrieve data based on the formatted customer input
        # retrieved_data_from_index = search_index_get_answer_from_llm({"query": customer_input_with_attributes})['result'] < -- the exact same thing as search_index_get_answer_from_llm.run(**context)
        # 
        # # Append the retrieved data to the chat history
        # chat_history.append(retrieved_data_from_index)

        # Prepare the context with the formatted customer input and chat history
        context = {
            'query': customer_input_with_attributes,
            'chat_history': chat_history
        }

        # OBTAIN RESPONSE
        # Run conversation with provided context synchronously
        llm_retrieval_augmented_response = search_index_get_answer_from_llm.run(**context)
        print(llm_retrieval_augmented_response)
        message, product_list_as_json = split_process_and_message_from_response(llm_retrieval_augmented_response)   
        
        
        # UPDATE HISTORY
        if product_list_as_json is not None:
            chat_history.append(product_list_as_json['products'])
        
        if reviews_dict is not None:
            chat_history.append(reviews_dict)


        return message, product_list_as_json  # Return chat response as a string


    except ValueError as error:
        if "AccessDeniedException" in str(error):
            class StopExecution(ValueError):
                def _render_traceback_(self):
                    pass
            raise StopExecution
        else:
            raise error
        
customer_input2 = "I am looking for waterproof insulated boots for my men working for me on a commercial fishing boat in the Arctic cold."
message, response_as_json = process_chat_question(customer_input2)
print(str(response_as_json))
print (message)


 <products>
[
  {"product": "These half rubber-dipped neoprene boots are flexible and form-fitting for a snug, secure fit. They have a waterproof rubber coating that covers the bottom half of the outside of the boot. Knee-height boots protect the foot, lower leg, and calf to just below the knee. These insulated boots are worn when users are exposed to cold, wet conditions and below-freezing temperatures.", "code": "21A652"},
  {"product": "These half rubber-dipped neoprene boots are flexible and form-fitting for a snug, secure fit. They have a waterproof rubber coating that covers the bottom half of the outside of the boot. Knee-height boots protect the foot, lower leg, and calf to just below the knee. These insulated boots are worn when users are exposed to cold, wet conditions and below-freezing temperatures.", "code": "21A650"}
]
</products>

<response>
Based on your need for waterproof insulated boots for commercial fishing in the Arctic cold, I recommend the following 2 products f

In [26]:
# # HERE IS THE CONVERSATION
# from langchain.chains import ConversationalRetrievalChain
# from langchain.memory import ConversationBufferMemory
# from langchain.chains.conversational_retrieval.prompts import CONDENSE_QUESTION_PROMPT
# 
# chat_history = [" "]
# memory_chain = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
# conversation = ConversationalRetrievalChain.from_llm(
#     llm=llm, 
#     retriever=vectorstore_faiss_doc.as_retriever(), 
#     memory=memory_chain,
#     condense_question_prompt=CONDENSE_QUESTION_PROMPT,
#     chain_type='stuff',  # 'refine',
# )
# 
# # Define a function to process the chat question
# def process_chat_question(question, clear_history=False):
#     try:
#         if clear_history:
#             chat_history.clear()  # Clear chat history if specified
# 
#         context = {
#             'question': question,
#             'chat_history': chat_history
#         }
# 
#         # Run conversation with provided context synchronously
#         chat_res = search_index_get_answer_from_llm.run(**context)
# 
#         # Append the chat prompt and result to history
#         chat_history.append([question, chat_res])
# 
#         # Optionally add response_json['products'] and reviews_dict to chat history
#         if response_json:
#             chat_history.append(response_json['products'])
# 
#         if reviews_dict:
#             chat_history.append(reviews_dict)
# 
#         return str(chat_res)  # Return chat response as a string
# 
#     except ValueError as error:
#         if "AccessDeniedException" in str(error):
#             class StopExecution(ValueError):
#                 def _render_traceback_(self):
#                     pass
#             raise StopExecution
#         else:
#             raise error


In [32]:
customer_input2 = "I am looking for waterproof insulated boots for my men working for me on a commercial fishing boat in the Arctic cold."
customer_attributes = extract_customer_attributes(customer_input2)
customer_input = "{} {}".format(customer_input, str(customer_attributes))
retrieved_data = search_index_get_answer_from_llm({"query": customer_input2})['result']
chat_history.append(retrieved_data)
response = process_chat_question(question=customer_input2, clear_history=False)
print(response)

 <products>
[
  {"product": "These half rubber-dipped neoprene boots are flexible and form-fitting for a snug, secure fit. They have a waterproof rubber coating that covers the bottom half of the outside of the boot. Knee-height boots protect the foot, lower leg, and calf to just below the knee. These insulated boots are worn when users are exposed to cold, wet conditions and below-freezing temperatures.", "code":"21A652"},
  {"product": "These half rubber-dipped neoprene boots are flexible and form-fitting for a snug, secure fit. They have a waterproof rubber coating that covers the bottom half of the outside of the boot. Knee-height boots protect the foot, lower leg, and calf to just below the knee. These insulated boots are worn when users are exposed to cold, wet conditions and below-freezing temperatures.", "code":"21A645"},
  {"product": "These half rubber-dipped neoprene boots are flexible and form-fitting for a snug, secure fit. They have a waterproof rubber coating that covers

In [None]:
# Example usage:
question = ("What boots do you have in size 14 that are water proof?")
response = process_chat_question(question,clear_history=False)  # Specify clear_history as needed
print(response)

In [35]:

# Example usage:
question = ("I am looking for hats to protect my men from the sun while working out in road construction in Arizona heat. ")
response = process_chat_question(question,  clear_history=True)  # Specify clear_history as needed
print(response)

 <products>
[
  {"product": "CHILL-ITS BY ERGODYNE Cooling Hat: Green, Universal, Cooling Hat with Neck Shade Hat, Cooling CHILL-ITS BY ERGODYNE Baseball hats have a curved brim that shades the eyes from sun to reduce eye strain and improve visibility.", "code":"2EMK4"},
  {"product": "CHILL-ITS BY ERGODYNE Cooling Hat: Orange, Universal, Cooling Hat with Neck Shade Hat, Cooling CHILL-ITS BY ERGODYNE Baseball hats have a curved brim that shades the eyes from sun to reduce eye strain and improve visibility.", "code":"2EMK5"}
]
</products>
