## Installations

In [98]:
!pip install pandas langchain-openai qdrant-client langchain langchain-community langchain-qdrant



## Load Envs

In [3]:
from decouple import config
import os

## Read CSV File

In [9]:
import pandas as pd
import ast

In [10]:
df = pd.read_csv("./data/product_ticket_description.csv")
df.head()

Unnamed: 0,ID,product_purchased,ticket_description
0,0,GoPro Hero,I'm having an issue with the GoPro Hero. Pleas...
1,1,LG Smart TV,I'm having an issue with the LG Smart TV. Plea...
2,2,Dell XPS,I'm facing a problem with my Dell XPS. The Del...
3,3,Microsoft Office,I'm having an issue with the Microsoft Office....
4,4,Autodesk AutoCAD,I'm having an issue with the Autodesk AutoCAD....


In [11]:
from langchain_core.documents import Document

In [12]:
documents = []

for index, row in df[:250].iterrows():
    document = Document(
        page_content=row["ticket_description"],
        metadata={"product_name": row["product_purchased"]}
    )
    documents.append(document)

In [13]:
print(documents[0].page_content)

I'm having an issue with the GoPro Hero. Please assist.Your billing zip code is: 71701.We appreciate that you have requested a website address.Please double check your email address. I've tried troubleshooting steps mentioned in the user manual, but the issue persists.


In [14]:
len(documents)

250

## Document UUID

In [15]:
from uuid import uuid4

In [16]:
uuids = [str(uuid4()) for _ in range(len(documents))]

## Connect To Qdrant Cloud

In [17]:
from qdrant_client import QdrantClient


In [18]:
qdrant_client = QdrantClient(
    api_key=config("QDRANT_API_KEY"),
    url=config("QDRANT_URL")
)

In [21]:
# List availabe collections
qdrant_client.get_collections()

CollectionsResponse(collections=[])

## Create Collection

In [25]:
from qdrant_client.http.models import Distance, VectorParams

In [26]:
COLLECTION_NAME ="customer_support_tickets"

[OpenAI Embedding Models Dimensions](https://qdrant.tech/documentation/embeddings/openai/)

In [27]:
qdrant_client.create_collection(
    collection_name=COLLECTION_NAME,
    vectors_config=VectorParams(
        size=os.getenv("QDRANT_VECTOR_DIMENSION", 1536), 
        distance=Distance.COSINE),
)

True

## Connect To Vector Store

In [32]:
from langchain_qdrant import QdrantVectorStore
from langchain_openai import OpenAIEmbeddings

embedding_model = OpenAIEmbeddings(
    model=os.getenv("EMBEDDING_MODEL", 
                    default="text-embedding-3-small"
                )
    )

In [41]:
from langchain_qdrant import RetrievalMode

QdrantVectorStore supports 3 modes for similarity searches. They can be configured using the retrieval_mode parameter when setting up the class.

- Dense Vector Search(Default)
- Sparse Vector Search
- Hybrid Search

In [33]:
vector_store = QdrantVectorStore(
    client=qdrant_client,
    collection_name=COLLECTION_NAME,
    embedding=embedding_model,
    # retrieval_mode=RetrievalMode.DENSE
)

## Insert Records / Documents

In [37]:
vector_store.add_documents(documents=documents, ids=uuids)

['d397286a-bce1-4816-94f0-020038d9b124',
 '99972c0b-b4ff-43a3-ae3e-62e605438c16',
 '9791387d-3018-4732-bbba-d0478a3d3d7c',
 '54b6594d-0744-419b-bb74-d2f7e95ad805',
 'b05996b2-a1e2-480d-80d7-71fed26d9c88',
 'a8c5a0b2-a07b-4c95-8cf2-a4cdf2153134',
 '766ec312-bec8-4096-aa3f-9b421f8c2a37',
 'bc27c179-3ee2-4db7-afb8-851e64fe18a8',
 '78a7ea97-faa2-49e0-bab8-a84864010b83',
 'b2f5677e-b2b2-435e-b7d8-c6d4e0247939',
 '56c6b27a-9993-4ecb-b3b0-550c27fdda85',
 'b7fcc082-23c9-4d12-85fd-6de90c6efb7e',
 '593b34fe-caa2-4d16-8cce-2ea406fd53f1',
 '3ea5b87f-7c76-4738-b5bb-aa86e2ae1916',
 '62610df8-3144-4927-b79b-d3ee03355b83',
 'e0fb2144-18c3-450d-8db7-c90c1f493ce3',
 '997d3210-56ed-4891-951b-d4f9173a1115',
 'b0e62418-f03d-4bf8-9ebf-9bc561142af3',
 'bcaf4192-f4f4-4290-a237-0926a0601244',
 'af07b782-bb57-4c88-84bf-e5c59bba0dca',
 'e40cb8b1-08ef-4619-ac38-38b58af2d5e7',
 '67a7cad8-9af0-4db7-aa70-6c43ca8e0880',
 'ad270d20-2c58-46c6-ba71-0f0725193b19',
 'b63e0a0c-fbd4-4d92-94d5-8314c42cf3aa',
 '30bac209-4fe7-

## Query Vector DB

In [42]:
query = "What are the common complaints regarding the GoPro Hero?"

In [44]:
results = vector_store.similarity_search(
    query, k=2
)

By default, your document is going to be stored in the following payload structure, example:

```json
{
    "page_content": "I'm having an issue with the GoPro Hero. Please assist. I'm experiencing this issue on multiple devices of the same model, so it seems to be a widespread problem.",
    "metadata": {
        "product_name": "GoPro Hero", 
        "_id": "d36c329f-761a-4ce9-afc6-114cc7662f2e", 
        "_collection_name": "customer_support_tickets"
    }
}
```

In [45]:
for res in results:
    print(f"* {res.page_content} [{res.metadata}]")

* I'm having an issue with the GoPro Hero. Please assist. I'm experiencing this issue on multiple devices of the same model, so it seems to be a widespread problem. [{'product_name': 'GoPro Hero', '_id': 'd36c329f-761a-4ce9-afc6-114cc7662f2e', '_collection_name': 'customer_support_tickets'}]
* I'm having an issue with the GoPro Hero. Please assist. We have nothing to do with it. Please try to have an informed discussion about this issue with the seller. Thank you for your time!3/28 The issue I'm facing is intermittent. Sometimes it works fine, but other times it acts up unexpectedly. [{'product_name': 'GoPro Hero', '_id': 'ac33a33b-7ac8-4487-a4c2-81c8e4536ce1', '_collection_name': 'customer_support_tickets'}]


### Similarity Search With Score

In [48]:
results = vector_store.similarity_search_with_score(
    query=query, k=2
)

In [49]:

for doc, score in results:
    print(f"* [SIM={score:3f}] {doc.page_content} [{doc.metadata}]")

* [SIM=0.609396] I'm having an issue with the GoPro Hero. Please assist. I'm experiencing this issue on multiple devices of the same model, so it seems to be a widespread problem. [{'product_name': 'GoPro Hero', '_id': 'd36c329f-761a-4ce9-afc6-114cc7662f2e', '_collection_name': 'customer_support_tickets'}]
* [SIM=0.600445] I'm having an issue with the GoPro Hero. Please assist. We have nothing to do with it. Please try to have an informed discussion about this issue with the seller. Thank you for your time!3/28 The issue I'm facing is intermittent. Sometimes it works fine, but other times it acts up unexpectedly. [{'product_name': 'GoPro Hero', '_id': 'ac33a33b-7ac8-4487-a4c2-81c8e4536ce1', '_collection_name': 'customer_support_tickets'}]


### Metadata Filtering

In [51]:
from qdrant_client.http import models

In [52]:
results = vector_store.similarity_search(
    query=query,
    k=2,
    filter=models.Filter(
        should=[
            models.FieldCondition(
                key="metadata.product_name",
                match=models.MatchValue(
                    value="Microsoft Office"
                ),
            ),
        ]
    ),
)

In [53]:
for doc in results:
    print(f"* {doc.page_content} [{doc.metadata}]")

* I'm having an issue with the Microsoft Office. Please assist.If you have a problem you're interested in and I'd love to see this happen, please check out the Feedback. I've already contacted customer support multiple times, but the issue remains unresolved. [{'product_name': 'Microsoft Office', '_id': '54b6594d-0744-419b-bb74-d2f7e95ad805', '_collection_name': 'customer_support_tickets'}]
* I'm having an issue with the Microsoft Office. Please assist. I'm unable to find any results I've tried using different cables, adapters, or peripherals with my Microsoft Office, but the issue persists. [{'product_name': 'Microsoft Office', '_id': '505702b7-7cd8-46fc-8449-7bd503cb0a1c', '_collection_name': 'customer_support_tickets'}]


## Building ChatBot

In [58]:
from langchain.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from pydantic import BaseModel, Field
from typing import List

In [56]:
llm = ChatOpenAI(api_key=config("OPENAI_API_KEY"))

In [59]:
class Entities(BaseModel):
    """Identifying information about Products."""

    names: List[str] = Field(
        ...,
        description="All product names that appear in the text"
    )

In [61]:
product_extraction_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are extracting product names from the text.",
        ),
        (
            "human",
            "Use the given format to extract information from the following "
            "input: {question}",
        ),
    ]
)

product_extraction_chain = product_extraction_prompt | llm.with_structured_output(Entities)


In [63]:
product_extraction_chain.invoke("I like my GoPro Hero 10 camera.")

Entities(names=['GoPro Hero 10 camera'])

### Actual Bot

In [76]:
def information_retriever(question: str) -> str:
    """
    Search and retrieve information from the Qdrant vector store.
    """
    
    # Get product name from the question
    product_name = product_extraction_chain.invoke(question)
    
    # Filter the search results based on the product name
    unstructured_data_results = vector_store.similarity_search(
        query=query,
        k=2,
        filter=models.Filter(
            should=[
                models.FieldCondition(
                    key="metadata.product_name",
                    match=models.MatchValue(
                        value=product_name.names[0]
                    ),
                ),
            ]
        ),
    )
    
    return unstructured_data_results

In [77]:
from langchain_core.runnables import (
    RunnableParallel,
    RunnablePassthrough,
)

In [78]:
bot_chat_template = """Answer the question based only on the following context:
            {context}

            Question: {question}
            Use natural language and be concise.
            Answer:
            Context Used:
            You are also required to return the context used to answer the question. The context should be the whole context provided 
            to you, do not modify it. If no context was provided do not generate any context, just say no context was provided:
        """      

In [79]:
class ResponseFormat(BaseModel):
    """Identifying information about Products."""

    answer: str = Field(
        ...,
        description="Your response to the user query."
    )
    sources: List[str] = Field(
        ...,
        description="The sources, contexts used to generate the response."
    )

In [80]:
bot_chat_prompt = ChatPromptTemplate.from_template(bot_chat_template)
chat_chain = (
    RunnableParallel(
        {
            "context": information_retriever,
            "question": RunnablePassthrough(),
        }
    )
    | bot_chat_prompt
    | llm.with_structured_output(ResponseFormat)
)  

In [83]:
response = chat_chain.invoke("What are the common complaints regarding the GoPro Hero?")

In [85]:
response

ResponseFormat(answer='The common complaints regarding the GoPro Hero include widespread issues experienced on multiple devices of the same model and intermittent performance problems.', sources=['Document(metadata={\'product_name\': \'GoPro Hero\', \'_id\': \'d36c329f-761a-4ce9-afc6-114cc7662f2e\', \'_collection_name\': \'customer_support_tickets\'}, page_content="I\'m having an issue with the GoPro Hero. Please assist. I\'m experiencing this issue on multiple devices of the same model, so it seems to be a widespread problem."), Document(metadata={\'product_name\': \'GoPro Hero\', \'_id\': \'ac33a33b-7ac8-4487-a4c2-81c8e4536ce1\', \'_collection_name\': \'customer_support_tickets\'}, page_content="I\'m having an issue with the GoPro Hero. Please assist. We have nothing to do with it. Please try to have an informed discussion about this issue with the seller. Thank you for your time!3/28 The issue I\'m facing is intermittent. Sometimes it works fine, but other times it acts up unexpecte

In [84]:
print(response)

answer='The common complaints regarding the GoPro Hero include widespread issues experienced on multiple devices of the same model and intermittent performance problems.' sources=['Document(metadata={\'product_name\': \'GoPro Hero\', \'_id\': \'d36c329f-761a-4ce9-afc6-114cc7662f2e\', \'_collection_name\': \'customer_support_tickets\'}, page_content="I\'m having an issue with the GoPro Hero. Please assist. I\'m experiencing this issue on multiple devices of the same model, so it seems to be a widespread problem."), Document(metadata={\'product_name\': \'GoPro Hero\', \'_id\': \'ac33a33b-7ac8-4487-a4c2-81c8e4536ce1\', \'_collection_name\': \'customer_support_tickets\'}, page_content="I\'m having an issue with the GoPro Hero. Please assist. We have nothing to do with it. Please try to have an informed discussion about this issue with the seller. Thank you for your time!3/28 The issue I\'m facing is intermittent. Sometimes it works fine, but other times it acts up unexpectedly.")']


## Custom Ouput Parse

In [95]:
import json
import ast
from typing import Dict, List
from langchain_core.exceptions import OutputParserException
from langchain_core.output_parsers import BaseOutputParser

class ResponseFormatParser(BaseOutputParser[Dict]):
    """Custom parser for ResponseFormat exception output."""

    def parse(self, text: str) -> Dict:
        try:
            # Parse the outer JSON structure
            error_json = json.loads(text)
            
            # Extract the error message
            error_message = error_json["message"]
            
            # Find the start of the actual ResponseFormat content
            start_index = error_message.index('{"answer"')
            response_format_str = error_message[start_index:].strip()
            
            # Parse the ResponseFormat content
            response_format = json.loads(response_format_str)
            
            return {
                "answer": response_format["answer"],
                "sources": self._parse_sources(response_format["sources"])
            }
        except Exception as e:
            raise OutputParserException(f"Failed to parse the output: {e}")

    def _parse_sources(self, sources: List[str]) -> List[Dict]:
        parsed_sources = []
        for source in sources:
            # Extract metadata and page_content
            metadata_str, content = source.split(", page_content=")
            metadata_str = metadata_str.split("metadata=")[1]
            
            # Safely evaluate metadata and content
            metadata = ast.literal_eval(metadata_str)
            content = ast.literal_eval(content.rstrip(")"))
            
            parsed_sources.append({
                "metadata": metadata,
                "page_content": content
            })
        return parsed_sources

    @property
    def _type(self) -> str:
        return "response_format_exception_parser"

In [96]:
bot_chat_prompt = ChatPromptTemplate.from_template(bot_chat_template)
chat_chain_custom_parser = (
    RunnableParallel(
        {
            "context": information_retriever,
            "question": RunnablePassthrough(),
        }
    )
    | bot_chat_prompt
    | llm.with_structured_output(ResponseFormat)
    | ResponseFormatParser()
)  

In [97]:
response_custom_parser = chat_chain_custom_parser.invoke("What are the common complaints regarding the GoPro Hero?")

ValidationError: 1 validation error for Generation
text
  str type expected (type=type_error.str)

In [None]:
print(response_custom_parser)

- [Langchain PromptTemplates](https://python.langchain.com/v0.1/docs/modules/model_io/prompts/quick_start/)

- [LangChain Qdrant Official Docs](https://python.langchain.com/v0.2/docs/integrations/vectorstores/qdrant/)
