In [2]:
!pip install -U langchain-community
!pip install langchain sentence_transformers faiss-cpu

Collecting langchain-community
  Downloading langchain_community-0.3.9-py3-none-any.whl.metadata (2.9 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting httpx-sse<0.5.0,>=0.4.0 (from langchain-community)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting langchain<0.4.0,>=0.3.8 (from langchain-community)
  Downloading langchain-0.3.9-py3-none-any.whl.metadata (7.1 kB)
Collecting langchain-core<0.4.0,>=0.3.21 (from langchain-community)
  Downloading langchain_core-0.3.21-py3-none-any.whl.metadata (6.3 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community)
  Downloading pydantic_settings-2.6.1-py3-none-any.whl.metadata (3.5 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading marshmallow-3.23.1-py3-none-any.whl.metadata (7.5 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-jso

In [42]:
from sentence_transformers import SentenceTransformer
from langchain.embeddings import SentenceTransformerEmbeddings
from langchain.vectorstores import FAISS
from langchain.docstore.document import Document
from langchain.chains import RetrievalQA
from langchain.llms import HuggingFaceHub

# 1. Embedding Generation Function
def embeddings_generation(text):
    # Generate embeddings
    embeddings = SentenceTransformerEmbeddings(model_name="all-mpnet-base-v2")

    # Create a vector database (convert text to Document objects)
    documents = [Document(page_content=t) for t in text]
    # Generate FAISS vector store from documents and embeddings
    return FAISS.from_documents(documents, embeddings)

# 2. Initialize the RAG model (HuggingFace)
def initialize_rag():
    # Hugging Face model setup for retrieval-augmented generation
    return HuggingFaceHub(
        repo_id="google/flan-t5-base",  # Change this if using another model
        model_kwargs={"temperature": 0.5, "max_length": 512},
        huggingfacehub_api_token="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"  # Replace with your token
    )

# 3. Query Execution Function (RAG-based)
def run_query(db, query, rag_model):
    retriever = db.as_retriever()  # Convert database into retriever
    qa_chain = RetrievalQA.from_chain_type(llm=rag_model, chain_type="stuff", retriever=retriever)

    # Run the query and print the result
    result = qa_chain.run(query)
    print("Query Result:", result)

# 4. Extract Text & Query the Model
def extract_text(text, query, rag_model):
    db = embeddings_generation(text)  # Generate vector DB from comments
    run_query(db, query, rag_model)   # Run query using the RAG model

# Initialize the RAG model
model = initialize_rag()

# Sample text (comments to be categorized)
text = [
    "Bought a crocs for my kid. The rubber was broken within a month , poor quality and high price !"

]

# Updated query with specific request for categorization
query ="""Please categorize the following customer feedback into one of the following categories:

Categories:
- Wrong Article
- Missing Article
- Damaged Article
- Fitting Issue
- Quality Issue
- Staff Behaviour
- OTP Issue
- Order Cancel
- No Order
- Delay Order
- Delay Punch
- Wrong Amount

Here is the comment to categorize:
{comment}

Instructions:
1. Only return the category name.
2. If the comment refers to an issue with the product (e.g., wrong or missing items), categorize it under the appropriate product-related category.
3. If the comment mentions problems like fitting or size, classify it under **Fitting Issue**.
4. If the comment refers to quality concerns (e.g., bad condition, poor quality), classify it under **Quality Issue**.
5. If the comment mentions issues with customer service or staff, categorize it under **Staff Behaviour**.
6. If the comment refers to order delays, missing items, or other logistical issues, categorize it under the corresponding logistical issue category (e.g., **Delay Order**).

Please provide only the category name. Do not add explanations or other details.
"""

# Loop through the comments and pass them to the query
for c in text:
    print(c)
    current_query = query.format(comment=c)
    extract_text(text, current_query, model)


Bought a crocs for my kid. The rubber was broken within a month , poor quality and high price !
Query Result: Quality Issue
