## Text Classification with RAG

#### Creating a Knowledge Base Text File

In [284]:
# Define the knowledge base content
knowledge_base = [
    "Category 1 - Login Issues - Login issues often occur due to incorrect passwords or account lockouts.",
    "Category 2 - App Functionality - App crashes can be caused by outdated software or device incompatibility.",
    "Category 3 - Billing - Billing discrepancies may result from processing errors or duplicate transactions.",
    "Category 4 - Account Management - Account management includes tasks such as changing profile information, linking social media accounts, and managing privacy settings.",
    "Category 5 - Performance Issues - Performance issues can be related to device specifications, network connectivity, or app optimization."
]

In [285]:
file_path = "Data/knowledge_base.txt"

# Save the knowledge base to the file
with open(file_path, "w") as file:
    for entry in knowledge_base:
        file.write(entry + "\n")

#### Using Groqcloud and Llama 3.1 model with Huggingface Embeddings

In [286]:
import os
from dotenv import load_dotenv
load_dotenv()

os.environ['GROQ_API_KEY'] = os.getenv("GROQ_API_KEY")
os.environ['HF_TOKEN'] = os.getenv("HF_TOKEN")

In [287]:
from langchain_groq import ChatGroq

model = ChatGroq(model="llama-3.1-8b-instant")
model

ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x000002D3FCD74370>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x000002D3FCD76D40>, model_name='llama-3.1-8b-instant', groq_api_key=SecretStr('**********'))

In [288]:
from langchain_huggingface import HuggingFaceEmbeddings

embeddings=HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")



#### Creating a Retrieval-Augmented Generation Chain

In [289]:
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import TextLoader
from langchain_core.prompts import ChatPromptTemplate
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

##### Data Ingestion and Text Splitting

In [290]:
loader = TextLoader(file_path)
docs = loader.load()
docs

[Document(metadata={'source': 'Data/knowledge_base.txt'}, page_content='Category 1 - Login Issues - Login issues often occur due to incorrect passwords or account lockouts.\nCategory 2 - App Functionality - App crashes can be caused by outdated software or device incompatibility.\nCategory 3 - Billing - Billing discrepancies may result from processing errors or duplicate transactions.\nCategory 4 - Account Management - Account management includes tasks such as changing profile information, linking social media accounts, and managing privacy settings.\nCategory 5 - Performance Issues - Performance issues can be related to device specifications, network connectivity, or app optimization.\n')]

In [291]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=200)
splits = text_splitter.split_documents(docs)

##### Creating a Retriever

In [292]:
# Create a Vector Store
vectorstore = FAISS.from_documents(documents=splits, embedding=embeddings)
retriever = vectorstore.as_retriever(k=3) # Using top 3 results
retriever

VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000002D3FCD75D80>)

##### Check how the similarity search works

In [306]:
## Query From a vector db
query = "The app crashes every time I try to upload a photo"
result = vectorstore.similarity_search(query, k=3)

In [307]:
result

[Document(metadata={'source': 'Data/knowledge_base.txt'}, page_content='Category 2 - App Functionality - App crashes can be caused by outdated software or device incompatibility.'),
 Document(metadata={'source': 'Data/knowledge_base.txt'}, page_content='Category 5 - Performance Issues - Performance issues can be related to device specifications, network connectivity, or app optimization.'),
 Document(metadata={'source': 'Data/knowledge_base.txt'}, page_content='Category 1 - Login Issues - Login issues often occur due to incorrect passwords or account lockouts.')]

##### Creating Retrieval Chain using Retriever, Prompt, and Model 

In [295]:
system_prompt = (
        """
            ### Instruction
            Context:
            {context}

            Understand the context containing labels and their description and then,

            Classify the input text below into one of the following labels based on the context:
            Category 1 - Login Issues
            Category 2 - App Functionality
            Category 3 - Billing 
            Category 4 - Account Management
            Category 5 - Performance Issues

            If you dont know the answer or the input is completely irrelevant to the context just say 'I don't know'. 
            
            Just output the label and nothing else. 
        """
    )

In [296]:
qa_prompt = ChatPromptTemplate.from_messages(
        [
            ("system", system_prompt),
            ("human", "{input}"),
        ]
    )

In [297]:
question_answer_chain = create_stuff_documents_chain(model, qa_prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

##### Trying the Support Tickets

In [298]:
support_tickets = [
    {"text": "My account login is not working. I've tried resetting my password twice."},
    {"text": "The app crashes every time I try to upload a photo."},
    {"text": "I was charged twice for my last subscription payment."},
    {"text": "I can't find the option to change my profile picture."},
    {"text": "The video playback is very laggy on my device."}
]

In [299]:
for ticket in support_tickets:
    print(f"Ticket: {ticket['text']}")
    response = rag_chain.invoke({"input": ticket['text']})
    print(f"Model Response: {response['answer']}")
    print()

Ticket: My account login is not working. I've tried resetting my password twice.
Model Response: Category 1 - Login Issues

Ticket: The app crashes every time I try to upload a photo.
Model Response: Category 2 - App Functionality

Ticket: I was charged twice for my last subscription payment.
Model Response: Category 3 - Billing

Ticket: I can't find the option to change my profile picture.
Model Response: Category 4 - Account Management

Ticket: The video playback is very laggy on my device.
Model Response: Category 5 - Performance Issues



##### Using Different Support tickets

In [300]:
response = rag_chain.invoke({"input": "App is running very slowly"})
response['answer']

'Category 5 - Performance Issues'

In [301]:
response = rag_chain.invoke({"input": "Want to delete my account"})
response['answer']

'Category 4 - Account Management'

In [302]:
response = rag_chain.invoke({"input": "Refund not processed"})
response['answer']

'Category 3 - Billing'

In [303]:
response = rag_chain.invoke({"input": "The app crashes very often"})
response['answer']

'Category 2 - App Functionality'

In [304]:
response = rag_chain.invoke({"input": "Account access denied"})
response['answer']

'Category 1 - Login Issues'

In [305]:
response = rag_chain.invoke({"input": "How are you?"})
response['answer']

"I don't know."