<a href="https://colab.research.google.com/github/JessuSriyank/chatbot/blob/main/Task3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install langchain
!pip install langchain-community
!pip install langchain_groq
!pip install langchain_core
!pip install langchain_google_genai
!pip install pypdf
!pip install faiss-cpu
!pip install langchain_huggingface
!pip install chromadb

!pip install --upgrade transformers
!pip install --upgrade torch torchvision torchaudio
!pip install python-docx
!pip install chroma-hnswlib==0.7.6
!pip install chromadb==0.6.3
!pip install sentence-transformers
!pip install --upgrade chromadb

Collecting transformers>=4.39.0 (from langchain_huggingface)
  Using cached transformers-4.48.2-py3-none-any.whl.metadata (44 kB)
Using cached transformers-4.48.2-py3-none-any.whl (9.7 MB)
[0mInstalling collected packages: transformers
Successfully installed transformers-4.48.2
Collecting torch
  Using cached torch-2.6.0-cp311-cp311-manylinux1_x86_64.whl.metadata (28 kB)
Collecting torchvision
  Using cached torchvision-0.21.0-cp311-cp311-manylinux1_x86_64.whl.metadata (6.1 kB)
Collecting torchaudio
  Using cached torchaudio-2.6.0-cp311-cp311-manylinux1_x86_64.whl.metadata (6.6 kB)
Collecting nvidia-cusparselt-cu12==0.6.2 (from torch)
  Using cached nvidia_cusparselt_cu12-0.6.2-py3-none-manylinux2014_x86_64.whl.metadata (6.8 kB)
Collecting triton==3.2.0 (from torch)
  Using cached triton-3.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.4 kB)
Using cached torch-2.6.0-cp311-cp311-manylinux1_x86_64.whl (766.7 MB)
Downloading nvidia_cusparselt_cu12-0.6.2-py3-no

In [None]:
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_groq import ChatGroq
from langchain.document_loaders import PyPDFLoader, TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import create_retrieval_chain
from langchain.vectorstores import Chroma
import time

# Load environment variables
groq_api_key = "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"

# Initialize the language model
print("Initializing ChatGroq model...")
llm = ChatGroq(groq_api_key=groq_api_key, model_name="mixtral-8x7b-32768")
print("ChatGroq model initialized.")

# Initialize embeddings
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Define Chroma vector store path
vector_store_path = "chroma_db"



def process_document(file_path):
    """
    Processes a document (PDF, TXT, DOC) and creates a ChromaDB vector store.

    Args:
        file_path: Path to the file.

    Returns:
        The retrieval chain, or None if any errors occur during processing.
    """
    try:
        # Determine document loader based on file extension
        if file_path.endswith(".pdf"):
            loader = PyPDFLoader(file_path)
        elif file_path.endswith(".txt"):
            loader = TextLoader(file_path)
        elif file_path.endswith(".doc") or file_path.endswith(".docx"):
            # You might need to install a library like python-docx for .doc/.docx support
            # loader = Docx2txtLoader(file_path)
            raise NotImplementedError("Support for .doc/.docx files is not yet implemented.")
        else:
            raise ValueError(f"Unsupported file type: {file_path}")

        print("Loading and processing the document...")
        docs = loader.load()
        print(f"Loaded {len(docs)} documents from file.")

        # Split documents
        text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
        split_documents = text_splitter.split_documents(docs)
        print(f"Split documents into {len(split_documents)} chunks.")

        # Create ChromaDB vector store
        print("Creating ChromaDB vector store...")
        vector_store = Chroma.from_documents(
            split_documents,
            embeddings,
            persist_directory=vector_store_path
        )
        print("ChromaDB vector store created successfully.")

        print("Setting up retriever...")
        retriever = vector_store.as_retriever()
        print("Retriever setup complete.")

        # Define retrieval prompt template
        print("Defining prompt template...")
        prompt_template = ChatPromptTemplate.from_template(
    """
    You are a Quality Analyst reviewing a sales call conversation between a salesperson and a customer.
    Your task is to evaluate the salesperson's performance based on the conversation provided below.
    Please answer the query below, ensuring your response is clear, concise, and well-justified.
    Provide an objective evaluation for each query, without combining responses for different queries.

    **Conversation Transcript:**
    {context}

    **Evaluation:**
    Respond directly to the following query without including introductory statements like "based on the provided context."
    Provide a precise evaluation for the query:

    - **Query**: {input}
    - **Your response**:
    """
)

        print("Prompt template defined.")

        # Set up document chain and retrieval chain
        print("Setting up chains...")
        document_chain = create_stuff_documents_chain(llm, prompt_template)
        retrieval_chain = create_retrieval_chain(retriever, document_chain)
        print("Chains set up successfully.")

        return retrieval_chain

    except Exception as e:
        print(f"Error during document processing: {e}")
        return None

# Load the document to be analyzed
document_path = "/content/Call Center Conversation #03_transcript.txt"  # Replace with the actual path
retrieval_chain = process_document(document_path)

if retrieval_chain:
    # Load queries from a file
    with open("/content/salesqueries.txt", "r", encoding="utf-8") as f:
        queries = f.readlines()
    queries = [query.strip() for query in queries]

    for i, query in enumerate(queries):
        print(f"User query {i+1}: {query}")
        start = time.process_time()
        try:
            response = retrieval_chain.invoke({"input": query})
            end_time = time.process_time() - start
            print(f"Response received in {end_time:.2f} seconds.")
            print(f"**Response for Query {i+1}:**")
            print(response['answer'])
        except Exception as e:
            print(f"Error during query processing: {e}")
else:
    print("Failed to process the document. Please check the file and error messages.")

Initializing ChatGroq model...
ChatGroq model initialized.
Loading and processing the document...
Loaded 1 documents from file.
Split documents into 5 chunks.
Creating ChromaDB vector store...
ChromaDB vector store created successfully.
Setting up retriever...
Retriever setup complete.
Defining prompt template...
Prompt template defined.
Setting up chains...
Chains set up successfully.
User query 1: Was the call answered promptly and polietly?
Response received in 0.07 seconds.
**Response for Query 1:**
No, the call was not answered promptly. The customer support agent used the same greeting and request for the order number in every response, indicating that they may have been using a script and did not immediately address the customer's issue. However, the agent was polite throughout the conversation.
User query 2: Did the agent greet the customer appropriately?
Response received in 0.06 seconds.
**Response for Query 2:**
No, the salesperson did not greet the customer appropriately. T

In [2]:
!git clone https://github.com/JessuSriyank/chatbot

Cloning into 'chatbot'...


In [3]:
!mv /content/Task3/* /content/chatbot/


mv: cannot stat '/content/Task3/*': No such file or directory
