In [1]:
# Install required packages
!pip install groq pinecone-client langchain sentence-transformers

import os
from pinecone import Pinecone, ServerlessSpec
from groq import Groq
from sentence_transformers import SentenceTransformer
from typing import List, Dict
import numpy as np
import time


Defaulting to user installation because normal site-packages is not writeable



In [6]:
class Config:
    GROQ_API_KEY = "your-key"
    PINECONE_API_KEY = "your-key"
    INDEX_NAME = "rag-bot"
    DIMENSION = 384
    CLOUD = "aws"
    REGION = "ur-region"

In [7]:
class DocumentProcessor:
    def __init__(self):
        self.encoder = SentenceTransformer('all-MiniLM-L6-v2')
    
    def process_documents(self, documents: List[str]) -> List[Dict]:
        processed_docs = []
        for i, doc in enumerate(documents):
            embedding = self.encoder.encode(doc)
            processed_docs.append({
                'id': str(i),
                'text': doc,
                'embedding': embedding.tolist()
            })
        return processed_docs

In [8]:
class VectorStore:
    def __init__(self):
        try:
            self.pc = Pinecone(api_key=Config.PINECONE_API_KEY)
            print("Connected to Pinecone")
            
            # Check if index exists
            if Config.INDEX_NAME not in self.pc.list_indexes().names():
                print(f"Creating new index: {Config.INDEX_NAME}")
                self.pc.create_index(
                    name=Config.INDEX_NAME,
                    dimension=Config.DIMENSION,
                    metric='cosine',
                    spec=ServerlessSpec(
                        cloud=Config.CLOUD,
                        region=Config.REGION
                    )
                )
                print("Waiting for index to initialize...")
                time.sleep(20)  # Wait for index to be ready
            
            self.index = self.pc.Index(Config.INDEX_NAME)
            print("Successfully connected to index")
            self.encoder = SentenceTransformer('all-MiniLM-L6-v2')
            
        except Exception as e:
            raise Exception(f"Pinecone initialization failed: {str(e)}")
    
    def store_documents(self, processed_docs: List[Dict]):
        try:
            vectors = [(doc['id'], doc['embedding'], {'text': doc['text']}) 
                      for doc in processed_docs]
            self.index.upsert(vectors=vectors)
            print(f"Successfully stored {len(vectors)} documents")
        except Exception as e:
            raise Exception(f"Failed to store documents: {str(e)}")
    
    def search(self, query: str, top_k: int = 3) -> List[Dict]:
        try:
            query_embedding = self.encoder.encode(query).tolist()
            results = self.index.query(
                vector=query_embedding,
                top_k=top_k,
                include_metadata=True
            )
            return [
                {
                    'text': match.metadata['text'],
                    'score': match.score
                }
                for match in results.matches
            ]
        except Exception as e:
            raise Exception(f"Search failed: {str(e)}")


In [9]:
class QABot:
    def __init__(self):
        try:
            print("Initializing Groq client...")
            self.groq_client = Groq(api_key=Config.GROQ_API_KEY)
            print("Initializing Vector Store...")
            self.vector_store = VectorStore()
        except Exception as e:
            raise Exception(f"QABot initialization failed: {str(e)}")
    
    def answer_question(self, question: str) -> str:
        try:
            relevant_docs = self.vector_store.search(question)
            
            context = "\n".join([
                f"Context {i+1}: {doc['text']}" 
                for i, doc in enumerate(relevant_docs)
            ])
            
            prompt = f"""Based on the following context, please answer the question.
            If the answer cannot be found in the context, say "I cannot answer this question based on the available information."

            {context}

            Question: {question}
            Answer:"""
            
            response = self.groq_client.chat.completions.create(
                messages=[{"role": "user", "content": prompt}],
                model="mixtral-8x7b-32768",
                temperature=0.1,
                max_tokens=300
            )
            
            return response.choices[0].message.content
        except Exception as e:
            return f"Error generating answer: {str(e)}"

def main():
    try:
        # Sample business documents
        documents = [
            "Our company's return policy allows returns within 30 days of purchase with original receipt.",
            "We offer free shipping on orders over $50 within the continental United States.",
            "Customer support is available 24/7 through our helpline at 1-800-HELP.",
            "Our premium membership costs $99 per year and includes exclusive discounts.",
            "All products come with a 1-year manufacturer warranty."
        ]
        
        print("\n1. Initializing document processor...")
        doc_processor = DocumentProcessor()
        
        print("\n2. Initializing QA bot...")
        qa_bot = QABot()
        
        print("\n3. Processing documents...")
        processed_docs = doc_processor.process_documents(documents)
        
        print("\n4. Storing documents in vector database...")
        qa_bot.vector_store.store_documents(processed_docs)
        
        print("\n5. Testing QA bot...")
        questions = [
            "What is the return policy?",
            "How much does shipping cost?",
            "What are the membership benefits?"
        ]
        
        for question in questions:
            print(f"\nQ: {question}")
            answer = qa_bot.answer_question(question)
            print(f"A: {answer}")
            
    except Exception as e:
        print(f"\nERROR: {str(e)}")
        print("\nTroubleshooting tips:")
        print("1. Verify your API keys are correct")
        print("2. Check if you have access to the specified Pinecone region")
        print("3. Ensure you have sufficient credits/permissions")
        print("4. Verify your internet connection")

if __name__ == "__main__":
    main()


1. Initializing document processor...

2. Initializing QA bot...
Initializing Groq client...
Initializing Vector Store...
Connected to Pinecone
Successfully connected to index

3. Processing documents...

4. Storing documents in vector database...
Successfully stored 5 documents

5. Testing QA bot...

Q: What is the return policy?
A: I cannot answer this question based on the available information. The context provided does not include information about the return policy.

Q: How much does shipping cost?
A: The context provided does not include information about the cost of shipping. Therefore, I cannot answer this question based on the available information.

Q: What are the membership benefits?
A: The context does not provide information on the membership benefits. Therefore, I cannot answer this question based on the available information.
