In [45]:
!pip install langchain pinecone-client langchain-google-genai cohere langchain_community langchain-groq pypdf



In [46]:
import os
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from pinecone import ServerlessSpec
from langchain_community.vectorstores import Pinecone
import pinecone
import cohere
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_groq import ChatGroq
from langchain.chains.question_answering import load_qa_chain

In [47]:
# API Keys (Change with placeholder)
GOOGLE_API_KEY = 'your-google-api-key'
GROQ_API_KEY = 'your-groq-api-key'
COHERE_API_KEY = "your-cohere-api-key"
PINECONE_API_KEY = "your-pinecone-api-ke"
os.environ['PINECONE_API_KEY'] = PINECONE_API_KEY
folder_path = '/content/Sample Pdfs' # Make directory with pdfs and give its path

In [48]:
def load_documents(pdf_dir):
  '''
  Loads the PDF files from the directory as documents.
  Input: Path to the directory containing PDF files.
  Output: List of documents.
  '''
  try:
      file_loader = PyPDFDirectoryLoader(pdf_dir)
      documents = file_loader.load()
      print("All documents loaded successfully.")
      return documents
  except Exception as e:
      print(f"Error loading documents: {e}")

In [49]:
def chunk_documents(docs, chunk_size=1500, chunk_overlap=150):
  '''
  Splits the documents into smaller chunks for better processing.
  Input: List of documents, chunk_size, and chunk_overlap parameters.
  Output: List of document chunks.
  '''
  try:
      text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
      chunks = text_splitter.split_documents(docs)
      print("Documents split into chunks successfully.")
      return chunks
  except Exception as e:
      print(f"Error splitting documents: {e}")

In [50]:
def initialize_pinecone(api_key, index_name, dimension=768, metric='cosine', cloud='aws', region='us-east-1'):
  '''
  Initializes Pinecone with the specified parameters.
  Input: API key, index name, dimension, and other configuration options.
  Output: None (Creates a Pinecone index).
  '''
  try:
      pc = pinecone.Pinecone(api_key=api_key)
      if index_name in pc.list_indexes().names():
          pc.delete_index(index_name)
      pc.create_index(
          name=index_name,
          dimension=dimension,
          metric=metric,
          spec=ServerlessSpec(
              cloud=cloud,
              region=region
          )
      )
      print(f"Pinecone index '{index_name}' created successfully.")
  except Exception as e:
      print(f"Error initializing Pinecone: {e}")

In [51]:
def create_vector_store(documents, embeddings, index_name):
  '''
  Creates a Pinecone vector store using the document embeddings.
  Input: List of document chunks, embeddings, and index name.
  Output: Pinecone vector store.
  '''
  try:
      index = Pinecone.from_documents(documents, embeddings, index_name=index_name)
      print("Vector store created successfully.")
      return index
  except Exception as e:
      print(f"Error creating vector store: {e}")
      raise

In [52]:
# Initialize Embeddings and Vector Store
embeddings = GoogleGenerativeAIEmbeddings(google_api_key=GOOGLE_API_KEY, model="models/embedding-001")
initialize_pinecone(api_key=PINECONE_API_KEY, index_name="langcha")

Pinecone index 'langcha' created successfully.


In [53]:
# Load PDFs and Create Document Chunks
documents = load_documents(folder_path)
chunks = chunk_documents(documents)

All documents loaded successfully.
Documents split into chunks successfully.


In [54]:
# Create Vector Store
vector_store = create_vector_store(chunks, embeddings, "langcha")

Vector store created successfully.


In [55]:
def rerank_results_with_cohere(query, search_results, cohere_api_key,
                               rerank_model='rerank-english-v3.0',
                               relevance_threshold=0.001):
  '''
  Reranks search results based on relevance using Cohere's model.
  Input: Query, search results, Cohere API key, rerank model, and relevance threshold.
  Output: Reranked search results.
  '''
  try:
      co = cohere.Client(api_key=cohere_api_key)
      documents = [doc.page_content for doc in search_results]
      rerank_response = co.rerank(query=query, documents=documents, model=rerank_model)

      reranked_results = sorted(
          [
              (doc, result.relevance_score)
              for doc, result in zip(search_results, rerank_response.results)
              if result.relevance_score >= relevance_threshold  # Apply the relevance score threshold
          ],
          key=lambda x: x[1], reverse=True  # Sort by relevance score in descending order
      )

      print("Results successfully reranked using Cohere.")
      return [result[0] for result in reranked_results]
  except Exception as e:
      print(f"Error reranking results: {e}")

In [56]:
# Define the Document class to handle the document structure
class Document:
    def __init__(self, metadata, page_content):
        self.metadata = metadata
        self.page_content = page_content

In [57]:
def retrieve_answers(index, chain, query, cohere_api_key):
  '''
  Retrieves the answer using the question-answering chain and reranks results using Cohere.
  Input: Index (vector store), chain, query, and Cohere API key.
  Output: The final answer.
  '''
  try:
      search_results = index.similarity_search(query, k=8)
      if len(search_results)==0:
          search_results = [Document(metadata='metadata', page_content='There is not relevant content')]
      reranked_results = rerank_results_with_cohere(query, search_results, cohere_api_key)
      response = chain.run(input_documents=reranked_results, question=query)
      return response
  except Exception as e:
      print(f"Error retrieving answers: {e}")

In [58]:
# LLM Chain Initialization
llm = ChatGroq(groq_api_key=GROQ_API_KEY, model_name="Llama3-8b-8192")
qa_chain = load_qa_chain(llm, chain_type="stuff")

In [59]:
# Ask a Question
query = "code for simple calculator"
answer = retrieve_answers(vector_store, qa_chain, query, cohere_api_key=COHERE_API_KEY)
print(answer)

Results successfully reranked using Cohere.
The code for a simple calculator program is provided below. This program allows the user to select an operation (addition, subtraction, multiplication, or division) and then enter two numbers. The program will then perform the selected operation on the two numbers and display the result.

```python
# Program make a simple calculator that can add, subtract, multiply and divide 
using functions  
# define functions  
def add(x, y):  
    """This function adds two numbers"""          
    return x + y  
def subtract(x, y):  
    ""This function subtracts two numbers"""         
    return x - y  
def multiply(x, y):  
    """This function multiplies two numbers"""   
    return x * y  
def divide(x, y):  
    """This function divides two numbers"""  
    return x / y  

# take input from the user 
print("Select operation.") 
print("1.Add") 
print("2.Subtract") 
print("3.Multiply")
print("4.Divide")  
choice = input("Enter choice(1/2/3/4):")  
nu

In [60]:
# Ask a Question
query = "Which are requirements in Rag QA-Bot task ?"
answer = retrieve_answers(vector_store, qa_chain, query, cohere_api_key=COHERE_API_KEY)
print(answer)

Results successfully reranked using Cohere.
According to the provided context, the requirements for the RAG (Retrieval-Augmented Generation) QA-Bot task are:

1. Implement a RAG-based model that can handle questions related to a provided document or dataset.
2. Use a vector database (such as Pinecone) to store and retrieve document embeddings efficiently.
3. Test the model with several queries and show how well it retrieves and generates accurate answers from the document.

These requirements are specified in the "Task Requirements" section of the Part 1: Retrieval-Augmented Generation Model for QA Bot problem statement.


In [61]:
# Ask a Question
query = "Which questions can be asked for viva on Depth First Search practical and on which page number they are available ?"
answer = retrieve_answers(vector_store, qa_chain, query, cohere_api_key=COHERE_API_KEY)
print(answer)

Results successfully reranked using Cohere.
Based on the provided context, the following questions can be asked for viva on Depth First Search practical:

1. What is Depth -First -Search. (Available on page 25)
2. Differences between BFS and DFS. (Available on page 28)
3. What is DFS. (Available on page 27)

These questions can be asked for viva on Depth First Search practical, which is available on page 27-30.


In [62]:
# Ask a Question
query = "write code for Breadth First Search and also show how output looks like"
answer = retrieve_answers(vector_store, qa_chain, query, cohere_api_key=COHERE_API_KEY)
print(answer)

Results successfully reranked using Cohere.
Here is the code for Breadth-First Search (BFS):

```
graph = {  
'5' : ['3','7'],  
'3' : ['2',  '4'], 
'7' : ['8'],  
'2' : [],  
'4' : ['8'],  
'8' : []  
} 
 
visited = [] # List for visited  nodes. 
queue  = [] #Initialize a  queue  
 
def bfs(visited, graph, node): #function for BFS 
    visited.append(node)  
    queue.append(node)  
 
    while  queue:  # Creating loop to visit each node 
        m = queue.pop(0)  
        print (m, end = " ")  
 
        for neighbour in graph[m]:  
            if neighbour not in visited: 
                visited.append(neighbour) 
                queue.append(neighbour)  
 
# Driver Code  
print("Following is the Breadth -First Search") 
bfs(visited,  graph,  '5') # function  calling 
```

Output:

```
Following is the Breadth -First Search 
5 3 7 2 4 8
```

Explanation: The BFS algorithm starts by visiting the given node (5) and then all its adjacent nodes (3 and 7). Then it visits the nodes that 

In [63]:
# Ask a Question
query = "What is name of the project ?"
answer = retrieve_answers(vector_store, qa_chain, query, cohere_api_key=COHERE_API_KEY)
print(answer)

Results successfully reranked using Cohere.
The project is "Interactive QA Bot Interface" and it's a program to implement a simple chatbot.


In [64]:
# Ask a Question
query = "Which types of questions can be asked in viva ? "
answer = retrieve_answers(vector_store, qa_chain, query, cohere_api_key=COHERE_API_KEY)
print(answer)

Results successfully reranked using Cohere.
Based on the provided context, it appears that the types of questions that can be asked in a viva (oral examination) in the context of Artificial Intelligence Lab, Department of CSE, include:

1. Conceptual questions: These questions test the candidate's understanding of various AI concepts, such as NLP, chatbots, search algorithms, and knowledge representation.
2. Definition-based questions: These questions ask the candidate to define specific terms or concepts, such as AO* Search, Knowledge Representation Issues, or Breadth-First Search.
3. Comparison-based questions: These questions ask the candidate to compare and contrast different concepts, such as informed and uninformed search or various search algorithms.
4. Example-based questions: These questions ask the candidate to provide examples or illustrate concepts using real-world scenarios or code snippets, such as string functions in Python or list indexing and slicing.
5. Method-based q

In [65]:
# Ask a Question
query = "System requirements for lab"
answer = retrieve_answers(vector_store, qa_chain, query, cohere_api_key=COHERE_API_KEY)
print(answer)

Results successfully reranked using Cohere.
According to the provided context, the system requirements for the lab are:

* Processors: Intel Atom or Intel Core i3 processor
* Disk space: 1 GB
* Operating systems: Windows 7 or later, macOS, and Linux
* Python versions: 2.7.X, 3.6.X, and 3.8.X
