#Install Required Packages

In [None]:
!pip install langchain openai pinecone-client fastapi uvicorn python-dotenv
!pip install langchain-community
!pip install --upgrade pinecone-client
!pip install huggingface_hub
!pip install datasets
!pip install tiktoken

In [None]:
import os
from fastapi import FastAPI, HTTPException
from langchain.chains import ConversationalRetrievalChain
from langchain.chat_models import ChatOpenAI
from langchain.vectorstores import Pinecone
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.memory import ConversationBufferMemory
from datasets import load_dataset
from dotenv import load_dotenv
from pinecone import pinecone, ServerlessSpec

#Setting up API Keys for integrating with vector database and creating embedding vectors.

In [None]:
# Define environment variables content
env_content = """
OPENAI_API_KEY="Your OPEN API KEY"
PINECONE_API_KEY="Your API KEY"
PINECONE_ENV=" Your PINECONE ENV"
"""

# Create and write to the .env file
with open('.env', 'w') as f:
    f.write(env_content)


In [None]:
from dotenv import load_dotenv
load_dotenv()  # Loads environment variables from .env file




True

# Load environment variables

In [None]:

PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")  # Add your Pinecone API key
if not PINECONE_API_KEY:
    raise ValueError("Key is missing. Please set it in your environment variables.")

PINECONE_ENV = os.getenv("PINECONE_ENV")          # Add your Pinecone environment

INDEX_NAME = "agenticrag"

# Load OpenAI API Key

In [None]:
# Initialize FastAPI
app = FastAPI()

# Load OpenAI API Key
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
if not OPENAI_API_KEY:
    raise ValueError("OpenAI API key is missing. Please set it in your environment variables.")


# Initialize embeddings and vector store

In [None]:

embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
vector_store = Pinecone.from_existing_index(index_name=INDEX_NAME, embedding=embeddings)


# Initialize LLM and memory

In [None]:

llm = ChatOpenAI(model="gpt-4", temperature=0, openai_api_key=OPENAI_API_KEY)
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

# Build the retrieval-augmented chain


In [None]:
qa_chain = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=vector_store.as_retriever(),
    memory=memory,
    return_source_documents=True
)

In [None]:
@app.post("/query/")
async def query_agent(query: str):
    """
    Endpoint to interact with the Agentic RAG system.
    :param query: User's legal query.
    :return: Response from the system.
    """
    try:
        response = qa_chain.run(query)
        return {"response": response}
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

# Root endpoint
@app.get("/")
def read_root():
    return {"message": "Welcome to the Agentic RAG Legal Assistant!"}

# Load the USA Constitution dataset

In [None]:

dataset = load_dataset("c4lliope/us-congress")  # Replace with the dataset you have
print(dataset)


# Initialize Pinecone with API key and environment

In [None]:

from pinecone import Pinecone

pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
INDEX_NAME = pc.Index("agenticrag")

# Initialize OpenAI embeddings
embeddings = OpenAIEmbeddings(openai_api_key=os.getenv("OPENAI_API_KEY"))

# Set up the vector store
vector_store = Pinecone(index_name=INDEX_NAME, embedding=embeddings)

# Ensure 'chunks' is a list of strings (text data)

In [None]:

chunks = dataset['train']['text']  # Replace with your relevant text field in the dataset

# Make sure all elements in chunks are strings
chunks = [str(text) for text in chunks]  # Convert any non-string data to string


# Generate embeddings for the text chunks (documents) using embed_documents


In [None]:
embedding_vectors = embeddings.embed_documents(chunks)


# Prepare data for upsert (Pinecone expects tuples of (ID, vector, metadata))

In [None]:

pinecone_data = [(str(i), embedding_vectors[i], {"text": chunks[i]}) for i in range(len(chunks))]


# Upsert the embeddings into Pinecone

In [None]:

index.upsert(vectors=pinecone_data)

In [None]:
# Example Query to find similar documents
query = "What is the preamble of the USA Constitution?"

# Generate the query embedding
query_embedding = embeddings.embed(query)

# Query Pinecone for the most similar documents
results = index.query([query_embedding], top_k=3, include_metadata=True)

# Print the results (showing the most relevant documents)
for match in results['matches']:
    print(f"Score: {match['score']}, Text: {match['metadata']['text']}")