In [18]:

import os
from dotenv import load_dotenv
from langchain_anthropic import AnthropicLLM
from langchain_groq import ChatGroq
from langchain_community.document_loaders import PyPDFLoader
load_dotenv()
from langchain.chains import create_retrieval_chain
from langchain.prompts import ChatPromptTemplate
from langchain.schema import HumanMessage, AIMessage, SystemMessage
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings

anthropic_api_key=os.getenv("ANTHROPIC_API_KEY")
langchain_api_key=os.getenv("LANGCHAIN_API_KEY")
groq_api_key=os.getenv("GROQ_API_KEY")
nvidia_api_key=os.getenv("NVIDIA_API_KEY")


os.environ["LANGSMITH_TRACING"] = "true"
os.environ["LANGSMITH_API_KEY"] = langchain_api_key

llm = ChatGroq(
    model="llama-3.1-8b-instant",
    api_key=groq_api_key,
)

loader=PyPDFLoader("data.pdf")
docs=loader.load()
docs



text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)
texts = [doc.page_content for doc in docs]
txt=[]
for i in texts[:10]:
    text=text_splitter.split_text(i);
    txt.extend(text)

embedder = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

vectordb = FAISS.from_texts(txt, embedder)


retriever=vectordb.as_retriever(search_kwargs={"k":3})
prompt=ChatPromptTemplate.from_messages([
    ("human","Answer the following Question based on the following {context} Question:{input}")
    
])
doc_chain = create_stuff_documents_chain(llm, prompt)
qa_chain = create_retrieval_chain(retriever, doc_chain)

result = qa_chain.invoke({"input": "What does source explains about?"})
print(result["answer"])


Based on the provided text, the Open Source Driven feature explains that the Data Portal is developed completely using an Open Source Stack. This means that:

1. Software costs are saved, as no licenses are required.
2. There is provision for community participation in further developing the product, specifically in areas such as:
   * Data visualization
   * Data consumption
   * APIs to access datasets

This suggests that the Data Portal is an open and collaborative platform that encourages community involvement and contribution to its development, with the goal of making it a more robust and useful tool for data management and analysis.
