In [7]:
!pip -q install openai python-dotenv langchain_nvidia_ai_endpoints langchain_community faiss-cpu pypdf

[0m

In [None]:
from openai import OpenAI
from dotenv import load_dotenv
import os
load_dotenv()
os.environ['NVIDIA_API_KEY']=os.getenv('NVIDIA_API_KEY')
client = OpenAI(
  base_url = "https://integrate.api.nvidia.com/v1",
  api_key = os.getenv('NVIDIA_API_KEY')
)

completion = client.chat.completions.create(
  model="deepseek-ai/deepseek-r1",
  messages=[{"role":"user","content":"Provide me an essay on ML"}],
  temperature=0.6,
  top_p=1,
  max_tokens=4096,
  stream=True
)

for chunk in completion:
      if chunk.choices[0].delta.content is not None:
            print(chunk.choices[0].delta.content, end="")



In [6]:
%%writefile  RAG_NVIDIA_NIM.py
import streamlit as st
import os 
from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings, ChatNVIDIA
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain.chains import create_retrieval_chain
from langchain_community.vectorstores import FAISS
from dotenv import load_dotenv

load_dotenv()

# Load the Nvidia API key
os.environ['NVIDIA_API_KEY']=os.getenv('NVIDIA_API_KEY')
llm=ChatNVIDIA(model_name='meta/llama3-70b-instruct')

def vector_embedding():
    if "vectors" not in st.session_state:
        st.session_state.embeddings=NVIDIAEmbeddings()
        st.session_state.loader=PyPDFDirectoryLoader('./us_census')
        st.session_state.docs=st.session_state.loader.load()
        st.session_state.text_splitter=RecursiveCharacterTextSplitter(chunk_size=700, chunk_overlap=100)
        st.session_state.final_documents=st.session_state.text_splitter.split_documents(st.session_state.docs)
        st.session_state.vectors=FAISS.from_documents(st.session_state.final_documents,st.session_state.embeddings)

prompt=ChatPromptTemplate.from_template(
"""
Answer the questions based on the provided context only.
Please provide the most accurate response based on the question
<context>
{context}
<context>
Questions:{input}

"""
)

prompt1=st.text_input("Enter your question from documents")
if st.button("Document Embedding"):
    vector_embedding()
    st.write(" FAISS Vector Store DB is ready with Nvidia Embeddings")
if prompt1:
    document_chain=create_stuff_documents_chain(llm,prompt)
    retriever=st.session_state.vectors.as_retriever()
    retrieval_chain=create_retrieval_chain(retriever,document_chain)
    response=retrieval_chain.invole({'input':prompt1})
    st.write(response['answer'])
    
    #streamlit expander
    with st.expander("Document Similarity Search"):
        for i , doc in enumerate(response["context"]):
            st.write(doc.page_content)
            st.write("------------")
    
            

Overwriting RAG_NVIDIA_NIM.py
