In [1]:
import os
from dotenv import load_dotenv
from langchain_google_genai import GoogleGenerativeAI
from langchain.document_loaders.csv_loader import CSVLoader
from langchain_community.embeddings import HuggingFaceInstructEmbeddings
from langchain.vectorstores import FAISS
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# loading the llm
load_dotenv()
llm = GoogleGenerativeAI(model="models/text-bison-001", google_api_key=os.environ['GOOGLE_API_KEY'],temperature = 0.2)

In [3]:
# Loading the csv file into a variable
loader = CSVLoader('demo.csv',source_column = 'prompt')
data = loader.load()

In [4]:
# Loading the embeddings
instructor_embeddings = HuggingFaceInstructEmbeddings(
    query_instruction="Represent the query for retrieval: "
)


load INSTRUCTOR_Transformer
max_seq_length  512


In [9]:
# Create a FAISS instance for vector database from 'data'
vectordb = FAISS.from_documents(documents=data,
                                 embedding=instructor_embeddings)

In [12]:
retriever = vectordb.as_retriever()
# example query
# rdocs = retriever.get_relevant_documents("How long is the course valid")

In [15]:
prompt_template = """Given the following context and a question, generate an answer based on this context only.
In the answer try to provide as much text as possible from "response" section in the source document context without making much changes.
If the answer is not found in the context, kindly state "I don't know." Don't try to make up an answer.

CONTEXT: {context}

QUESTION: {question}"""

prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
chain_type_kwargs = {"prompt": prompt}

In [16]:
# Use the retreivar QA class for the final retreival from the database
# return source documents will tell it whether to return the rows which it used for retrieving or not
chain = RetrievalQA.from_chain_type(llm=llm,
                            chain_type="stuff",
                            retriever=retriever,
                            input_key="query",
                            return_source_documents=True,
                            chain_type_kwargs=chain_type_kwargs)

# chain("Do you have a js course")
# in queries such as these teh llm halucinates and gives answer according to its own knowledge, hence we need to tell the llm that answer only on the basis of the database

In [17]:
chain("Do you know current year")

  warn_deprecated(


{'query': 'Do you know current year',
 'result': "? I don't know.",
 'source_documents': [Document(page_content='prompt: Why is the year 2018 missing or disappeared?\nresponse: Check this reference:\nhttps://discordapp.com/channels/1090613684163850280/1111545547426369637/1111563527753318430', metadata={'source': 'Why is the year 2018 missing or disappeared?', 'row': 67}),
  Document(page_content='prompt: The year column is missing in the P&L check. How can I resolve this issue and obtain the year column?\nresponse: Check this reference:\n https://discord.com/channels/1090613684163850280/1111101322406658098/1111137901816848494', metadata={'source': 'The year column is missing in the P&L check. How can I resolve this issue and obtain the year column?', 'row': 64}),
  Document(page_content="prompt: Can you state the distinction among the SAMEPERIODLASTYEAR(), PARALLELPERIOD(), and DATEADD() function in Power BI?\nresponse: Here is the brief explanation.\n\nSAMEPERIODLASTYEAR() compares th