In [25]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import ReadTheDocsLoader
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain_community.document_loaders import PyPDFLoader
from langchain.chains.retrieval import create_retrieval_chain
from langchain import hub
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_groq import ChatGroq
from dotenv import load_dotenv
import os 
# Load environment variables
load_dotenv()
groq_api_key = os.getenv("GROQ_API_KEY")


In [6]:
def ingest_docs():
    loader = ReadTheDocsLoader("langchain-docs/api.python.langchain.com/en/latest")
    raw_documents = loader.load()
    return raw_documents

In [11]:
loader = PyPDFLoader("SQL Interview Questions.pdf")
docs = loader.load()

  from cryptography.hazmat.primitives.ciphers.algorithms import AES, ARC4


In [12]:
docs

[Document(metadata={'source': 'SQL Interview Questions.pdf', 'page': 0}, page_content=' \n \n \n \n800+ SQL SERVER         \nINTERVIEW       \nQUESTION   \nANSWERS PDF  \nwww.interviewquestionspd f.com  \n \n \n \n \n '),
 Document(metadata={'source': 'SQL Interview Questions.pdf', 'page': 1}, page_content='SQL SERVER INTERVIEW QUESTION  \nANSWERS  PDF (MORE THAN 800+ QUESTION FREE PDF \nDOWNLOAD)   \nBY Vikas Ahlawat ( www.interviewquestionspdf.com ) \nFor latest interview questions must visit www.interviewquestionspdf.com  \n \n \n \n \nSQL SERVER INTERVIEW QUESTIONS ANSWERS  \n \nDescription  Link \nBasic SQL Interview Q.(for 0 -1 year exp)      Sql Server Basic Interview Query Set -1  \nString Related Basic Queries(for 0 -1 year exp)  Sql Server Basic Interview Query Set -2  \nDate Time related Queries(for 0 -1 year exp)  Sql Server Date -Time Interview Query SET -3  \nSalary Related Queries  (for 0 -2 year exp)  Sql Server Salary Interview Query SET -4  \nGroup By Related Queries(

In [14]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 200)

In [15]:
documents = text_splitter.split_documents(docs)

In [16]:
documents

[Document(metadata={'source': 'SQL Interview Questions.pdf', 'page': 0}, page_content='800+ SQL SERVER         \nINTERVIEW       \nQUESTION   \nANSWERS PDF  \nwww.interviewquestionspd f.com'),
 Document(metadata={'source': 'SQL Interview Questions.pdf', 'page': 1}, page_content='SQL SERVER INTERVIEW QUESTION  \nANSWERS  PDF (MORE THAN 800+ QUESTION FREE PDF \nDOWNLOAD)   \nBY Vikas Ahlawat ( www.interviewquestionspdf.com ) \nFor latest interview questions must visit www.interviewquestionspdf.com  \n \n \n \n \nSQL SERVER INTERVIEW QUESTIONS ANSWERS  \n \nDescription  Link \nBasic SQL Interview Q.(for 0 -1 year exp)      Sql Server Basic Interview Query Set -1  \nString Related Basic Queries(for 0 -1 year exp)  Sql Server Basic Interview Query Set -2  \nDate Time related Queries(for 0 -1 year exp)  Sql Server Date -Time Interview Query SET -3  \nSalary Related Queries  (for 0 -2 year exp)  Sql Server Salary Interview Query SET -4  \nGroup By Related Queries(for 0 -2 year exp)  Sql Serve

In [18]:
embeddings = HuggingFaceEmbeddings()

  from tqdm.autonotebook import tqdm, trange


In [19]:
db = FAISS.from_documents(documents,embeddings)

In [20]:
db.save_local("my_faiss_index")

In [22]:
retrieval_qa_chat_prompt  = hub.pull("langchain-ai/retrieval-qa-chat")

Please use the `langsmith sdk` instead:
  pip install langsmith
Use the `pull_prompt` method.
  res_dict = client.pull_repo(owner_repo_commit)


In [26]:


llm = ChatGroq(
    groq_api_key=groq_api_key,
    model_name="gemma2-9b-It",
    temperature=0.5
)


In [27]:
stuff_document_chain = create_stuff_documents_chain(llm,retrieval_qa_chat_prompt)

In [28]:
qa = create_retrieval_chain(retriever=db.as_retriever(),combine_docs_chain=stuff_document_chain)

In [29]:
query = " Write a query to get all employee detail from 'EmployeeDetail' table"

In [30]:
result = qa.invoke(input={'input':query})

In [32]:
result['answer']

'```sql\nSELECT * FROM EmployeeDetail;\n``` \n\nThis query will select all columns (`*`) from the table named `EmployeeDetail`. \n'

In [34]:
new_vector_store = FAISS.load_local(
    "my_faiss_index", embeddings, allow_dangerous_deserialization=True
)

In [None]:
def run_llm(query):
    llm = ChatGroq(
    groq_api_key=groq_api_key,
    model_name="gemma2-9b-It",
    temperature=0.5
)
    retrieval_qa_chat_prompt  = hub.pull("langchain-ai/retrieval-qa-chat")
    stuff_document_chain = create_stuff_documents_chain(llm,retrieval_qa_chat_prompt)
    qa = create_retrieval_chain(retriever=db.as_retriever(),combine_docs_chain=stuff_document_chain)
    result = qa.invoke(input={'input':query})
    return result['answer']