In [63]:
import langchain
import langchain_ollama
import os

In [48]:
from langchain_ollama import OllamaLLM
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.messages import SystemMessage, HumanMessage
from langchain_core.prompts import FewShotPromptTemplate
from langchain_core.prompts import PromptTemplate

In [97]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
from langchain_milvus import Milvus
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_huggingface import HuggingFaceEndpoint

from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA


In [89]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [5]:
!ollama list

NAME                     	ID          	SIZE  	MODIFIED     
deepseek-r1:8b           	28f8fd6cdc67	4.9 GB	25 hours ago	
llama3.2:1b-instruct-q4_0	53f2745c8077	770 MB	3 months ago	
llama3.2:1b              	baf6a787fdff	1.3 GB	3 months ago	
llama3.1:8b              	42182419e950	4.7 GB	4 months ago	
mistral:instruct         	f974a74358d6	4.1 GB	4 months ago	


In [59]:
llm = OllamaLLM(
    model="llama3.1:8b",
    temperature=0.2
)

In [70]:
folder_path = "./POC-LangChain/PDF_for_RAG-bot"

In [73]:
df_files = [f for f in os.listdir(folder_path) if f.endswith(".pdf")]

pdf_files = [f for f in os.listdir(folder_path) if f.endswith(".pdf")]
documents = []
for pdf in pdf_files:
    pdf_path = os.path.join(folder_path, pdf)
    loader = PyPDFLoader(pdf_path)
    documents.extend(loader.load())

print(f"Loaded {len(documents)} pages from {len(pdf_files)} PDFs.")

Loaded 14 pages from 1 PDFs.


In [79]:
# Use a free embedding model from Hugging Face
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

The cache for model files in Transformers v4.22.0 has been updated. Migrating your old cache. This is a one-time only operation. You can interrupt this and resume the migration later on by calling `transformers.utils.move_cache()`.


0it [00:00, ?it/s]

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [91]:
# Define text splitter (adjust chunk_size as needed)
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)

# Split documents into smaller chunks
chunks = text_splitter.split_documents(documents)

In [92]:
# Convert document chunks into embeddings
vector_store = FAISS.from_documents(chunks, embedding_model)

# Save the vector store
vector_store.save_local("faiss_index")

print("Embeddings generated and saved successfully.")

Embeddings generated and saved successfully.


In [121]:
# Load FAISS vector store with safe deserialization
vector_store = FAISS.load_local("faiss_index", embedding_model, allow_dangerous_deserialization=True)

# Convert FAISS store into a retriever
retriever = vector_store.as_retriever(search_kwargs={"k": 5})

print("FAISS vector store loaded successfully!")

FAISS vector store loaded successfully!


In [132]:
query = "What is NL2SQL360?"

In [137]:
system_prompt = (
    "Use the given context only to answer the question. "
    "If you don't know the answer, say you don't know. "
    "Context: {context}"
)
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)
question_answer_chain = create_stuff_documents_chain(llm, prompt)
chain = create_retrieval_chain(retriever, question_answer_chain)

In [138]:
chain.invoke({"input": query})

{'input': 'What is NL2SQL360?',
 'context': [Document(id='917966a1-aaf9-4038-9fd9-e73c81609d4d', metadata={'source': './POC-LangChain/PDF_for_RAG-bot\\Li et al. - 2024 - The Dawn of Natural Language to SQL Are We Fully .pdf', 'page': 0, 'page_label': '1'}, page_content='databases. The emergence of Large Language Models has intro-\nduced a novel paradigm in nl2sql tasks, enhancing capabilities\ndramatically. However, this raises a critical question:Are we fully\nprepared to deploy nl2sql models in production?\nTo address the posed questions, we present a multi-anglenl2sql\nevaluation framework, NL2SQL360, to facilitate the design and test\nof new nl2sql methods for researchers. Through NL2SQL360, we\nconduct a detailed comparison of leading nl2sql methods across a'),
  Document(id='fe7cb7eb-d418-426f-b13d-bd4efcfc6c57', metadata={'source': './POC-LangChain/PDF_for_RAG-bot\\Li et al. - 2024 - The Dawn of Natural Language to SQL Are We Fully .pdf', 'page': 0, 'page_label': '1'}, page_cont