# ðŸ§™ Gandalf Chatbot (Fully Local)
This notebook runs a LangChain RAG chatbot locally using `transformers`, `sentence-transformers`, and `FAISS`. No cloud APIs or internet needed after model download.

In [1]:
# Install Required Libraries
!pip install -q transformers sentence-transformers langchain faiss-cpu pypdf python-dotenv

In [2]:
# Install Accelerate
!pip install -U accelerate

Defaulting to user installation because normal site-packages is not writeable


In [3]:
# Load Environment Variables
import os
from dotenv import load_dotenv
load_dotenv()

True

In [4]:
# Load and Split PDF
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

loader = PyPDFLoader("Tolkien-J.-The-lord-of-the-rings-HarperCollins-ebooks-2010.pdf")  # Change to your PDF path
pages = loader.load()
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
docs = splitter.split_documents(pages)
print(f"âœ… Loaded and split {len(docs)} chunks.")

âœ… Loaded and split 4246 chunks.


In [None]:
# Embed Chunks and Save Vectorstore
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore = FAISS.from_documents(docs, embeddings)
vectorstore.save_local("gandalf_index")
print("âœ… Vectorstore saved.")

  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


âœ… Vectorstore saved.


In [None]:
pip install -U accelerate


Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


: 

In [None]:
# Load a local language model using transformers
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from langchain.llms import HuggingFacePipeline

model_id = "tiiuae/falcon-7b-instruct"  # Open-access model

print("Loading model... (may take a few minutes on first run)")

# Requires accelerate
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto"  # <== Requires accelerate!
)

pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=512,
    temperature=0.7,
    do_sample=True
)

llm = HuggingFacePipeline(pipeline=pipe)

print("Model loaded locally.")


Loading model... (may take a few minutes on first run)


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
# ðŸ§  Load Vectorstore and Run RAG
from langchain.chains import RetrievalQA

retriever = FAISS.load_local("gandalf_index", embeddings).as_retriever()
qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever, return_source_documents=True)

question = "What happened in the mines of Moria?"
result = qa_chain.invoke({"query": question})

print("ðŸ§™ Gandalf says:\n", result['result'])