In [1]:
# ===============================
# 📌 Step 2: Load and Split Documents
# ===============================
from langchain_community.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter

# Load your knowledge base (replace "knowledge.txt" with your file)
loader = TextLoader("knowledge.txt")
docs = loader.load()

# Split into smaller chunks for embeddings
splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
documents = splitter.split_documents(docs)

print("✅ Number of chunks:", len(documents))
print("📖 Sample chunk:", documents[0])


✅ Number of chunks: 1
📖 Sample chunk: page_content='Machine learning is a branch of artificial intelligence that focuses on the use of data and algorithms to imitate the way humans learn. 
Deep learning is a subset of machine learning that uses neural networks with many layers.
LangChain is a framework for developing applications powered by language models. 
Retrieval-Augmented Generation (RAG) combines retrieval from a knowledge base with generative models to improve accuracy.' metadata={'source': 'knowledge.txt'}


In [2]:
# ===============================
# 📌 Step 3: Create Vector Store (FAISS)
# ===============================
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings

# Use HuggingFace embeddings wrapper (fix for compatibility)
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Build FAISS index
db = FAISS.from_documents(documents, embedding_model)

# Save FAISS index locally
db.save_local("vector_store")

print("✅ Vector store created and saved!")


  embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
  from .autonotebook import tqdm as notebook_tqdm


✅ Vector store created and saved!


In [3]:
# ===============================
# 📌 Step 4: Load Local LLM (Flan-T5)
# ===============================
from transformers import pipeline
from langchain.llms import HuggingFacePipeline

# Use a small HuggingFace model (works on CPU)
generator = pipeline(
    "text2text-generation",
    model="google/flan-t5-base",   # change to "flan-t5-small" if slow
    max_length=512,
    
)

llm = HuggingFacePipeline(pipeline=generator)

print("✅ Local model loaded")


Device set to use cpu


✅ Local model loaded


  llm = HuggingFacePipeline(pipeline=generator)


In [4]:
# ===============================
# 📌 Step 5: Build Conversational RAG Chain
# ===============================
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory

# Reload FAISS with safe deserialization
db = FAISS.load_local("vector_store", embedding_model, allow_dangerous_deserialization=True)

# Memory to keep chat history
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

# Conversational Retrieval Chain (RAG)
qa = ConversationalRetrievalChain.from_llm(
    llm,
    db.as_retriever(),
    memory=memory
)

def ask_bot(query):
    result = qa.invoke({"question": query})

    return result["answer"]

print("✅ Chatbot ready to use!")


✅ Chatbot ready to use!


  memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)


In [5]:
# 📌 Step 6: Test the Chatbot
# ===============================
print("🤖 Chatbot is live! Type 'exit' to quit.\n")

for _ in range(10):
    query = input("You: ")
    if query.lower() in ["exit", "quit", "bye"]:
        print("Bot: Goodbye! 👋")
        break
    result = qa.invoke({"question": query})
    print("Bot:", result["answer"])


🤖 Chatbot is live! Type 'exit' to quit.

Bot: What is LangChain?
Bot: a framework for developing applications powered by language models
Bot: LangChain
Bot: Goodbye! 👋
