In [None]:
# Mudasir Naeem
# DevelopersHub co
# Task 4: Context-Aware-Chatbot-(RAG-with-LangChain)

In [5]:
!pip install langchain faiss-cpu langchain-community langchain-text-splitters sentence-transformers --quiet

from langchain_community.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import ConversationalRetrievalChain
from langchain.llms import HuggingFaceHub

# Example text (simulate Wikipedia/article)
text = """
Artificial Intelligence (AI) is a branch of computer science focused on building smart machines.
Machine Learning (ML) is a subset of AI which enables machines to learn from data.
Deep Learning (DL) is a subset of ML which uses neural networks with many layers.
Transformers are a type of deep learning model that revolutionized NLP tasks.
LangChain is a framework for building applications powered by language models.
"""

# Save to file (simulate loading)
with open("ai.txt", "w") as f:
    f.write(text)

# Loading and spliting text
loader = TextLoader("ai.txt")
documents = loader.load()
splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=20)
docs = splitter.split_documents(documents)

embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
db = FAISS.from_documents(docs, embeddings)

retriever = db.as_retriever()

# Dummy LLM (using HuggingFaceHub - needs token; or replace with simple string returner)
# For demo, we simulate a "chatbot" by retrieving context only
def chatbot(query, history=[]):
    context = retriever.get_relevant_documents(query)
    context_text = " ".join([d.page_content for d in context])
    response = f"Answer (based on context): {context_text[:200]}..."
    return response

# Example Q&A
print(chatbot("What is AI?"))
print(chatbot("Tell me about transformers."))


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.4/31.4 MB[0m [31m37.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m39.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m444.0/444.0 kB[0m [31m23.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m64.7/64.7 kB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.9/50.9 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
google-colab 1.0.0 requires requests==2.32.4, but you have requests 2.32.5 which is incompatible.[0m[31m
[0m

  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Answer (based on context): Artificial Intelligence (AI) is a branch of computer science focused on building smart machines.
Machine Learning (ML) is a subset of AI which enables machines to learn from data.
Deep Learning (DL) i...
Answer (based on context): Artificial Intelligence (AI) is a branch of computer science focused on building smart machines.
Machine Learning (ML) is a subset of AI which enables machines to learn from data.
Deep Learning (DL) i...


  context = retriever.get_relevant_documents(query)
