[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Sciform/fhnw-mini-rag-system/blob/main/rag2.ipynb)


In [None]:
#%pip install --upgrade pip setuptools wheel
%pip install langchain langchain-huggingface langchain-community faiss-cpu sentence-transformers transformers huggingface_hub

In [None]:
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings

import os

# Set your Hugging Face Hub API token (free signup)
os.environ["HUGGINGFACEHUB_API_TOKEN"] = "<Hugging_Face_Access>"

In [None]:
# 1. Load documents

# Download the sample text files
!wget https://raw.githubusercontent.com/sciform/fhnw-mini-rag-system/main/docs/sample1.txt -P docs/
!wget https://raw.githubusercontent.com/sciform/fhnw-mini-rag-system/main/docs/sample2.txt -P docs/
!wget https://raw.githubusercontent.com/sciform/fhnw-mini-rag-system/main/docs/sample3.txt -P docs/


loader1 = TextLoader("docs/sample1.txt")
loader2 = TextLoader("docs/sample2.txt")
loader3 = TextLoader("docs/sample3.txt")
documents = loader1.load() + loader2.load() + loader3.load()

# 2. Split into chunks
text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=10)
docs = text_splitter.split_documents(documents)

In [None]:
# 3. Embed and store in FAISS
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vector_store = FAISS.from_documents(docs, embeddings)

In [None]:
# Print the number of documents in the vector store (FAISS index)
print(f"Number of documents in vector store: {vector_store.index.ntotal}")

In [None]:
retriever = vector_store.as_retriever(search_type="similarity", k=1)
query = "What is a blue whale ?"
retrieved_docs = retriever.invoke(query, k=1)

# Print the retrieved documents
print("Documents retrieved:")
for doc in retrieved_docs[:len(retrieved_docs)]:
    print(f"Document: {doc.page_content}")

In [None]:
# 4. Use a free LLM (small one)
from langchain_huggingface import HuggingFaceEndpoint

model_falcon_base = "tiiuae/falcon-rw-1b-instruct"
model_falcon_instruct = "ericzzz/falcon-rw-1b-instruct-openorca"
model_mistral = "mistralai/Mistral-7B-v0.1"

# Wrap it in LangChain's new HuggingFaceEndpoint class
llm = HuggingFaceEndpoint(
    repo_id=model_falcon_instruct,
    task="text-generation",
    max_new_tokens=75,
    temperature=0.1
)

In [None]:
# 5. Create RetrievalQA chain
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_template(
    """Answer using only this context:
    {context}

    Question: {input}"""
)

document_chain = create_stuff_documents_chain(
    llm,
    prompt,
    document_separator="\n\n")

retriever = vector_store.as_retriever(search_type="similarity", k=1)

qa_chain = create_retrieval_chain(
    retriever,
    document_chain)


In [None]:
# 6. Ask something
query = "Who climbs Mount Everest?"
context = "Use only the most relevant document, if unsure say 'I don't know'. Answer as short as possible. Do not confuse animals."
result = qa_chain.invoke({"input": query}, {"context": context})


In [None]:
print("\nQuestion:", query)
print(result["answer"])

In [None]:
result