In [1]:
!jupyter nbconvert --to notebook --ClearOutputPreprocessor.enabled=True Mini_Rag.ipynb


This application is used to convert notebook files (*.ipynb)
        to various other formats.


Options
The options below are convenience aliases to configurable class-options,
as listed in the "Equivalent to" description-line of the aliases.
To see all configurable class-options for some <cmd>, use:
    <cmd> --help-all

--debug
    set log level to logging.DEBUG (maximize logging output)
    Equivalent to: [--Application.log_level=10]
--show-config
    Show the application's configuration (human-readable format)
    Equivalent to: [--Application.show_config=True]
--show-config-json
    Show the application's configuration (json format)
    Equivalent to: [--Application.show_config_json=True]
--generate-config
    generate default config file
    Equivalent to: [--JupyterApp.generate_config=True]
-y
    Answer yes to any questions instead of prompting.
    Equivalent to: [--JupyterApp.answer_yes=True]
--execute
    Execute the notebook prior to export.
    Equivalent to: [--ExecutePr

#Assessment 1: Mini RAG Pipeline with ChromaDB on Colab

Instructions:
- Use Google Colab for the task.
- Take 3–5 short documents (you can hardcode them).
- Preprocess the text by converting to lowercase and removing punctuation.
- Embed the documents using any method you prefer (e.g., sentence-transformers, HuggingFace, or other embedding techniques).
- Store the embeddings in ChromaDB or FAISS (use ChromaDB for simplicity).
- Accept a user query and retrieve the most relevant document based on cosine similarity.
- Generate a simple response by outputting the most relevant document.

In [None]:
# Installing Liabraries
!pip install sentence-transformers chromadb


In [None]:
# Take 3–5 short documents .
documents = ["Artificial Intelligence enables machines to think.",
             "Machine Learning is a subset of Artificial Intelligence. It learns from the patters of data.",
             "Deep Learning is a subset of Machine Learning.and it uses neural networks for learning."]

In [None]:
# Preprocessing
import re

clean_docs = []

for doc in documents:
    doc = doc.lower()
    doc = re.sub(r'[^\w\s]', '', doc)
    clean_docs.append(doc)


In [None]:
# Create Embeddings
from sentence_transformers import SentenceTransformer

model = SentenceTransformer("all-MiniLM-L6-v2")
doc_embeddings = model.encode(clean_docs)

In [None]:
# Store in Chromadb
import chromadb

client = chromadb.Client()
collection = client.create_collection(name="mini_rag")

for i in range(len(doc_embeddings)):
    collection.add(
        ids=[str(i)],
        embeddings=[doc_embeddings[i]],
        documents=[clean_docs[i]]
    )


In [None]:
#  the query
query = "What is machine learning"
query = query.lower()
query = re.sub(r'[^\w\s]', '', query)

query_embedding = model.encode([query])


In [None]:
# Retrive most relevant document cosine similarity
result = collection.query(
    query_embeddings=query_embedding,
    n_results=1
)


In [None]:
# Output
print("Answer:", result["documents"][0][0])
