In [1]:
!pip install nbstripout
!nbstripout --clear smart_ai_chatbot.ipynb


usage: nbstripout [-h]
                  [--dry-run | --install | --uninstall | --is-installed | --status | --version]
                  [--verify] [--keep-count] [--keep-output] [--keep-id]
                  [--extra-keys EXTRA_KEYS]
                  [--keep-metadata-keys KEEP_METADATA_KEYS]
                  [--drop-empty-cells] [--drop-tagged-cells DROP_TAGGED_CELLS]
                  [--strip-init-cells] [--attributes FILEPATH]
                  [--global | --system | --python PATH] [--force]
                  [--max-size SIZE] [--mode {jupyter,zeppelin}] [--textconv]
                  [files ...]
nbstripout: error: unrecognized arguments: --clear


In [2]:
!pip install langchain==0.1.12
!pip install langchain-community
!pip install sentence-transformers
!pip install faiss-cpu
!pip install groq



In [3]:
import os
from groq import Groq
from sentence_transformers import SentenceTransformer
from langchain.embeddings.base import Embeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.llms.base import LLM
from typing import Optional, List

In [4]:
DATA_PATH = "/content/data"
INDEX_PATH = "/content/faiss_index"


In [5]:
os.makedirs(DATA_PATH, exist_ok=True)

files = {
    "academics.txt": """The Department of Computer Science and Engineering is located near the Admin Block.
ECE department is beside the main auditorium.
Mechanical Engineering block is near the workshop area.
The main academic buildings operate from 8:30 AM to 5:30 PM.""",

    "hostels.txt": """Hostel A to G are boys hostels.
Hostel H and I are girls hostels.
Hostel entry closes at 10 PM for all residents.""",

    "mess.txt": """Breakfast: 7:30 AM to 9 AM.
Lunch: 12 PM to 2 PM.
Snacks: 4:30 PM to 5:30 PM.
Dinner: 7 PM to 9 PM.
The mess is located beside Hostel C.""",

    "library.txt": """The central library is located behind the main academic block.
Library hours: 8 AM to 8 PM.
The library has 3 floors with study rooms and digital resources.""",

    "campus.txt": """Medical center operates 24/7.
ATM service available near the Admin Block.
Main canteen is located beside the auditorium.
Sports complex is near the west gate."""
}

for name, content in files.items():
    with open(f"{DATA_PATH}/{name}", "w") as f:
        f.write(content)

print("Files created!")


Files created!


In [6]:
os.environ["GROQ_API_KEY"] = input("Enter Groq API key: ")

Enter Groq API key: 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


In [7]:
class GroqLLM(LLM):
    model: str = "llama-3.3-70b-versatile"

    @property
    def _llm_type(self):
        return "groq_llm"

    def _call(self, prompt: str, stop: Optional[List[str]] = None):
        client = Groq(api_key=os.environ["GROQ_API_KEY"])
        response = client.chat.completions.create(
            model=self.model,
            messages=[{"role": "user", "content": prompt}],
            temperature=0
        )
        return response.choices[0].message.content

In [8]:
class MyEmbeddings(Embeddings):
    def __init__(self, model_name="sentence-transformers/all-MiniLM-L6-v2"):
        self.model = SentenceTransformer(model_name)

    def embed_documents(self, texts):
        return self.model.encode(texts, show_progress_bar=True)

    def embed_query(self, text):
        return self.model.encode([text])[0]


In [9]:
def load_documents():
    docs = []
    for name in os.listdir(DATA_PATH):
        with open(os.path.join(DATA_PATH, name)) as f:
            docs.append(f.read())
    return docs


In [10]:
def load_documents():
    docs = []
    for name in os.listdir(DATA_PATH):
        file_path = os.path.join(DATA_PATH, name)
        if os.path.isfile(file_path): # Check if it's a file before opening
            with open(file_path, "r") as f:
                docs.append(f.read())
    return docs

def build_vector_db():
    docs = load_documents()
    splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=50)
    chunks = splitter.create_documents(docs)

    vector_db = FAISS.from_documents(chunks, MyEmbeddings())
    return vector_db


vector_db = build_vector_db()

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

In [11]:
prompt_template = """
You are a campus navigation assistant.

Answer ONLY from the provided context.
Give short, direct answers.
If answer is not in context, say: "I don't know based on the campus data."

Question: {question}

Context:
{context}

Answer:
"""

In [12]:
def ask(question):
    retriever = vector_db.as_retriever(search_kwargs={"k": 2})
    docs = retriever.get_relevant_documents(question)
    context = "\n".join([d.page_content for d in docs])

    llm = GroqLLM()

    final_prompt = prompt_template.format(
        question=question,
        context=context
    )

    answer = llm(final_prompt)
    print("\nðŸ“Œ **Answer:**")
    print(answer)
    print("\n-----------------------")


In [13]:
ask("When is dinner time?")

  warn_deprecated(
  warn_deprecated(



ðŸ“Œ **Answer:**
Dinner time is 7 PM to 9 PM.

-----------------------


In [14]:
ask("Which hostels are for girls?")


ðŸ“Œ **Answer:**
Hostel H and I.

-----------------------


In [15]:
ask("Where is the sports complex?")


ðŸ“Œ **Answer:**
Near the west gate.

-----------------------


In [16]:
ask("Where is the library located?")


ðŸ“Œ **Answer:**
The central library is located behind the main academic block.

-----------------------


In [17]:
ask("how many sudents are there in the campus")


ðŸ“Œ **Answer:**
I don't know based on the campus data.

-----------------------


In [18]:
ask("I am near the Admin Block. What facilities are close to me?")


ðŸ“Œ **Answer:**
Department of Computer Science and Engineering, and ATM service are close to you.

-----------------------
