In [1]:
# setting up the path
import sys
import os

sys.path.append(os.path.abspath(".."))

In [12]:
# importing necessary libraries
from langchain_core.prompts import ChatPromptTemplate
from langchain_community.document_loaders import UnstructuredHTMLLoader
from langchain_core.runnables import RunnablePassthrough
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma

from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.llms import HuggingFacePipeline

from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline

import pyttsx3


In [3]:
# load data
loader = UnstructuredHTMLLoader(file_path="../data/mg-zs-warning-messages.html")
car_docs = loader.load()

In [4]:
# split data into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=400, chunk_overlap=50)
splits = text_splitter.split_documents(car_docs)

In [5]:
# loaclly create embeddings and vectorstore
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore = Chroma.from_documents(documents=splits,embedding=embeddings)



In [6]:
# setup retriever
retriever = vectorstore.as_retriever(search_kwargs={"k": 2})

In [7]:
# local LLM setup, runs on CPU, can be changed to a smaller model to run on a laptop
model_name = "google/flan-t5-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

pipe = pipeline(
    "text2text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=150,
    do_sample=False
)

llm = HuggingFacePipeline(pipeline=pipe)

  llm = HuggingFacePipeline(pipeline=pipe)


In [8]:
# RAG prompt template
prompt = ChatPromptTemplate.from_template(
    """You are an assistant for question-answering tasks.
Use the following context to answer the question.
If you don't know the answer, say you don't know.

Question: {question}
Context: {context}

Answer:"""
)


In [9]:
# build offline RAG chain
rag_chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
)


In [10]:
# question
question = "What does the brake system warning mean?"


In [21]:
# ask question
response = rag_chain.invoke(question)

# extract text safely
answer_text = response.content if hasattr(response, "content") else str(response)

# print text answer
print(f"Answer:\n{answer_text}.")

# initialize TTS engine
engine = pyttsx3.init()

# adjust voice speed (default ~200)
engine.setProperty("rate", 200)

# adjust volume (0.0 to 1.0)
engine.setProperty("volume", 1.0)

# set 0 or 1 for different voices
voices = engine.getProperty('voices')
engine.setProperty('voice', voices[1].id)

# speak immediately
engine.say(answer_text)
engine.runAndWait()

Answer:
Brake Fault Indicates that the brake fluid could be low or a fault has been detected in the Electronic Brake-force Distribution (EBD) system.


#### **Note:** Prototype is done, implement further requirements, and build app for deployment.