## Import Libraries

In [1]:
import gradio as gr
import whisper   
from gtts import gTTS
import os
from tempfile import NamedTemporaryFile
from transformers import pipeline
from langchain.llms import Ollama
from langchain.embeddings import GPT4AllEmbeddings
from langchain.vectorstores import FAISS
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain.document_loaders import DirectoryLoader, TextLoader
from langchain.text_splitter import CharacterTextSplitter
import glob

## Load Models

In [2]:
model = whisper.load_model("base")

In [3]:
ner_pipeline = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english")
intent_pipeline = pipeline("text-classification", model="facebook/bart-large-mnli")

Some weights of the model checkpoint at dbmdz/bert-large-cased-finetuned-conll03-english were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Device set to use cpu
Device set to use cpu


## Create Vector-Base

In [4]:
MODEL = "llama3.2"
db_name = "vector_db"
folders = glob.glob("knowledge-base/*")
text_loader_kwargs = {'encoding': 'utf-8'}
documents = []
for folder in folders:
    doc_type = os.path.basename(folder)
    loader = DirectoryLoader(folder, glob="**/*.md", loader_cls=TextLoader, loader_kwargs=text_loader_kwargs)
    folder_docs = loader.load()
    for doc in folder_docs:
        doc.metadata["doc_type"] = doc_type
        documents.append(doc)
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = text_splitter.split_documents(documents)
embeddings = GPT4AllEmbeddings()
vectorstore = FAISS.from_documents(chunks, embedding=embeddings)

Created a chunk of size 1088, which is longer than the specified 1000


## Set LLM for Data Retrieval

In [5]:
llm = Ollama(base_url="http://localhost:11434", model=MODEL)
retriever = vectorstore.as_retriever()
memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, memory=memory)

  llm = Ollama(base_url="http://localhost:11434", model=MODEL)
  memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)


## Manual Setup for ffmpeg

In [6]:
import os
os.environ["PATH"] += os.pathsep + r"C:/Users/bhudi/Downloads/ffm/ffmpeg-2025-02-20-git-bc1a3bfd2c-essentials_build/bin"

In [7]:
def speech_to_text(audio_path):
    try:
        # Check if file exists
        if not os.path.exists(audio_path):
            raise FileNotFoundError(f"Audio file not found at: {audio_path}")
            
        # Check if ffmpeg is available
        import subprocess
        try:
            subprocess.run(['ffmpeg', '-version'], capture_output=True)
        except FileNotFoundError:
            raise RuntimeError("ffmpeg not found. Please install ffmpeg and add it to PATH")
            
        # Now try transcription
        transcription = model.transcribe(audio_path)["text"]
        return transcription
        
    except Exception as e:
        print(f"Detailed error in speech_to_text: {str(e)}")
        raise

def verify_ffmpeg():
    """Verify ffmpeg installation"""
    import subprocess
    try:
        result = subprocess.run(['ffmpeg', '-version'], 
                              capture_output=True, 
                              text=True)
        print("ffmpeg is installed and accessible")
        return True
    except FileNotFoundError:
        print("ffmpeg is NOT installed or not in PATH")
        return False

# Add this check when starting your app
if not verify_ffmpeg():
    print("Please install ffmpeg before running this application")
    print("You can install it using: choco install ffmpeg")
    exit(1)

ffmpeg is installed and accessible


## Function Definations

In [8]:
def speech_to_text(audio_path):
    transcription = model.transcribe(audio_path)["text"]
    return transcription

In [9]:
def analyze_intent_and_entities(text):
    entities = ner_pipeline(text)
    intent = intent_pipeline(text)[0]['label']
    return intent, entities

In [10]:
def generate_response(text):
    result = conversation_chain.invoke({"question": text})
    return result["answer"]

In [11]:
def text_to_speech(text, lang="en", slow=False):
    tts = gTTS(text, lang=lang, slow=slow)
    output_audio = NamedTemporaryFile(suffix=".mp3", delete=False)
    tts.save(output_audio.name)
    return output_audio.name

In [12]:
def chatbot_pipeline(audio_path=None, text_input=None):
    try:
        if audio_path:
            print(f"Received audio path: {audio_path}")
            if not os.path.exists(audio_path):
                return "Error: Audio file not found", None, None, None
            text_input = speech_to_text(audio_path)
            print(f"Transcription: {text_input}")
        
        if not text_input:
            return "Error: No input provided", None, None, None
        
        intent, entities = analyze_intent_and_entities(text_input)
        print(f"Detected Intent: {intent}")
        print(f"Extracted Entities: {entities}")
        
        response_text = generate_response(text_input)
        print(f"Generated Response: {response_text}")
        
        response_audio_path = text_to_speech(response_text)
        print(f"Response Audio Path: {response_audio_path}")
        
        return response_text, response_audio_path, intent, entities
    except Exception as e:
        return f"Error: {str(e)}", None, None, None

## Gradio User Interface

In [13]:
iface = gr.Interface(
    fn=chatbot_pipeline,
    inputs=[
        gr.Audio(type="filepath", label="Speak"),
        gr.Textbox(label="Type your query")
    ],
    outputs=[
        gr.Textbox(label="Response Text"),
        gr.Audio(label="Response Audio"),
        gr.Textbox(label="Detected Intent"),
        gr.Textbox(label="Extracted Entities")
    ],
    title="NLU-Enhanced RAG Chatbot with Voice & Text",
    allow_flagging='never'
)

iface.launch(inbrowser=True)




* Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.




Detected Intent: neutral
Extracted Entities: []
Generated Response: Based on the provided context, here is an overview of Avery Lancaster:

Avery Lancaster has been a key figure in the growth and success of Insurellm, an insurance technology company. She has demonstrated resilience, adaptability, and leadership skills throughout her career, navigating challenges such as market competition, pandemic-related operational difficulties, and employee concerns.

Avery's tenure at Insurellm spans from 2015 to present, with notable achievements including:

* Launching two successful products that significantly increased market share in 2018
* Regaining market leadership through innovative approaches to personalized insurance solutions in 2023
* Fostering a commitment to diversity and inclusion, improving team representation since 2021
* Implementing flexible working conditions and regular check-ins to address work-life balance concerns

Avery's professional development has been significant, wit