## Deployment Using Gradio for Text and Audio files

## Step1: Import Libraries

In [1]:
!pip install -U langchain langchain-community langchain-openai sentence-transformers faiss-cpu gradio --quiet
!pip install openai-whisper --quiet

In [None]:
import os
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.documents import Document
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain_openai import OpenAI
from google.colab import drive
from google.colab import userdata
from pathlib import Path
from tqdm.notebook import tqdm
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain_community.document_loaders import TextLoader
from langchain_openai import ChatOpenAI
from pathlib import Path
from langchain.docstore.document import Document
import gradio as gr
from transformers import pipeline
from typing import Union, List, Dict, Tuple

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## Step2: Create Chunks to the Transcripts

In [4]:
transcript_folder = Path("/content/drive/MyDrive/ServiceNow_Audio_Transcripts")
transcript_folder.mkdir(parents=True, exist_ok=True)
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)

documents: list[Document] = []
for txt_path in tqdm(transcript_folder.glob("*.txt"), desc="Chunking transcripts"):
    raw_text = txt_path.read_text(encoding="utf-8")
    lines = raw_text.splitlines()
    subject = lines[0] if lines else txt_path.stem  # Use first line or fallback to filename
    for chunk in splitter.split_text(raw_text):
        documents.append(
            Document(page_content=chunk, metadata={"source": subject})
        )
print(f"✅ Loaded and chunked {len(documents)} document chunks from {transcript_folder}.")

Chunking transcripts: 0it [00:00, ?it/s]

✅ Loaded and chunked 656 document chunks from /content/drive/MyDrive/ServiceNow_Audio_Transcripts.


## Step3: Embedding & Vectorizer

In [None]:
embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
faiss_dir = "/content/drive/MyDrive/faiss_store"
faiss_dir = Path(faiss_dir)
Path(faiss_dir).mkdir(parents=True, exist_ok=True)

faiss_index = FAISS.load_local(
    faiss_dir,
    embedding_model,
    allow_dangerous_deserialization=True
)
retriever = faiss_index.as_retriever(search_type="similarity", search_kwargs={"k": 5})

  embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")


## Step4: LLM Model & Retrieval

In [None]:
llm = OpenAI(
    temperature=0,
    openai_api_key=userdata.get("OPENAI_API_KEY")
)

qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    return_source_documents=True
)

## Step5: Initiate Whisper Model Pipeline

In [None]:
transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-small")

def answer_question(text_input: str, audio_input: Union[str, dict, None], chat_history: List[Dict]) -> List[Dict]:
    question = ""
    if isinstance(audio_input, str) and audio_input:
        result = transcriber(audio_input)
        question = result.get("text", "").strip()
    elif text_input:
        question = text_input.strip()

    if not question:
        return chat_history + [{"role": "assistant", "content": "❌ Please ask a question via text or audio."}]

    try:
        response = qa_chain(question)
        answer = response.get("result", "❌ No answer found.")
        sources = []

        for doc in response.get("source_documents", []):
            preview = doc.page_content[:200]
            source = doc.metadata.get("source", "unknown")
            sources.append(f"📄 {source}\n🔎 {preview}")

        full_answer = answer + "\n\n"
        chat_history.append({"role": "user", "content": question})
        chat_history.append({"role": "assistant", "content": full_answer})
        return chat_history

    except Exception as e:
        chat_history.append({"role": "assistant", "content": f"❌ Error from QA chain: {e}"})
        return chat_history

Device set to use cuda:0


In [None]:
with gr.Blocks() as demo:
    gr.HTML("<h1 style='text-align: center;'>ServiceNow QA Assistant</h1>")
    gr.Markdown("<center>Type or record your question below. The bot will provide you answer</center>")
    chatbot = gr.Chatbot(label="💬 ServiceNow Assistant", type="messages", value=[
        {"role": "assistant", "content": "👋 Hi! Ask me anything about ServiceNow (text or voice)."}
    ])
    with gr.Row():
        text_input = gr.Textbox(placeholder="Type your ServiceNow question here...", label="📝 Text")
        audio_input = gr.Audio(type="filepath", label="🎙️ Upload Your Voice")
    submit_btn = gr.Button("🔍 Ask")

    submit_btn.click(fn=answer_question, inputs=[text_input, audio_input, chatbot], outputs=chatbot)

demo.launch()

Device set to use cuda:0


It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://5071bbae932a5f3001.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




## Step6: Save Code to python file to use in HuggingFace for deployment

In [None]:
code = '''import os
import gradio as gr
from transformers import pipeline
from typing import Union, List, Dict

from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAI
from langchain.chains import RetrievalQA
from langchain.docstore.document import Document

embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
faiss_dir = "/content/drive/MyDrive/faiss_store"
faiss_dir = Path(faiss_dir)
Path(faiss_dir).mkdir(parents=True, exist_ok=True)

faiss_index = FAISS.load_local(
    faiss_dir,
    embedding_model,
    allow_dangerous_deserialization=True
)

retriever = faiss_index.as_retriever(search_type="similarity", search_kwargs={"k": 5})

openai_api_key = userdata.get('OPENAI_API_KEY')
if not openai_api_key:
    raise ValueError("❌ Set your OPENAI_API_KEY in environment or secrets.")

llm = OpenAI(temperature=0, openai_api_key=openai_api_key)
qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever, return_source_documents=True)

transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-small")

def answer_question(text_input: str, audio_input: Union[str, dict, None], chat_history: List[Dict]) -> List[Dict]:
    question = ""
    if isinstance(audio_input, str) and audio_input:
        result = transcriber(audio_input)
        question = result.get("text", "").strip()
    elif text_input:
        question = text_input.strip()

    if not question:
        return chat_history + [{"role": "assistant", "content": "❌ Please ask a question via text or audio."}]

    try:
        response = qa_chain(question)
        answer = response.get("result", "❌ No answer found.")
        sources = []

        for doc in response.get("source_documents", []):
            preview = doc.page_content[:200]
            source = doc.metadata.get("source", "unknown")
            sources.append(f"📄 {source}\n🔎 {preview}")

        full_answer = answer + "\n\n"
        chat_history.append({"role": "user", "content": question})
        chat_history.append({"role": "assistant", "content": full_answer})
        return chat_history

    except Exception as e:
        chat_history.append({"role": "assistant", "content": f"❌ Error from QA chain: {e}"})
        return chat_history

with gr.Blocks() as demo:
    gr.HTML("<h1 style='text-align: center;'>ServiceNow QA Assistant</h1>")
    gr.Markdown("<center>Type or record your question below. The bot will provide you answer</center>")
    chatbot = gr.Chatbot(label="💬 ServiceNow Assistant", type="messages", value=[
        {"role": "assistant", "content": "👋 Hi! Ask me anything about ServiceNow (text or voice)."}
    ])
    with gr.Row():
        text_input = gr.Textbox(placeholder="Type your ServiceNow question here...", label="📝 Text")
        audio_input = gr.Audio(type="filepath", label="🎙️ Upload Your Voice")
    submit_btn = gr.Button("🔍 Ask")

    submit_btn.click(fn=answer_question, inputs=[text_input, audio_input, chatbot], outputs=chatbot)

demo.launch()
'''
with open("app.py", "w") as f:
    f.write(code)

In [None]:
requirements = '''gradio
langchain
langchain-openai
langchain-community
langchain-core
sentence-transformers
transformers
faiss-cpu
openai
yt-dlp
python-dotenv
'''
with open("requirements.txt", "w") as f:
    f.write(requirements)

In [None]:
from google.colab import files
files.download("app.py")
files.download("requirements.txt")