In [None]:
!pip install whisper python-docx Document gradio faiss-cpu PyPDF2 pikepdf

In [None]:
# 1. Install Dependencies
!pip install -q sentence-transformers faiss-cpu google-generativeai PyMuPDF

import os
import zipfile
import fitz  # PyMuPDF
import faiss
import logging
from sentence_transformers import SentenceTransformer
from google import genai
from textwrap import wrap

# 2. Configure Gemini API
GEMINI_API_KEY = ""
client = genai.Client(api_key=GEMINI_API_KEY)
logging.basicConfig(level=logging.INFO)

# 3. Unzip Dataset
zip_path = "/content/dataset_NDE.zip"
extract_path = "/content/nde_data"
os.makedirs(extract_path, exist_ok=True)
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

# 4. Extract text from all PDFs
def extract_text_from_pdfs(folder):
    all_text = ""
    for root, _, files in os.walk(folder):
        for file in files:
            if file.endswith(".pdf"):
                path = os.path.join(root, file)
                try:
                    doc = fitz.open(path)
                    for page in doc:
                        all_text += page.get_text()
                    doc.close()
                except Exception as e:
                    logging.warning(f"Failed to extract {file}: {e}")
    return all_text

raw_text = extract_text_from_pdfs(extract_path)
if not raw_text.strip():
    raise ValueError("No text found in PDF files. Check dataset.")

# 5. Create Chunks & Embed
embed_model = SentenceTransformer("all-MiniLM-L6-v2")
chunks = wrap(raw_text, width=300)
if not chunks:
    raise ValueError("No chunks generated from extracted PDF text.")
embeddings = embed_model.encode(chunks, convert_to_numpy=True)

# 6. Build FAISS Index
index = faiss.IndexFlatL2(embeddings.shape[1])
index.add(embeddings)

# 7. Chat Memory & Retrieval
chat_history = []

def retrieve_context(query, top_k=3):
    query_embedding = embed_model.encode([query], convert_to_numpy=True)
    D, I = index.search(query_embedding, top_k)
    return [chunks[i] for i in I[0]]

def ask_nde_bot(user_input):
    context_chunks = retrieve_context(user_input)
    context_text = "\n".join(context_chunks)
    prompt = f"""
You are an expert NDE (Non-Destructive Evaluation) assistant chatbot. Be accurate, correct wrong statements, and recall previous conversation if asked.

Context:
{context_text}

Question:
{user_input}
"""
    response = client.models.generate_content(model="gemini-2.5-flash", contents=prompt)
    answer = response.text.strip()
    chat_history.append((user_input, answer))
    print("Bot:", answer)
    return answer

def recall_last_question():
    if chat_history:
        print("Last Question:", chat_history[-1][0])
        print("Bot's Answer:", chat_history[-1][1])
    else:
        print("No previous questions found.")

# 8. Start Chatting!
while True:
    user_input = input("You: ")
    if user_input.lower() in ["exit", "quit", "bye"]:
        print("Bot: Goodbye!")
        break
    elif "recall" in user_input.lower() or "previous question" in user_input.lower():
        recall_last_question()
    else:
        ask_nde_bot(user_input)

In [None]:
# ✅ Voice Input–Enabled NDE Chatbot Using Whisper + Gemini

!pip install -q gradio openai-whisper google-generativeai

import whisper
import gradio as gr
from google import genai
import tempfile
import os

#  Set up Gemini client
GEMINI_API_KEY = "AIzaSyBtSFU9U0d5rO2wELWMA3P9pu_iEj34MGg"
client = genai.Client(api_key=GEMINI_API_KEY)

#  Load Whisper model (base is fast + accurate)
model = whisper.load_model("base")

#  Function: Transcribe + Ask Gemini + Return Answer
def full_pipeline(audio_path):
    try:
        # Transcribe with Whisper
        result = model.transcribe(audio_path)
        user_text = result["text"]

        # Ask Gemini
        prompt = f"You are an expert NDE (Non-Destructive Evaluation) assistant. Answer professionally: {user_text}"
        response = client.models.generate_content(
            model="gemini-2.5-flash",
            contents=prompt
        )
        return f"You said: {user_text}\n\nBot: {response.text.strip()}"

    except Exception as e:
        return f"Error: {e}"

#  Gradio Interface
iface = gr.Interface(
    fn=full_pipeline,
    inputs=gr.Audio(type="filepath", label="🎙️ Speak your NDE question"),
    outputs="text",
    title="🔍 NDE Voice Assistant",
    description="Ask your NDE-related questions by voice."
)

iface.launch(debug=True)
