In [12]:
import PyPDF2
import sounddevice as sd
from scipy.io.wavfile import write
import whisper
import pyttsx3
import os
import torch
from transformers import BertTokenizer, BertForQuestionAnswering
from langdetect import detect

In [13]:
# Step 1: Extract text from PDF
def extract_text_from_pdf(pdf_path):
    with open(pdf_path, 'rb') as f:
        reader = PyPDF2.PdfReader(f)
        text = ""
        for page in reader.pages:
            text += page.extract_text()
    return text

In [14]:
# Step 2: Record voice
def record_audio(filename="question.wav", duration=5, fs=44100):
    print("🎤 Recording your question (Speak now)...")
    recording = sd.rec(int(duration * fs), samplerate=fs, channels=1)
    sd.wait()
    write(filename, fs, recording)
    print("✅ Recording saved.")

In [15]:
# Step 3: Transcribe using Whisper
def get_voice_input_whisper():
    record_audio()
    model = whisper.load_model("base")
    result = model.transcribe("question.wav")
    question_text = result["text"]
    print("🗣 You asked:", question_text)
    return question_text

In [16]:
# Step 4: Detect language
def detect_language(text):
    lang = detect(text)
    if lang.startswith("ml"):  # langdetect may return 'ml'
        return "ml"
    elif lang.startswith("fr"):
        return "fr"
    else:
        return "en"

In [17]:
# Step 5: QA using BERT (English only for now)
def answer_with_bert(context, question):
    tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
    model = BertForQuestionAnswering.from_pretrained("bert-base-uncased")

    inputs = tokenizer(question, context, return_tensors="pt", truncation=True, max_length=512)
    input_ids = inputs["input_ids"].tolist()[0]

    with torch.no_grad():
        outputs = model(**inputs)
        answer_start = torch.argmax(outputs.start_logits)
        answer_end = torch.argmax(outputs.end_logits) + 1
        answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(input_ids[answer_start:answer_end]))
    return answer

In [18]:
# Step 6: Speak answer using TTS
def speak_text(text, lang_code="en"):
    engine = pyttsx3.init()

    voices = engine.getProperty('voices')
    lang_map = {
        'en': 'english',
        'fr': 'french',
        'ml': 'malayalam'
    }

    for voice in voices:
        if lang_map[lang_code].lower() in voice.name.lower():
            engine.setProperty('voice', voice.id)
            break

    engine.say(text)
    engine.runAndWait()
    engine.stop()

In [None]:
# Step 7: Multilingual answer handler
def main():
    # Load PDF (in any language)
    context = extract_text_from_pdf("english_doc.pdf")

    # Voice input
    question = get_voice_input_whisper()
    lang = detect_language(question)
    print(f"🌐 Detected language: {lang}")

    # Currently, BERT works best with English
    if lang != "en":
        print("⚠️ Warning: QA will still run in English model (for demo). Multilingual BERT can be added later.")

    # QA
    answer = answer_with_bert(context, question)
    print("✅ Answer:", answer)

    # Speak answer
    speak_text(answer, lang)

    os._exit(0)  # Forcefully terminate background threads if any

: 

In [None]:
# Run
if __name__ == "__main__":
    main()

🎤 Recording your question (Speak now)...
✅ Recording saved.




🗣 You asked:  Куча батала.
🌐 Detected language: en


Some weights of BertForQuestionAnswering were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['qa_outputs.bias', 'qa_outputs.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


✅ Answer: , including machine learning and deep learning , which enable systems to learn from data and improve their performance over time . ai is revolutionizing diverse sectors , from healthcare where it assists in diagnoses and drug discovery , to ﬁnance where it helps with fraud
