# Step 4: Add Voice Input
Adding speech recognition to the AI Tutor using Whisper.
- Date: July 21, 2025
- Converts spoken questions to text, answered by DistilBERT.

In [2]:
import whisper
import pickle
from transformers import pipeline
import os

# Verify audio file
audio_file = "speech_test.wav" 
print("Full Path: ", audio_file)
print("File Exists: ", os.path.exists(audio_file))  # Should be True

# Load SQuAD dataset
with open('squad_dataset.pkl', 'rb') as file:
    dataset = pickle.load(file)

print("Dataset loaded from squad_dataset.pkl!")

# Load Whisper model
whisper_model = whisper.load_model("base")

# Load QA pipeline
text_qa = pipeline("question-answering", model="distilbert-base-cased-distilled-squad")

# Transcribe audio
result = whisper_model.transcribe(audio_file)
transcribed_text = result["text"]
print(f"Transcribed Question: {transcribed_text}")

# Answer using DistilBERT
context = "The capital of France is Paris."
result = text_qa(question=transcribed_text, context=context)
print(f"Answer: {result['answer']}")
print(f"Confidence: {result['score']:.2f}")

Full Path:  speech_test.wav
File Exists:  True
Dataset loaded from squad_dataset.pkl!


Device set to use cpu


Transcribed Question:  What is the capital of France?
Answer: Paris
Confidence: 0.99


## Observations
- Whisper accurately transcribed "What is the capital of France?" from speech_test.wav.
- DistilBERT answered "Paris" with high confidence (0.99) using a matching context.
- Fixed incorrect answer by updating context to align with the question.
- This adds voice input, making the AI Tutor multimodal.