**Requrement:**

In [None]:
!pip install pydub
!pip install googletrans==4.0.0-rc1
!pip install python-docx
!pip install SpeechRecognition
!pip install gtts

Collecting SpeechRecognition
  Using cached SpeechRecognition-3.11.0-py2.py3-none-any.whl.metadata (28 kB)
Using cached SpeechRecognition-3.11.0-py2.py3-none-any.whl (32.8 MB)
Installing collected packages: SpeechRecognition
Successfully installed SpeechRecognition-3.11.0


**Workflow Overview**
1. **Input:** Kannada audio file.
2. **Audio Processing:** Divide the audio into manageable chunks.
3. **Speech Recognition:** Transcribe the Kannada audio chunks in English and Kannada both.




In [None]:
import speech_recognition as sr
from pydub import AudioSegment
from googletrans import Translator
from docx import Document

# Function to split audio file into chunks
def split_audio(file_path, chunk_length_ms=10000):
    """Split audio file into chunks (default 10 seconds)"""
    audio = AudioSegment.from_mp3(file_path)  # Load MP3 file directly
    chunks = []
    for start_ms in range(0, len(audio), chunk_length_ms):
        chunk = audio[start_ms:start_ms + chunk_length_ms]
        chunks.append(chunk)
    return chunks

# Function to save transcript to Word document with timing
def append_to_word_with_timing(text, doc, start_time, end_time):
    time_info = f"From {start_time}s to {end_time}s: "
    doc.add_paragraph(time_info + text)

# Initialize recognizer
recognizer = sr.Recognizer()

# Path of the input MP3 file
mp3_path = "/content/SandalWoodNewsStories_36.mp3"

# Split the MP3 audio file into 10-second chunks
audio_chunks = split_audio(mp3_path, chunk_length_ms=10000)

# Initialize translator
translator = Translator()

# Prepare Word documents for saving Kannada and English transcripts
kannada_transcript_file = "kannada6_transcript.docx"
english_transcript_file = "english6_translation.docx"

# Create the Word documents at the start
kannada_doc = Document()
english_doc = Document()

# Process each chunk
for idx, chunk in enumerate(audio_chunks):
    chunk_start = idx * 10  # Start time of the chunk in seconds
    chunk_end = (idx + 1) * 10  # End time of the chunk in seconds

    print(f"Processing chunk {idx + 1} from {chunk_start}s to {chunk_end}s...")

    # Save the chunk as a temporary WAV file for speech recognition
    chunk_path = f"temp_chunk_{idx + 1}.wav"
    chunk.export(chunk_path, format="wav")

    # Recognize speech from the chunk
    with sr.AudioFile(chunk_path) as audio_file:
        audio_data = recognizer.record(audio_file)

        try:
            # Recognize speech using Google Web Speech API (Kannada language)
            kannada_text = recognizer.recognize_google(audio_data, language='kn-IN')
            print(f"Kannada Transcript: {kannada_text}")

            # Check if the Kannada text is not empty before translation
            if kannada_text.strip():
                try:
                    translated_text = translator.translate(kannada_text, src='kn', dest='en').text
                    if translated_text:
                        print(f"English Translation: {translated_text}")
                        # Save to Word documents with timing information
                        append_to_word_with_timing(kannada_text, kannada_doc, chunk_start, chunk_end)
                        append_to_word_with_timing(translated_text, english_doc, chunk_start, chunk_end)
                    else:
                        print("Translation failed, skipping this chunk.")
                except Exception as e:
                    print(f"Error during translation: {e}")
            else:
                print("Empty Kannada text, skipping translation.")

        except sr.UnknownValueError:
            print(f"Could not understand audio in chunk {idx + 1}.")
        except sr.RequestError:
            print(f"Error with the speech recognition service in chunk {idx + 1}.")

# Save the documents after processing all chunks
kannada_doc.save(kannada_transcript_file)
english_doc.save(english_transcript_file)

print(f"Saved Kannada transcript to {kannada_transcript_file}")
print(f"Saved English translation to {english_transcript_file}")


Take input from mic.(Use as input for nxt code script)

In [None]:
# Updated JavaScript Code with Manual Stop
RECORD_AUDIO_JS = """
let mediaRecorder;
let audioChunks = [];

async function startRecording() {
    const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
    mediaRecorder = new MediaRecorder(stream);

    mediaRecorder.ondataavailable = (event) => {
        audioChunks.push(event.data);
    };

    mediaRecorder.start();
    console.log("Recording started...");
}

function stopRecording() {
    return new Promise((resolve) => {
        mediaRecorder.onstop = () => {
            const audioBlob = new Blob(audioChunks, { type: 'audio/wav' });
            const reader = new FileReader();
            reader.onload = () => {
                const base64data = reader.result.split(',')[1];
                google.colab.kernel.invokeFunction(
                    'notebook.recordAudioCallback',
                    [base64data],
                    {}
                );
            };
            reader.readAsDataURL(audioBlob);
        };

        mediaRecorder.stop();
        console.log("Recording stopped...");
        resolve();
    });
}

// Create start and stop buttons
const startButton = document.createElement('button');
startButton.textContent = "Start Recording";
startButton.onclick = startRecording;

const stopButton = document.createElement('button');
stopButton.textContent = "Stop Recording";
stopButton.onclick = async () => {
    await stopRecording();
    startButton.disabled = false; // Re-enable the start button
    stopButton.disabled = true;  // Disable the stop button
};

document.body.appendChild(startButton);
document.body.appendChild(stopButton);

// Initially disable stop button
stopButton.disabled = true;

// Disable the start button when recording starts
startButton.onclick = async () => {
    startButton.disabled = true;
    stopButton.disabled = false;
    await startRecording();
};
"""

# Python callback function to handle the audio data
def record_audio_callback(audio_base64):
    audio_data = b64decode(audio_base64)

    # Save the audio file locally
    with open("recordedQ_audio.wav", "wb") as f:
        f.write(audio_data)

    print("Audio recording saved as 'recorded_audio.wav'.")

# Register the Python callback function
output.register_callback('notebook.recordAudioCallback', record_audio_callback)

# Execute the JavaScript code to add buttons and allow manual control
display(Javascript(RECORD_AUDIO_JS))


<IPython.core.display.Javascript object>

Audio recording saved as 'recorded_audio.wav'.


**Steps of the Workflow**
1. Audio Input and Transcription:

- Input: A Kannada audio question from above code script.
- Output: Transcribed Kannada question (text) and its English translation.
2.  Semantic Matching:Perform similarity search on the QA dataset to find the best-matching answer.
3. Answer Retrieval:

- Retrieve the text answer in Kannada and English.
- Retrieve and play the corresponding audio segment from the dataset.
4. Response Generation:

- Play the Kannada audio answer.
- Display text in both Kannada and English.

In [None]:
import json
from sentence_transformers import SentenceTransformer, util
from gtts import gTTS
import os
import speech_recognition as sr
from googletrans import Translator
from pydub import AudioSegment
from IPython.display import Audio, display
import time

class QASystem:
    def __init__(self, json_path):
        # Load JSON data
        with open(json_path, 'r') as file:
            self.data = json.load(file)

        # Initialize components
        self.model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
        self.translator = Translator()

        # Prepare QA pairs
        self.qa_pairs = [(item['question'], item['answer']) for item in self.data]
        self.questions = [q for q, _ in self.qa_pairs]
        self.encoded_questions = self.model.encode(self.questions, convert_to_tensor=True)

    def text_to_kannada_audio(self, english_text, output_path="kannada_question_audio.mp3"):
        """Convert English text to Kannada audio"""
        try:
            # Translate to Kannada
            translated = self.translator.translate(english_text, src='en', dest='kn')
            kannada_text = translated.text
            print("\n=== Question Translation ===")
            print("English Question:", english_text)
            print("Kannada Question:", kannada_text)

            # Convert to speech
            tts = gTTS(text=kannada_text, lang='kn')
            tts.save(output_path)

            # Play the audio using Colab's audio player
            print("\nPlaying Kannada question audio...")
            display(Audio(output_path))

            # Wait for audio to finish
            time.sleep(2)
            return output_path

        except Exception as e:
            print(f"Error in text_to_kannada_audio: {str(e)}")
            return None

    def audio_to_text(self, audio_path):
        """Convert audio to text"""
        try:
            recognizer = sr.Recognizer()

            # Convert to WAV
            audio = AudioSegment.from_mp3(audio_path)
            audio.export("temp.wav", format="wav")

            # Transcribe
            with sr.AudioFile("temp.wav") as source:
                audio_data = recognizer.record(source)
                question_text_kn = recognizer.recognize_google(audio_data, language="kn")

            # Cleanup
            if os.path.exists("temp.wav"):
                os.remove("temp.wav")

            return question_text_kn

        except Exception as e:
            print(f"Error in audio_to_text: {str(e)}")
            return None

    def get_answer(self, question_text_kn, similarity_threshold=0.5):
        try:
            # Translate to English
            question_text_en = self.translator.translate(question_text_kn, src='kn', dest='en').text
            print("\n=== Processing Question ===")
            print("Recognized Kannada:", question_text_kn)
            print("Translated English:", question_text_en)

            # Find similar question
            question_embedding = self.model.encode(question_text_en, convert_to_tensor=True)
            similarities = util.pytorch_cos_sim(question_embedding, self.encoded_questions)[0]
            best_match_index = similarities.argmax()
            best_match_score = similarities[best_match_index].item()

            if best_match_score >= similarity_threshold:
                # Get answer and optional audio source
                best_answer_en = self.qa_pairs[best_match_index][1]
                question_data = self.data[best_match_index]  # Fetch original JSON data
                original_audio_path = question_data.get('audio', None)

                # Translate answer to Kannada
                best_answer_kn = self.translator.translate(best_answer_en, src='en', dest='kn').text
                print("\n=== Answer ===")
                print("English:", best_answer_en)
                print("Kannada:", best_answer_kn)
                print(f"Match confidence: {best_match_score:.2f}")

                # Play source audio if available
                if original_audio_path and os.path.exists(original_audio_path):
                    print("\nOriginal audio found. Playing the source audio...")
                    display(Audio(original_audio_path))
                    time.sleep(2)  # Wait for the audio to finish playing
                else:
                    print("\nNo valid source audio found or the file is missing.")

                # Create and play Kannada answer audio
                tts = gTTS(text=best_answer_kn, lang='kn')
                audio_answer_path = "answer_kn.mp3"
                tts.save(audio_answer_path)

                print("\nPlaying answer audio...")
                if os.path.exists(audio_answer_path):
                    display(Audio(audio_answer_path))
                    time.sleep(2)  # Wait for the audio to finish playing
                else:
                    print("Error: Answer audio file was not created.")

                return best_answer_en, best_answer_kn
            else:
                print("\nNo close match found for the question.")
                return "No close match found for the question.", "ನಿಮ್ಮ ಪ್ರಶ್ನೆಗೆ ಸೂಕ್ತ ಉತ್ತರ ಲಭ್ಯವಿಲ್ಲ."

        except Exception as e:
            print(f"Error in get_answer: {str(e)}")
            return "An error occurred while processing your question.", "ನಿಮ್ಮ ಪ್ರಶ್ನೆಯನ್ನು ಪ್ರಕ್ರಿಯೆಗೊಳಿಸುವಾಗ ದೋಷ ಸಂಭವಿಸಿದೆ."

    def process_question(self, english_question, similarity_threshold=0.7):
        """Process complete flow from English question to Kannada answer"""
        print("\nProcessing question:", english_question)

        # Convert question to Kannada audio
        audio_path = self.text_to_kannada_audio(english_question)
        if not audio_path:
            return "Error creating audio question."

        # Convert audio back to text
        question_text_kn = self.audio_to_text(audio_path)
        if not question_text_kn:
            return "Error recognizing audio question."

        # Get and return answer
        return self.get_answer(question_text_kn, similarity_threshold)
# Example usage
def main():
    # Initialize system
    qa_system = QASystem('/content/combined_qa_with_audio.json')

    # Enter the audio question in Kannada (Make sure it's recorded and stored as a file)
    kannada_audio_question_path = "/content/download.mp3"  # Path to the Kannada audio file you want to input

    # Convert audio to Kannada text
    question_text_kn = qa_system.audio_to_text(kannada_audio_question_path)
    if not question_text_kn:
        print("Error recognizing audio question.")
        return

    print("\nRecognized Kannada Question:", question_text_kn)

    # Get and return the answer for the recognized question
    answer_en, answer_kn = qa_system.get_answer(question_text_kn)

    print("\nEnglish Answer:", answer_en)
    print("Kannada Answer:", answer_kn)

    # Clean up audio files (optional)
    time.sleep(5)  # Wait for audio to finish playing
    if os.path.exists(kannada_audio_question_path):
        os.remove(kannada_audio_question_path)
    if os.path.exists("answer_kn.mp3"):
        os.remove("answer_kn.mp3")

if __name__ == "__main__":
    main()




Recognized Kannada Question: ಒಂದು ಕಡಿಮೆ ನೀರಿನ ಪರಿಸ್ಥಿತಿಗಳಲ್ಲಿ ಶ್ರೀಗಂಧದ ಮರ ಬೆಳೆಯಬಹುದೇ

=== Processing Question ===
Recognized Kannada: ಒಂದು ಕಡಿಮೆ ನೀರಿನ ಪರಿಸ್ಥಿತಿಗಳಲ್ಲಿ ಶ್ರೀಗಂಧದ ಮರ ಬೆಳೆಯಬಹುದೇ
Translated English: Can sandalwood grow under a low water conditions

=== Answer ===
English: Yes, sandalwood can grow in low-water conditions as it depends on nutrient-rich soil for sustenance.
Kannada: ಹೌದು, ಶ್ರೀಗಂಧವು ಕಡಿಮೆ-ನೀರಿನ ಪರಿಸ್ಥಿತಿಗಳಲ್ಲಿ ಬೆಳೆಯಬಹುದು ಏಕೆಂದರೆ ಇದು ಆಹಾರಕ್ಕಾಗಿ ಪೋಷಕಾಂಶ-ಸಮೃದ್ಧ ಮಣ್ಣನ್ನು ಅವಲಂಬಿಸಿರುತ್ತದೆ.
Match confidence: 0.99

Original audio found. Playing the source audio...



Playing answer audio...



English Answer: Yes, sandalwood can grow in low-water conditions as it depends on nutrient-rich soil for sustenance.
Kannada Answer: ಹೌದು, ಶ್ರೀಗಂಧವು ಕಡಿಮೆ-ನೀರಿನ ಪರಿಸ್ಥಿತಿಗಳಲ್ಲಿ ಬೆಳೆಯಬಹುದು ಏಕೆಂದರೆ ಇದು ಆಹಾರಕ್ಕಾಗಿ ಪೋಷಕಾಂಶ-ಸಮೃದ್ಧ ಮಣ್ಣನ್ನು ಅವಲಂಬಿಸಿರುತ್ತದೆ.


In [None]:
import json
from sentence_transformers import SentenceTransformer, util
from gtts import gTTS
import os
import speech_recognition as sr
from googletrans import Translator
from pydub import AudioSegment
from IPython.display import Audio, display
import time

class QASystem:
    def __init__(self, json_path):
        # Load JSON data
        with open(json_path, 'r') as file:
            self.data = json.load(file)

        # Initialize components
        self.model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
        self.translator = Translator()

        # Prepare QA pairs
        self.qa_pairs = [(item['question'], item['answer']) for item in self.data]
        self.questions = [q for q, _ in self.qa_pairs]
        self.encoded_questions = self.model.encode(self.questions, convert_to_tensor=True)

    def text_to_kannada_audio(self, english_text, output_path="kannada_question_audio.mp3"):
        """Convert English text to Kannada audio"""
        try:
            # Translate to Kannada
            translated = self.translator.translate(english_text, src='en', dest='kn')
            kannada_text = translated.text
            print("\n=== Question Translation ===")
            print("English Question:", english_text)
            print("Kannada Question:", kannada_text)

            # Convert to speech
            tts = gTTS(text=kannada_text, lang='kn')
            tts.save(output_path)

            # Play the audio using Colab's audio player
            print("\nPlaying Kannada question audio...")
            display(Audio(output_path))

            # Wait for audio to finish
            time.sleep(2)
            return output_path

        except Exception as e:
            print(f"Error in text_to_kannada_audio: {str(e)}")
            return None

    def audio_to_text(self, audio_path):
        """Convert audio to text"""
        try:
            recognizer = sr.Recognizer()

            # Convert to WAV
            audio = AudioSegment.from_mp3(audio_path)
            audio.export("temp.wav", format="wav")

            # Transcribe
            with sr.AudioFile("temp.wav") as source:
                audio_data = recognizer.record(source)
                question_text_kn = recognizer.recognize_google(audio_data, language="kn")

            # Cleanup
            if os.path.exists("temp.wav"):
                os.remove("temp.wav")

            return question_text_kn

        except Exception as e:
            print(f"Error in audio_to_text: {str(e)}")
            return None

    def get_answer(self, question_text_kn, similarity_threshold=0.5):
        try:
            # Translate to English
            question_text_en = self.translator.translate(question_text_kn, src='kn', dest='en').text
            print("\n=== Processing Question ===")
            print("Recognized Kannada:", question_text_kn)
            print("Translated English:", question_text_en)

            # Find similar question
            question_embedding = self.model.encode(question_text_en, convert_to_tensor=True)
            similarities = util.pytorch_cos_sim(question_embedding, self.encoded_questions)[0]
            best_match_index = similarities.argmax()
            best_match_score = similarities[best_match_index].item()

            if best_match_score >= similarity_threshold:
                # Get answer and optional audio source
                best_answer_en = self.qa_pairs[best_match_index][1]
                question_data = self.data[best_match_index]  # Fetch original JSON data
                original_audio_path = question_data.get('audio', None)

                # Translate answer to Kannada
                best_answer_kn = self.translator.translate(best_answer_en, src='en', dest='kn').text
                print("\n=== Answer ===")
                print("English:", best_answer_en)
                print("Kannada:", best_answer_kn)
                print(f"Match confidence: {best_match_score:.2f}")
                # Play source audio if available
                if original_audio_path:
                   print("\nOriginal audio found. Playing the source audio...")
                   display(Audio(original_audio_path))
                   time.sleep(2)  # Wait for the audio to finish playing
                # Create audio response
                tts = gTTS(text=best_answer_kn, lang='kn')
                tts.save("answer_kn.mp3")

                print("\nPlaying answer audio...")
                display(Audio("answer_kn.mp3"))

                return best_answer_en, best_answer_kn
            else:
                print("\nNo close match found for the question.")
                return "No close match found for the question.", "ನಿಮ್ಮ ಪ್ರಶ್ನೆಗೆ ಸೂಕ್ತ ಉತ್ತರ ಲಭ್ಯವಿಲ್ಲ."

        except Exception as e:
            print(f"Error in get_answer: {str(e)}")
            return "An error occurred while processing your question.", "ನಿಮ್ಮ ಪ್ರಶ್ನೆಯನ್ನು ಪ್ರಕ್ರಿಯೆಗೊಳಿಸುವಾಗ ದೋಷ ಸಂಭವಿಸಿದೆ."

    def process_question(self, english_question, similarity_threshold=0.7):
        """Process complete flow from English question to Kannada answer"""
        print("\nProcessing question:", english_question)

        # Convert question to Kannada audio
        audio_path = self.text_to_kannada_audio(english_question)
        if not audio_path:
            return "Error creating audio question."

        # Convert audio back to text
        question_text_kn = self.audio_to_text(audio_path)
        if not question_text_kn:
            return "Error recognizing audio question."

        # Get and return answer
        return self.get_answer(question_text_kn, similarity_threshold)

# Example usage
def main():
    # Initialize system
    qa_system = QASystem('/content/combined_qa_with_audio.json')

    # Process a question
    english_question = "1.	Can sandalwood grow in low-water conditions?" # Enter you question
    answer_en, answer_kn = qa_system.process_question(english_question)

    # Clean up audio files (optional)
    time.sleep(5)  # Wait for audio to finish playing
    if os.path.exists("kannada_question_audio.mp3"):
        os.remove("kannada_question_audio.mp3")
    if os.path.exists("answer_kn.mp3"):
        os.remove("answer_kn.mp3")

if __name__ == "__main__":
    main()
