In [3]:
import sounddevice as sd
import soundfile as sf
from IPython.display import Audio
import os

def record_audio(file_path, duration=5, sample_rate=44100):
    print("Recording audio... Speak now.")
    
    # Ensure the 'audio' directory exists
    os.makedirs(os.path.dirname(file_path), exist_ok=True)

    try:
        recording = sd.rec(int(duration * sample_rate), samplerate=sample_rate, channels=2)
        sd.wait()  # Wait for recording to finish
        sf.write(file_path, recording, sample_rate)
        print(f"Audio recorded and saved as '{file_path}'")
    except Exception as e:
        print("An error occurred while recording audio:", str(e))

def play_audio(file_path):
    if os.path.exists(file_path):
        return Audio(file_path)
    else:
        print(f"'{file_path}' not found. Please record audio first.")

# Specify the file path to save the recorded audio within 'audio' folder
audio_folder = "audio"
audio_file_path = os.path.join(audio_folder, "recorded_audio.wav")

# Record audio for 5 seconds
record_audio(audio_file_path)

# Play the recorded audio
play_audio(audio_file_path)


Recording audio... Speak now.
Audio recorded and saved as 'audio\recorded_audio.wav'


In [1]:
import tkinter as tk
from tkinter import filedialog, messagebox, simpledialog
import whisper
from langdetect import detect
import spacy
import calamancy
import os

# Initialize language models
nlp_english = spacy.load("en_core_web_sm")
nlp_tagalog = calamancy.load("tl_calamancy_md-0.1.0")

def transcribe_and_analyze(file_path):
    # Transcribe the audio using Whisper
    model = whisper.load_model("base")
    result = model.transcribe(file_path)
    transcription = result["text"]

    # Detect language
    try:
        lang = detect(transcription)
    except:
        lang = "error"

    # Get user input for words to extract
    user_input = simpledialog.askstring("Input", "Enter words to extract:", parent=root)
    if user_input:
        user_input = user_input.split()
        extracted_words = []

        if lang == 'en':
            extracted_words = [word.text for word in nlp_english(transcription) if word.text in user_input]
        elif lang == 'tl':
            doc = nlp_tagalog(transcription)
            extracted_words = [word.text for word in doc if word.text in user_input]

        # Display results
        result_message = f"Transcription:\n{transcription}\n\nDetected Language: {'English' if lang == 'en' else 'Tagalog' if lang == 'tl' else 'Mixed/Unknown'}\n\nExtracted Words: {extracted_words}"
        messagebox.showinfo("Transcription and Extracted Words", result_message)

def open_audio_file():
    audio_folder = "audio"
    if not os.path.exists(audio_folder):
        os.makedirs(audio_folder)

    file_path = filedialog.askopenfilename(initialdir=audio_folder, title="Select Audio File",
                                           filetypes=(("Audio files", "*.wav *.mp3 *.m4a"), ("All files", "*.*")))
    if file_path:
        transcribe_and_analyze(file_path)

def quit_app():
    root.destroy()

root = tk.Tk()
root.title("Audio Transcription App")
root.geometry("500x400")
root.config(bg="#ADD8E6")

select_button = tk.Button(root, text="Select Audio File", command=open_audio_file, height=2, width=20)
select_button.config(bg="#E0FFFF", font=("Helvetica", 12))
select_button.pack(pady=20)

quit_button = tk.Button(root, text="Quit", command=quit_app, height=2, width=20)
quit_button.config(bg="#E0FFFF", font=("Helvetica", 12))
quit_button.pack(pady=20)

root.mainloop()





