In [1]:
# Install required packages (may take a few minutes). If PyAudio fails on Windows, use pipwin as suggested below.
!pip install -U pip setuptools wheel
!pip install -U numpy==1.26.4 h5py==3.11.0 spacy==3.7.2 thinc==8.2.2 speechrecognition --quiet
try:
    import pyaudio
    print("PyAudio already installed.")
except Exception:
    print("Attempting to install pyaudio (may fail on Windows)...")
    !pip install pyaudio --quiet || echo "PyAudio install failed; on Windows run: pip install pipwin && pipwin install pyaudio"
print('\n‚úÖ Package install step completed. Check above output for any failures.')

Collecting pip
  Using cached pip-25.3-py3-none-any.whl.metadata (4.7 kB)
Collecting wheel
  Using cached wheel-0.45.1-py3-none-any.whl.metadata (2.3 kB)
Using cached pip-25.3-py3-none-any.whl (1.8 MB)
Using cached wheel-0.45.1-py3-none-any.whl (72 kB)


ERROR: To modify pip, please run the following command:
C:\Users\siric\Downloads\S\jupyter\python.exe -m pip install -U pip setuptools wheel


PyAudio already installed.

‚úÖ Package install step completed. Check above output for any failures.


In [2]:
# Imports
import tkinter as tk
from tkinter import filedialog
import speech_recognition as sr
import spacy
import subprocess
import sys
print('Imports cell ready. If this cell errors, re-run the install cell and then this one.')

Imports cell ready. If this cell errors, re-run the install cell and then this one.


In [3]:
# Ensure spaCy model
from spacy.cli import download as spacy_download

def ensure_spacy_model(model_name='en_core_web_sm'):
    try:
        nlp = spacy.load(model_name)
        print(f"Loaded spaCy model: {model_name}")
        return nlp
    except Exception as e:
        print(f"Model {model_name} not found or failed to load. Attempting download...\nDetail: {e}")
        try:
            spacy_download(model_name)
            nlp = spacy.load(model_name)
            print("Download successful and model loaded.")
            return nlp
        except Exception:
            print("spacy.cli.download failed. Trying subprocess call with --direct...")
            try:
                subprocess.check_call([sys.executable, "-m", "spacy", "download", model_name, "--direct"])
                nlp = spacy.load(model_name)
                print("Downloaded with --direct and loaded model.")
                return nlp
            except Exception:
                print('Automatic downloads failed. See instructions in the notebook for manual install.')
                raise RuntimeError('spaCy model installation failed; please follow printed instructions.')

nlp = ensure_spacy_model('en_core_web_sm')

Loaded spaCy model: en_core_web_sm


In [4]:
# Helper functions

def pos_tagging(text):
    doc = nlp(text)
    return [(token.text, token.pos_) for token in doc]

def display_result(text):
    result_text.delete("1.0", tk.END)
    pos_text.delete("1.0", tk.END)
    result_text.insert(tk.END, text)
    try:
        tags = pos_tagging(text)
        for word, tag in tags:
            pos_text.insert(tk.END, f"{word} ‚Äî {tag}\\n")
    except Exception as e:
        pos_text.insert(tk.END, f"‚ùå POS tagging failed: {e}")
    status_label.config(text="‚úÖ Done")


def transcribe_from_microphone():
    recognizer = sr.Recognizer()
    try:
        with sr.Microphone() as source:
            status_label.config(text="üéô Listening...")
            window.update()
            recognizer.adjust_for_ambient_noise(source)
            audio = recognizer.listen(source, timeout=5)
        status_label.config(text="üîç Processing...")
        text = recognizer.recognize_google(audio)
        display_result(text)
    except Exception as e:
        display_result(f"‚ùå Error: {e}")


def transcribe_from_file():
    filepath = filedialog.askopenfilename(filetypes=[("Audio Files", "*.wav *.mp3 *.flac")])
    if not filepath:
        return
    recognizer = sr.Recognizer()
    try:
        with sr.AudioFile(filepath) as source:
            audio = recognizer.record(source)
        status_label.config(text="üîç Processing file...")
        text = recognizer.recognize_google(audio)
        display_result(text)
    except Exception as e:
        display_result(f"‚ùå Error: {e}")

In [5]:
# GUI creation (widgets only)
window = tk.Tk()
window.title("üé§ Voice to Text + POS Tagging")
window.geometry("640x520")
window.resizable(False, False)
window.config(bg="#f4f4f4")

tk.Label(window, text="Voice to Text + POS Tagging", font=("Arial", 18, "bold"), bg="#f4f4f4").pack(pady=15)
tk.Button(window, text="üéô Speak Now", command=transcribe_from_microphone, width=25, bg="#4CAF50", fg="white", font=("Arial", 12)).pack(pady=5)
tk.Button(window, text="üìÅ Upload Audio File", command=transcribe_from_file, width=25, bg="#2196F3", fg="white", font=("Arial", 12)).pack(pady=5)

status_label = tk.Label(window, text="Ready", font=("Arial", 10), fg="gray", bg="#f4f4f4")
status_label.pack(pady=10)

tk.Label(window, text="üìù Transcribed Text:", font=("Arial", 12, "bold"), bg="#f4f4f4").pack()
result_text = tk.Text(window, height=6, width=80, font=("Arial", 11))
result_text.pack(padx=5)

tk.Label(window, text="üî† Part-of-Speech Tags:", font=("Arial", 12, "bold"), bg="#f4f4f4").pack(pady=(10, 0))
pos_text = tk.Text(window, height=10, width=80, font=("Arial", 11))
pos_text.pack(padx=5)

print('GUI created. Run the next cell to open the window when you are ready (manual launch).')

GUI created. Run the next cell to open the window when you are ready (manual launch).


In [6]:
# Run this cell manually (Shift+Enter) to open the Tkinter GUI window.
window.mainloop()