In [None]:
import tkinter as tk
from tkinter import scrolledtext
import time
import threading
import speech_recognition as sr
import whisper
from pydub import AudioSegment
import numpy as np
from io import BytesIO
import openai

# Initialize OpenAI client
openai.api_key = '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'
client = openai.OpenAI()

# Global variables
stop_transcription = False
transcription_thread = None
reading_thread = None
last_position = 0
is_running = False

# Transcription Functions
def test_whisper_local():
    global stop_transcription
    recognizer = sr.Recognizer()
    model = whisper.load_model("tiny")

    with sr.Microphone(device_index=3) as source:  # Adjust device_index as needed
        recognizer.adjust_for_ambient_noise(source, duration=1)  # Reduced from 3s to 1s
        recognizer.pause_threshold = 0.5  # Reduced for faster response
        print("🎙️ Listening... Say something!")

        while not stop_transcription:
            try:
                audio = recognizer.listen(source, timeout=2, phrase_time_limit=10)  # Reduced timeouts
                audio_bytes = audio.get_wav_data()
                audio_segment = AudioSegment.from_wav(BytesIO(audio_bytes))
                samples = np.array(audio_segment.get_array_of_samples(), dtype=np.float32) / 32768.0

                if audio_segment.frame_rate != 16000:
                    audio_segment = audio_segment.set_frame_rate(16000)
                    samples = np.array(audio_segment.get_array_of_samples(), dtype=np.float32) / 32768.0

                result = model.transcribe(samples, fp16=False)
                text = result["text"].strip()

                if text and len(text.split()) >= 5:  # Ignore < 5 words
                    send_to_openai(text)
                else:
                    print(f"ℹ️ Ignoring transcription (less than 5 words): '{text}'")

            except sr.WaitTimeoutError:
                continue
            except Exception as e:
                print(f"❌ Error: {e}")

def send_to_openai(text):
    try:
        full_input = f"{text} (+ Answer as Neha in a concise, human-like manner (confident, not needy) in one paragraph: I’m Neha, a Data Analyst with an MBA and B.Tech in CS, specializing in data analytics, compensation & benefits, and data science. I’ve built dashboards using Tableau, PowerBI, SQL, and Python and develop AI-driven projects like a news analysis model (NLP, Whisper, OpenAI), an Instagram scraper (Selenium), and a voice-to-text AI chatbot. In late 2023, I faced transverse myelitis and stepped away to recover, using the time to earn my Data Analytics certification from Skillslash. By mid-2024, I was back—interning, building projects, and stronger than ever.)"
        response = client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[{"role": "user", "content": full_input}]
        )
        bot_reply = response.choices[0].message.content
        print(f"🤖 OpenAI: {bot_reply}")

        with open("output.txt", "a", encoding="utf-8") as file:
            file.write(f"Question: {text}\n")
            file.write(f"Answer: {bot_reply}\n\n")
            file.flush()

    except Exception as e:
        print(f"❌ OpenAI Error: {e}")

def stop_transcription_loop():
    global stop_transcription, transcription_thread, is_running
    stop_transcription = True
    if transcription_thread and transcription_thread.is_alive():
        transcription_thread.join(timeout=1)
    is_running = False
    update_button_states()
    print("🛑 Transcription stopped")

def start_transcription():
    global stop_transcription, transcription_thread, is_running
    if not is_running:
        stop_transcription = False
        transcription_thread = threading.Thread(target=test_whisper_local, daemon=True)
        transcription_thread.start()
        is_running = True
        update_button_states()
        print("▶️ Transcription started")
    else:
        print("Transcription already running!")

# GUI Functions
def read_chat():
    global last_position
    while not stop_transcription:
        try:
            with open("output.txt", "r", encoding="utf-8") as file:
                lines = file.readlines()
            new_text = "".join(lines[last_position:])
            if new_text:
                last_position = len(lines)
                chat_box.after(0, type_text, chat_box, new_text)
        except Exception as e:
            print(f"Error reading file: {e}")
        time.sleep(0.1)  # Reduced from 2s to 0.1s for faster updates

def type_text(widget, text):
    widget.config(state=tk.NORMAL)
    for line in text.split('\n'):
        tag = "default"
        if line.startswith("Question:"):
            tag = "question"
        elif line.startswith("Answer:"):
            tag = "answer"
        widget.insert(tk.END, line + "\n", tag)
    widget.config(state=tk.DISABLED)
    widget.see(tk.END)

def update_transparency(value):
    window.attributes("-alpha", float(value))

def slider_click(event):
    slider_width = slider.winfo_width()
    click_x = event.x
    min_val, max_val = 0.5, 1.0
    new_value = min_val + (max_val - min_val) * (click_x / slider_width)
    slider.set(new_value)
    update_transparency(new_value)

def change_text_size(delta):
    global current_font_size
    current_font_size = max(8, min(24, current_font_size + delta))
    font_settings = ("Arial", current_font_size)
    chat_box.configure(font=font_settings)
    chat_box.tag_configure("question", font=font_settings)
    chat_box.tag_configure("answer", font=font_settings)
    chat_box.tag_configure("default", font=font_settings)

def resize_slider(event=None):
    slider.pack_configure(fill="x", expand=True, padx=(5, 5))

def start_reading():
    global reading_thread
    if not reading_thread or not reading_thread.is_alive():
        reading_thread = threading.Thread(target=read_chat, daemon=True)
        reading_thread.start()
    start_transcription()

def update_button_states():
    if is_running:
        start_button.config(text="Running", bg="yellow", fg="black")
        stop_button.config(text="Stop", bg="#FF6347", fg="black")
    else:
        start_button.config(text="Start", bg="lightgreen", fg="black")
        stop_button.config(text="Stopped", bg="gray", fg="black")

def create_floating_window():
    global window, chat_box, slider, current_font_size, start_button, stop_button
    window = tk.Tk()
    window.title("Chat Output")
    window.geometry("400x350")
    window.configure(bg="black")
    window.attributes("-alpha", 0.85)
    window.attributes("-topmost", True)
    window.overrideredirect(False)
    
    current_font_size = 14

    frame = tk.Frame(window, bg="black")
    frame.pack(expand=True, fill="both")

    chat_box = scrolledtext.ScrolledText(frame, wrap=tk.WORD, bg="black", fg="white", 
                                         font=("Arial", current_font_size), borderwidth=0, highlightthickness=0)
    chat_box.pack(expand=True, fill="both", padx=14, pady=14)
    chat_box.config(state=tk.DISABLED)
    
    chat_box.tag_configure("question", foreground="#FFA500")
    chat_box.tag_configure("answer", foreground="white")
    chat_box.tag_configure("default", foreground="white")

    bottom_frame = tk.Frame(window, bg="black")
    bottom_frame.pack(fill="x", side="bottom")

    plus_button = tk.Button(bottom_frame, text="+", font=("Arial", 14), bg="black", fg="black", 
                            command=lambda: change_text_size(2), width=2, height=1)
    plus_button.pack(side="right", padx=(5, 5), pady=5)

    minus_button = tk.Button(bottom_frame, text="-", font=("Arial", 14), bg="black", fg="black", 
                             command=lambda: change_text_size(-2), width=2, height=1)
    minus_button.pack(side="right", pady=5)
    
    start_button = tk.Button(bottom_frame, text="Start", font=("Arial", 12), bg="lightgreen", fg="black", 
                             width=6, height=1, command=start_reading)
    start_button.pack(side="right", padx=(5, 5), pady=5)

    stop_button = tk.Button(bottom_frame, text="Stop", font=("Arial", 12), bg="#FF6347", fg="black", 
                            width=6, height=1, command=stop_transcription_loop)
    stop_button.pack(side="right", padx=(5, 5), pady=5)

    slider = tk.Scale(bottom_frame, from_=0.5, to=1.0, resolution=0.01, orient=tk.HORIZONTAL, 
                      command=update_transparency, bg="black", fg="white", troughcolor="#4A4A4A", 
                      highlightthickness=0, borderwidth=0, showvalue=0, width=18, 
                      activebackground="#888888", sliderlength=20)
    slider.set(0.85)
    slider.pack(side="left", fill="x", expand=True, padx=(5, 5), pady=5)
    
    slider.bind("<Button-1>", slider_click)
    window.bind("<Configure>", resize_slider)
    
    window.mainloop()

if __name__ == "__main__":
    create_floating_window()

2025-03-04 12:00:21.120 python[43617:4970824] +[IMKClient subclass]: chose IMKClient_Modern
2025-03-04 12:00:21.120 python[43617:4970824] +[IMKInputSession subclass]: chose IMKInputSession_Modern


▶️ Transcription started
🎙️ Listening... Say something!
ℹ️ Ignoring transcription (less than 5 words): ''
🤖 OpenAI: I have a strong educational background and experience in data analytics, compensation & benefits, and data science. I have advanced skills in data visualization tools like Tableau and PowerBI, and programming languages such as SQL and Python. Currently, I am working on AI-driven projects like a news analysis model, an Instagram scraper, and a voice-to-text AI chatbot. I am particularly skilled in handling large datasets and deriving impactful insights to drive decision-making.
ℹ️ Ignoring transcription (less than 5 words): 'biggest improvement areas.'
🤖 OpenAI: I have continuously improved my skills by working on AI-driven projects like a news analysis model, an Instagram scraper, and a voice-to-text AI chatbot. I have also built dashboards using Tableau, PowerBI, SQL, and Python to enhance my data visualization and analytics abilities. My expertise in handling large data

In [1]:
import speech_recognition as sr
print(sr.Microphone.list_microphone_names())

['iPhone 12 Microphone', 'Background Music', 'Background Music (UI Sounds)', 'BlackHole 2ch', 'MacBook Air Microphone', 'MacBook Air Speakers', 'Microsoft Teams Audio', 'Multi-Output Device']
