In [None]:
import sounddevice as sd
from scipy.io.wavfile import write
import whisper
import warnings
import torch

# ปิดคำเตือนที่อาจไม่จำเป็น
warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=FutureWarning)

def record_and_transcribe(duration=10, sample_rate=48000, output_file='output.wav', language="en"):
    try:
        # บันทึกเสียง
        print("เริ่มบันทึกเสียง...")
        audio_data = sd.rec(int(duration * sample_rate), samplerate=sample_rate, channels=1, dtype='float32')
        sd.wait()  # รอให้การบันทึกเสร็จสิ้น
        print("บันทึกเสียงเสร็จสิ้น")
        
        # บันทึกเสียงเป็นไฟล์ .wav
        write(output_file, sample_rate, audio_data)
        print(f"ไฟล์เสียงถูกบันทึกเป็น '{output_file}'")
        
        # โหลดโมเดล Whisper
        model = whisper.load_model("medium")
        
        # ถอดเสียงจากไฟล์ที่บันทึก
        print("เริ่มถอดเสียง...")
        transcription = model.transcribe(output_file, language=language)
        
        # แสดงผลลัพธ์การถอดเสียง
        print("ข้อความที่ถอดเสียงได้:")
        print(transcription["text"])
    
    except Exception as e:
        print("เกิดข้อผิดพลาด:", e)

# เรียกใช้งานฟังก์ชัน
record_and_transcribe()


In [1]:
import sounddevice as sd
import whisper
import warnings
import pyttsx3
import numpy as np
import keyboard
import wave

# Suppress both FutureWarning and UserWarning
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=UserWarning)

# Text-to-Speech setup
engine = pyttsx3.init()
engine.setProperty('rate', 150)  # Adjust the speed of speech
engine.setProperty('volume', 0.9)  # Adjust the volume of speech

# Load the Whisper model once at the start
model = whisper.load_model("small")  # ใช้ model ขนาด large เพื่อความเร็วในการประมวลผล

# Function to respond with speech
def speak(text):
    engine.say(text)
    engine.runAndWait()

# Function to save audio to a .wav file
def save_audio(audio_data, sample_rate, filename="recorded_audio.wav"):
    # Convert float32 audio data to int16 for .wav compatibility
    int_data = (audio_data * 32767).astype(np.int16)
    with wave.open(filename, "wb") as wf:
        wf.setnchannels(1)  # Mono
        wf.setsampwidth(2)  # 2 bytes per sample for int16
        wf.setframerate(sample_rate)
        wf.writeframes(int_data.tobytes())
    print(f"Audio saved to {filename}")

# Main function to record, save, transcribe, and respond
def record_and_respond(sample_rate=48000, duration=2, filename="recorded_audio.wav", device_name="Computer"):
    print(f"Say '{device_name}' to activate, and press the space bar to start recording...")

    while True:
        # Wait until the space bar is pressed to start recording
        if keyboard.is_pressed("space"):
            print("Recording for 2 seconds...")

            # Record audio for the specified duration
            audio_data = sd.rec(int(duration * sample_rate), samplerate=sample_rate, channels=1, dtype='float32')
            sd.wait()  # Wait until recording is finished

            # Save the recorded audio to a .wav file
            save_audio(audio_data.flatten(), sample_rate, filename)

            # Process the saved audio file with Whisper
            print("Processing the audio...")
            transcription = model.transcribe(filename, language="en")
            command_text = transcription["text"]
            print("Transcribed command:", command_text)

            # Check if the device name is mentioned to activate the system
            if device_name.lower() in command_text.lower():
                # Respond to the device name activation
                speak("Please say your command")

                # Wait for the next command after activation
                print("Waiting for your command...")

                # Wait until the space bar is pressed again for the command
                while not keyboard.is_pressed("space"):
                    pass  # Wait for space bar press

                # Record audio for the specified duration for the command
                audio_data = sd.rec(int(duration * sample_rate), samplerate=sample_rate, channels=1, dtype='float32')
                sd.wait()  # Wait until recording is finished

                # Save and process the audio command
                save_audio(audio_data.flatten(), sample_rate, filename)
                transcription = model.transcribe(filename, language="en")
                command_text = transcription["text"]
                print("Transcribed command:", command_text)

                # Respond with speech based on the command
                if "Turn on" in command_text:  
                    speak("Turning on")
                elif "Turn off" in command_text:   
                    speak("Turning off")
                else:
                    speak("Sorry, I didn't understand that command")
            else:
                speak(f"Please say '{device_name}' to activate") 

            # Prompt to start again
            print(f"Say '{device_name}' to activate, and press the space bar to start recording...")

# Call the main function
record_and_respond()


Say 'Computer' to activate, and press the space bar to start recording...
Recording for 2 seconds...
Audio saved to recorded_audio.wav
Processing the audio...
Transcribed command:  computer
Waiting for your command...
Audio saved to recorded_audio.wav
Transcribed command:  Turn off.
Say 'Computer' to activate, and press the space bar to start recording...
Recording for 2 seconds...
Audio saved to recorded_audio.wav
Processing the audio...
Transcribed command: 
Say 'Computer' to activate, and press the space bar to start recording...
Recording for 2 seconds...
Audio saved to recorded_audio.wav
Processing the audio...
Transcribed command:  Computer.
Waiting for your command...
Audio saved to recorded_audio.wav
Transcribed command:  Turn on.
Say 'Computer' to activate, and press the space bar to start recording...
Recording for 2 seconds...
Audio saved to recorded_audio.wav
Processing the audio...
Transcribed command:  Computer.
Waiting for your command...
Audio saved to recorded_audio.wa

KeyboardInterrupt: 