In [125]:
import numpy as np
import librosa
import pyaudio
import tensorflow as tf
from tensorflow.keras.models import load_model
import time
import os
import wave

# 模型路徑
MODEL_PATH = 'pokemon_cnn_model.keras'
# 載入模型
model = load_model(MODEL_PATH)

# 設定錄音參數
SAMPLE_RATE = 22050  # 采樣率
MFCC_COUNT = 40  # 使用 40 個 MFCC 特徵
AUDIO_DURATION = 3  # 錄音總長度 (秒)
CHANNELS = 1  # 單聲道
FORMAT = pyaudio.paInt16  # 設定格式
RATE = SAMPLE_RATE
CHUNK = 1024  # 每個塊的大小

# 設定資料夾路徑（確保已經有分類資料夾結構）
DATA_PATH = 'augmented_pokemon_sounds'
labels = sorted(os.listdir(DATA_PATH))  # 資料夾中的寶可夢名稱作為標籤

# 初始化 pyaudio
p = pyaudio.PyAudio()

# 開啟麥克風
stream = p.open(format=FORMAT,
                channels=CHANNELS,
                rate=RATE,
                input=True,
                frames_per_buffer=CHUNK)

def extract_features(y):
    """從音訊提取MFCC特徵"""
    mfcc = librosa.feature.mfcc(y=y, sr=SAMPLE_RATE, n_mfcc=MFCC_COUNT)
    mfcc = librosa.util.fix_length(mfcc, size=130, axis=1)
    return mfcc.T

def record_audio():
    """錄製 10 秒鐘的音訊"""
    print("開始錄音...")
    frames = []
    for _ in range(0, int(SAMPLE_RATE / CHUNK * AUDIO_DURATION)):
        data = stream.read(CHUNK)
        frames.append(data)
    print("錄音結束.")
    # 將音訊數據轉換為 NumPy 陣列並且將其轉為浮點數格式
    filename = "recorded_audio.wav"
    wf = wave.open(filename, 'wb')
    wf.setnchannels(CHANNELS)
    wf.setsampwidth(p.get_sample_size(FORMAT))
    wf.setframerate(SAMPLE_RATE)
    wf.writeframes(b''.join(frames))
    wf.close()
    audio_data = np.frombuffer(b''.join(frames), dtype=np.int16)
    audio_data = audio_data.astype(np.float32) / 32768.0  # 正規化為 [-1, 1] 範圍
    return audio_data

def predict_from_microphone():
    """從麥克風錄音並預測"""
    while True:
        audio_data = record_audio()
        
        play_audio(audio_data)  # ⬅️ 播放錄音
        time.sleep(2)
        # def normalize_audio(audio_data):
        #     # 將音訊數據正規化到 [-1, 1] 範圍
        #     return audio_data / np.max(np.abs(audio_data))
        # normalized_audio = normalize_audio(audio_data)

        # 轉換正規化的音訊數據為 int16 格式並保存為新的 WAV 文件
        normalized_audio_int16 = np.int16(audio_data * 32767)  # 將數據縮放回 [-32767, 32767] 範圍

        # 儲存正規化後的音訊為 WAV 檔案
        filename = "recorded_audio_normal.wav"
        wf = wave.open(filename, 'wb')
        wf.setnchannels(CHANNELS)
        wf.setsampwidth(p.get_sample_size(FORMAT))
        wf.setframerate(SAMPLE_RATE)
        wf.writeframes(normalized_audio_int16.tobytes())
        wf.close()
        # audio_data = normalize_audio(audio_data)
        features = extract_features(audio_data)

        features = np.expand_dims(features, axis=-1)  # 增加一個 channel 維度 → (time_steps, 40, 1)
        features = np.expand_dims(features, axis=0)   # 增加 batch 維度 → (1, time_steps, 40, 1)
        # print("萃取的特徵向量：", features)

        prediction = model.predict(features)
        predicted_label = np.argmax(prediction)

        print(f"預測結果: {predicted_label} ({labels[predicted_label]})")
        for i in range(5, 0, -1):
            print(i)
            time.sleep(1)



Exception ignored in: <function Wave_write.__del__ at 0x00000112E6603910>
Traceback (most recent call last):
  File "c:\Users\KunYu\miniconda3\envs\poke-env\lib\wave.py", line 326, in __del__
    self.close()
  File "c:\Users\KunYu\miniconda3\envs\poke-env\lib\wave.py", line 443, in close
    if self._file:
AttributeError: 'Wave_write' object has no attribute '_file'
Exception ignored in: <function Wave_write.__del__ at 0x00000112E6603910>
Traceback (most recent call last):
  File "c:\Users\KunYu\miniconda3\envs\poke-env\lib\wave.py", line 326, in __del__
    self.close()
  File "c:\Users\KunYu\miniconda3\envs\poke-env\lib\wave.py", line 443, in close
    if self._file:
AttributeError: 'Wave_write' object has no attribute '_file'
Exception ignored in: <function Wave_write.__del__ at 0x00000112E6603910>
Traceback (most recent call last):
  File "c:\Users\KunYu\miniconda3\envs\poke-env\lib\wave.py", line 326, in __del__
    self.close()
  File "c:\Users\KunYu\miniconda3\envs\poke-env\lib

In [126]:
import sounddevice as sd

def play_audio(audio_data):
    """播放錄製的音訊"""
    print("🔊 播放錄音...")
    sd.play(audio_data, samplerate=SAMPLE_RATE)
    sd.wait()  # 等待播放完畢


In [127]:
# 啟動麥克風預測
predict_from_microphone()

# 關閉麥克風流
stream.stop_stream()
stream.close()
p.terminate()


開始錄音...
錄音結束.
🔊 播放錄音...
預測結果: 1 (Charizard)
5
4
3
2
1
開始錄音...
錄音結束.
🔊 播放錄音...
預測結果: 1 (Charizard)
5
4
3
2
1
開始錄音...
錄音結束.
🔊 播放錄音...
預測結果: 1 (Charizard)
5
4
3


KeyboardInterrupt: 