In [101]:
# 導入必要的庫
import os
import numpy as np
import librosa
import noisereduce as nr
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense
from tensorflow.keras.utils import to_categorical
%matplotlib inline


In [102]:
DATA_PATH = 'C:/Users/bymin/OneDrive/桌面/swallow/data/'  # 請將此替換為您的實際路徑

FEATURE_SAVE_PATH = 'C:/Users/bymin/OneDrive/桌面/swallow/feature'  # 請將此替換為您的實際路徑

# 音頻取樣率
SAMPLE_RATE = 16000

# 梅爾頻譜圖參數
N_MELS = 64
N_FFT = 1024
HOP_LENGTH = 512

def load_audio(file_path, sample_rate=SAMPLE_RATE):
    """
    加載音頻文件
    """
    audio, sr = librosa.load(file_path, sr=sample_rate)
    return audio, sr

def normalize_audio(audio):
    """
    振幅正規化，將音頻信號調整到 [-1, 1] 範圍內
    """
    max_abs_amplitude = np.max(np.abs(audio))
    if max_abs_amplitude > 0:
        audio = audio / max_abs_amplitude
    return audio

def reduce_noise(audio, sr):
    """
    降噪處理，使用 noisereduce 庫
    """
    reduced_audio = nr.reduce_noise(y=audio, sr=sr)
    return reduced_audio

def extract_mel_spectrogram(audio, sr, n_mels=N_MELS, n_fft=N_FFT, hop_length=HOP_LENGTH):
    """
    提取梅爾頻譜圖特徵
    """
    mel_spectrogram = librosa.feature.melspectrogram(
        y=audio,
        sr=sr,
        n_fft=n_fft,
        hop_length=hop_length,
        n_mels=n_mels
    )
    # 對數壓縮
    log_mel_spectrogram = librosa.power_to_db(mel_spectrogram, ref=np.max)
    return log_mel_spectrogram

def standardize_features(feature):
    """
    特徵標準化，零均值單位方差
    """
    mean = np.mean(feature)
    std = np.std(feature)
    standardized_feature = (feature - mean) / std
    return standardized_feature


In [103]:
# =============================
# 準備音頻文件路徑和標籤
# =============================

# 初始化列表
file_paths = []
file_labels = []

# 遍歷資料夾，假設正類和負類分別存放在 'swallowing' 和 'non-swallowing' 資料夾中
for label in ['swallow', 'non']:
    folder_path = os.path.join(DATA_PATH, label)
    for filename in os.listdir(folder_path):
        if filename.endswith('.wav') or filename.endswith('.mp3'):
            file_paths.append(os.path.join(folder_path, filename))
            file_labels.append(label)

# 檢查總共的樣本數量
print(f"總共的音頻樣本數：{len(file_paths)}")
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(file_labels)

# 檢查編碼後的標籤
print("原始標籤：", label_encoder.classes_)
print("編碼後的標籤：", np.unique(encoded_labels))


總共的音頻樣本數：137
原始標籤： ['non' 'swallow']
編碼後的標籤： [0 1]


In [105]:
FIXED_DURATION = 1.0 # 固定時長（秒）
FIXED_LENGTH = int(SAMPLE_RATE * FIXED_DURATION)  # 固定樣本數

# ...（準備音頻文件路徑和標籤的代碼保持不變）...

# 初始化列表
features = []
labels = []

# 遍歷音頻文件，提取特徵
for idx, (file_path, label) in enumerate(zip(file_paths, encoded_labels)):
    try:
        # 加載音頻
        audio, sr = load_audio(file_path)
        
        # 確保音頻時長一致
        if len(audio) < FIXED_LENGTH:
            # 如果音頻短於固定長度，進行零填充
            pad_width = FIXED_LENGTH - len(audio)
            audio = np.pad(audio, (0, pad_width), mode='constant')
        else:
            # 如果音頻長於固定長度，進行截斷
            audio = audio[:FIXED_LENGTH]
        # 振幅正規化
        audio = normalize_audio(audio)
        # 降噪處理
        audio = reduce_noise(audio, sr)
        # 提取梅爾頻譜圖
        feature = extract_mel_spectrogram(audio, sr)
        # 標準化特徵
        feature = standardize_features(feature)
        # 檢查特徵形狀，確保一致
        expected_shape = (N_MELS, int(np.ceil((FIXED_LENGTH - N_FFT) / HOP_LENGTH)) + 1)
        if feature.shape != expected_shape:
            # 如果形狀不一致，進行修正
            feature = librosa.util.fix_length(feature, size=expected_shape[1], axis=1)
        # 展平成一維向量
        feature = feature.flatten()
        # 添加到列表
        features.append(feature)
        labels.append(label)
        
        # 進度輸出
        if (idx + 1) % 10 == 0:
            print(f"已處理 {idx + 1} / {len(file_paths)} 個文件")
                
    except Exception as e:
        print(f"處理文件 {file_path} 時發生錯誤：{e}")

feature_shapes = [feature.shape for feature in features]
unique_shapes = set(feature_shapes)

print("特徵形狀集合：", unique_shapes)

# 確保只有一種特徵形狀
if len(unique_shapes) == 1:
    # 轉換為 NumPy 陣列
    features = np.array(features)
    labels = np.array(labels)
    
    print("特徵矩陣形狀：", features.shape)
    print("標籤向量形狀：", labels.shape)
else:
    print("特徵形狀不一致，請檢查預處理步驟。")



已處理 10 / 137 個文件
已處理 20 / 137 個文件
已處理 30 / 137 個文件
已處理 40 / 137 個文件
已處理 50 / 137 個文件
已處理 60 / 137 個文件
已處理 70 / 137 個文件
已處理 80 / 137 個文件
已處理 90 / 137 個文件
已處理 100 / 137 個文件
已處理 110 / 137 個文件
已處理 120 / 137 個文件
已處理 130 / 137 個文件
特徵形狀集合： {(1984,)}
特徵矩陣形狀： (137, 1984)
標籤向量形狀： (137,)


In [106]:
# =============================
# 將列表轉換為 NumPy 陣列
# =============================

features = np.array(features)
labels = np.array(labels)

# 檢查形狀
print("特徵矩陣形狀：", features.shape)
print("標籤向量形狀：", labels.shape)


特徵矩陣形狀： (137, 1984)
標籤向量形狀： (137,)


In [107]:
# =============================
# 劃分資料集
# =============================

# 首先劃分訓練集和臨時集（驗證集 + 測試集）
X_train, X_temp, y_train, y_temp = train_test_split(
    features, labels, test_size=0.3, random_state=42, stratify=labels)

# 然後劃分驗證集和測試集
X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, test_size=0.5, random_state=42, stratify=y_temp)

# 檢查各個資料集的形狀
print("訓練集特徵形狀：", X_train.shape)
print("訓練集標籤形狀：", y_train.shape)
print("驗證集特徵形狀：", X_val.shape)
print("驗證集標籤形狀：", y_val.shape)
print("測試集特徵形狀：", X_test.shape)
print("測試集標籤形狀：", y_test.shape)


訓練集特徵形狀： (95, 1984)
訓練集標籤形狀： (95,)
驗證集特徵形狀： (21, 1984)
驗證集標籤形狀： (21,)
測試集特徵形狀： (21, 1984)
測試集標籤形狀： (21,)


In [108]:
# 將特徵和標籤轉換為NumPy數組
X = np.array(features)
y = np.array(labels_processed)

print(f"特徵形狀: {X.shape}")
print(f"標籤形狀: {y.shape}")

# 將數值標準化到0-1之間
X = X / np.max(X)

print(f"標準化後的特徵形狀: {X.shape}")

# 分割訓練集和測試集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

print(f"訓練集大小: {X_train.shape}")
print(f"測試集大小: {X_test.shape}")


特徵形狀: (137, 1984)
標籤形狀: (184,)
標準化後的特徵形狀: (137, 1984)


ValueError: Found input variables with inconsistent numbers of samples: [137, 184]

In [98]:
import numpy as np
from collections import Counter

# 檢查原始標籤分佈
print("原始標籤分佈:", Counter(y))

# 分割前後標籤的分佈
X = np.array(features)
y = np.array(labels_processed)

# 確認標籤分佈
label_counts = Counter(y)
print("分割前標籤分佈:", label_counts)


原始標籤分佈: Counter({0: 135, 1: 49})
分割前標籤分佈: Counter({0: 135, 1: 49})


In [74]:
# 將特徵和標籤轉換為numpy數組
X = np.array(features)
y = np.array(labels_list)

print(f"特徵形狀: {X.shape}")
print(f"標籤形狀: {y.shape}")


特徵形狀: (2, 128, 87, 1)
標籤形狀: (2,)


In [75]:
# 分割訓練集和測試集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

print(f"訓練集大小: {X_train.shape}")
print(f"測試集大小: {X_test.shape}")

訓練集大小: (1, 128, 87, 1)
測試集大小: (1, 128, 87, 1)


In [76]:
# 將標籤轉為類別
y_train = to_categorical(y_train, num_classes=2)
y_test = to_categorical(y_test, num_classes=2)

print(f"訓練集標籤範例: {y_train[:5]}")
print(f"測試集標籤範例: {y_test[:5]}")


訓練集標籤範例: [[1. 0.]]
測試集標籤範例: [[1. 0.]]


In [77]:
from collections import Counter

# 獲取訓練集和驗證集的真實類別
y_train_classes = np.argmax(y_train, axis=1)
y_test_classes = np.argmax(y_test, axis=1)

# 計算類別分佈
train_counter = Counter(y_train_classes)
test_counter = Counter(y_test_classes)

print(f"訓練集類別分佈: {train_counter}")  # 例如: Counter({0: 800, 1: 600})
print(f"驗證集類別分佈: {test_counter}")  # 例如: Counter({0: 200, 1: 150})


訓練集類別分佈: Counter({0: 1})
驗證集類別分佈: Counter({0: 1})


In [56]:
def build_cnn(input_shape):
    """
    建立CNN模型架構。

    參數:
    - input_shape: 輸入數據的形狀，例如 (128, 130, 1)

    返回:
    - model: 建立好的Keras模型
    """
    model = Sequential()

    # 第一個卷積層
    model.add(Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
    model.add(MaxPooling2D((2, 2)))
    
    # 第二個卷積層
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2, 2)))
    
    # 第三個卷積層
    model.add(Conv2D(128, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2, 2)))
    
    # 展平層
    model.add(Flatten())
    
    # 全連接層
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5))
    
    # 輸出層
    model.add(Dense(2, activation='softmax'))
    
    return model

# 獲取輸入形狀
input_shape = X_train.shape[1:]  # 例如 (128, 130, 1)
print(f"inputShape:{input_shape}")

# 建立模型
model = build_cnn(input_shape)

# 編譯模型
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# 顯示模型摘要
model.summary()


inputShape:(128, 87, 1)
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 126, 85, 32)       320       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 63, 42, 32)       0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 61, 40, 64)        18496     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 30, 20, 64)       0         
 2D)                                                             
                                                                 
 conv2d_2 (Conv2D)           (None, 28, 18, 128)       73856     
                                                                 
 max_pooling2d_2 (MaxPooling  (N

In [58]:
# 定義早停回調，以防止過度擬合
from tensorflow.keras.callbacks import EarlyStopping

early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# 訓練模型
history = model.fit(X_train, y_train, epochs=50, batch_size=32,
                    validation_data=(X_test, y_test))


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
