In [1]:
import os
import librosa
from tqdm import tqdm
import librosa.display
import numpy as np
from sklearn.model_selection import train_test_split
from dotenv import load_dotenv

In [2]:
def normalize_audio(wav):
    # 標準化音頻信號
    factor = np.max(np.abs(wav))
    if factor == 0:
        return wav
    return wav / factor

def hamming_window(wav):
    # 應用漢明窗以減少邊緣效應
    return wav * np.hamming(len(wav))

def convert_to_mel_spectrogram(audio, n_fft, hop_length, n_mels):
    audio = normalize_audio(audio)
    audio = hamming_window(audio)
    mel_spectrogram = librosa.feature.melspectrogram(y=audio, sr=target_sample_rate, n_fft=n_fft, hop_length=hop_length,
                                                     n_mels=n_mels)
    mel_spectrogram_db = librosa.power_to_db(mel_spectrogram, ref=np.max)
    return mel_spectrogram_db

In [3]:
target_sample_rate = 44100
# 指定 STFT 參數
load_dotenv()
hop_length = int(os.getenv('hop_length'))
n_mels = int(os.getenv('n_mels'))
n_fft = int(os.getenv('n_fft'))


In [4]:
def load_segmented_files(directory):
    wav_files = []
    for file in tqdm(os.listdir(directory)):
        if file.endswith(".wav"):
            file_path = os.path.join(directory, file)
            y, sr = librosa.load(file_path, sr=None)
            target_sample_rate = sr
            path = file.split(os.path.sep)
            filename = f'{path[-1]}'
            wav_files.append((y, filename))
    return wav_files

In [5]:
# 讀取分割後的音訊片段
normal_segments = load_segmented_files('output')
anomaly_segments = load_segmented_files('output_anomaly')
# 打亂 mel_spectrograms_normal 列表
np.random.shuffle(normal_segments)
np.random.shuffle(anomaly_segments)

100%|██████████| 3330/3330 [00:12<00:00, 262.42it/s]
100%|██████████| 6660/6660 [00:24<00:00, 270.43it/s]


In [6]:
mel_spectrograms_normal = [convert_to_mel_spectrogram(audio[0], n_fft, hop_length, n_mels) for audio in tqdm(normal_segments)]
mel_spectrograms_anomaly = [convert_to_mel_spectrogram(audio[0], n_fft, hop_length, n_mels) for audio in tqdm(anomaly_segments[:len(normal_segments)])]

  return f(*args, **kwargs)
100%|██████████| 3330/3330 [00:33<00:00, 99.71it/s] 
100%|██████████| 3330/3330 [00:33<00:00, 99.08it/s] 


# 分割資料集

In [7]:
X = mel_spectrograms_normal + mel_spectrograms_anomaly
y = [0] * len(mel_spectrograms_normal) + [1] * len(mel_spectrograms_anomaly)

# 打亂資料集

In [8]:
combined = list(zip(X, y))
np.random.shuffle(combined)
X[:], y[:] = zip(*combined)

In [12]:
combined[0]

(array([[-80.        , -80.        , -80.        , ..., -80.        ,
         -80.        , -80.        ],
        [-61.73487155, -62.92664556, -66.85266557, ..., -72.03854366,
         -67.97746121, -66.29207128],
        [-55.72328981, -56.91506383, -60.84108384, ..., -66.02696193,
         -61.96587948, -60.28048955],
        ...,
        [-29.74827129, -28.96748814, -28.56893373, ..., -25.84680842,
         -26.38559299, -28.02384032],
        [-28.44970276, -28.06257494, -29.04562229, ..., -31.90300302,
         -30.57375686, -30.43092518],
        [-33.08297286, -32.15502581, -32.99685091, ..., -38.45594638,
         -36.95836708, -36.24105992]]),
 0)

# Spilt Dataset to training and testing

In [14]:
# 將梅爾頻譜圖轉換為 NumPy 數組
X = np.array(X)
y = np.array(y)

# 對數據進行切分
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.2, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

print("訓練集數據形狀:", X_train.shape)
print("驗證集數據形狀:", X_val.shape)
print("測試集數據形狀:", X_test.shape)
print("訓練集標籤形狀:", y_train.shape)
print("驗證集標籤形狀:", y_val.shape)
print("測試集標籤形狀:", y_test.shape)

訓練集數據形狀: (5328, 64, 2757)
驗證集數據形狀: (666, 64, 2757)
測試集數據形狀: (666, 64, 2757)
訓練集標籤形狀: (5328,)
驗證集標籤形狀: (666,)
測試集標籤形狀: (666,)


# 儲存資料集（選擇性）

In [15]:
np.save('X_train.npy', X_train)
np.save('X_val.npy', X_val)
np.save('X_test.npy', X_test)
np.save('y_train.npy', y_train)
np.save('y_val.npy', y_val)
np.save('y_test.npy', y_test)

In [10]:
# 調整數據的形狀
# train_data = train_data.reshape(train_data.shape[0], train_data.shape[1], train_data.shape[2], 1)
# val_data = val_data.reshape(val_data.shape[0], val_data.shape[1], val_data.shape[2], 1)
# test_data = test_data.reshape(test_data.shape[0], test_data.shape[1], test_data.shape[2], 1)

# 數據歸一化到 [0, 1]
# train_data = (train_data - train_data.min()) / (train_data.max() - train_data.min())
# val_data = (val_data - val_data.min()) / (val_data.max() - val_data.min())
# test_data = (test_data - test_data.min()) / (test_data.max() - test_data.min())
# 打印轉換後的數據形狀
# print("訓練集數據形狀:", train_data.shape)
# print("驗證集數據形狀:", val_data.shape)
# print("測試集數據形狀:", test_data.shape)

訓練集數據形狀: (2131, 128, 1379, 1)
驗證集數據形狀: (533, 128, 1379, 1)
測試集數據形狀: (666, 128, 1379, 1)


# 調整數據的形狀