In [None]:
import librosa
import librosa.display
import matplotlib.pyplot as plt
import os
import numpy as np
import tensorflow as tf
from PIL import Image
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from keras.models import Sequential
from sklearn.metrics import confusion_matrix
import time
import soundfile as sf
from scipy.signal import lfilter

In [None]:
# White Noise Augmentation 0.001
def adding_white_noise1(data, noise_rate=0.001):
    wn = np.random.randn(len(data))
    data_wn = data + noise_rate * wn
    return data_wn

# White Noise Augmentation 0.002
def adding_white_noise2(data, noise_rate=0.002):
    wn = np.random.randn(len(data))
    data_wn = data + noise_rate * wn
    return data_wn

# White Noise Augmentation 0.003
def adding_white_noise3(data, noise_rate=0.003):
    wn = np.random.randn(len(data))
    data_wn = data + noise_rate * wn
    return data_wn

# Shift Augmentation
def shifting_sound(data, sr=22050, roll_rate=0.1):
    data_roll = np.roll(data, int(len(data) * roll_rate))
    return data_roll

# Reverse Augmentation
def reverse_sound(data, sr=22050):
    data_len = len(data)
    data = np.array([data[len(data)-1-i] for i in range(len(data))])
    return data

# Minus Augmentation
def minus_sound(data, sr=22050):
    temp_numpy = (-1)*data
    return temp_numpy

# MFCC
def plot_and_save_mfcc(file_path, output_path):
    # 오디오 파일 로드
    y, sr = librosa.load(file_path, sr=None)
    
    # MFCC 계산
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    
    # MFCC를 이미지로 플로팅
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(mfccs, x_axis='time')
    plt.colorbar()
    plt.title('MFCC')
    
    # 이미지를 파일로 저장
    plt.savefig(output_path)
    plt.close()

def process_directory(directory_path, output_dir, label):
    # 디렉토리 내의 파일 목록 가져오기
    file_list = os.listdir(directory_path)
    
    for file_name in file_list:
        file_path = os.path.join(directory_path, file_name)
        
        # 이미지로 저장할 경로 정의
        output_path = os.path.join(output_dir, f"{label}_{file_name.replace('.wav', '.png')}")
        
        # MFCC를 이미지로 저장
        plot_and_save_mfcc(file_path, output_path)
        
def aug_plot_and_save_mfcc(file_path, output_path):
    # 오디오 파일 로드
    y, sr = librosa.load(file_path, sr=None)
    
    # MFCC 계산
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    
    # MFCC를 이미지로 플로팅
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(mfccs, x_axis='time')
    plt.colorbar()
    plt.title('MFCC')
    
    # 이미지를 파일로 저장
    plt.savefig(output_path)
    
    # 오디오 파일 로드
    reversed_y = reverse_sound(y) 
      
    # 반전된 오디오에 대한 MFCC 계산
    mfccs_reversed = librosa.feature.mfcc(y=reversed_y, sr=sr, n_mfcc=13)

    # 반전된 MFCC를 이미지로 플로팅
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(mfccs_reversed, x_axis='time')
    plt.colorbar()
    plt.title('Reversed MFCC')
    
    # 이미지를 파일로 저장
    plt.savefig(output_path.replace('.png', '_reversed_mfcc.png'))
    
     # 오디오 파일 로드
    minus_y = minus_sound(y) 
      
    # 반전된 오디오에 대한 MFCC 계산
    mfccs_minus = librosa.feature.mfcc(y=minus_y, sr=sr, n_mfcc=13)

    # 반전된 MFCC를 이미지로 플로팅
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(mfccs_minus, x_axis='time')
    plt.colorbar()
    plt.title('Minus MFCC')
    
    # 이미지를 파일로 저장
    plt.savefig(output_path.replace('.png', '_minused_mfcc.png'))
    
     # 오디오 파일 로드
    shifted_y = shifting_sound(y) 
      
    # 반전된 오디오에 대한 MFCC 계산
    mfccs_shifted = librosa.feature.mfcc(y=shifted_y, sr=sr, n_mfcc=13)

    # 반전된 MFCC를 이미지로 플로팅
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(mfccs_shifted, x_axis='time')
    plt.colorbar()
    plt.title('Shifted MFCC')
    
    # 이미지를 파일로 저장
    plt.savefig(output_path.replace('.png', '_shifted_mfcc.png'))
    
    plt.close()
    


def aug_process_directory(directory_path, output_dir, label):
    # 디렉토리 내의 파일 목록 가져오기
    file_list = os.listdir(directory_path)
    
    for file_name in file_list:
        file_path = os.path.join(directory_path, file_name)
        
        # 이미지로 저장할 경로 정의
        output_path = os.path.join(output_dir, f"{label}_{file_name.replace('.wav', '.png')}")
        
        # MFCC를 이미지로 저장
        aug_plot_and_save_mfcc(file_path, output_path)
        
#LPCC
def calculate_lpc_coefficients(data, order=16):
    # LPC 계수 계산
    lpc_coefficients = librosa.lpc(data, order)
    return lpc_coefficients

def plot_and_save_lpc(file_path, output_path):
    # 오디오 파일 로드
    y, sr = librosa.load(file_path, sr=None)
    
    # LPC 계수 계산
    lpc_coefficients = calculate_lpc_coefficients(y)

    # LPC 계수를 이미지로 플로팅
    plt.figure(figsize=(10, 4))
    plt.plot(lpc_coefficients, marker='o')
    plt.title('LPC Coefficients')
    
    # 이미지를 파일로 저장
    plt.savefig(output_path)
    plt.show()

In [None]:
# import os
# import librosa
# import numpy as np
# import scipy
# import matplotlib.pyplot as plt

# ok_directory = "D:\\Dataset\\#1_Dataset_장비이상 조기탐지 AI 데이터셋#1\\data\\FAN_sound_OK"
# er_directory = "C:\\Users\\user\\Desktop\\test\\Define\\Define_1_2\\audio file"

# ok_files = os.listdir(ok_directory)
# er_files = os.listdir(er_directory)

# for file_name in ok_files:
#     # 파일 경로 생성
#     file_path = os.path.join(ok_directory, file_name)
    
#     # 오디오 로드
#     y, sr = librosa.load(file_path)
    
#     # LPC 계수 계산
#     a = librosa.lpc(y, order=2)
#     b = np.hstack([[0], -1 * a[1:]])
#     y_hat = scipy.signal.lfilter(b, [1], y)
    
#     # 그래프 생성
#     fig, ax = plt.subplots()
#     ax.plot(y_hat, linestyle='--', label='y_hat')
#     ax.legend()
#     ax.set_title('lpc image')
    
#     # 이미지 저장
#     output_path = os.path.join("C:\\Users\\user\\Desktop\\test\\Define\\Define_1_2\\lpc_ok", f"{file_name.replace('.wav', '_lpc.png')}")
#     plt.savefig(output_path)
    
# plt.close()
# LPCC Error

import matplotlib.pyplot as plt
import scipy
import librosa

audio_ok = "C:\\Users\\user\\Desktop\\test\\Define\\Define_1_2\\audio file"
output_file_path = "C:\\Users\\user\\Desktop\\test\\Define\\Define_1_2\\lpc_er"

audio_files_ok = os.listdir(audio_ok)

for file in audio_files_ok:
    audio_path = os.path.join(audio_ok, file)
    y, sr = librosa.load(audio_path)
    
    reversed_y = reverse_sound(y)
    minus_y = minus_sound(y)
    shift_y = shifting_sound(y)
    
    augmentations = [y, reversed_y, minus_y, shift_y]
    
    for augmentation_index, augmented_y in enumerate(augmentations):
        lpc_a = librosa.lpc(augmented_y, order=12)
        lpc_b = np.hstack([[0],-1*lpc_a[1:]])
        lpc_y_hat = scipy.signal.lfilter(lpc_b,[1],augmented_y)
        lpcc = -np.log1p(np.abs(np.fft.fft(lpc_y_hat)))
        lpcc_2d = np.expand_dims(lpcc, axis=0)
        
        plt.figure(figsize=(10, 6))
        plt.imshow(lpcc_2d, aspect='auto', origin='lower', cmap='viridis', extent=(0, 30, 0, sr/2))
        plt.title('LPCC Coefficients')
        plt.xlabel('Time (s)')
        plt.ylabel('Frequency (Hz)')
        output_path = os.path.join(output_file_path, os.path.splitext(file)[0] + f"_augmentation_{augmentation_index}.png")
        plt.savefig(output_path)

    

    # # Plot only y_hat
    # fig, ax = plt.subplots()
    # ax.plot(y_hat, linestyle='--', label='y_hat')
    # ax.legend(loc='upper right')
    # output_path = os.path.join(output_file_path, f"{os.path.splitext(file)[0]}_y_hat.png")
    # plt.savefig(output_path)

    # # Close the current figure to avoid overlapping when looping through files
    # plt.close()

In [None]:
# LPCC Ok
import matplotlib.pyplot as plt
import scipy
import librosa

audio_ok = "D:\\Dataset\\#1_Dataset_장비이상 조기탐지 AI 데이터셋#1\\data\\FAN_sound_OK"
output_file_path = "C:\\Users\\user\\Desktop\\test\\Define\\Define_1_2\\lpc_ok"

audio_files_ok = os.listdir(audio_ok)

for file in audio_files_ok:
    audio_path = os.path.join(audio_ok, file)
    y, sr = librosa.load(audio_path)
    a = librosa.lpc(y, order=12)
    b = np.hstack([[0], -1 * a[1:]])
    y_hat = scipy.signal.lfilter(b, [1], y)
    lpcc = -np.log1p(np.abs(np.fft.fft(y_hat)))
    lpcc_2d = np.expand_dims(lpcc, axis=0)
    
    plt.figure(figsize=(10, 6))
    plt.imshow(lpcc_2d, aspect='auto', origin='lower', cmap='viridis', extent=(0, 30, 0, sr/2))
    plt.title('LPCC Coefficients')
    plt.xlabel('Time (s)')
    plt.ylabel('Frequency (Hz)')
    output_path = os.path.join(output_file_path, f"{os.path.splitext(file)[0]}_y_hat.png")
    plt.savefig(output_path)

In [None]:
import matplotlib.pyplot as plt
import scipy
y, sr = librosa.load(librosa.ex('libri1'), duration=0.020)
a = librosa.lpc(y, order=2)
b = np.hstack([[0], -1 * a[1:]])
y_hat = scipy.signal.lfilter(b, [1], y)
fig, ax = plt.subplots()
ax.plot(y)
ax.plot(y_hat, linestyle='--')
ax.legend(['y', 'y_hat'])
ax.set_title('LP Model Forward Prediction')

In [None]:
audio_ok = "D:\\Dataset\\#1_Dataset_장비이상 조기탐지 AI 데이터셋#1\\data\\FAN_sound_OK"
audio_er = "D:\\Dataset\\#1_Dataset_장비이상 조기탐지 AI 데이터셋#1\\data\\FAN_sound_error"

output_ok = "C:\\Users\\user\\Desktop\\test\\Define\\Define_1_2\\mfcc_ok"
output_er = "C:\\Users\\user\\Desktop\\test\\Define\\Define_1_2\\mfcc_er"
output_audio = "C:\\Users\\user\\Desktop\\test\\Define\\Define_1_2\\audio file"

os.makedirs(output_ok, exist_ok=True)
os.makedirs(output_er, exist_ok=True)

audio_files_ok = os.listdir(audio_ok)
audio_files_er = os.listdir(audio_er)
aug_files_er = os.listdir(output_audio)

for audio_file in audio_files_ok:
    audio_path = os.path.join(audio_ok, audio_file)
    y, sr = librosa.load(audio_path)

    # Mel 스펙트로그램 계산
    mel_spectrogram = librosa.feature.melspectrogram(y=y, sr=sr)

    # Mel 스펙트로그램을 이미지로 변환
    mel_spectrogram_db = librosa.power_to_db(mel_spectrogram, ref=np.max)
    mel_spectrogram_db_image = (mel_spectrogram_db - mel_spectrogram_db.min()) / (mel_spectrogram_db.max() - mel_spectrogram_db.min())  # 0~1로 스케일 조정

    # 이미지 파일로 저장
    output_file_path = os.path.join(output_ok, os.path.splitext(audio_file)[0] + ".png")
    plt.imsave(output_file_path, mel_spectrogram_db_image, cmap='viridis')

for audio_file in audio_files_er:
    audio_path = os.path.join(audio_er, audio_file)
    y, sr = librosa.load(audio_path)
    
    #White Noise1 (0.003)
    addwn_y1 = adding_white_noise1(y)
    
    #White Noise2 (0.005)
    addwn_y2 = adding_white_noise2(y)
    
    #White Noise3 (0.007)
    addwn_y3 = adding_white_noise3(y)
    
    augmentations = [y, addwn_y1, addwn_y2, addwn_y3]

    for augmentation_index, augmented_y in enumerate(augmentations):
        mel_spectrogram = librosa.feature.melspectrogram(y=augmented_y, sr=sr)

        mel_spectrogram_db = librosa.power_to_db(mel_spectrogram, ref=np.max)
        mel_spectrogram_db_image = (mel_spectrogram_db - mel_spectrogram_db.min()) / (mel_spectrogram_db.max() - mel_spectrogram_db.min())  # 0~1로 스케일 조정

        output_file_path = os.path.join(output_er, os.path.splitext(audio_file)[0] + f"_augmentation_{augmentation_index}.png")
        plt.imsave(output_file_path, mel_spectrogram_db_image, cmap='viridis')

        audio_output_file_path = os.path.join(output_audio, os.path.splitext(audio_file)[0] + f"_augmentation_{augmentation_index}.wav")

        sf.write(audio_output_file_path, augmented_y, sr)
            
        
# for aug_file in aug_files_er:
#     aug_path = os.path.join(aug_files_er, aug_file)
#     y, sr = librosa.load(aug_path)
    
    
#     # Reverse Augmentation
#     reversed_y = reverse_sound(y)

#     # Minus Augmentation
#     minus_y = minus_sound(y)
    
#     #Shifting Augmentation
#     shift_y = shifting_sound(y)
    
#     augmentations = [y, reversed_y, minus_y, shift_y]

#     for augmentation_index, augmented_y in enumerate(augmentations):
#         mel_spectrogram = librosa.feature.melspectrogram(y=augmented_y, sr=sr)

#         mel_spectrogram_db = librosa.power_to_db(mel_spectrogram, ref=np.max)
#         mel_spectrogram_db_image = (mel_spectrogram_db - mel_spectrogram_db.min()) / (mel_spectrogram_db.max() - mel_spectrogram_db.min())  # 0~1로 스케일 조정

#         output_file_path = os.path.join(output_er, os.path.splitext(audio_file)[0] + f"_augmentation_{augmentation_index}.png")
#         plt.imsave(output_file_path, mel_spectrogram_db_image, cmap='viridis')

In [None]:
#MFCC Ok & Error
ok_directory = "D:\\Dataset\\#1_Dataset_장비이상 조기탐지 AI 데이터셋#1\\data\\FAN_sound_OK"
er_directory = "C:\\Users\\user\\Desktop\\test\\Define\\Define_1_2\\audio file"

# OK 파일 처리
process_directory(ok_directory, "mfcc_ok", "ok")

# 이상 파일 처리
aug_process_directory(er_directory, "mfcc_er", "er")

In [None]:
audio_directory = "C:\\Users\\user\\Desktop\\test\\Define\\Define_1_2\\audio file"
output_directory = "C:\\Users\\user\\Desktop\\test\\Define\\Define_1_2\\mfcc_er"

os.makedirs(output_directory, exist_ok=True)
audio_files = os.listdir(audio_directory)

for audio_file in audio_files:
    audio_path = os.path.join(audio_directory, audio_file)
    y, sr = librosa.load(audio_path)

    mel_spectrogram = librosa.feature.melspectrogram(y=y, sr=sr)
    mel_spectrogram_db = librosa.power_to_db(mel_spectrogram, ref=np.max)
    mel_spectrogram_db_image = (mel_spectrogram_db - mel_spectrogram_db.min()) / (mel_spectrogram_db.max() - mel_spectrogram_db.min())  # 0~1로 스케일 조정
    output_file_path = os.path.join(output_directory, os.path.splitext(audio_file)[0] + "_original.png")
    plt.imsave(output_file_path, mel_spectrogram_db_image, cmap='viridis')

    reversed_y = reverse_sound(y)
    mel_spectrogram_reversed = librosa.feature.melspectrogram(y=reversed_y, sr=sr)
    mel_spectrogram_db_reversed = librosa.power_to_db(mel_spectrogram_reversed, ref=np.max)
    mel_spectrogram_db_image_reversed = (mel_spectrogram_db_reversed - mel_spectrogram_db_reversed.min()) / (mel_spectrogram_db_reversed.max() - mel_spectrogram_db_reversed.min())
    output_file_path_reversed = os.path.join(output_directory, os.path.splitext(audio_file)[0] + "_reversed.png")
    plt.imsave(output_file_path_reversed, mel_spectrogram_db_image_reversed, cmap='viridis')

    minus_y = minus_sound(y)
    mel_spectrogram_minus = librosa.feature.melspectrogram(y=minus_y, sr=sr)
    mel_spectrogram_db_minus = librosa.power_to_db(mel_spectrogram_minus, ref=np.max)
    mel_spectrogram_db_image_minus = (mel_spectrogram_db_minus - mel_spectrogram_db_minus.min()) / (mel_spectrogram_db_minus.max() - mel_spectrogram_db_minus.min())
    output_file_path_minus = os.path.join(output_directory, os.path.splitext(audio_file)[0] + "_minus.png")
    plt.imsave(output_file_path_minus, mel_spectrogram_db_image_minus, cmap='viridis')

    shift_y = shifting_sound(y)
    mel_spectrogram_shifted = librosa.feature.melspectrogram(y=shift_y, sr=sr)
    mel_spectrogram_db_shifted = librosa.power_to_db(mel_spectrogram_shifted, ref=np.max)
    mel_spectrogram_db_image_shifted = (mel_spectrogram_db_shifted - mel_spectrogram_db_shifted.min()) / (mel_spectrogram_db_shifted.max() - mel_spectrogram_db_shifted.min())
    output_file_path_shifted = os.path.join(output_directory, os.path.splitext(audio_file)[0] + "_shifted.png")
    plt.imsave(output_file_path_shifted, mel_spectrogram_db_image_shifted, cmap='viridis')

In [None]:
output_er_re = "C:\\Users\\user\\Desktop\\test\\Define\\Define_1_2\\erMel(2)"

for aug_file in aug_files_er:
    aug_path = os.path.join(aug_files_er, aug_file)
    y, sr = librosa.load(aug_path)
    
    
    # Reverse Augmentation
    reversed_y = reverse_sound(y)

    # Minus Augmentation
    minus_y = minus_sound(y)
    
    #Shifting Augmentation
    shift_y = shifting_sound(y)
    
    augmentations = [y, reversed_y, minus_y, shift_y]

    for augmentation_index, augmented_y in enumerate(augmentations):
        mel_spectrogram = librosa.feature.melspectrogram(y=augmented_y, sr=sr)

        mel_spectrogram_db = librosa.power_to_db(mel_spectrogram, ref=np.max)
        mel_spectrogram_db_image = (mel_spectrogram_db - mel_spectrogram_db.min()) / (mel_spectrogram_db.max() - mel_spectrogram_db.min())  # 0~1로 스케일 조정

        output_file_path = os.path.join(output_er_re, os.path.splitext(aug_file)[0] + f"_augmentation_{augmentation_index}.png")
        plt.imsave(output_file_path, mel_spectrogram_db_image, cmap='viridis')

In [None]:
# audio augmentation (wn1, wn2, wn3)
import os
import librosa
import soundfile as sf
import numpy as np

# 주어진 경로
data_path = "D:\\Dataset\\#1_Dataset_장비이상 조기탐지 AI 데이터셋#1\\data\\FAN_sound_error"

# augmentation 함수들
def adding_white_noise(data, noise_rate):
    wn = np.random.randn(len(data))
    data_wn = data + noise_rate * wn
    return data_wn

# augmentation 적용 및 저장 함수
def augment_and_save(input_path, output_path, augmentation_function, noise_rate):
    for filename in os.listdir(input_path):
        if filename.endswith(".wav"):
            file_path = os.path.join(input_path, filename)
            # 오디오 파일 불러오기
            audio_data, sr = librosa.load(file_path, sr=None)
            # augmentation 적용
            augmented_data = augmentation_function(audio_data, noise_rate)
            # 새로운 파일명 지정
            output_filename = f"er_{noise_rate}_{filename}"
            output_file_path = os.path.join(output_path, output_filename)
            # 증강된 데이터 저장
            sf.write(output_file_path, augmented_data, sr)

# augmentation 결과를 저장할 경로
output_path = "C:\\Users\\user\\Desktop\\test\\Define\\Define_1_2\\audio file"

# 만약 저장할 경로가 없다면 생성
if not os.path.exists(output_path):
    os.makedirs(output_path)

augment_and_save(data_path, output_path, adding_white_noise, 0.001)
augment_and_save(data_path, output_path, adding_white_noise, 0.002)
augment_and_save(data_path, output_path, adding_white_noise, 0.003)


In [None]:
import os
import shutil

data_path = "D:\\Dataset\\#1_Dataset_장비이상 조기탐지 AI 데이터셋#1\\data\\FAN_sound_error"
output_path = "C:\\Users\\user\\Desktop\\test\\Define\\Define_1_2\\audio file"
prefix = "er_original_"

# 경로가 없으면 생성
os.makedirs(output_path, exist_ok=True)

# data_path의 모든 파일에 대해 반복
for filename in os.listdir(data_path):
    # 파일 경로 생성
    file_path = os.path.join(data_path, filename)
    
    # output_path에 파일 복사 및 이름 변경
    new_filename = f"{prefix}{filename}"
    new_file_path = os.path.join(output_path, new_filename)
    shutil.copy(file_path, new_file_path)

print("작업 완료!")



In [None]:
output_er_re = "C:\\Users\\user\\Desktop\\test\\Define\\Define_1_2\\erMel(2)"