<a href="https://colab.research.google.com/github/Eunchae-L/Audio_Denoising/blob/main/denoising2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!git clone https://github.com/yxlu-0102/MP-SENet.git

Cloning into 'MP-SENet'...
remote: Enumerating objects: 797, done.[K
remote: Counting objects: 100% (141/141), done.[K
remote: Compressing objects: 100% (37/37), done.[K
remote: Total 797 (delta 128), reused 104 (delta 104), pack-reused 656[K
Receiving objects: 100% (797/797), 477.72 MiB | 15.87 MiB/s, done.
Resolving deltas: 100% (203/203), done.
Updating files: 100% (247/247), done.


In [None]:
# Google Drive에 있는 zip 파일의 경로를 설정
zip_file_path = '/content/drive/MyDrive/고려대학교/Audio_detection/denoising/Data_Sources/clean_trainset_wav.zip'  # zip 파일의 경로로 변경

# 압축 해제할 경로 설정
extract_path = '/content/MP-SENet/VoiceBank+DEMAND/wavs_clean'  # 압축을 해제할 경로로 변경

# 디렉토리 생성 (존재하지 않는 경우)
import os
os.makedirs(extract_path, exist_ok=True)

# 압축 해제
!unzip {zip_file_path} -d {extract_path}

In [None]:
# Google Drive에 있는 zip 파일의 경로를 설정
zip_file_path = '/content/drive/MyDrive/고려대학교/Audio_detection/denoising/Data_Sources/noisy_trainset_wav.zip'  # zip 파일의 경로로 변경

# 압축 해제할 경로 설정
extract_path = '/content/MP-SENet/VoiceBank+DEMAND/wavs_noisy'  # 압축을 해제할 경로로 변경

# 디렉토리 생성 (존재하지 않는 경우)
import os
os.makedirs(extract_path, exist_ok=True)

# 압축 해제
!unzip {zip_file_path} -d {extract_path}

In [None]:
!pip install librosa
!pip install soundfile
!pip install torch
!pip install torchaudio
!pip install torchvision
!pip install einops
!pip install pesq
!pip install tensorboard
!pip install scikit-learn

In [None]:
import os
import librosa
import soundfile as sf

def resample_wav(input_dir, output_dir, target_sr=16000):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    for root, dirs, files in os.walk(input_dir):
        for file in files:
            if file.endswith('.wav'):
                input_path = os.path.join(root, file)
                output_path = os.path.join(output_dir, file)

                # Load the audio file
                audio, sr = librosa.load(input_path, sr=None)

                # Resample the audio to target sample rate
                resampled_audio = librosa.resample(audio, orig_sr=sr, target_sr=target_sr)

                # Save the resampled audio to the output directory
                sf.write(output_path, resampled_audio, target_sr)
                print(f'Resampled {input_path} to {output_path}')

input_dir_clean = '/content/MP-SENet/VoiceBank+DEMAND/wavs_clean'
output_dir_clean = '/content/MP-SENet/VoiceBank+DEMAND/wavs_clean_16k'
input_dir_noisy = '/content/MP-SENet/VoiceBank+DEMAND/wavs_noisy'
output_dir_noisy = '/content/MP-SENet/VoiceBank+DEMAND/wavs_noisy_16k'

# Resample clean speech
resample_wav(input_dir_clean, output_dir_clean)

# Resample noisy speech
resample_wav(input_dir_noisy, output_dir_noisy)

In [None]:
import argparse
import os
import json
import torch
import librosa
import soundfile as sf
from models.generator import MPNet
from utils import load_checkpoint
from datasets.dataset import mag_pha_stft, mag_pha_istft
from env import AttrDict

def main():
    print('Initializing Inference Process..')

    parser = argparse.ArgumentParser()
    parser.add_argument('--input_noisy_wavs_dir', default='/content/drive/MyDrive/고려대학교/Audio_detection/data/test')
    parser.add_argument('--output_dir', default='/content/drive/MyDrive/고려대학교/Audio_detection/data/cleaned')
    parser.add_argument('--checkpoint_file', required=True)
    a = parser.parse_args()

    config_file = os.path.join(os.path.split(a.checkpoint_file)[0], 'config.json')
    with open(config_file) as f:
        data = f.read()

    global h
    json_config = json.loads(data)
    h = AttrDict(json_config)

    torch.manual_seed(h.seed)
    global device
    if torch.cuda.is_available():
        torch.cuda.manual_seed(h.seed)
        device = torch.device('cuda')
    else:
        device = torch.device('cpu')

    inference(a)

def inference(a):
    model = MPNet(h).to(device)

    state_dict = load_checkpoint(a.checkpoint_file, device)
    model.load_state_dict(state_dict['generator'])

    ogg_files = glob.glob(os.path.join(a.input_noisy_wavs_dir, '*.ogg'))

    os.makedirs(a.output_dir, exist_ok=True)

    model.eval()

    with torch.no_grad():
        for ogg_file in ogg_files:
            index = os.path.splitext(os.path.basename(ogg_file))[0]
            print(index)

            # OGG 파일을 WAV로 변환 및 로드
            y, sr = librosa.load(ogg_file, sr=h.sampling_rate)
            noisy_wav = torch.FloatTensor(y).to(device)
            norm_factor = torch.sqrt(len(noisy_wav) / torch.sum(noisy_wav ** 2.0)).to(device)
            noisy_wav = (noisy_wav * norm_factor).unsqueeze(0)
            noisy_amp, noisy_pha, noisy_com = mag_pha_stft(noisy_wav, h.n_fft, h.hop_size, h.win_size, h.compress_factor)
            amp_g, pha_g, com_g = model(noisy_amp, noisy_pha)
            audio_g = mag_pha_istft(amp_g, pha_g, h.n_fft, h.hop_size, h.win_size, h.compress_factor)
            audio_g = audio_g / norm_factor

            output_file = os.path.join(a.output_dir, index + '.wav')
            sf.write(output_file, audio_g.squeeze().cpu().numpy(), h.sampling_rate, 'PCM_16')

if __name__ == '__main__':
    main()


Initializing Inference Process..


usage: colab_kernel_launcher.py [-h] [--input_noisy_wavs_dir INPUT_NOISY_WAVS_DIR]
                                [--output_dir OUTPUT_DIR] --checkpoint_file CHECKPOINT_FILE
colab_kernel_launcher.py: error: the following arguments are required: --checkpoint_file


SystemExit: 2

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [None]:
!python inference.py --checkpoint_file /content/MP-SENet/best_ckpt/g_best

Initializing Inference Process..
Loading '/content/MP-SENet/best_ckpt/g_best'
Complete.
p232_001
Traceback (most recent call last):
  File "/content/MP-SENet/inference.py", line 93, in <module>
    main()
  File "/content/MP-SENet/inference.py", line 89, in main
    inference(a)
  File "/content/MP-SENet/inference.py", line 48, in inference
    noisy_wav, _ = librosa.load(os.path.join(a.input_noisy_wavs_dir, index+'.wav'), h.sampling_rate)
TypeError: load() takes 1 positional argument but 2 were given
