## Initialization

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# create directory for data
!mkdir data

# unzip files from zip folder
!unzip "/content/drive/MyDrive/PG/PB/Projekt badawczy - terminy medyczne/001_004.zip" -d "data/"

mkdir: cannot create directory ‘data’: File exists
Archive:  /content/drive/MyDrive/PG/PB/Projekt badawczy - terminy medyczne/001_004.zip
replace data/001_004/001_004_online.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: N


## Package installation

In [None]:
!pip install resemble-enhance --upgrade




## Imports

In [None]:
import pathlib
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np
from pydub import AudioSegment, effects
import os
import IPython.display as ipd

# RESEMBLE ENHANCE
import gradio as gr
import torch
import torchaudio

from resemble_enhance.enhancer.inference import denoise, enhance

[2024-10-04 12:42:06,691] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)


In [None]:
data_directory = "./data/001_004/"
data_path = pathlib.Path(data_directory)
files = list(data_path.glob('*.wav'))

## Functionality definitions

In [None]:
class Audio:
  def __init__(self, audio, freq):
    self.audio = audio
    self.sr = freq

  def display_play_button(self):
    display(ipd.Audio(data=self.audio, rate=self.sr))

def create_audio_from_filepath(filepath):
  wav, sr = librosa.load(filepath, sr=None)
  return Audio(wav, sr)

def _fn(path, solver, nfe, tau, denoising):
    if path is None:
        return None, None

    solver = solver.lower()
    nfe = int(nfe)
    lambd = 0.9 if denoising else 0.1

    dwav, sr = torchaudio.load(path)
    dwav = dwav.mean(dim=0)

    wav1, new_sr = denoise(dwav, sr, device)
    wav2, new_sr = enhance(dwav, sr, device, nfe=nfe, solver=solver, lambd=lambd, tau=tau)

    wav1 = wav1.cpu().numpy()
    wav2 = wav2.cpu().numpy()

    return Audio(wav2, new_sr)


### Enhancer settings

In [None]:
solver = "Midpoint"
nfe = 64
tau = 0.5
denoising = False

if torch.cuda.is_available():
    device = "cuda"
else:
    device = "cpu"

## Sample processing

In [None]:
original_audio = []
enhanced_audio = []

### Execute enhancer

In [None]:
for file in files:
  enhanced_audio_sample = _fn(file, solver, nfe, tau, denoising)
  enhanced_audio.append(enhanced_audio_sample)
  original_audio.append(create_audio_from_filepath(file))

100%|██████████| 1/1 [00:29<00:00, 29.03s/it]
100%|██████████| 1/1 [07:37<00:00, 457.81s/it]


### Display audio buttons

In [12]:
audio_pairs = [] # (orignal, enhanced)
for i in range(len(original_audio)):
  audio_pairs.append((original_audio[i], enhanced_audio[i]))

for audio_pair in audio_pairs:
  audio_pair[0].display_play_button() # original
  audio_pair[1].display_play_button() # enhanced