In [1]:
import torch
import torchaudio
import matplotlib.pyplot as plt
from IPython.display import Audio, display

In [2]:
def plot_waveform(waveform, sample_rate, title="Waveform", xlim=None, ylim=None):
  waveform = waveform.numpy()

  num_channels, num_frames = waveform.shape
  time_axis = torch.arange(0, num_frames) / sample_rate

  figure, axes = plt.subplots(num_channels, 1)
  if num_channels == 1:
    axes = [axes]
  for c in range(num_channels):
    axes[c].plot(time_axis, waveform[c], linewidth=1)
    axes[c].grid(True)
    if num_channels > 1:
      axes[c].set_ylabel(f'Channel {c+1}')
    if xlim:
      axes[c].set_xlim(xlim)
    if ylim:
      axes[c].set_ylim(ylim)
  figure.suptitle(title)
  plt.show(block=False)

def plot_specgram(waveform, sample_rate, title="Spectrogram", xlim=None):
  waveform = waveform.numpy()

  num_channels, num_frames = waveform.shape
  time_axis = torch.arange(0, num_frames) / sample_rate

  figure, axes = plt.subplots(num_channels, 1)
  if num_channels == 1:
    axes = [axes]
  for c in range(num_channels):
    axes[c].specgram(waveform[c], Fs=sample_rate)
    if num_channels > 1:
      axes[c].set_ylabel(f'Channel {c+1}')
    if xlim:
      axes[c].set_xlim(xlim)
  figure.suptitle(title)
  plt.show(block=False)


def play_audio(waveform, sample_rate):
  waveform = waveform.numpy()

  num_channels, num_frames = waveform.shape
  if num_channels == 1:
    display(Audio(waveform[0], rate=sample_rate))
  elif num_channels == 2:
    display(Audio((waveform[0], waveform[1]), rate=sample_rate))
  else:
    raise ValueError("Waveform with more than 2 channels are not supported.")


In [3]:
# waveform, sample_rate = torchaudio.load('/Users/a.anikin/repos/cv_court_lines_detection/sound_processing/data/Breath-Data/01_male_23_BQuyen.wav')

In [4]:
# %pip install soundfile

In [5]:
import os
from pathlib import Path
import sys

In [6]:

path2add = '/Users/a.anikin/repos/cv_court_lines_detection/sound_processing/src'
if (not (path2add in sys.path)) :
    sys.path.append(path2add)


In [7]:
from dataset import BreathingDataset
from model import RawAudioCNN
from train import train_model
from augmentations import train_augment


In [8]:
data_dir = Path("/Users/a.anikin/repos/cv_court_lines_detection/sound_processing/data/Breath-Data")

noise_dir = Path("/Users/a.anikin/repos/cv_court_lines_detection/sound_processing/data/noise")
noise_val_dir = Path("/Users/a.anikin/repos/cv_court_lines_detection/sound_processing/data/noise_val")

sounds_list = [
        data_dir / "Voice_breath.mp3",
        data_dir / "Voice_breath4.mp3",
        data_dir / "Voice_breath5.mp3",
        data_dir / "Voice_breath6.mp3",
        data_dir / "Voice_breath7.mp3",
        data_dir / "Voice_breath8.mp3",
        data_dir / "Voice_breath9.mp3",
        data_dir / "Voice_breath2.mp3",
        data_dir / "Voice_breath10.mp3",
        data_dir / "Voice_breath11.mp3",
]

# Split into train / validation

train_list = sounds_list[:-3]
val_list   = sounds_list[-3:]
train_dataset = BreathingDataset(train_list, samples_amount=10000, random_audios_folder=noise_dir)
val_dataset = BreathingDataset(val_list, samples_amount=10000, random_audios_folder=noise_val_dir, seed=42)

Loaded Voice_breath - torch.Size([1, 997158])
Loaded Voice_breath4 - torch.Size([1, 1795553])
Loaded Voice_breath5 - torch.Size([1, 2596608])
Loaded Voice_breath6 - torch.Size([1, 949232])
Loaded Voice_breath7 - torch.Size([1, 3436416])
Loaded Voice_breath8 - torch.Size([1, 1046198])
Loaded Voice_breath9 - torch.Size([1, 3220992])
Loaded random audios: 12
Loaded Voice_breath2 - torch.Size([1, 987498])
Loaded Voice_breath10 - torch.Size([1, 1020192])
Loaded Voice_breath11 - torch.Size([1, 3104640])
Loaded random audios: 4


In [9]:
for i in range(0, 20):
    wave, label = val_dataset[i]
    play_audio(wave.unsqueeze(0), 16000)
    print(label)

0


1


1


1


0


1


0


1


1


1


0


0


0


1


0


1


0


1


1


1


In [10]:
val_list

[PosixPath('/Users/a.anikin/repos/cv_court_lines_detection/sound_processing/data/Breath-Data/Voice_breath2.mp3'),
 PosixPath('/Users/a.anikin/repos/cv_court_lines_detection/sound_processing/data/Breath-Data/Voice_breath10.mp3'),
 PosixPath('/Users/a.anikin/repos/cv_court_lines_detection/sound_processing/data/Breath-Data/Voice_breath11.mp3')]