In [1]:
import torch
import torchaudio
import matplotlib.pyplot as plt
from IPython.display import Audio, display

In [2]:

def plot_waveform(waveform, sample_rate, title="Waveform", xlim=None, ylim=None):
  waveform = waveform.numpy()

  num_channels, num_frames = waveform.shape
  time_axis = torch.arange(0, num_frames) / sample_rate

  figure, axes = plt.subplots(num_channels, 1)
  if num_channels == 1:
    axes = [axes]
  for c in range(num_channels):
    axes[c].plot(time_axis, waveform[c], linewidth=1)
    axes[c].grid(True)
    if num_channels > 1:
      axes[c].set_ylabel(f'Channel {c+1}')
    if xlim:
      axes[c].set_xlim(xlim)
    if ylim:
      axes[c].set_ylim(ylim)
  figure.suptitle(title)
  plt.show(block=False)

def plot_specgram(waveform, sample_rate, title="Spectrogram", xlim=None):
  waveform = waveform.numpy()

  num_channels, num_frames = waveform.shape
  time_axis = torch.arange(0, num_frames) / sample_rate

  figure, axes = plt.subplots(num_channels, 1)
  if num_channels == 1:
    axes = [axes]
  for c in range(num_channels):
    axes[c].specgram(waveform[c], Fs=sample_rate)
    if num_channels > 1:
      axes[c].set_ylabel(f'Channel {c+1}')
    if xlim:
      axes[c].set_xlim(xlim)
  figure.suptitle(title)
  plt.show(block=False)


def play_audio(waveform, sample_rate):
  waveform = waveform.numpy()

  num_channels, num_frames = waveform.shape
  if num_channels == 1:
    display(Audio(waveform[0], rate=sample_rate))
  elif num_channels == 2:
    display(Audio((waveform[0], waveform[1]), rate=sample_rate))
  else:
    raise ValueError("Waveform with more than 2 channels are not supported.")


In [3]:
# waveform, sample_rate = torchaudio.load('/Users/a.anikin/repos/cv_court_lines_detection/sound_processing/data/Breath-Data/01_male_23_BQuyen.wav')

In [4]:
# %pip install soundfile

In [5]:
import os
from pathlib import Path
import sys

In [6]:

path2add = '/Users/a.anikin/repos/cv_court_lines_detection/sound_processing/src'
if (not (path2add in sys.path)) :
    sys.path.append(path2add)


In [7]:
from dataset import BreathingDataset
from model import RawAudioCNN
from train import train_model
from augmentations import train_augment


In [9]:
data_dir = Path("/Users/a.anikin/repos/cv_court_lines_detection/sound_processing/data/Breath-Data")

noise_dir = Path("data/noise")
noise_val_dir = Path("data/noise_val")
sounds_list = [
    data_dir / "08_male_21_TLong.wav",
    data_dir / "15_female_21_PPhuong.wav",
    data_dir / "06_male_21_QViet.wav",
    data_dir / "16_male_21_TTung.wav",
    data_dir / "22_male_21_VHung.wav",
    data_dir / "14_male_21_Khanh.wav",
    data_dir / "20_male_21_Viet.wav",
    data_dir / "18_male_21_Hoa.wav",
    data_dir / "03_male_21_BDuong.wav",
    data_dir / "29_male_19_Cong.wav",
    data_dir / "23_male_21_CNDuong.wav",
    data_dir / "17_male_21_Trung.wav",
    data_dir / "10_male_21_Nam.wav",
    data_dir / "24_female_21_MPham.wav",
    data_dir / "04_female_21_LAnh.wav",
    data_dir / "19_male_21_Minh.wav",
    data_dir / "05_male_21_NLinh.wav",
    data_dir / "11_female_21_Tam.wav",
    data_dir / "28_male_19_VHoa_asthma.wav",
    data_dir / "21_male_21_Hai.wav",
    data_dir / "07_male_21_MQuang.wav",
    data_dir / "27_female_19_TThanh.wav",
    data_dir / "01_male_23_BQuyen.wav",
    data_dir / "09_male_21_Ngon.wav",
    data_dir / "12_male_21_Tam.wav",
    data_dir / "13_female_20_TNhi.wav",
    data_dir / "02_male_22_PTuan.wav",
    data_dir / "26_female_19_Linh.wav"
]

# Split into train / validation
train_list = sounds_list[:-7]
val_list   = sounds_list[-7:]
train_dataset = BreathingDataset(train_list, random_audios_folder=noise_dir)
val_dataset = BreathingDataset(val_list, random_audios_folder=noise_val_dir, seed=42)

In [10]:
for i in range(10):
    wave, label = val_dataset[i]
    play_audio(wave.unsqueeze(0), 16000)
    print(label)

IndexError: Cannot choose from an empty sequence