<a href="https://colab.research.google.com/github/CAU2022-CAPSTONE-PACETIME/BreathDetector/blob/main/BreathDataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
%cd /content/drive/MyDrive/ColabNotebooks

/content/drive/MyDrive/ColabNotebooks


In [None]:
!pip install audiomentations

In [None]:
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torch
import pandas as pd
from scipy import signal as scipy_signal
import sklearn
import numpy as np
import numpy.linalg as LA
import os
from imu import *

In [None]:
class BreathDataset(Dataset):
  def __init__(self, audio_dirlist):
    """
    Breath Dataset
    Arg : 
      audio_dirlist : audio path
      mode : Augment or Not
    """
    self.audio_list = self.__combine_files(audio_dirlist)
    self.audio_sr = 44100
    self.imu_sr = 200


  def __len__(self):
    return len(self.audio_list)

  def __getitem__(self, index):
    sample_path = self.audio_list[index]
    csv_data = pd.read_csv(sample_path)
    sound_data = csv_data['sound'].dropna()
    sound_data = self.__cut_sound(sound_data)
    sound_data = self.__right_pad(sound_data)
    sound_list = self.__sound_cutting(sound_data)
    imu = Imu(csv_data)

    return torch.FloatTensor(np.array(sound_list)), torch.FloatTenosr(np.array(imu.get_item()[:116]))

  def __combine_files(self, audio_dirlist):
    audio_lst = []
    for audio_path in audio_dirlist:
      lst = os.listdir(audio_path)
      
      for audio in lst:
        path = audio_path + "/" + audio
        audio_lst.append(path)

    return audio_lst

  def __cut_sound(self, data):
    if len(data) > 44100 * 29:
      data = data[:44100*29]
    return data
  
  def __right_pad(self, data):
    if len(data) < 44100 * 29:
      missing_samples = 44100*29 - len(data)
      lst = [0 for _ in range(missing_samples)]
      data = np.append(data, lst)
    return data

  def __sound_cutting(self, signal):
    sound_lst = []

    for start in range(116):
      qu_sr = int(self.audio_sr/4)
      signal_q = signal[start*qu_sr : start*qu_sr+ qu_sr]
      sound_lst.append(signal_q)

    return sound_lst