In [70]:
import numpy as np
import pandas as pd
import torch
import torchaudio
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
import os
from scipy import signal
import librosa

dataset_folder = '../HF_Lung_V1'
train_data_path = os.path.join(dataset_folder, 'train')
test_data_path = os.path.join(dataset_folder, 'test')

SAMPLE_RATE = 4000
ALL_LABELS = ['I', 'D', 'E', 'Rhonchi', 'Wheeze', 'Stridor']

In [3]:
import torchaudio.functional as F
import torchaudio.transforms as T

Creating A Torch DataSet for training


In [25]:
class LungDataSet(Dataset):
    def __init__(self, file_list, transform, targets_transform):
        self.file_list = file_list
        self.transform = transform
        self.target_transform = targets_transform
        self.target_sample_rate = SAMPLE_RATE
    
    def __len__(self):
        len(self.file_list)
        
    def __getitem__(self, index):
        audio_sample_path = self._get_audio_sample_path(index)
        labels = self._get_audio_sample_labels(index)
        
        # Read Audio from path
        wav, sample_rate = librosa.load(audio_sample_path)
        
        # Trim the leading and trailing silences based on labels
        wav_trim = self._trim_audio(signal=wav, sample_rate=sample_rate, labels=labels)
        
        # Apply high pass filter cutoff = 80Hz, filter order = 10
        cutoff_freq = 80
        b, a = signal.butter(N=10, Wn=cutoff_freq, btype='high', fs=sample_rate)
        wav_filtered = signal.filtfilt(b, a, wav_trim)
        # Convert the np array to torch tensor
        torch_signal = torch.reshape(wav_filtered, (1,wav_filtered.shape[0]))
        
        # Resample the signal to 4kHz
        torch_signal = self._resample_if_necessary(torch_signal)
        
        # Apply transformations
        if self.transform:
            torch_signal = self.transform(torch_signal)
        if self.target_transform:
            labels = self.target_transform(labels)
            
        return torch_signal, labels
    
    def _get_audio_sample_path(self, index):
        path = os.path.join(self.file_list[index] + ".wav")
        return path
    
    def _get_audio_sample_labels(self, index):
        path = os.path.join(self.file_list[index] + "_label.txt")
        labels = pd.read_csv(path, sep=' ', header=None, names=["class", "start", "end"])
        labels['start'] = pd.to_timedelta(labels['start']).dt.total_seconds()
        labels['end'] = pd.to_timedelta(labels['end']).dt.total_seconds()
        return labels
    
    def _trim_audio(self, signal, sample_rate, labels):
        start = np.floor(min(labels['start']) * sample_rate).astype(int)
        end = np.floor(max(labels['end']) * sample_rate).astype(int)
        return signal[start:end]
    
    def _resample_if_necessary(self, signal, sr):
        if sr != self.target_sample_rate:
            resampler = torchaudio.transforms.Resample(sr, self.target_sample_rate)
            signal = resampler(signal)
            
        return signal
    

In [5]:
list_files = []
data_folder = train_data_path
for file in os.listdir(data_folder):
    if file.endswith(".wav"):
        list_files.append(os.path.join(data_folder, os.path.splitext(file)[0]))
list_files[0]

'../HF_Lung_V1\\train\\steth_20180814_09_37_11'

In [61]:
file_list = list_files
path = os.path.join(file_list[0] + "_label.txt")
labels = pd.read_csv(path, sep=' ', header=None, names=["class", "start", "end"])
labels['start'] = pd.to_timedelta(labels['start']).dt.total_seconds()
labels['end'] = pd.to_timedelta(labels['end']).dt.total_seconds()
max(labels['end'])

13.829

In [13]:
window_function = torch.hann_window
window_length = 256
hop_length = 64
spectrogram = torchaudio.transforms.Spectrogram(
    # sample_rate=SAMPLE_RATE,
    # n_fft=1024,
    win_length=window_length,
    hop_length=hop_length,
    window_fn=window_function
)

In [17]:
wav, sample_rate = librosa.load(os.path.join(file_list[0] + ".wav"), sr=None, mono=False)
cutoff_freq = 80

start = np.floor(min(labels['start']) * sample_rate).astype(int)
end = np.floor(max(labels['end']) * sample_rate).astype(int)
wav_trim = wav[start:end]

# librosa.get_duration(y=wav_trim)

b, a = signal.butter(N=10, Wn=cutoff_freq, btype='high', fs=sample_rate)
wav_filtered = signal.filtfilt(b, a, wav_trim)

signal1, sr = torchaudio.load(os.path.join(file_list[2] + ".wav"))
torch_signal = torch.tensor(wav_filtered.copy()).reshape(1,-1)

resampler = torchaudio.transforms.Resample(sr, SAMPLE_RATE)
torch_signal = resampler(torch_signal)

torch_signal = spectrogram(torch_signal)

torch_signal.shape[-1]
# wav.shape
# signal1.shape

771

In [24]:
wav, sample_rate = librosa.load(os.path.join(file_list[0] + ".wav"), sr=None, mono=False)
start = np.floor(min(labels['start']) * sample_rate).astype(int)
end = np.floor(max(labels['end']) * sample_rate).astype(int)
wav_trim = wav[start:end]
# librosa.get_duration(y=wav_trim, sr=sample_rate)
min(labels['start'])

1.5

In [28]:
# labels.info()
torch_signal.shape

torch.Size([1, 201, 771])

In [55]:
windows = torch_signal.shape[-1]
start = min(labels['start'])*1000
end = max(labels['end'])*1000
sample_size = end - start
window_size = np.floor(sample_size/windows)
# window_size
class_label = ['']*windows
max_frames = np.zeros(windows)
for i in range(windows):
    win_start = i*window_size + start
    win_end = win_start + window_size
    print("win: ", win_start, win_end)
    for index, row in labels.iterrows():
        # print(pd.to_timedelta(row['start']).total_seconds())
        row_start = row['start']*1000
        row_end = row['end']*1000
        print("row label: ", row['class'], row_start, row_end)
        if (row_start >= win_start) and (row_start <= win_end):
            num_frames_in_win = min(win_end, row_end) - \
                max(win_start, row_start)
            print("num frames ", num_frames_in_win)
            print("maxframes ", max_frames[i])
            if num_frames_in_win > max_frames[i]:
                class_label[i] = row['class']
                max_frames[i] = num_frames_in_win
            print("class label ", class_label[i])
class_label


win:  1500.0 1515.0
row label:  I 1500.0 2457.0
num frames  15.0
maxframes  0.0
class label  I
row label:  D 1500.0 2457.0
num frames  15.0
maxframes  15.0
class label  I
row label:  E 2608.0 3637.0
row label:  D 2608.0 3637.0
row label:  I 5227.0 6350.0
row label:  D 5227.0 6350.0
row label:  E 6431.0 7411.0
row label:  D 6431.0 7411.0
row label:  I 9038.0 10019.0
row label:  E 10205.0 11186.0
row label:  D 10205.0 11186.0
row label:  I 12754.0 13829.0
row label:  Rhonchi 12754.0 13485.0
win:  1515.0 1530.0
row label:  I 1500.0 2457.0
row label:  D 1500.0 2457.0
row label:  E 2608.0 3637.0
row label:  D 2608.0 3637.0
row label:  I 5227.0 6350.0
row label:  D 5227.0 6350.0
row label:  E 6431.0 7411.0
row label:  D 6431.0 7411.0
row label:  I 9038.0 10019.0
row label:  E 10205.0 11186.0
row label:  D 10205.0 11186.0
row label:  I 12754.0 13829.0
row label:  Rhonchi 12754.0 13485.0
win:  1530.0 1545.0
row label:  I 1500.0 2457.0
row label:  D 1500.0 2457.0
row label:  E 2608.0 3637.0
row

['I',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 'E',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 ''

In [69]:
all_class_labels = ['I', 'D', 'E', 'Rhonchi', 'Wheeze', 'Stridor']
windows = torch_signal.shape[-1]
labels_array = np.zeros([windows, len(all_class_labels)])
start = min(labels['start'])*1000
end = max(labels['end'])*1000
sample_size = end - start
window_size = np.floor(sample_size/windows)
# window_size
class_label = ['']*windows
max_frames = np.zeros(windows)
for i in range(windows):
    win_start = i*window_size + start
    win_end = win_start + window_size
    for index, row in labels.iterrows():
        row_start = row['start']*1000
        row_end = row['end']*1000
        if (row_start <= win_start) and (row_end >= win_end):
            labels_array[i][all_class_labels.index(row['class'])] = 1
labels_array


win:  1500.0 1515.0
row label:  I 1500.0 2457.0
row label:  D 1500.0 2457.0
row label:  E 2608.0 3637.0
row label:  D 2608.0 3637.0
row label:  I 5227.0 6350.0
row label:  D 5227.0 6350.0
row label:  E 6431.0 7411.0
row label:  D 6431.0 7411.0
row label:  I 9038.0 10019.0
row label:  E 10205.0 11186.0
row label:  D 10205.0 11186.0
row label:  I 12754.0 13829.0
row label:  Rhonchi 12754.0 13485.0
labels_encoding  [1. 1. 0. 0. 0. 0.]
win:  1515.0 1530.0
row label:  I 1500.0 2457.0
row label:  D 1500.0 2457.0
row label:  E 2608.0 3637.0
row label:  D 2608.0 3637.0
row label:  I 5227.0 6350.0
row label:  D 5227.0 6350.0
row label:  E 6431.0 7411.0
row label:  D 6431.0 7411.0
row label:  I 9038.0 10019.0
row label:  E 10205.0 11186.0
row label:  D 10205.0 11186.0
row label:  I 12754.0 13829.0
row label:  Rhonchi 12754.0 13485.0
labels_encoding  [1. 1. 0. 0. 0. 0.]
win:  1530.0 1545.0
row label:  I 1500.0 2457.0
row label:  D 1500.0 2457.0
row label:  E 2608.0 3637.0
row label:  D 2608.0 363

array([[1., 1., 0., 0., 0., 0.],
       [1., 1., 0., 0., 0., 0.],
       [1., 1., 0., 0., 0., 0.],
       ...,
       [1., 0., 0., 1., 0., 0.],
       [1., 0., 0., 1., 0., 0.],
       [1., 0., 0., 1., 0., 0.]])

In [71]:
def label_encoder(torch_signal, labels):
    windows = torch_signal.shape[-1]
    labels_array = np.zeros([windows, len(ALL_LABELS)])
    start = min(labels['start'])*1000
    end = max(labels['end'])*1000
    window_size = np.floor(end - start/windows)
    for i in range(windows):
        win_start = i*window_size + start
        win_end = win_start + window_size
        for index, row in labels.iterrows():
            row_start = row['start']*1000
            row_end = row['end']*1000
            if (row_start <= win_start) and (row_end >= win_end):
                labels_array[i][ALL_LABELS.index(row['class'])] = 1
    return labels_array

In [57]:
all_labels = []
for file in file_list:
    path = os.path.join(file + "_label.txt")
    labels = pd.read_csv(path, sep=' ', header=None, names=["class", "start", "end"])
    labels['start'] = pd.to_timedelta(labels['start']).dt.total_seconds()
    labels['end'] = pd.to_timedelta(labels['end']).dt.total_seconds()
    all_labels.append(labels)

all_labels_df = pd.concat(all_labels)
all_labels_df['class'].unique()

array(['I', 'D', 'E', 'Rhonchi', 'Wheeze', 'Stridor'], dtype=object)