In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from scipy import signal
from scipy.io import wavfile
import matplotlib.pyplot as plt
import soundfile as sf
import librosa
import keras
from keras.models import Sequential
from keras.datasets import mnist
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.initializers import Constant
from keras import backend as K
from keras.layers import PReLU

**Audio augmentation library**

In [None]:
!pip install nlpaug

import nlpaug.augmenter.audio as naa
naug = naa.NoiseAug()

**Extract data from the dataset**

In [None]:
inputData = np.empty((6898,50000))
targetData = np.empty(6898)
root = '../input/respiratory-sound-database/Respiratory_Sound_Database/Respiratory_Sound_Database/audio_and_txt_files/'
filenames = [s.split('.')[0] for s in os.listdir(path = root) if '.txt' in s]
i_list = []
rec_annotations = []
rec_annotations_dict = {}

**Sample rate is 10kHz and max. length of a sample is 5s. Functions to zero pad if available audio is less than 5s and prepare the target data**

In [None]:
def Extract_Annotation_Data(file_name, root):
    tokens = file_name.split('_')
    recording_info = pd.DataFrame(data = [tokens], columns = ['Patient number', 'Recording index', 'Chest location','Acquisition mode','Recording equipment'])
    recording_annotations = pd.read_csv(os.path.join(root, file_name + '.txt'), names = ['Start', 'End', 'Crackles', 'Wheezes'], delimiter= '\t')
    return (recording_info, recording_annotations)

def slice_data(start, end, raw_data,  sample_rate):
    max_ind = len(raw_data)
    new_sample_rate = 10000
    new_raw_data = signal.resample(raw_data,int(max_ind*new_sample_rate/sample_rate))
    new_max_ind = len(new_raw_data)
    start_ind = min(int(start * new_sample_rate), new_max_ind)
    end_ind = min(int(end * new_sample_rate), new_max_ind)
    max_len = 50000
    
    if (end_ind-start_ind)>max_len:
        return new_raw_data[start_ind:(start_ind+max_len)]
    
    elif ((end_ind-start_ind)<max_len):
        return np.concatenate((new_raw_data[start_ind:end_ind],np.zeros(max_len+start_ind-end_ind)))
    
    elif (end_ind-start_ind)==max_len:
        return new_raw_data[start_ind:end_ind]
    
def getClass(df,index):
    if(df.at[index,'Wheezes']==0 and df.at[index,'Crackles']==0):
        return 0
    elif(df.at[index,'Wheezes']==1 and df.at[index,'Crackles']==0):
        return 1
    elif(df.at[index,'Wheezes']==0 and df.at[index,'Crackles']==1):
        return 2
    elif(df.at[index,'Wheezes']==1 and df.at[index,'Crackles']==1):
        return 3

**Display sample annotation table**

In [None]:
for s in filenames:
    (i,a) = Extract_Annotation_Data(s, root)
    i_list.append(i)
    rec_annotations.append(a)
    rec_annotations_dict[s] = a
recording_info = pd.concat(i_list, axis = 0)
recording_info.head()

**Extract audio files and zero pad it if required and create target data for training**

In [None]:
l=0
for i in rec_annotations_dict:
    j = rec_annotations_dict[i]
    for k in range(j.shape[0]):
        data,sampleRate = sf.read(root+i+'.wav')
        inputData[l] = slice_data(j.at[k,'Start'],j.at[k,'End'], data, sampleRate)
        targetData[l] = getClass(j,k)
        l=l+1

In [None]:
print(targetData.shape)
print(inputData.shape)

**Noise augmentation of audio sample**

In [None]:
inputDataNoise = np.zeros((6898, 50000))

for i in range(inputDataNoise.shape[0]): 
    inputDataNoise[i, :] = naug.augment(inputData[i, :])
    print(i)

In [None]:
inputDataNoise.shape
inputData = np.copy(inputDataNoise)
del inputDataNoise

In [None]:
sampleRate = 10000

**Display sample audio time-domain waveform, spectrum and spectrogram**

In [None]:
import matplotlib.pyplot as plt

sampleNo = 4345
sample = inputData[sampleNo,:]
t = np.linspace(0, sample.size/sampleRate, sample.size)

plt.figure(figsize = (20,5))
plt.plot(t, sample)
plt.show()

In [None]:
import scipy as sc

z = sc.fft.fft(sample)
z = np.abs(z[0:int(500*z.size/sampleRate)])
z = np.concatenate((z[::-1], z))
f = np.linspace(-500, 500, z.size)

plt.figure(figsize = (20,5))
plt.plot(f, z*2/sample.size)
plt.show()

In [None]:
import librosa

stft = np.abs(librosa.stft(sample, n_fft = 1024, hop_length = 512, win_length = 1024))
stft = 20*np.log(np.abs(stft) + 1e-10)

cutoff_freq = 3000

stft_low_freq = stft[0:int(cutoff_freq*2*stft.shape[0]/sampleRate), :]
tf = np.linspace(0, sample.size/sampleRate, stft.shape[1])
ff = np.linspace(0, cutoff_freq, stft_low_freq.shape[0])

plt.figure(figsize = (20, 5))
plt.pcolormesh(tf, ff, stft_low_freq, vmin = stft.min(), vmax = stft.max(), shading='gouraud')
plt.show()

In [None]:
print(stft_low_freq.shape)
print(stft.shape)

**Convert the audio data to spectrogram data and chop off the spectrogram to 3 kHz**

In [None]:
stft_dataset = []
cutoff_freq = 3000

for i in range(inputData.shape[0]):
    sample_audio = inputData[i,:]
    stft = np.abs(librosa.stft(sample_audio, n_fft = 1024, hop_length = 512, win_length = 1024))
    stft = 20*np.log(np.abs(stft) + 1e-10)
    stft = stft[0:int(cutoff_freq*2*stft.shape[0]/sampleRate), :]
    print(i)
    stft_dataset.append(stft)

stft_dataset = np.array(stft_dataset)

In [None]:
stft_dataset.shape

**Resize the spectrogram to reduce the data consumption and to make it feasible for training**

In [None]:
import cv2

dim = (125, 40)
stft_resized = []
for i in range(stft_dataset.shape[0]):
    sample = cv2.resize(stft_dataset[i], dim, interpolation = cv2.INTER_AREA)
    stft_resized.append(sample)

stft_resized = np.array(stft_resized)
stft_resized.shape

**Display a resized spectrogram**

In [None]:
sample_resized = stft_resized[sampleNo, :]

tf = np.linspace(0, 5, sample_resized.shape[1])
ff = np.linspace(0, cutoff_freq, sample_resized.shape[0])

plt.figure(figsize = (20, 5))
plt.pcolormesh(tf, ff, sample_resized, vmin = stft_resized.min(), vmax = stft_resized.max(), shading='gouraud')
plt.show()

**Save the spectrogram dataset and target data to kaggle output**

In [None]:
np.save('/kaggle/working/stft_dataset.npy', stft_resized)
np.save('/kaggle/working/target.npy', targetData)