In [None]:
!ls 'output2/'

[34mtrain[m[m [34mval[m[m


For creating spectrogram images, we use the librosa python library. It has a nifty function of creating spectrogram images. You can learn more about it here: https://librosa.github.io/librosa

In [None]:
import librosa as lb
from librosa.display import specshow

import matplotlib.pyplot as plt

import os
from os import listdir
from os.path import isfile, join

import pandas as pd

In [None]:
os.makedirs('output3')
os.makedirs('output3/train')
os.makedirs('output3/val')

In [None]:
files_loc = 'output2/'

In [None]:
diagnosis_csv = 'Downloads/input/Respiratory_Sound_Database/Respiratory_Sound_Database/patient_diagnosis.csv'
diagnosis = pd.read_csv(diagnosis_csv, names=['pId', 'diagnosis'])
diagnosis.head()

Unnamed: 0,pId,diagnosis
0,101,URTI
1,102,Healthy
2,103,Asthma
3,104,COPD
4,105,URTI


In [None]:
categories = diagnosis['diagnosis'].unique()
categories

array(['URTI', 'Healthy', 'Asthma', 'COPD', 'LRTI', 'Bronchiectasis',
       'Pneumonia', 'Bronchiolitis'], dtype=object)

In [None]:
for cat in categories:
    os.makedirs('output3/train/' + cat)
    os.makedirs('output3/val/' + cat)

In [None]:
def is_wav(filename):
    '''
        Checks if files are .wav files
        Utility tool in converting wav to png files
    '''
    return filename.split('.')[-1] == 'wav'

The next function is the core of this kernel. This loads each file in each directory in each split ([split]/[category]/[file]) and converts it to a spectrogram image then saves it on the output directory that should have the same file structure--[split]/[category]/[file]

In [None]:
#create images using librosa spectogram
def convert_to_spec_image(file_loc, filename, category, is_train=False, verbose=False):
    '''
        Converts audio file to spec image
        Input file includes path
        Saves the file to a png image in the save_directory
    '''
    train_ = 'train/'
    val_ = 'val/'

    loc = file_loc + train_ + category + '/' + filename
    if is_train == False:
        loc = file_loc + val_ + category + '/' + filename

    if verbose == True:
        print('reading and converting ' + filename + '...')

    y, sr = lb.load(loc)

    #Plot signal in
    plt.figure(figsize=(10,3))
    src_ft = lb.stft(y)
    src_db = lb.amplitude_to_db(abs(src_ft))
    specshow(src_db, sr=sr, x_axis='time', y_axis='hz')
    plt.ylim(0, 5000)

    save_directory = 'output3/'
    filename_img = filename.split('.wav')[0]

    save_loc = save_directory + train_ + category + '/' + filename_img + '.png'
    if is_train == False:
        save_loc = save_directory + test_ + category + '/' + filename_img + '.png'

    plt.savefig(save_loc)

    if verbose == True:
        print(filename + ' converted!')

    plt.close()

In [None]:
import os
os.environ['KMP_DUPLICATE_LIB_OK']='True'

In [None]:
split = ['train', 'val']

for s in split:
    for cat in categories:
        print('-' * 100)
        print('working on ' + cat + '...')
        print('-' * 100)

        files = [f for f in listdir(files_loc + s + '/' + cat + '/') if isfile(join(files_loc + s + '/' + cat + '/', f)) and is_wav(f)]
        for f in files:
            convert_to_spec_image(file_loc = files_loc, category=cat, filename=f, is_train=(s == 'train'), verbose=True)

----------------------------------------------------------------------------------------------------
working on URTI...
----------------------------------------------------------------------------------------------------
reading and converting 188_1b1_Ar_sc_Meditron_3.wav...
188_1b1_Ar_sc_Meditron_3.wav converted!
reading and converting 105_1b1_Tc_sc_Meditron_5.wav...
105_1b1_Tc_sc_Meditron_5.wav converted!
reading and converting 188_1b1_Al_sc_Meditron_2.wav...
188_1b1_Al_sc_Meditron_2.wav converted!
reading and converting 101_1b1_Al_sc_Meditron_11.wav...
101_1b1_Al_sc_Meditron_11.wav converted!
reading and converting 129_1b1_Ar_sc_Meditron_5.wav...
129_1b1_Ar_sc_Meditron_5.wav converted!
reading and converting 137_1b1_Ll_sc_Meditron_0.wav...
137_1b1_Ll_sc_Meditron_0.wav converted!
reading and converting 137_1b1_Ar_sc_Meditron_1.wav...
137_1b1_Ar_sc_Meditron_1.wav converted!
reading and converting 164_1b1_Ll_sc_Meditron_8.wav...
164_1b1_Ll_sc_Meditron_8.wav converted!
reading and conve