In [None]:


from fastai.vision.all import *
import torchaudio
import pathlib
import librosa
from IPython.display import Audio
import librosa.display
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import seaborn as sns

warnings.filterwarnings("ignore")
%config Completer.use_jedi = False



In [None]:
mypath = "/home/john/Downloads/kaggle_respiratory_data/Respiratory_Sound_Database/Respiratory_Sound_Database/audio_and_txt_files/"
filenames = get_files(mypath, extensions='.wav')
filenames

In [None]:
p_diag = pd.read_csv("/home/john/Downloads/kaggle_respiratory_data/respiratory_sound_database/Respiratory_Sound_Database/patient_diagnosis.csv",header=None) # patient diagnosis file
p_diag.head()

In [None]:
n_fft=1024
hop_length=256
target_rate=44100
num_samples=int(target_rate)

In [None]:
## Method for labelling sample (Healthy/Unhealthy)
def get_y(path): 
    desease = p_diag[p_diag[0] == int(path.stem[:3])][1].values[0]
    if desease == "Healthy":
        return "Healthy"
    else : 
        return "Unhealthy"

In [None]:


## Method for getting all audio files, I get file withc rate 44100 Hz only because resampling take so much time :( 
def get_items(path): 
    fns = [fn for fn in get_files(path, extensions='.wav') if torchaudio.load_wav(fn)[1] == target_rate]
    return fns



In [None]:


## Helper method to tranform audio array to Spectrogram
au2spec = torchaudio.transforms.MelSpectrogram(sample_rate=target_rate,n_fft=n_fft, hop_length=hop_length, n_mels=256)
ampli2db = torchaudio.transforms.AmplitudeToDB()



In [None]:
def get_x(path, target_rate=target_rate, num_samples=num_samples*2):
    x, rate = torchaudio.load_wav(path)
    if rate != target_rate: 
        x = torchaudio.transforms.Resample(orig_freq=rate, new_freq=target_rate, resampling_method='sinc_interpolation')(x)
    x = x[0] / 32768
    x = x.numpy()
    sample_total = x.shape[0]
    randstart = random.randint(target_rate, sample_total-target_rate*3)
    x = x[randstart:num_samples+randstart]
    x = librosa.util.fix_length(x, num_samples)
    torch_x = torch.tensor(x)
    spec = au2spec(torch_x)
    spec_db = ampli2db(spec)
    spec_db = spec_db.data.squeeze(0).numpy()
    spec_db = spec_db - spec_db.min()
    spec_db = spec_db/spec_db.max()*255
    return spec_db

In [None]:
## Getting all files and labels
items = get_items(mypath)
labels = [get_y(item) for item in items]
Counter(labels)

In [None]:
test_size=0.3
splitter = TrainTestSplitter(test_size=test_size, random_state=42, stratify=labels)


In [None]:


db = DataBlock(
    blocks=(ImageBlock, CategoryBlock),
    get_x=get_x,
    get_y=get_y,
    splitter=splitter,
    item_tfms=[Resize(256)])



In [None]:


dsets = db.datasets(items)



In [None]:


dsets



In [None]:
count = Counter(labels)
wgts = [1/count[dsets.vocab[label]] for img, label in dsets.train]
wgts[:10]

In [None]:
dls = db.dataloaders(items, num_workers=2, dl_type=WeightedDL, wgts=wgts)

In [None]:
x, y = dls.one_batch()

In [None]:
sum(y)/len(y)

In [None]:
dls.show_batch()

In [None]:


## We use xresnet18 as model
learn = cnn_learner(dls, xresnet18, metrics=error_rate)



In [None]:
learn.fine_tune(1)

In [None]:


learn.show_results()



In [None]:
interp = ClassificationInterpretation.from_learner(learn)
interp.plot_confusion_matrix(figsize=(12,12), dpi=60)