In [1]:
import pandas as pd
from pathlib import Path

Catholic_file = 'data/data2.csv'
df = pd.read_csv(Catholic_file)
df

Unnamed: 0,filename,category,class
0,0001-2.wav,healthy,0
1,0002-1.wav,healthy,0
2,0002-2.wav,healthy,0
3,0002-3.wav,healthy,0
4,0002-4.wav,healthy,0
...,...,...,...
258,0614-2.wav,wheezing,1
259,0614-3.wav,wheezing,1
260,0614-4.wav,wheezing,1
261,0615-1.wav,wheezing,1


In [2]:
df['relative_path'] = '/' + df['filename'].astype(str)
df = df[['relative_path', 'class']]
df.head()

Unnamed: 0,relative_path,class
0,/0001-2.wav,0
1,/0002-1.wav,0
2,/0002-2.wav,0
3,/0002-3.wav,0
4,/0002-4.wav,0


In [3]:
import math, random
import torch
import torchaudio
from torchaudio import transforms
from IPython.display import Audio

class Breath_sound_Util():
  
  def open(audio_file):
    sig, sr = torchaudio.load(audio_file)
    return (sig, sr)

  def resample(aud, newsr):
    sig, sr = aud

    if (sr == newsr):
     return aud

    num_channels = sig.shape[0]
    resig = torchaudio.transforms.Resample(sr, newsr)(sig[:1,:])
    if (num_channels > 1):
      retwo = torchaudio.transforms.Resample(sr, newsr)(sig[1:,:])
      resig = torch.cat([resig, retwo])

    return ((resig, newsr))

  def pad(aud, max_ms):
    sig, sr = aud
    num_rows, sig_len = sig.shape
    max_len = sr//1000 * max_ms
   
    if (sig_len > max_len):
      sig = sig[:,:max_len]
 
    elif (sig_len < max_len):

      repeated = []
      repeated.append(sig)
      required_len = max_len - sig_len

      while required_len > sig_len : 
        repeated.append(sig)
        require_len -= sig_len
      repeated.append(sig[:, :required_len])
 
      sig = torch.cat(repeated, 1)

    return (sig, sr)


  def time_shift(aud, shift_limit):
    sig,sr = aud
    _, sig_len = sig.shape
    shift_amt = int(random.random() * shift_limit * sig_len)
    return (sig.roll(shift_amt), sr)

  def spectro_gram(aud, n_mels=64, n_fft=1024, hop_len=None):
    sig,sr = aud
    aud = torchaudio.transforms.TimeStretch(hop_length=2048, fixed_rate=5000000)
    top_db = 80
    
    spec = transforms.MelSpectrogram(sr, n_fft=n_fft, hop_length=hop_len, n_mels=n_mels)(sig)
    spec = transforms.AmplitudeToDB(top_db=top_db)(spec)
    return (spec)

  def spectro_augment(spec, max_mask_pct=0.1, n_freq_masks=1, n_time_masks=1):
    _, n_mels, n_steps = spec.shape
    mask_value = spec.mean()
    aug_spec = spec

    freq_mask_param = max_mask_pct * n_mels
    for _ in range(n_freq_masks):
      aug_spec = transforms.FrequencyMasking(freq_mask_param)(aug_spec, mask_value)

    time_mask_param = max_mask_pct * n_steps
    for _ in range(n_time_masks):
      aug_spec = transforms.TimeMasking(time_mask_param)(aug_spec, mask_value)

    return aug_spec
    print(aug_spec)

#### (좌)최대길이에 맞춰 반복시켜 패딩 (우)최대길이에 맞춰 제로패딩

![Screenshot from 2021-07-20 20-47-09](https://user-images.githubusercontent.com/74411831/126318715-e4d0ae11-38f9-42e2-9a2f-21282c7f674d.png)

In [4]:
data_path = 'data/all_data'

In [5]:
from torch.utils.data import DataLoader, Dataset, random_split
import torchaudio
import torchvision

class breathDS(Dataset):
    
  def __init__(self, df, data_path):
    self.df = df
    self.data_path = str(data_path)
    self.duration = 4000
    self.sr = 48000
    self.shift_pct = 0.4
            
  def __len__(self):
    return len(self.df)    
    
  def __getitem__(self, idx):
    audio_file = self.data_path + self.df.loc[idx, 'relative_path']
    class_id = self.df.loc[idx, 'class']
    aud = Breath_sound_Util.open(audio_file)
    reaud = Breath_sound_Util.resample(aud, self.sr)
    dur_aud = Breath_sound_Util.pad(reaud, self.duration)
    shift_aud = Breath_sound_Util.time_shift(dur_aud, self.shift_pct)
    sgram = Breath_sound_Util.spectro_gram(shift_aud, n_mels=64, n_fft=1024, hop_len=None)
    aug_sgram = Breath_sound_Util.spectro_augment(sgram, max_mask_pct=0.1, n_freq_masks=2, n_time_masks=2)

    
    return aug_sgram, class_id

In [6]:
from torch.utils.data import random_split

brds = breathDS(df, data_path)

In [7]:
num_items = len(brds)
num_train = round(num_items * 0.8)
num_val = num_items - num_train
train_ds, val_ds = random_split(brds, [num_train, num_val])
train_dl = torch.utils.data.DataLoader(train_ds, batch_size=16, shuffle=True)
val_dl = torch.utils.data.DataLoader(val_ds, batch_size=16, shuffle=True)

In [8]:
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import init

# --------------------------------------------------------------
# 호흡음의 Healthy, Wheezing을 판단하는 Binary Classification Model
# --------------------------------------------------------------

class WhoWheezing(nn.Module):

    def __init__(self):
        super().__init__()
        conv_layers = []
        self.conv1 = nn.Conv2d(1, 8, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2))
        self.relu1 = nn.ReLU()
        self.bn1 = nn.BatchNorm2d(8)
        self.conv1.bias.data.zero_()
        conv_layers += [self.conv1, self.relu1, self.bn1]

        self.conv2 = nn.Conv2d(8, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
        self.relu2 = nn.ReLU()
        self.bn2 = nn.BatchNorm2d(16)
        self.conv2.bias.data.zero_()
        conv_layers += [self.conv2, self.relu2, self.bn2]

        self.conv3 = nn.Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
        self.relu3 = nn.ReLU()
        self.bn3 = nn.BatchNorm2d(32)
        self.conv3.bias.data.zero_()
        conv_layers += [self.conv3, self.relu3, self.bn3]

        self.conv4 = nn.Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
        self.relu4 = nn.ReLU()
        self.bn4 = nn.BatchNorm2d(64)
        self.conv4.bias.data.zero_()
        conv_layers += [self.conv4, self.relu4, self.bn4]

        self.ap = nn.AdaptiveAvgPool2d(output_size=1)
        self.lin = nn.Linear(in_features=64, out_features=2)
        self.sigmoid = nn.Sigmoid()

        self.conv = nn.Sequential(*conv_layers)
 
    def forward(self, x):
        x = self.conv(x)
        x = self.ap(x)
        x = x.view(x.shape[0], -1)
        x = self.lin(x)
        x = self.sigmoid(x)
        return x

Model1 = WhoWheezing()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
Model1 = Model1.to(device)
next(Model1.parameters()).device

device(type='cuda', index=0)

In [9]:
from sklearn.metrics import confusion_matrix
import numpy as np
import sklearn.metrics as metrics
def training(model, train_dl, num_epochs):
  criterion = nn.CrossEntropyLoss()
  optimizer = torch.optim.Adam(model.parameters(),lr=0.001)
  scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=0.001,
                                                steps_per_epoch=int(len(train_dl)),
                                                epochs=num_epochs,
                                                anneal_strategy='linear')

  for epoch in range(num_epochs):
    running_loss = 0.0
    correct_prediction = 0
    total_prediction = 0

    for i, data in enumerate(train_dl):
        inputs, labels = data[0].to(device), data[1].to(device)
        inputs_m, inputs_s = inputs.mean(), inputs.std()
        inputs = (inputs - inputs_m) / inputs_s
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        scheduler.step()
        running_loss += loss.item()
        _, prediction = torch.max(outputs,1)
        correct_prediction += (prediction == labels).sum().item()
        total_prediction += prediction.shape[0]

    num_batches = len(train_dl)
    avg_loss = running_loss / num_batches
    acc = correct_prediction/total_prediction
    print(f'Epoch: {epoch}, Loss: {avg_loss:.4f}, Accuracy: {acc:.4f}')

  print('Finished Training')
  
num_epochs=100
training(Model1, train_dl, num_epochs)

Epoch: 0, Loss: 0.6711, Accuracy: 0.7905
Epoch: 1, Loss: 0.6222, Accuracy: 0.8143
Epoch: 2, Loss: 0.5798, Accuracy: 0.8381
Epoch: 3, Loss: 0.5534, Accuracy: 0.8333
Epoch: 4, Loss: 0.5524, Accuracy: 0.7952
Epoch: 5, Loss: 0.5210, Accuracy: 0.8429
Epoch: 6, Loss: 0.5193, Accuracy: 0.8429
Epoch: 7, Loss: 0.4949, Accuracy: 0.8286
Epoch: 8, Loss: 0.4727, Accuracy: 0.8762
Epoch: 9, Loss: 0.4933, Accuracy: 0.8810
Epoch: 10, Loss: 0.4809, Accuracy: 0.8905
Epoch: 11, Loss: 0.4645, Accuracy: 0.8571
Epoch: 12, Loss: 0.4765, Accuracy: 0.8905
Epoch: 13, Loss: 0.4916, Accuracy: 0.8762
Epoch: 14, Loss: 0.4547, Accuracy: 0.8952
Epoch: 15, Loss: 0.4409, Accuracy: 0.8810
Epoch: 16, Loss: 0.4294, Accuracy: 0.9143
Epoch: 17, Loss: 0.4389, Accuracy: 0.9095
Epoch: 18, Loss: 0.4336, Accuracy: 0.9143
Epoch: 19, Loss: 0.4232, Accuracy: 0.8952
Epoch: 20, Loss: 0.4533, Accuracy: 0.9000
Epoch: 21, Loss: 0.4436, Accuracy: 0.8952
Epoch: 22, Loss: 0.4446, Accuracy: 0.9095
Epoch: 23, Loss: 0.4335, Accuracy: 0.9000
Ep

In [10]:
import numpy as np
for epoch in range(2):
    for i, data in enumerate(train_dl, 0):
        inputs, labels = data
        inputs = np.array(inputs)
        print(inputs.shape)
        print(f'Epoch: {i} | Inputs {inputs} | Labels {labels}')

(16, 1, 64, 376)
Epoch: 0 | Inputs [[[[ -7.3975425    5.007869     9.03249    ...  -2.9935834
     -1.3400569  -15.080538  ]
   [  0.6873282    1.7515504   10.396252   ...  -4.5290813
      6.5511236   -3.7352905 ]
   [ -0.5288943   -0.37542346   7.5962644  ...   0.65855396
      7.384449    -7.735466  ]
   ...
   [-53.70318    -53.70318    -53.70318    ... -53.70318
    -53.70318    -53.70318   ]
   [-53.70318    -53.70318    -53.70318    ... -53.70318
    -53.70318    -53.70318   ]
   [-53.70318    -53.70318    -53.70318    ... -53.70318
    -53.70318    -53.70318   ]]]


 [[[-12.698537   -16.158916   -21.610918   ... -39.639812
    -33.077995   -21.931028  ]
   [-12.99748    -22.309607   -27.015907   ... -35.536354
    -36.839096   -27.541487  ]
   [-15.517608   -34.834457   -36.397022   ... -33.983864
    -32.449036   -26.76367   ]
   ...
   [-70.217224   -73.03935    -73.03935    ... -73.03935
    -73.03935    -70.3933    ]
   [-70.25016    -73.03935    -73.03935    ... -73.03935


(16, 1, 64, 376)
Epoch: 3 | Inputs [[[[-14.144321   -35.493713   -21.159975   ... -17.696777
    -20.136679    -9.423846  ]
   [-12.86132    -36.59207    -27.278      ... -23.602736
    -25.625366   -11.077969  ]
   [-10.469681   -37.591473   -36.374516   ... -35.363533
    -33.274708   -11.577783  ]
   ...
   [-70.042755   -70.042755   -70.042755   ... -70.042755
    -70.042755   -70.042755  ]
   [-70.042755   -70.042755   -70.042755   ... -70.042755
    -70.042755   -70.042755  ]
   [-70.042755   -70.042755   -70.042755   ... -70.042755
    -70.042755   -70.042755  ]]]


 [[[ -9.950836   -11.390672   -21.806953   ...  -2.2714052
     -2.5458221   -4.10437   ]
   [-10.370586   -12.071228   -27.27462    ...  -5.8183537
     -4.500327    -9.1861725 ]
   [-14.199672   -16.11336    -27.031391   ... -12.506179
    -10.028925   -16.99772   ]
   ...
   [-57.730156   -57.730156   -57.730156   ... -57.730156
    -57.730156   -57.730156  ]
   [-57.730156   -57.730156   -57.730156   ... -57.7301

(16, 1, 64, 376)
Epoch: 6 | Inputs [[[[-16.670235   -15.631141   -31.796772   ... -18.430656
    -15.709898   -23.07064   ]
   [-20.771238   -19.006235   -30.929188   ... -22.536129
    -18.549042   -27.42025   ]
   [-24.388437   -25.60008    -34.068375   ... -28.4525
    -24.069956   -35.01221   ]
   ...
   [-67.260056   -67.260056   -67.260056   ... -67.260056
    -67.260056   -67.260056  ]
   [-67.260056   -67.260056   -67.260056   ... -67.260056
    -67.260056   -67.260056  ]
   [-67.260056   -67.260056   -67.260056   ... -67.260056
    -67.260056   -67.260056  ]]]


 [[[-13.612261   -19.054752   -14.602523   ... -16.310251
    -18.008183   -29.053978  ]
   [-17.093657   -24.466162   -19.389954   ... -22.653017
    -22.795202   -19.34468   ]
   [-23.882675   -17.422663   -23.33836    ... -30.355713
    -30.740227   -18.378847  ]
   ...
   [-81.01962    -82.36636    -82.36636    ... -82.36636
    -82.36636    -76.29968   ]
   [-81.08423    -82.36636    -82.36636    ... -82.36636
   

(16, 1, 64, 376)
Epoch: 9 | Inputs [[[[  3.0577989    5.771313     5.844733   ...  -2.538676
     -3.9296389    3.6864328 ]
   [  7.9492683    6.323645     5.5840626  ...  -4.4073124
     -7.0850906    6.8918633 ]
   [  6.1424947    1.6394247    1.2324908  ...  -5.413552
     -2.2465081    5.0580716 ]
   ...
   [-46.30783    -47.75528    -48.68742    ... -45.87787
    -47.95542    -48.091167  ]
   [-47.123802   -46.099174   -46.52798    ... -47.009335
    -48.39134    -48.027493  ]
   [-47.03325    -47.713593   -47.738235   ... -47.1224
    -47.567707   -48.88962   ]]]


 [[[  4.340914    -7.234434   -12.623024   ... -12.736322
     -7.6534824   -5.931635  ]
   [  3.875611    -4.54606     -9.316802   ... -13.3299265
     -2.2720115  -11.227003  ]
   [ -0.74055666  -4.2201815   -6.5604167  ... -13.381313
     -2.8479252  -12.393072  ]
   ...
   [-47.237694   -46.25126    -45.359413   ... -47.059837
    -45.150967   -46.510452  ]
   [-44.379845   -44.478203   -46.196712   ... -46.55453
 

Epoch: 13 | Inputs [[[[-34.30631  -20.573061 -21.049976 ... -19.40988  -27.037968
    -44.902428]
   [-32.16592  -22.852657 -25.248505 ... -26.015635 -32.552296
    -34.497185]
   [-31.349766 -27.123592 -31.608265 ... -40.482178 -33.739838
    -38.534405]
   ...
   [-66.27602  -66.27602  -66.27602  ... -66.27602  -66.27602
    -66.27602 ]
   [-66.27602  -66.27602  -66.27602  ... -66.27602  -66.27602
    -66.27602 ]
   [-66.27602  -66.27602  -66.27602  ... -66.27602  -66.27602
    -66.27602 ]]]


 [[[-29.134066 -20.572622 -18.241837 ... -15.657551 -34.16446
    -28.595808]
   [-28.759789 -25.226738 -23.509056 ... -20.056492 -27.275475
    -29.671211]
   [-31.142653 -28.49413  -31.715841 ... -27.812794 -30.729563
    -29.277805]
   ...
   [-68.41237  -68.41237  -68.41237  ... -68.41237  -68.41237
    -68.41237 ]
   [-68.41237  -68.41237  -68.41237  ... -68.41237  -68.41237
    -68.41237 ]
   [-68.41237  -68.41237  -68.41237  ... -68.41237  -68.41237
    -68.41237 ]]]] | Labels tensor([0,

(16, 1, 64, 376)
Epoch: 5 | Inputs [[[[-27.54424    -24.605072   -34.582756   ... -16.005043
    -21.994663   -21.63599   ]
   [-33.539368   -29.403435   -32.145615   ... -20.37822
    -26.80826    -28.093489  ]
   [-37.79052    -36.989895   -36.16951    ... -28.094406
    -33.71594    -36.38772   ]
   ...
   [-79.42512    -79.42512    -79.42512    ... -79.42512
    -79.42512    -79.42512   ]
   [-79.42512    -79.42512    -79.42512    ... -79.42512
    -79.42512    -79.42512   ]
   [-79.42512    -79.42512    -79.42512    ... -79.42512
    -79.42512    -79.42512   ]]]


 [[[  9.809509     6.975687     9.380623   ...   2.431201
      3.9753625   10.436455  ]
   [  9.827856     4.3298326   18.050278   ...   6.4578824
      6.462203    12.427824  ]
   [  8.774419     3.5109277   20.42508    ...   9.274587
     12.636309    10.342349  ]
   ...
   [-41.94439    -46.203995   -47.04798    ... -47.811493
    -48.209587   -42.744072  ]
   [-42.80606    -47.282898   -48.66771    ... -45.22876
   

(16, 1, 64, 376)
Epoch: 9 | Inputs [[[[-38.180717   -36.352425   -14.120342   ... -42.260628
    -48.822113   -36.40067   ]
   [-39.756012   -38.005806   -13.641741   ... -44.18917
    -50.999413   -39.026417  ]
   [-43.314377   -40.809944   -15.1034565  ... -44.76559
    -53.665203   -43.789482  ]
   ...
   [-64.50119    -64.50119    -64.50119    ... -64.50119
    -64.50119    -64.50119   ]
   [-64.50119    -64.50119    -64.50119    ... -64.50119
    -64.50119    -64.50119   ]
   [-64.50119    -64.50119    -64.50119    ... -64.50119
    -64.50119    -64.50119   ]]]


 [[[  1.8208705    9.779903    11.024435   ...  -1.1421776
      4.036704     7.5649805 ]
   [  1.5925136    7.9945974    8.574881   ...   3.289139
      4.9710426    3.307352  ]
   [  7.3042607    5.5298204    4.755139   ...   3.3470283
      6.5671854   -2.122073  ]
   ...
   [-46.085907   -47.094757   -45.189247   ... -47.841755
    -45.17544    -45.131756  ]
   [-44.331646   -44.666435   -46.31736    ... -44.72445
   

(16, 1, 64, 376)
Epoch: 12 | Inputs [[[[-44.391136  -28.35315   -27.923471  ... -47.778275  -65.05765
    -41.27161  ]
   [-39.074863  -30.5932    -32.445072  ... -49.34575   -65.05765
    -41.66435  ]
   [-37.96091   -35.059933  -40.43709   ... -50.87969   -63.645203
    -43.286987 ]
   ...
   [-65.05765   -65.05765   -65.05765   ... -65.05765   -65.05765
    -65.05765  ]
   [-65.05765   -65.05765   -65.05765   ... -65.05765   -65.05765
    -65.05765  ]
   [-65.05765   -65.05765   -65.05765   ... -65.05765   -65.05765
    -65.05765  ]]]


 [[[-14.240812  -14.82009    -3.060028  ...  -9.834965  -19.06961
    -21.733292 ]
   [ -5.8013673  -9.536169   -3.8414865 ... -10.361556  -15.5327015
     -9.319998 ]
   [ -6.0389524  -8.619074   -8.995945  ... -12.627011  -18.437597
     -9.428795 ]
   ...
   [-50.0438    -47.848034  -47.25229   ... -46.935368  -46.186024
    -45.775856 ]
   [-47.37566   -45.12159   -44.050552  ... -48.39378   -46.12255
    -45.719585 ]
   [-48.60083   -45.80414   

In [11]:
import torchsummary
torchsummary.summary(Model1, (1,64,376))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 8, 32, 188]             208
            Conv2d-2           [-1, 8, 32, 188]             208
              ReLU-3           [-1, 8, 32, 188]               0
              ReLU-4           [-1, 8, 32, 188]               0
       BatchNorm2d-5           [-1, 8, 32, 188]              16
       BatchNorm2d-6           [-1, 8, 32, 188]              16
            Conv2d-7           [-1, 16, 16, 94]           1,168
            Conv2d-8           [-1, 16, 16, 94]           1,168
              ReLU-9           [-1, 16, 16, 94]               0
             ReLU-10           [-1, 16, 16, 94]               0
      BatchNorm2d-11           [-1, 16, 16, 94]              32
      BatchNorm2d-12           [-1, 16, 16, 94]              32
           Conv2d-13            [-1, 32, 8, 47]           4,640
           Conv2d-14            [-1, 32

In [16]:
def inference (model, val_dl):
  correct_prediction = 0
  total_prediction = 0

  with torch.no_grad():
    for data in val_dl:
      
      inputs, labels = data[0].to(device), data[1].to(device)
      inputs_m, inputs_s = inputs.mean(), inputs.std()
      inputs = (inputs - inputs_m) / inputs_s
      outputs = model(inputs)
 
      _, prediction = torch.max(outputs,1)
      
      correct_prediction += (prediction == labels).sum().item()
      total_prediction += prediction.shape[0]
    
  acc = correct_prediction/total_prediction
  print(f'Accuracy: {acc:.2f}, Total items: {total_prediction}')
  
  labels = labels.cpu().numpy()
  prediction = prediction.cpu().numpy()
  accuracy = np.mean(np.equal(labels,prediction))
  right = np.sum(labels * prediction == 1)
  precision = right / np.sum(prediction)
  recall = right / np.sum(labels)
  f1 = 2 * precision*recall/(precision+recall)
  print('Accuracy', accuracy)
  print('precision', precision)
  print('recall', recall)
  print('f1', f1)
  print('accuracy', metrics.accuracy_score(labels,prediction) )
  print('precision', metrics.precision_score(labels,prediction) )
  print('recall', metrics.recall_score(labels,prediction) )
  print('f1', metrics.f1_score(labels,prediction) )
  print(metrics.classification_report(labels,prediction))

inference(Model1, val_dl)

Accuracy: 0.87, Total items: 53
Accuracy 0.8
precision 1.0
recall 0.6666666666666666
f1 0.8
accuracy 0.8
precision 1.0
recall 0.6666666666666666
f1 0.8
              precision    recall  f1-score   support

           0       0.67      1.00      0.80         2
           1       1.00      0.67      0.80         3

    accuracy                           0.80         5
   macro avg       0.83      0.83      0.80         5
weighted avg       0.87      0.80      0.80         5

