## 주요 변동사항

### breathDS에서 tabular를 추가했음.
### 모델이 들어간 Whowheezing과 forward에
### 정형데이터가 들어갈 linier이랑 input을 추가했음.
### 모델 학습하는 training에도 새로운 input(정형데이터)을 사용하기 위해 추가했음.

In [1]:
import pandas as pd
from pathlib import Path

In [2]:
Catholic_file = 'train_v2.1_aug.csv'
df = pd.read_csv(Catholic_file)
df

Unnamed: 0,filename,category,class,sex,old
0,0001-1.wav,wheezing,1,0,0
1,0001-3.wav,wheezing,1,0,0
2,0001-4.wav,wheezing,1,0,0
3,0020-1.wav,wheezing,1,0,4
4,0020-3.wav,wheezing,1,0,4
...,...,...,...,...,...
1605,wn_0041-2.wav,non-wheezing,0,0,2
1606,wn_0041-3.wav,non-wheezing,0,0,2
1607,wn_0041-4.wav,non-wheezing,0,0,2
1608,wn_0042-1.wav,non-wheezing,0,1,3


### 나이변수만 정규화시킴

In [3]:
mean = df.old.mean()
std = df.old.std()
df.old = (df.old - mean) / std

In [4]:
df

Unnamed: 0,filename,category,class,sex,old
0,0001-1.wav,wheezing,1,0,-1.187189
1,0001-3.wav,wheezing,1,0,-1.187189
2,0001-4.wav,wheezing,1,0,-1.187189
3,0020-1.wav,wheezing,1,0,0.249935
4,0020-3.wav,wheezing,1,0,0.249935
...,...,...,...,...,...
1605,wn_0041-2.wav,non-wheezing,0,0,-0.468627
1606,wn_0041-3.wav,non-wheezing,0,0,-0.468627
1607,wn_0041-4.wav,non-wheezing,0,0,-0.468627
1608,wn_0042-1.wav,non-wheezing,0,1,-0.109346


In [5]:
df['relative_path'] = '/' + df['filename'].astype(str)
df = df[['relative_path', 'class', 'sex', 'old']]
df.head()

Unnamed: 0,relative_path,class,sex,old
0,/0001-1.wav,1,0,-1.187189
1,/0001-3.wav,1,0,-1.187189
2,/0001-4.wav,1,0,-1.187189
3,/0020-1.wav,1,0,0.249935
4,/0020-3.wav,1,0,0.249935


In [6]:
data_path = 'aug_train'

In [7]:
import math, random
import torch
import torchaudio
from torchaudio import transforms
from IPython.display import Audio

class Breath_sound_Util():
  
  def open(audio_file):
    sig, sr = torchaudio.load(audio_file)
    
    return (sig, sr)

  def resample(aud, newsr):
    sig, sr = aud
    
    if (sr == newsr):
     return aud

    num_channels = sig.shape[0]
    resig = torchaudio.transforms.Resample(sr, newsr)(sig[:1,:])
    if (num_channels > 1):
      retwo = torchaudio.transforms.Resample(sr, newsr)(sig[1:,:])
      resig = torch.cat([resig, retwo])

    return ((resig, newsr))
  

  def pad(aud, max_ms):
    sig, sr = aud
    num_rows, sig_len = sig.shape
    max_len = sr//1000 * max_ms
   
    if (sig_len > max_len):
      sig = sig[:,:max_len]
 
    elif (sig_len < max_len):

      repeated = []
      repeated.append(sig)
      required_len = max_len - sig_len

      while required_len > sig_len : 
        repeated.append(sig)
        require_len -= sig_len
      repeated.append(sig[:, :required_len])
 
      sig = torch.cat(repeated, 1)

    return (sig, sr)


  def spectro_gram(aud, n_mels=64, n_fft=1024, hop_len=None):
    sig,sr = aud
    top_db = 80
    
    spec = transforms.MelSpectrogram(sr, n_fft=n_fft, hop_length=hop_len, n_mels=n_mels)(sig)
    spec = transforms.AmplitudeToDB(top_db=top_db)(spec)
    return (spec)

  def spectro_augment(spec, max_mask_pct=0.1, n_freq_masks=1, n_time_masks=1):
    _, n_mels, n_steps = spec.shape
    mask_value = spec.mean()
    aug_spec = spec

    freq_mask_param = max_mask_pct * n_mels
    for _ in range(n_freq_masks):
      aug_spec = transforms.FrequencyMasking(freq_mask_param)(aug_spec, mask_value)

    time_mask_param = max_mask_pct * n_steps
    for _ in range(n_time_masks):
      aug_spec = transforms.TimeMasking(time_mask_param)(aug_spec, mask_value)

    return aug_spec
    print(aug_spec)

In [8]:
from torch.utils.data import DataLoader, Dataset, random_split
import torchaudio
import torchvision

class breathDS(Dataset):
    
  def __init__(self, df, data_path):
    self.df = df
    self.data_path = str(data_path)
    self.duration = 4000
    self.sr = 48000
            
  def __len__(self):
    return len(self.df)    
    
  def __getitem__(self, idx):
    audio_file = self.data_path + self.df.loc[idx, 'relative_path']
    class_id = self.df.loc[idx, 'class']
    
    aud = Breath_sound_Util.open(audio_file)
    reaud = Breath_sound_Util.resample(aud, self.sr)
    dur_aud = Breath_sound_Util.pad(reaud, self.duration)
    sgram = Breath_sound_Util.spectro_gram(dur_aud, n_mels=64, n_fft=1024, hop_len=None)
    aug_sgram = Breath_sound_Util.spectro_augment(sgram, max_mask_pct=0.1, n_freq_masks=2, n_time_masks=2)
    #여기부터 나이, 성별변수를 집어넣는 과정임
    x = self.df.loc[idx, 'old']
    y = self.df.loc[idx, 'sex']
    x = torch.from_numpy(np.asarray(x).reshape((1,)))
    y = torch.from_numpy(np.asarray(y).reshape((1,)))
    tabular = torch.cat((x, y), 0)
    tabular = tabular.float()
    #print(tabular)
    #x1 = ["sex", "old"]
    #x2 = x2.
    #x2 = x2.iloc[idx].values
    
    
    return aug_sgram, tabular, class_id

In [9]:
brds = breathDS(df, data_path)

In [10]:
train_dl = DataLoader(brds, batch_size=32, shuffle=True)

In [11]:
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import init

# --------------------------------------------------------------
# 호흡음의 Healthy, Wheezing을 판단하는 Binary Classification Model
# --------------------------------------------------------------


class WhoWheezing(nn.Module):
    

    def __init__(self):
        super().__init__()
        conv_layers = []
        self.conv1 = nn.Conv2d(1, 8, kernel_size=(3, 3), stride=(2, 2), padding=(2, 2))
        self.relu1 = nn.ReLU()
        self.bn1 = nn.BatchNorm2d(8)
        init.kaiming_normal_(self.conv1.weight, a=0.2)
        self.conv1.bias.data.zero_()
        conv_layers += [self.conv1, self.relu1, self.bn1]

        self.conv2 = nn.Conv2d(8, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
        self.relu2 = nn.ReLU()
        self.bn2 = nn.BatchNorm2d(16)
        init.kaiming_normal_(self.conv1.weight, a=0.2)
        self.conv2.bias.data.zero_()
        conv_layers += [self.conv2, self.relu2, self.bn2]

        self.conv3 = nn.Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
        self.relu3 = nn.ReLU()
        self.bn3 = nn.BatchNorm2d(32)
        init.kaiming_normal_(self.conv1.weight, a=0.2)
        self.conv3.bias.data.zero_()
        conv_layers += [self.conv3, self.relu3, self.bn3]

        self.conv4 = nn.Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
        self.relu4 = nn.ReLU()
        self.bn4 = nn.BatchNorm2d(64)
        init.kaiming_normal_(self.conv1.weight, a=0.2)
        self.conv4.bias.data.zero_()
        conv_layers += [self.conv4, self.relu4, self.bn4]

        self.ap = nn.AdaptiveAvgPool2d(output_size=1)
        self.dropout = nn.Dropout(0.4)
        self.lin = nn.Linear(in_features=72, out_features=2)
        self.dropout = nn.Dropout(0.4)
        self.sigmoid = nn.Sigmoid()

        self.conv = nn.Sequential(*conv_layers)
 
        self.fc1 = nn.Linear(2, 10)
        self.fc2 = nn.Linear(10, 8)

    def forward(self, inputs, inputs2):
        inputs = self.conv(inputs)
        inputs = self.ap(inputs)
        inputs = inputs.view(inputs.shape[0], -1)
        inputs2 = self.fc1(inputs2)
        inputs2 = self.fc2(inputs2)
        #inputs가 원래있던 멜스펙트로그램이고
        #inputs2가 정형데이터임
        x = torch.cat((inputs, inputs2), 1)
        print(x)
        print(x.size())
        x = self.lin(x)
        x = self.dropout(x)
        inputs = self.sigmoid(x)
        return x       

    '''def forward(self, x):
        x = self.conv(x)
        x = self.ap(x)
        x = x.view(x.shape[0], -1)
        #print(x.size())
        #x2 = category
        #x2 = x2(self.fc1(x2))
        #x2 = x2(self.fc2(x2))
        #x = x1 + x2
        x = self.lin(x)
        x = self.dropout(x)
        x = self.sigmoid(x)
        return x'''
    
Model1 = WhoWheezing()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
Model1 = Model1.to(device)
next(Model1.parameters()).device

device(type='cuda', index=0)

In [12]:
from sklearn.metrics import confusion_matrix
import numpy as np
import sklearn.metrics as metrics


def training(model, train_dl, num_epochs):
  criterion = nn.CrossEntropyLoss()
  optimizer = torch.optim.Adam(model.parameters(),lr=0.001)
  scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=0.001,
                                                steps_per_epoch=int(len(train_dl)),
                                                epochs=num_epochs,
                                                anneal_strategy='linear')

  for epoch in range(num_epochs):
    running_loss = 0.0
    correct_prediction = 0
    total_prediction = 0

    for i, data in enumerate(train_dl):
        inputs, inputs2, labels = data[0].to(device), data[1].to(device), data[2].to(device)
        #여기에도 inputs2를 추가시켜줫음. 이제 data[1]이 가리키는건 breathDS의 tablular임
        inputs_m, inputs_s = inputs.mean(), inputs.std()
        inputs = (inputs - inputs_m) / inputs_s
        optimizer.zero_grad()
        outputs = model(inputs, inputs2)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        scheduler.step()
        running_loss += loss.item()
        _, prediction = torch.max(outputs,1)
        correct_prediction += (prediction == labels).sum().item()
        total_prediction += prediction.shape[0]

    num_batches = len(train_dl)
    avg_loss = running_loss / num_batches
    acc = correct_prediction/total_prediction
    print(f'Epoch: {epoch}, Loss: {avg_loss:.4f}, Accuracy: {acc:.4f}')

  print('Finished Training')
    
num_epochs=1
training(Model1, train_dl, num_epochs)

tensor([[-0.2075, -0.1968,  0.1102,  ...,  0.6647,  0.4398, -0.4794],
        [-0.3739, -0.2171, -0.3770,  ...,  0.2663,  0.8826, -0.5000],
        [ 0.0775, -0.0888, -0.0484,  ...,  0.5046,  0.5384, -1.0701],
        ...,
        [-0.1899,  0.4504,  0.4807,  ...,  0.3458,  0.7679, -0.6900],
        [ 0.0487, -0.0495,  0.0512,  ...,  0.7441,  0.3250, -0.6694],
        [-0.0918, -0.3460, -0.4520,  ...,  0.2663,  0.8826, -0.5000]],
       device='cuda:0', grad_fn=<CatBackward>)
torch.Size([32, 72])
tensor([[ 0.0504,  0.1400,  0.3113,  ...,  0.5050,  0.5389, -1.0698],
        [ 0.0111, -0.1143,  0.0894,  ...,  0.1882,  1.1283,  0.6609],
        [-0.4441, -0.4672, -0.5739,  ...,  0.4266,  0.7842,  0.0909],
        ...,
        [ 0.9821,  0.4121, -0.3966,  ..., -0.4485,  1.9151,  1.2102],
        [ 0.0724,  0.0710,  0.1090,  ...,  0.2677,  1.0136,  0.4709],
        [-0.2389, -0.2620, -0.6088,  ...,  0.2666,  0.8829, -0.4998]],
       device='cuda:0', grad_fn=<CatBackward>)
torch.Size([32, 7

tensor([[-0.4500, -0.4715, -0.4046,  ...,  0.5803,  0.5990, -0.2520],
        [-0.2357,  0.0104, -0.1701,  ...,  0.7433,  0.3867, -0.6214],
        [ 0.2779, -0.0862,  0.3618,  ...,  0.4987,  0.7052, -0.0674],
        ...,
        [-0.3412, -0.4566,  0.0913,  ...,  0.5803,  0.5990, -0.2520],
        [ 0.5710,  0.3463, -0.1787,  ...,  0.3357,  0.9175,  0.3020],
        [-0.3567, -0.5402,  0.7179,  ...,  0.3357,  0.9175,  0.3020]],
       device='cuda:0', grad_fn=<CatBackward>)
torch.Size([32, 72])
tensor([[ 0.5967, -0.0255, -0.0951,  ...,  0.3403,  0.8241, -0.6444],
        [ 0.3385,  0.1271, -0.2519,  ...,  0.3403,  0.8241, -0.6444],
        [ 0.0834, -0.0236,  0.2276,  ...,  0.5038,  0.6134, -1.0128],
        ...,
        [ 0.0774,  0.0932, -0.0442,  ...,  0.3403,  0.8241, -0.6444],
        [-0.2900, -0.2976,  0.4287,  ...,  0.5809,  0.6037, -0.2491],
        [-0.2300, -0.2366,  0.1099,  ...,  0.2585,  0.9294, -0.4602]],
       device='cuda:0', grad_fn=<CatBackward>)
torch.Size([32, 7

tensor([[ 0.0945, -0.1375, -0.4722,  ..., -0.0537,  1.3728,  0.2801],
        [-0.0153, -0.1929, -0.5308,  ...,  0.5240,  0.7602, -0.0475],
        [ 0.1593, -0.1673, -0.4806,  ...,  0.3696,  0.8917, -0.6203],
        ...,
        [-0.4348, -0.2202,  0.9853,  ...,  0.0161,  1.3375,  1.0330],
        [-0.4213,  0.0678, -0.5540,  ...,  0.2850,  0.9879, -0.4402],
        [-0.2830, -0.3330,  0.5137,  ..., -0.3076,  1.6615,  0.8203]],
       device='cuda:0', grad_fn=<CatBackward>)
torch.Size([32, 72])
tensor([[ 0.0368, -0.0353, -0.5170,  ..., -0.0526,  1.3729,  0.2793],
        [ 0.0677, -0.2683, -0.5354,  ...,  0.2864,  0.9898, -0.4401],
        [ 0.1355, -0.2169, -0.4817,  ...,  0.7795,  0.4746, -0.5868],
        ...,
        [-0.2350,  0.0754,  0.6422,  ...,  0.1863,  1.1452,  0.6722],
        [ 0.4849,  0.3035, -0.2845,  ...,  0.6948,  0.5704, -0.4069],
        [ 0.4766,  0.0116, -0.2919,  ..., -0.0526,  1.3729,  0.2793]],
       device='cuda:0', grad_fn=<CatBackward>)
torch.Size([32, 7

tensor([[ 0.1275,  0.0820, -0.2816,  ...,  0.1884,  1.1438,  0.6700],
        [ 0.0993,  0.1610, -0.5086,  ...,  0.6187,  0.6807, -0.2238],
        [ 0.0805,  0.0833, -0.5189,  ...,  0.3815,  0.9101, -0.6175],
        ...,
        [ 0.1258,  0.1510, -0.4870,  ...,  0.2954,  1.0027, -0.4387],
        [ 0.0884, -0.0380, -0.5061,  ...,  0.5327,  0.7733, -0.0450],
        [-0.0852, -0.2675,  0.6543,  ...,  0.7048,  0.5881, -0.4026]],
       device='cuda:0', grad_fn=<CatBackward>)
torch.Size([32, 72])
tensor([[ 4.4078e-01,  5.0428e-01, -3.7684e-01, -2.2814e-01, -2.3462e-01,
         -5.8515e-01, -4.0880e-01,  1.2449e+00, -3.6592e-01,  1.0852e-01,
          7.3815e-01, -4.3717e-01,  4.3884e-01,  3.4439e-01, -4.9750e-01,
         -6.7699e-01,  1.8137e+00,  5.6029e-02,  8.6584e-02,  2.3123e-01,
         -7.4707e-01, -3.2035e-01, -5.7360e-01, -6.9945e-01, -4.2416e-01,
         -9.7995e-02,  1.6667e+00, -2.5817e-01, -5.8071e-01, -4.2179e-01,
          1.2796e+00,  5.2144e-01,  2.3823e-01, -6.337

Epoch: 0, Loss: 0.4190, Accuracy: 0.8106
Finished Training


#### 검증방법 : print 해보면서 생각한 것과 쉐이프가 동일한지 확인함.
#### 파이썬 기본실력이 부족해서 코드적인 자신감이 부족함.