<a href="https://colab.research.google.com/github/CAU2022-CAPSTONE-PACETIME/BreathDetector/blob/main/BreathClassifier.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
%cd /content/drive/MyDrive/ColabNotebooks

/content/drive/MyDrive/ColabNotebooks


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from imu25 import *
from BreathDataset25 import *
from torch.utils.data import DataLoader, random_split
from Sound import *
import torchaudio
import torchaudio.functional as F

BATCH_SIZE = 16
EPOCHS = 6 # 반복 횟수

In [None]:
print(torch.__version__)
print(torchaudio.__version__)

1.13.0+cu116
0.13.0+cu116


In [None]:
# input : mfcc(40, 44) --> (Batchsize, channel, h, w) --> (BATCH_SIZE, 1, 40, 44)
# sound --> batchsize, 1, 11025
class BreathClassifier(nn.Module):
  def __init__(self, device, transformation):
    super().__init__()
    self.device = device
    self.transformation_mfcc = transformation

    self.conv1 = nn.Sequential(
        nn.Conv2d(
            in_channels = 1,
            out_channels = 4,
            kernel_size = (3, 3),
            stride = 1,
            padding = 1
        ),
        nn.BatchNorm2d(4),
        nn.ReLU()
    )
 
    self.conv2 = nn.Sequential(
        nn.Conv2d(
            in_channels = 4,
            out_channels = 4,
            kernel_size = (3, 3),
            stride = 1,
            padding = 1
        ),
        nn.BatchNorm2d(4),
        nn.ReLU()
    )

    self.fc1 = nn.Sequential(
        nn.Linear(4*40*44, 500),
        nn.ReLU()
    )
    
    self.fc2 = nn.Sequential(
        nn.Linear(500, 200),
        nn.ReLU()
    )

    self.fc3 = nn.Sequential(
        nn.Linear(200, 50),
        nn.ReLU()
    )

    self.fc4 = nn.Sequential(
        nn.Linear(50, 1),
        nn.Sigmoid()
    )

    self.dropout = nn.Dropout(0.4)
  
  def forward(self, input):
    sound = F.highpass_biquad(input, 44100, 800.0)
    mfcc = self.transformation_mfcc(sound)
    mfcc = mfcc.view(-1, 1, 40, 44)

    x = self.conv1(mfcc)
    x = self.dropout(x)
    x = self.conv2(x)
    x = self.dropout(x)
    x = x.view(x.size(0), -1)
    
    x = self.fc1(x)
    x = self.fc2(x)
    x = self.fc3(x)
    x = self.fc4(x)

    return x

In [None]:
def create_data_loader(train_data, batch_size):
  data_loader = DataLoader(train_data, batch_size = batch_size, shuffle = True, drop_last = True)
  return data_loader

def train_single_epoch(model, data_loader, loss_fn, optimizer, device):
  for input, target in data_loader:
    input = input.view(116, -1, 11025) # sound : batch*58x22050->58*batch*22050 imu : batch*60 -> 60*batch   // batch*116x11025
    target = target.view(116, -1)

    for i in range(len(input)):
      sound = input[i].view(-1, 11025).to(device) # sound.shape : Batchx11025
      imu = target[i].view(-1, 1).to(device)
      sound = F.highpass_biquad(sound, 44100, 800.0)
      prediction = model(sound)
      loss = loss_fn(prediction, imu)

      optimizer.zero_grad()
      loss.backward()
      optimizer.step()

def train(model, data_loader, loss_fn, optimizer, device, epochs):
  for i in range(epochs):
    print(f"Epoch {i + 1}")
    train_single_epoch(model, data_loader, loss_fn, optimizer, device)
    print("------------------------------")
  print("Finished training ")


def test(model, data_loader):
  with torch.no_grad():
    for input, target in data_loader:
      input = input.view(116, -1, 11025) # sound : batch*60x40x44->60*batch*40*44 imu : batch*60 -> 60*batch
      target = target.view(116, -1) 
      #print("input shape : {} target shape {}".format(input.shape, target.shape))
      accuracy = 0
      total = 0
      for i in range(len(input)):
        sound = input[i].view(-1, 1, 11025).to(device)
        imu = target[i].view(-1, 1).to(device)
        #sound = F.highpass_biquad(sound, 44100, 800.0)

        prediction = 1 if model(sound) >= 0.5 else 0
        total += 1
        accuracy += (prediction == imu).sum().item()
    print("accuracy : {}".format(100*accuracy/total))

In [None]:
if __name__ == "__main__":
  audio_list = ["/content/drive/MyDrive/ColabNotebooks/Data/note9-budslive-sync", "/content/drive/MyDrive/ColabNotebooks/Data/A21s-airpodspro-sync"]
  aug_audio_list = ["/content/drive/MyDrive/ColabNotebooks/Data/Augment_data"]

  device = 'cpu'
  dataset = BreathDataset(audio_list)
  aug_dataset = BreathDataset(aug_audio_list)
  print("Data length : {} Device : {}".format(len(dataset) + len(aug_dataset), device))

  train_ratio = 0.9
  train_data_length = int(train_ratio * len(dataset))
  test_data_length = len(dataset) - train_data_length
  
  train_dataset, test_dataset = random_split(dataset, [train_data_length, test_data_length])
  train_data_loader = create_data_loader(train_dataset, BATCH_SIZE)
  train_aug_data_loader = create_data_loader(aug_dataset, BATCH_SIZE)
  test_data_loader = create_data_loader(test_dataset, 1)
  
  mfcc_transform = torchaudio.transforms.MFCC(
      sample_rate=44100,
      n_mfcc=40,
      melkwargs={
          "n_fft": 500,
          "hop_length": 256,
          "n_mels" : 40
      },
  )


  cnn = BreathClassifier(device, mfcc_transform).to(device)

  loss_fn = nn.BCELoss()
  optimizer = optim.Adam(cnn.parameters(), lr = 0.001)

  train(cnn, train_data_loader, loss_fn, optimizer, device, EPOCHS)
  train(cnn, train_aug_data_loader, loss_fn, optimizer, device, EPOCHS)
  cnn.eval()
  torch.save(cnn.state_dict(), "BreathClassifierVer2.2.pth")

Data length : 220 Device : cpu
Epoch 1
------------------------------
Epoch 2
------------------------------
Epoch 3
------------------------------
Epoch 4
------------------------------
Epoch 5
------------------------------
Epoch 6
------------------------------
Finished training 
Epoch 1
------------------------------
Epoch 2
------------------------------
Epoch 3
------------------------------
Epoch 4
------------------------------
Epoch 5
------------------------------
Epoch 6
------------------------------
Finished training 


In [None]:
for i in range(10):
  test(cnn, test_data_loader)

In [None]:
device = 'cpu'

model = BreathClassifier(device, mfcc_transform).to(device)
model.load_state_dict(torch.load("BreathClassifierVer2.1.pth", map_location = device))
model.eval()
# for i in range(10):
#   test(model, test_data_loader)

In [None]:
import pandas as pd
import numpy as np
import time

def sound_process(path, idx, device = 'cpu'):
  data = pd.read_csv(path)
  data = data["sound"].dropna()
  data = np.array(data)
  print("{}s ~ {}s".format(idx, idx + 0.25))
  sample = data[int(idx*44100):int(idx*44100)+11025]
  sample = torch.FloatTensor(sample).to(device)
  sound = F.highpass_biquad(sample, 44100, 800.0)
  return sound

path = "/content/drive/MyDrive/ColabNotebooks/Data/A21s-airpodspro/Data_2022-11-01_18_53_03.csv"
#path = "/content/drive/MyDrive/ColabNotebooks/Data/noise/Data_2022-12-06_22_05_45.csv"
path = "/content/drive/MyDrive/ColabNotebooks/Data/zflip-budspro/Data_2022-11-15_19_49_18.csv"
path = "/content/drive/MyDrive/ColabNotebooks/Data/noise/Data_2022-12-06_22_51_02.csv"
sample= sound_process(path, 3.8)

start = time.time()
average = 0
total = model(sample)
print(total)

3.8s ~ 4.05s
tensor([[0.0161]], grad_fn=<SigmoidBackward0>)


In [None]:
# Model test

path = "/content/drive/MyDrive/ColabNotebooks/Data/A21s-airpodspro/Data_2022-11-04_00_55_32.csv"


def model_check(model, path, device = 'cpu'):
  data = pd.read_csv(path)
  sound = np.array(data['sound'].dropna())
  imu = np.array(Imu(data).get_item())
  
  print("Model Test")
  for s in range(116):
    st = s * 11025
    sound_data = sound[st:st+11025]
    sound_data = torch.FloatTensor(sound_data).to(device)
    sound_data = F.highpass_biquad(sound_data, 44100, 800.0)
    val = model(sound_data).detach().numpy()[0][0]
    res = "Exhale" if val < 0.5 else "Inhale"
    print("{:.2f}s value : {:.5f} --> {}".format(s*0.25, val, res))

model_check(model, path)

Model Test
0.00s value : 0.00004 --> Exhale
0.25s value : 0.00000 --> Exhale
0.50s value : 0.89548 --> Inhale
0.75s value : 0.41531 --> Exhale
1.00s value : 0.10232 --> Exhale
1.25s value : 0.88948 --> Inhale
1.50s value : 0.17355 --> Exhale
1.75s value : 0.00000 --> Exhale
2.00s value : 0.12013 --> Exhale
2.25s value : 0.93206 --> Inhale
2.50s value : 0.15119 --> Exhale
2.75s value : 0.68130 --> Inhale
3.00s value : 0.06587 --> Exhale
3.25s value : 0.00000 --> Exhale
3.50s value : 0.99087 --> Inhale
3.75s value : 0.99955 --> Inhale
4.00s value : 0.80234 --> Inhale
4.25s value : 0.87373 --> Inhale
4.50s value : 0.11659 --> Exhale
4.75s value : 0.99011 --> Inhale
5.00s value : 0.99978 --> Inhale
5.25s value : 0.01791 --> Exhale
5.50s value : 0.63835 --> Inhale
5.75s value : 0.06282 --> Exhale
6.00s value : 0.01376 --> Exhale
6.25s value : 0.97994 --> Inhale
6.50s value : 0.99932 --> Inhale
6.75s value : 0.37044 --> Exhale
7.00s value : 0.04052 --> Exhale
7.25s value : 0.07251 --> Exhale

# Pytorch model to Pytorch mobile

In [None]:
device = 'cpu'

model = BreathClassifier(device, mfcc_transform).to(device)
model.load_state_dict(torch.load("BreathClassifierVer1.9.pth", map_location = device))
model.eval()

In [None]:
name = "BreathClassifierVer1.9"
pt_path = "/content/drive/MyDrive/ColabNotebooks/Data/Model/" + name + ".pt"
ptl_path = "/content/drive/MyDrive/ColabNotebooks/Data/Model/" + name + ".ptl"
yaml_path = "/content/drive/MyDrive/ColabNotebooks/Data/Model/" + name + ".yaml"

In [None]:
from torch.utils.mobile_optimizer import optimize_for_mobile
scripted_model = torch.jit.script(model)
opt_model = optimize_for_mobile(scripted_model)
torch.jit.save(opt_model, pt_path)
opt_model._save_for_lite_interpreter(ptl_path)

In [None]:
import torch, yaml

model = torch.jit.load(pt_path)
ops = torch.jit.export_opnames(model)
with open(yaml_path, 'w') as output:
    yaml.dump(ops, output)