기본 import와 함수

In [156]:
from tqdm import tqdm
from natsort import natsorted
import os
from IPython.display import Audio
import librosa
import copy
import random
import numpy as np
import cv2
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from torch.utils.data import Dataset
import torch.optim as optim
from sklearn.metrics import accuracy_score
from torch.optim.lr_scheduler import StepLR
import wave
import shutil

if torch.cuda.is_available() == True:
    device = 'cuda:0'
    print('현재 가상환경 cuda 설정 가능')
else:
    device = 'cpu'
    print('현재 가상환경 cpu 사용')

path = './'
original_dataset_path = f'{path}/dataset/1_original/speech_commands_v0.02'
aug_path = f'{path}/dataset/2_aug'
mfcc_path = f'{path}/dataset/3_mfcc'
class_list = ['zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'noise']
n_mfcc = 40

# listdir을 정렬하기
def listdir(path):
    return natsorted(os.listdir(path))

# 폴더 만들어주는 함수
def makedirs(path):
    os.makedirs(path, exist_ok = True)

# 오디오 길이 맞춰주는 함수
def process_audio(audio_path):
    data, sr = librosa.load(audio_path, sr=None) # 오디오를 원본 샘플링 레이트로 로드
    # 목표 길이 설정 (1초)
    target_length = sr  # 1초에 해당하는 샘플 수
    current_length = len(data) # 현재 데이터 길이
    if current_length > target_length: # 1초를 초과하면 뒷부분을 잘라냄
        data = data[:target_length] 
    elif current_length < target_length: # 1초 미만이면 무음을 추가
        padding = np.zeros(target_length - current_length)
        data = np.concatenate((data, padding))
    return data, sr

def shuffle(input_list):
    random.seed(1234)
    random.shuffle(input_list)
    return input_list

현재 가상환경 cuda 설정 가능


class list 정의하기

In [157]:
print(class_list)
print(f'len_class: {len(class_list)}')

['zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'noise']
len_class: 11


### 데이터 전처리

In [158]:
# 랜덤한 음성 들어보기
random_class = shuffle(copy.deepcopy(class_list))[0]
audio_list = listdir(f'{original_dataset_path}/{random_class}')
random_audio = shuffle(audio_list)[0]
print(f'오디오 이름: {random_class}/{random_audio}')
audio_path = f'{original_dataset_path}/{random_class}/{random_audio}'
data, sr = process_audio(audio_path)
audio_len = len(data) / sr
print(f'오디오 길이: {audio_len}초')
print(f'data 길이: {len(data)}')
Audio(data, rate = sr)

오디오 이름: two/6bf5baf3_nohash_0.wav
오디오 길이: 1.0초
data 길이: 16000


데이터 전처리(배경 음성 합성)

In [159]:
# 모든 데이터에 배경 음성 입히기

# 두개의 음원을 합쳐서 저장
def combine_wav(background_path, voice_path, output_path, scale=1):
    # 배경 음악 파일 열기
    with wave.open(background_path, 'rb') as bg_wav:
        bg_params = bg_wav.getparams()
        bg_frames = bg_wav.readframes(bg_params.nframes)
        bg_signal = np.frombuffer(bg_frames, dtype=np.int16)

    # 음성 파일 열기
    with wave.open(voice_path, 'rb') as voice_wav:
        voice_params = voice_wav.getparams()
        voice_frames = voice_wav.readframes(voice_params.nframes)
        voice_signal = np.frombuffer(voice_frames, dtype=np.int16)

    # 두 신호의 길이가 다를 경우 짧은 길이에 맞춤
    min_length = min(len(bg_signal), len(voice_signal))
    bg_signal = bg_signal[:min_length]
    voice_signal = voice_signal[:min_length]

    # 배경음과 음성 신호 합치기
    combined_signal = bg_signal * scale + voice_signal

    # 결과 신호가 int16 범위를 넘지 않도록 조정
    combined_signal = np.clip(combined_signal, -32768, 32767)

    # 합친 신호를 새 wav 파일로 저장
    with wave.open(output_path, 'wb') as combined_wav:
        combined_wav.setparams(bg_params)
        combined_wav.writeframes(combined_signal.astype(np.int16).tobytes())

# 리스트에서 랜덤한 원소를 뽑아주는 함수
def get_rand_from_list(input_list):
    i = int(random.random() * len(input_list))
    i = min(len(input_list)-1, i)
    return input_list[i]

bg_list = listdir(f'{original_dataset_path}/noise') # 랜덤 배경음 가져오기
for class_name in class_list:
    makedirs(f'{aug_path}/{class_name}') # 폴더 생성
    audio_list = listdir(f'{original_dataset_path}/{class_name}')
    for audio_name in tqdm(audio_list):
        if class_name == 'noise': # noise는 그냥 복사
            from_path = f'{original_dataset_path}/{class_name}/{audio_name}'
            to_path = f'{aug_path}/{class_name}/{audio_name}'
            shutil.copy(from_path, to_path)  
        else: # 일반 음성이랑 noise랑 음성 합성
            voice_path = f'{original_dataset_path}/{class_name}/{audio_name}'
            background_path = f'{original_dataset_path}/noise/{get_rand_from_list(bg_list)}'
            output_path = f'{aug_path}/{class_name}/{audio_name}'
            combine_wav(background_path, voice_path, output_path, scale=1)

100%|██████████| 4052/4052 [00:00<00:00, 6396.22it/s]
100%|██████████| 3890/3890 [00:00<00:00, 6515.37it/s]
100%|██████████| 3880/3880 [00:00<00:00, 6415.04it/s]
100%|██████████| 3727/3727 [00:00<00:00, 6423.49it/s]
100%|██████████| 3728/3728 [00:00<00:00, 6456.67it/s]
100%|██████████| 4052/4052 [00:00<00:00, 6461.67it/s]
100%|██████████| 3860/3860 [00:00<00:00, 6498.50it/s]
100%|██████████| 3998/3998 [00:00<00:00, 6465.49it/s]
100%|██████████| 3787/3787 [00:00<00:00, 6458.15it/s]
100%|██████████| 3934/3934 [00:00<00:00, 6499.13it/s]
100%|██████████| 4420/4420 [00:00<00:00, 15271.07it/s]


목표 수량만큼만 음성 개수 맞추기

In [137]:
# class 별 이미지 개수 검사
for class_name in class_list:
    print(f'{class_name}: {len(listdir(f"{aug_path}/{class_name}"))}')

zero: 4052
one: 3890
two: 3880
three: 3727
four: 3728
five: 4052
six: 3860
seven: 3998
eight: 3787
nine: 3934
noise: 4420


In [138]:
# 삭제 후 재검사
goal = 3500
for class_name in class_list:
    audio_list = shuffle(listdir(f'{aug_path}/{class_name}'))
    remove_list = audio_list[:len(audio_list) - goal]
    for audio_name in tqdm(remove_list, desc = f'{class_name} 삭제 중...'):
        os.remove(f'{aug_path}/{class_name}/{audio_name}')

for class_name in class_list:
    print(f'{class_name}: {len(listdir(f"{aug_path}/{class_name}"))}')

zero 삭제 중...: 100%|██████████| 552/552 [00:00<00:00, 46181.35it/s]
one 삭제 중...: 100%|██████████| 390/390 [00:00<00:00, 54754.09it/s]
two 삭제 중...: 100%|██████████| 380/380 [00:00<00:00, 62584.35it/s]
three 삭제 중...: 100%|██████████| 227/227 [00:00<00:00, 59221.68it/s]
four 삭제 중...: 100%|██████████| 228/228 [00:00<00:00, 59679.31it/s]
five 삭제 중...: 100%|██████████| 552/552 [00:00<00:00, 66532.25it/s]
six 삭제 중...: 100%|██████████| 360/360 [00:00<00:00, 62224.90it/s]
seven 삭제 중...: 100%|██████████| 498/498 [00:00<00:00, 64573.64it/s]
eight 삭제 중...: 100%|██████████| 287/287 [00:00<00:00, 61388.41it/s]
nine 삭제 중...: 100%|██████████| 434/434 [00:00<00:00, 67347.22it/s]
noise 삭제 중...: 100%|██████████| 920/920 [00:00<00:00, 64022.43it/s]


zero: 3500
one: 3500
two: 3500
three: 3500
four: 3500
five: 3500
six: 3500
seven: 3500
eight: 3500
nine: 3500
noise: 3500


class_list 안의 음성을 mfcc로 변환하여 np파일로 저장

In [160]:
if os.path.isdir(mfcc_path) == True:
    print('mfcc폴더가 발견되었습니다. 이미 변환처리 한 것으로 보입니다.')
else:
    print('mfcc로 변환한 내역이 없습니다. 변환을 시작합니다.')
    for idx, class_name in enumerate(class_list): # class 로드
        makedirs(f'{mfcc_path}/{class_name}') # class 폴더 생성
        audio_list = listdir(f'{aug_path}/{class_name}') # 오디오 리스트 로드
        for audio_name in tqdm(audio_list, desc = f'{idx}: mfcc 변환 처리'): # 오디오 로드
            if not '.wav' in audio_name: continue # wav파일만 처리
            data, sr = process_audio(f'{aug_path}/{class_name}/{audio_name}') # 16000길이로 오디오 로드
            mfcc = librosa.feature.mfcc(y=data, sr=sr, n_mfcc=n_mfcc) # mfcc 추출
            mfcc_name = audio_name[:-4] # 확장자 제외한 이름만 추출
            np.save(f'{mfcc_path}/{class_name}/{mfcc_name}.npy', mfcc) # np 파일 형식으로 저장

mfcc로 변환한 내역이 없습니다. 변환을 시작합니다.


0: mfcc 변환 처리: 100%|██████████| 4052/4052 [00:19<00:00, 208.17it/s]
1: mfcc 변환 처리: 100%|██████████| 3890/3890 [00:21<00:00, 183.33it/s]
2: mfcc 변환 처리: 100%|██████████| 3880/3880 [00:20<00:00, 186.54it/s]
3: mfcc 변환 처리: 100%|██████████| 3727/3727 [00:19<00:00, 186.82it/s]
4: mfcc 변환 처리: 100%|██████████| 3728/3728 [00:19<00:00, 186.52it/s]
5: mfcc 변환 처리: 100%|██████████| 4052/4052 [00:22<00:00, 183.81it/s]
6: mfcc 변환 처리: 100%|██████████| 3860/3860 [00:20<00:00, 186.00it/s]
7: mfcc 변환 처리: 100%|██████████| 3998/3998 [00:21<00:00, 181.94it/s]
8: mfcc 변환 처리: 100%|██████████| 3787/3787 [00:20<00:00, 183.38it/s]
9: mfcc 변환 처리: 100%|██████████| 3934/3934 [00:21<00:00, 186.89it/s]
10: mfcc 변환 처리: 100%|██████████| 4420/4420 [00:23<00:00, 191.66it/s]


train, val, test 셋 정의

In [161]:
ratio = [0.9, 0.1] # train, val

all_list = []
for class_name in tqdm(class_list, desc = '전체 오디오 파일 스캔 중'):
    audio_list = listdir(f'{aug_path}/{class_name}')
    for audio_name in audio_list:
        all_list.append(f'{class_name}/{audio_name}')

all_list = shuffle(all_list) # 랜덤 셔플
train_list, val_list = [], []
for i, audio_path in enumerate(all_list):
    if i < ratio[0] * len(all_list):
        train_list.append(audio_path)
    else:
        val_list.append(audio_path)

print(f'train_list: {len(train_list)}')
print(f'val_list: {len(val_list)}')

전체 오디오 파일 스캔 중: 100%|██████████| 11/11 [00:00<00:00, 27.82it/s]

train_list: 38996
val_list: 4332





In [166]:
# Custom Dataset 생성

class AudioDataset(Dataset):
    def __init__(self, dataset_path, file_list, class_list_want):
        self.dataset_path = dataset_path
        self.file_list = file_list
        self.labels = []
        self.data = []

        # 클래스 레이블 할당을 위해 디렉토리 내의 클래스 폴더들을 자연스러운 순서로 정렬
        self.classes = class_list_want
        class_to_idx = {cls_name: i for i, cls_name in enumerate(self.classes)}

        # 파일 리스트를 바탕으로 데이터와 레이블 준비
        for file_name in self.file_list:
            file_name = f'{file_name[:-4]}.npy' # 기존 .wav 확장자를 .npy로 변경
            class_name = file_name.split('/')[0]  # 파일명에서 클래스 이름 추출
            label = class_to_idx[class_name]
            self.data.append(os.path.join(dataset_path, file_name))
            self.labels.append(label)

    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        mfcc_path = self.data[idx]
        label = self.labels[idx]
        mfccs = np.load(mfcc_path)
        mfccs = self.normalize_data(mfccs)  # MFCC 정규화
        mfccs = torch.tensor(mfccs, dtype=torch.float32)  # PyTorch 텐서로 변환
        label = torch.tensor(label, dtype=torch.long)
        return mfccs, label

    def normalize_data(self, data):
        # Min-Max 정규화
        return (data - data.min()) / (data.max() - data.min())
    
batch_size = 256
shuffle = True
# train
train_dataset = AudioDataset(mfcc_path, train_list, class_list)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=shuffle)
# val
val_dataset = AudioDataset(mfcc_path, val_list, class_list)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=shuffle)
# test
# test_dataset = AudioDataset(mfcc_path, test_list, class_list_want)
# test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=shuffle)

# 데이터셋 크기 출력
def print_dataset(dataset, loader, txt):
    print(f"{txt} dataset 길이: {len(dataset)}")
    sample_data, sample_label = dataset[0]
    print(f"Sample data shape: {sample_data.shape}")
    print(f"Sample label: {sample_label}")
    sample_batch_data, sample_batch_labels = next(iter(loader))
    print(f"Sample batch data shape: {sample_batch_data.shape}")
    print(f"Sample batch labels shape: {sample_batch_labels.shape}")
    print('')

print_dataset(train_dataset, train_loader, 'train')
print_dataset(val_dataset, val_loader, 'val')
# print_dataset(test_dataset, test_loader, 'test')

train dataset 길이: 38996
Sample data shape: torch.Size([40, 32])
Sample label: 5
Sample batch data shape: torch.Size([256, 40, 32])
Sample batch labels shape: torch.Size([256])

val dataset 길이: 4332
Sample data shape: torch.Size([40, 32])
Sample label: 6
Sample batch data shape: torch.Size([256, 40, 32])
Sample batch labels shape: torch.Size([256])



LSTM 모델 제작

In [177]:
# 모델 선언
class ModifiedLSTM(nn.Module):
    def __init__(self, class_list):
        super(ModifiedLSTM, self).__init__()
        self.lstm1 = nn.LSTM(input_size=32, hidden_size=128, num_layers=1, batch_first=True)
        self.bn1 = nn.BatchNorm1d(128)
        self.dropout1 = nn.Dropout(0.5)
        
        self.lstm2 = nn.LSTM(input_size=128, hidden_size=256, num_layers=1, batch_first=True)
        self.bn2 = nn.BatchNorm1d(256)
        
        self.lstm3 = nn.LSTM(input_size=256, hidden_size=512, num_layers=1, batch_first=True)
        self.bn3 = nn.BatchNorm1d(512)
        self.dropout2 = nn.Dropout(0.5)

        self.lstm4 = nn.LSTM(input_size=512, hidden_size=1024, num_layers=1, batch_first=True)
        self.bn4 = nn.BatchNorm1d(1024)
        
        # 최종 출력을 위한 선형 레이어
        self.fc = nn.Linear(1024, len(class_list))  # n개의 

    def forward(self, x):
        x, _ = self.lstm1(x)
        x = self.bn1(x.transpose(1, 2)).transpose(1, 2)
        x = self.dropout1(x)
        
        x, _ = self.lstm2(x)
        x = self.bn2(x.transpose(1, 2)).transpose(1, 2)
        
        x, _ = self.lstm3(x)
        x = self.bn3(x.transpose(1, 2)).transpose(1, 2)
        x = self.dropout2(x)

        x, _ = self.lstm4(x)
        x = self.bn4(x.transpose(1, 2)).transpose(1, 2)
        
        # 마지막 시퀀스의 출력만을 사용
        x = self.fc(x[:, -1, :])
        return x


모델 학습

In [178]:
# 경로 지정 및 폴더 생성
save_path = f'{path}/lstm_model_save'
makedirs(save_path)

model = ModifiedLSTM(class_list).to(device) # 모델 선언

# 손실 함수, 옵티마이저, 스케줄러 설정
epochs = 100  # 에폭 수 설정
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = StepLR(optimizer, step_size=1, gamma=0.9)
rnd_no = 3 # 소수점 반올림
early_stop = 3

# 학습 및 검증을 위한 함수
def train_and_validate(model, train_loader, val_loader, epochs, early_stop):
    best_acc, best_loss, early_cnt = 0.0, 99999.9, 0
    for epoch in range(epochs):
        model.train()  # 학습 모드
        train_labels, train_preds, train_loss = [], [], []
        for inputs, labels in tqdm(train_loader, desc = 'Training'):
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            train_labels.extend(labels.cpu().numpy())
            train_preds.extend(predicted.cpu().numpy())
            train_loss.append(loss.item())
        train_accuracy = round(accuracy_score(train_labels, train_preds), rnd_no)
        train_loss = round(sum(train_loss) / len(train_loss), rnd_no)
        
        # 검증 과정
        model.eval()  # 평가 모드
        val_labels, val_preds, val_loss = [], [], []
        for inputs, labels in tqdm(val_loader, desc = 'Validating'):
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            loss = criterion(outputs, labels)
            val_labels.extend(labels.cpu().numpy())
            val_preds.extend(predicted.cpu().numpy())
            val_loss.append(loss.item())
        val_accuracy = round(accuracy_score(val_labels, val_preds), rnd_no)
        val_loss = round(sum(val_loss) / len(val_loss), rnd_no)
        
        # 저장 관련
        torch.save(model.state_dict(), f'{save_path}/model_lstm-epoch{epoch+1}.pt') # 매 에폭마다 저장
        if val_accuracy > best_acc and val_loss < best_loss:
            early_cnt = 0
            best_acc, best_loss = val_accuracy, val_loss
            print('Best 모델 생성!')
            torch.save(model.state_dict(), f'{save_path}/model_lstm-best.pt') # best.pt 저장
        else:
            early_cnt += 1
            print(f'모델 발전 안됨. Early Stopper cnt: {early_cnt}')
        
        print(f'에폭: {epoch+1}/{epochs}, Train Acc.: {train_accuracy}, Train Loss: {train_loss}, Val Acc.: {val_accuracy}, Val Loss: {val_loss}\n') # verbose 출력

        scheduler.step() # 스케줄러

        # early stop
        if early_cnt > early_stop:
            print(f'{early_cnt}번 연속 모델 학습 발전이 없어서 학습을 종료합니다')
            break

# 학습 및 검증 실행
train_and_validate(model, train_loader, val_loader, epochs, early_stop)
# https://www.youtube.com/watch?v=NITIefkRae0

Training: 100%|██████████| 153/153 [00:09<00:00, 16.11it/s]
Validating: 100%|██████████| 17/17 [00:00<00:00, 18.52it/s]


Best 모델 생성!
에폭: 1/100, Train Acc.: 0.153, Train Loss: 2.332, Val Acc.: 0.165, Val Loss: 2.295



Training: 100%|██████████| 153/153 [00:09<00:00, 15.62it/s]
Validating: 100%|██████████| 17/17 [00:00<00:00, 18.58it/s]


모델 발전 안됨. Early Stopper cnt: 1
에폭: 2/100, Train Acc.: 0.196, Train Loss: 2.201, Val Acc.: 0.197, Val Loss: 2.322



Training: 100%|██████████| 153/153 [00:09<00:00, 15.84it/s]
Validating: 100%|██████████| 17/17 [00:00<00:00, 18.65it/s]


Best 모델 생성!
에폭: 3/100, Train Acc.: 0.282, Train Loss: 1.976, Val Acc.: 0.181, Val Loss: 2.236



Training: 100%|██████████| 153/153 [00:09<00:00, 15.96it/s]
Validating: 100%|██████████| 17/17 [00:00<00:00, 18.55it/s]


모델 발전 안됨. Early Stopper cnt: 1
에폭: 4/100, Train Acc.: 0.462, Train Loss: 1.51, Val Acc.: 0.128, Val Loss: 3.025



Training: 100%|██████████| 153/153 [00:09<00:00, 15.66it/s]
Validating: 100%|██████████| 17/17 [00:00<00:00, 18.59it/s]


Best 모델 생성!
에폭: 5/100, Train Acc.: 0.662, Train Loss: 0.999, Val Acc.: 0.341, Val Loss: 2.181



Training: 100%|██████████| 153/153 [00:09<00:00, 15.73it/s]
Validating: 100%|██████████| 17/17 [00:00<00:00, 18.57it/s]


Best 모델 생성!
에폭: 6/100, Train Acc.: 0.745, Train Loss: 0.77, Val Acc.: 0.518, Val Loss: 1.371



Training: 100%|██████████| 153/153 [00:09<00:00, 15.58it/s]
Validating: 100%|██████████| 17/17 [00:00<00:00, 18.33it/s]


Best 모델 생성!
에폭: 7/100, Train Acc.: 0.791, Train Loss: 0.627, Val Acc.: 0.576, Val Loss: 1.25



Training: 100%|██████████| 153/153 [00:09<00:00, 15.72it/s]
Validating: 100%|██████████| 17/17 [00:00<00:00, 18.53it/s]


Best 모델 생성!
에폭: 8/100, Train Acc.: 0.824, Train Loss: 0.54, Val Acc.: 0.648, Val Loss: 0.988



Training: 100%|██████████| 153/153 [00:09<00:00, 15.77it/s]
Validating: 100%|██████████| 17/17 [00:00<00:00, 18.62it/s]


모델 발전 안됨. Early Stopper cnt: 1
에폭: 9/100, Train Acc.: 0.843, Train Loss: 0.482, Val Acc.: 0.578, Val Loss: 1.349



Training: 100%|██████████| 153/153 [00:09<00:00, 15.77it/s]
Validating: 100%|██████████| 17/17 [00:00<00:00, 18.42it/s]


Best 모델 생성!
에폭: 10/100, Train Acc.: 0.859, Train Loss: 0.431, Val Acc.: 0.799, Val Loss: 0.636



Training: 100%|██████████| 153/153 [00:09<00:00, 15.81it/s]
Validating: 100%|██████████| 17/17 [00:00<00:00, 18.37it/s]


모델 발전 안됨. Early Stopper cnt: 1
에폭: 11/100, Train Acc.: 0.869, Train Loss: 0.397, Val Acc.: 0.735, Val Loss: 0.751



Training: 100%|██████████| 153/153 [00:09<00:00, 15.56it/s]
Validating: 100%|██████████| 17/17 [00:00<00:00, 18.59it/s]


모델 발전 안됨. Early Stopper cnt: 2
에폭: 12/100, Train Acc.: 0.88, Train Loss: 0.367, Val Acc.: 0.609, Val Loss: 1.171



Training: 100%|██████████| 153/153 [00:09<00:00, 15.92it/s]
Validating: 100%|██████████| 17/17 [00:00<00:00, 18.71it/s]


Best 모델 생성!
에폭: 13/100, Train Acc.: 0.888, Train Loss: 0.336, Val Acc.: 0.841, Val Loss: 0.501



Training: 100%|██████████| 153/153 [00:09<00:00, 15.83it/s]
Validating: 100%|██████████| 17/17 [00:00<00:00, 18.33it/s]


Best 모델 생성!
에폭: 14/100, Train Acc.: 0.897, Train Loss: 0.314, Val Acc.: 0.882, Val Loss: 0.382



Training: 100%|██████████| 153/153 [00:09<00:00, 15.83it/s]
Validating: 100%|██████████| 17/17 [00:00<00:00, 18.72it/s]


모델 발전 안됨. Early Stopper cnt: 1
에폭: 15/100, Train Acc.: 0.904, Train Loss: 0.288, Val Acc.: 0.851, Val Loss: 0.511



Training: 100%|██████████| 153/153 [00:09<00:00, 15.81it/s]
Validating: 100%|██████████| 17/17 [00:00<00:00, 18.67it/s]


모델 발전 안됨. Early Stopper cnt: 2
에폭: 16/100, Train Acc.: 0.911, Train Loss: 0.269, Val Acc.: 0.878, Val Loss: 0.38



Training: 100%|██████████| 153/153 [00:09<00:00, 15.59it/s]
Validating: 100%|██████████| 17/17 [00:00<00:00, 18.73it/s]


모델 발전 안됨. Early Stopper cnt: 3
에폭: 17/100, Train Acc.: 0.917, Train Loss: 0.254, Val Acc.: 0.793, Val Loss: 0.629



Training: 100%|██████████| 153/153 [00:09<00:00, 15.74it/s]
Validating: 100%|██████████| 17/17 [00:00<00:00, 18.28it/s]


모델 발전 안됨. Early Stopper cnt: 4
에폭: 18/100, Train Acc.: 0.921, Train Loss: 0.235, Val Acc.: 0.856, Val Loss: 0.469

4번 연속 모델 학습 발전이 없어서 학습을 종료합니다


모델 성능 테스트(Test Dataloader)

In [24]:
# 모델 로드
model = ModifiedLSTM(class_list).to(device)  # 먼저 모델 객체를 생성
model.load_state_dict(torch.load(f'{save_path}/model_lstm-best.pt'))  # 저장된 모델 파라미터를 로드
model.eval()  # 평가 모드로 설정

# 테스트 데이터셋에 대한 정확도 계산 함수
def test_accuracy(model, test_loader):
    rnd_no = 3 # 소수점 반올림
    model.eval()  # 평가 모드
    test_labels = []
    test_preds = []
    for inputs, labels in tqdm(test_loader):
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
        test_labels.extend(labels.cpu().numpy())
        test_preds.extend(predicted.cpu().numpy())
    test_accuracy = round(accuracy_score(test_labels, test_preds), rnd_no)
    return test_accuracy

# 테스트 정확도 출력
test_acc = test_accuracy(model, test_loader)
print(f'Test Accuracy: {test_acc}')

100%|██████████| 129/129 [00:01<00:00, 84.20it/s]

Test Accuracy: 0.872





마이크 wakeup 로직 만들기