### 2, 3번 class만 LSTM 학습

In [2]:
import cv2
import os
from tqdm import tqdm
import torch

# 모든 동영상 frame의 bbox x1, y1, x2, y2를 리스트로 레이블링 해서 저장
model = torch.hub.load('ultralytics/yolov5', 'custom', path = './yolov5-master/exp3/weights/best.pt')
def yolo_to_lstm(video_path, frame_ea):
    cap = cv2.VideoCapture(video_path)
    if cap.isOpened():
        bbox_list = []
        while True:
            ret, img = cap.read()
            if ret == True:
                # yolo에 이미지 넣어서 결과 받기
                h, w, c = img.shape
                pred_list = model(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
                pred_list = pred_list.pandas().xyxy[0]
                result_list = []
                for cnt in range(len(pred_list)):
                    x1 = int(pred_list.loc[cnt]['xmin']) / w
                    y1 = int(pred_list.loc[cnt]['ymin']) / h
                    x2 = int(pred_list.loc[cnt]['xmax']) / w
                    y2 = int(pred_list.loc[cnt]['ymax']) / h
                    conf = round(float(pred_list.loc[cnt]['confidence']), 3)
                    pred = {'bbox' : [x1, y1, x2, y2], 'conf' : conf}
                    result_list.append(pred)
                # yolo에 검출된 것이 있다면 bbox_list에 x1, y1, x2, y2를 추가하기
                if len(result_list) > 0:
                    result_list.sort(key = lambda x:x['conf'], reverse = True)
                    result = result_list[0]
                    bbox_list.append(result['bbox'])
                else: continue
                # bbox_list의 길이를 기준으로 frame_ea의 개수에 맞게 증강해주기
                need_aug = frame_ea - len(bbox_list)
                aug_tempo = need_aug / len(bbox_list)
                aug_bbox_list = []
                cnt = 0
                for bbox in bbox_list:
                    aug_bbox_list.append(bbox)
                    cnt += aug_tempo
                    while True:
                        if cnt < 1: break
                        aug_bbox_list.append(bbox)
                        cnt -= 1
            else: break
    # 데이터 길이가 알고리즘 특성 상 29 or 30이기 때문에 강제로 29로 맞춘다
    while True:
        if len(aug_bbox_list) == 29 or len(aug_bbox_list) == 0: break
        del aug_bbox_list[-1]
    return aug_bbox_list

# LSTM에 넣을 데이터셋 생성
dataset = []
path_2 = './train_video/2'
path_3 = './train_video/3'
# 2번 class에 대당하는 데이터셋을 만듬
video_list = os.listdir(path_2)
for video_name in tqdm(video_list):
    bbox_list = yolo_to_lstm('{}/{}'.format(path_2, video_name), 29)
    if len(bbox_list) > 0:
        dataset.append({'key': 2, 'value': bbox_list})

# 3번 class에 대당하는 데이터셋을 만듬        
video_list = os.listdir(path_3)
for video_name in tqdm(video_list):
    bbox_list = yolo_to_lstm('{}/{}'.format(path_3, video_name), 29)
    if len(bbox_list) > 0:
        dataset.append({'key': 3, 'value': bbox_list})

Using cache found in C:\Users\nyan/.cache\torch\hub\ultralytics_yolov5_master
YOLOv5  2022-9-15 Python-3.8.15 torch-1.10.1 CUDA:0 (NVIDIA GeForce GTX 1080 Ti, 11264MiB)

Fusing layers... 
Model summary: 322 layers, 86173414 parameters, 0 gradients
Adding AutoShape... 
100%|████████████████████████████████████████████████████████████████████████████████| 121/121 [02:54<00:00,  1.44s/it]
100%|████████████████████████████████████████████████████████████████████████████████| 133/133 [03:11<00:00,  1.44s/it]


In [3]:
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

class LSTMDataset(Dataset):
    def __init__(self, seq_list):
        self.X = []
        self.y = []
        for dic in seq_list :
            if dic['key'] == 2: self.y.append(0)
            else: self.y.append(1)
            self.X.append(dic['value'])
                
    def __getitem__(self, index):
        data = self.X[index]
        label = self.y[index]
        return torch.Tensor(np.array(data)), torch.tensor(np.array(int(label)))
        
    def __len__(self):
        return len(self.X)

In [4]:
split_ratio = [0.9, 0.1, 0.0]
train_len = int(len(dataset) * split_ratio[0])
val_len = int(len(dataset) * split_ratio[1])
test_len = len(dataset) - train_len - val_len
print('{}, {}, {}'.format(train_len, val_len, test_len))

213, 23, 1


In [5]:
from torch.utils.data import random_split
train_dataset = LSTMDataset(dataset)
train_data, valid_data, test_data = random_split(train_dataset, [train_len, val_len, test_len])

train_loader = DataLoader(train_data, batch_size=8)
val_loader = DataLoader(valid_data, batch_size=8)
test_loader = DataLoader(test_data, batch_size=8)

In [6]:
class skeleton_LSTM(nn.Module) :
    def __init__(self):
        super(skeleton_LSTM, self).__init__()
        self.lstm1 = nn.LSTM(input_size=4, hidden_size=128, num_layers=1, batch_first=True)
        self.lstm2 = nn.LSTM(input_size=128, hidden_size=256, num_layers=1, batch_first=True)
        self.lstm3 = nn.LSTM(input_size=256, hidden_size=512, num_layers=1, batch_first=True)
        self.dropout1 = nn.Dropout(0.3)
        self.lstm4 = nn.LSTM(input_size=512, hidden_size=256, num_layers=1, batch_first=True)
        self.lstm5 = nn.LSTM(input_size=256, hidden_size=128, num_layers=1, batch_first=True)
        self.lstm6 = nn.LSTM(input_size=128, hidden_size=64, num_layers=1, batch_first=True)
        self.dropout2 = nn.Dropout(0.3)
        self.lstm7 = nn.LSTM(input_size=64, hidden_size=32, num_layers=1, batch_first=True)
        self.fc = nn.Linear(32,2)

    def forward(self, x) :
        x, _ = self.lstm1(x)
        x, _ = self.lstm2(x)
        x, _ = self.lstm3(x)
        x = self.dropout1(x)
        x, _ = self.lstm4(x)
        x, _ = self.lstm5(x)
        x, _ = self.lstm6(x)
        x = self.dropout2(x)
        x, _ = self.lstm7(x)
        x = self.fc(x[:,-1,:])
        return x

In [9]:
def seed(seed = 1234):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True
if torch.cuda.is_available() == True:
    device = 'cuda:0'
    print('현재 가상환경 GPU 사용 가능상태')
else:
    device = 'cpu'
    print('GPU 사용 불가능 상태')

# 모델 초기화
import matplotlib.pyplot as plt
from torch.optim import Adam
def init_model():
    plt.rc('font', size = 10)
    global net_lstm, loss_fn, optim
    net_lstm = skeleton_LSTM().to(device)
    loss_fn = nn.CrossEntropyLoss()
    optim = Adam(net_lstm.parameters(), lr=0.0001)
    
# epoch 카운터 초기화
def init_epoch():
    global epoch_cnt
    epoch_cnt = 0
    
def init_log():
    plt.rc('font', size = 10)
    # 모든 Log를 초기화
    global log_stack, iter_log, tloss_log, tacc_log, vloss_log, vacc_log, time_log
    iter_log, tloss_log, tacc_log, vloss_log, vacc_log = [], [], [], [], []
    time_log, log_stack = [], []
    
import gc
from torch.cuda import memory_allocated, empty_cache
def clear_memory():
    if device != 'cpu':
        empty_cache()
    gc.collect()

# 학습 알고리즘
import numpy as np
def epoch(data_loader, mode = 'train'):
    global epoch_cnt
    
    # 사용되는 변수 초기화
    iter_loss, iter_acc, last_grad_performed = [], [], False
    
    # 1 iteration 학습 알고리즘(for문을 나오면 1 epoch 완료)
    for _data, _label in data_loader:
        data, label = _data.to(device), _label.type(torch.LongTensor).to(device)

        # 1. Feed-forward
        if mode == 'train':
            net_lstm.train()
        else:
            # 학습때만 쓰이는 Dropout, Batch Mormalization을 미사용
            net_lstm.eval()

        result = net_lstm(data) # 1 Batch에 대한 결과가 모든 Class에 대한 확률값으로
        _, out = torch.max(result, 1) # result에서 최대 확률값을 기준으로 예측 class 도출
        
        # 2. Loss 계산
        loss = loss_fn(result, label) # GT 와 Label 비교하여 Loss 산정
        iter_loss.append(loss.item()) # 학습 추이를 위하여 Loss를 기록
        
        # 3. 역전파 학습 후 Gradient Descent
        if mode == 'train':
            optim.zero_grad() # 미분을 통해 얻은 기울기르 초기화 for 다음 epoch
            loss.backward() # 역전파 학습
            optim.step() # Gradient Descent 수행
            last_grad_performed = True # for문 나가면 epoch 카운터 += 1
            
        # 4. 정확도 계산
        acc_partial = (out == label).float().sum() # GT == Label 인 개수
        acc_partial = acc_partial / len(label) # ( TP / (TP + TN)) 해서 정확도 산출
        iter_acc.append(acc_partial.item()) # 학습 추이를 위하여 Acc. 기록
        
    # 역전파 학습 후 Epoch 카운터 += 1
    if last_grad_performed:
        epoch_cnt += 1
    
    clear_memory()
    
    # loss와 acc의 평균값 for 학습추이 그래프, 모든 GT와 Label값 for 컨퓨전 매트릭스
    return np.average(iter_loss), np.average(iter_acc)

def epoch_not_finished():
    # 에폭이 끝남을 알림
    return epoch_cnt < maximum_epoch

def record_train_log(_tloss, _tacc, _time):
    # Train Log 기록용
    time_log.append(_time)
    tloss_log.append(_tloss)
    tacc_log.append(_tacc)
    iter_log.append(epoch_cnt)
    
def record_valid_log(_vloss, _vacc):
    # Validation Log 기록용
    vloss_log.append(_vloss)
    vacc_log.append(_vacc)

def last(log_list):
    # 리스트 안의 마지막 숫자를 반환(print_log 함수에서 사용)
    if len(log_list) > 0:
        return log_list[len(log_list) - 1]
    else:
        return -1

from IPython.display import clear_output
def print_log():
    # 학습 추이 출력
    
    # 소숫점 3자리 수까지 조절
    train_loss = round(float(last(tloss_log)), 3)
    train_acc = round(float(last(tacc_log)), 3)
    val_loss = round(float(last(vloss_log)), 3)
    val_acc = round(float(last(vacc_log)), 3)
    time_spent = round(float(last(time_log)), 3)
    
    log_str = 'Epoch: {:3} | T_Loss {:5} | T_acc {:5} | V_Loss {:5} | V_acc. {:5} | \
🕒 {:5}'.format(last(iter_log), train_loss, train_acc, val_loss, val_acc, time_spent)
    
    log_stack.append(log_str) # 프린트 준비
    
    # 학습 추이 그래프 출력
    hist_fig, loss_axis = plt.subplots(figsize=(10, 3), dpi=99) # 그래프 사이즈 설정
    hist_fig.patch.set_facecolor('white') # 그래프 배경색 설정
    
    # Loss Line 구성
    loss_t_line = plt.plot(iter_log, tloss_log, label='Train Loss', color='red', marker='o')
    loss_v_line = plt.plot(iter_log, vloss_log, label='Valid Loss', color='blue', marker='s')
    loss_axis.set_xlabel('epoch')
    loss_axis.set_ylabel('loss')
    
    # Acc. Line 구성
    acc_axis = loss_axis.twinx()
    acc_t_line = acc_axis.plot(iter_log, tacc_log, label='Train Acc.', color='red', marker='+')
    acc_v_line = acc_axis.plot(iter_log, vacc_log, label='Valid Acc.', color='blue', marker='x')
    acc_axis.set_ylabel('accuracy')
    
    # 그래프 출력
    hist_lines = loss_t_line + loss_v_line + acc_t_line + acc_v_line # 위에서 선언한 plt정보들 통합
    loss_axis.legend(hist_lines, [l.get_label() for l in hist_lines]) # 순서대로 그려주기
    loss_axis.grid() # 격자 설정
    plt.title('Learning history until epoch {}'.format(last(iter_log)))
    plt.draw()
    
    # 텍스트 로그 출력
    clear_output(wait=True)
    plt.show()
    for idx in reversed(range(len(log_stack))): # 반대로 sort 시켜서 출력
        print(log_stack[idx])

현재 가상환경 GPU 사용 가능상태


In [10]:
# Training Initialization
init_model()
init_epoch()
init_log()
maximum_epoch = 30

# Training Iteration
import time
while epoch_not_finished():
    start_time = time.time()
    tloss, tacc = epoch(train_loader, mode = 'train')
    end_time = time.time()
    time_taken = end_time - start_time
    record_train_log(tloss, tacc, time_taken)
    with torch.no_grad():
        vloss, vacc = epoch(val_loader, mode = 'val')
        record_valid_log(vloss, vacc)
    print_log()

print('\n Training completed!')
torch.save(net_lstm.state_dict(), './best_model_lstm.pth')

Epoch:  30 | T_Loss 0.007 | T_acc   1.0 | V_Loss 0.006 | V_acc.   1.0 | 🕒 0.508
Epoch:  29 | T_Loss 0.007 | T_acc   1.0 | V_Loss 0.007 | V_acc.   1.0 | 🕒 0.501
Epoch:  28 | T_Loss 0.008 | T_acc   1.0 | V_Loss 0.007 | V_acc.   1.0 | 🕒 0.511
Epoch:  27 | T_Loss 0.008 | T_acc   1.0 | V_Loss 0.008 | V_acc.   1.0 | 🕒 0.498
Epoch:  26 | T_Loss 0.009 | T_acc   1.0 | V_Loss 0.008 | V_acc.   1.0 | 🕒 0.549
Epoch:  25 | T_Loss  0.01 | T_acc   1.0 | V_Loss 0.009 | V_acc.   1.0 | 🕒 0.503
Epoch:  24 | T_Loss 0.011 | T_acc   1.0 | V_Loss 0.009 | V_acc.   1.0 | 🕒 0.579
Epoch:  23 | T_Loss 0.012 | T_acc   1.0 | V_Loss  0.01 | V_acc.   1.0 | 🕒 0.501
Epoch:  22 | T_Loss 0.013 | T_acc   1.0 | V_Loss 0.011 | V_acc.   1.0 | 🕒 0.508
Epoch:  21 | T_Loss 0.014 | T_acc   1.0 | V_Loss 0.012 | V_acc.   1.0 | 🕒 0.558
Epoch:  20 | T_Loss 0.015 | T_acc   1.0 | V_Loss 0.014 | V_acc.   1.0 | 🕒 0.487
Epoch:  19 | T_Loss 0.017 | T_acc   1.0 | V_Loss 0.015 | V_acc.   1.0 | 🕒 0.517
Epoch:  18 | T_Loss  0.02 | T_acc   1.0 