# train dual resnet gru
pretraining

In [1]:
import os
import numpy as np
import pandas as pd
from tqdm.auto import tqdm
import json
from ysp_func import *

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, random_split

from einops import rearrange
from torchsummary import summary
from torchvision.models import resnet18, resnet50
from torchvision import transforms

import lightning as L
from lightning.pytorch.callbacks import EarlyStopping

import warnings
warnings.filterwarnings(action='ignore') 

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [3]:
path0 = "C:/Users/" + os.getenv('USERNAME') +"/Desktop/DCASE2024-Task10-Dataset/simulation"
gen_name = 'gen_sound_v1'

### Dataset

In [3]:
class CustomDataset(Dataset):
    def __init__(self, root_path, dir_name):
        super(CustomDataset, self).__init__()

        self.root_path = root_path
        self.dir_name = dir_name
        self.df_datainfo = pd.read_csv(f'{self.root_path}/{self.dir_name}.csv')        
        
    def __getitem__(self, index):
        filename = self.df_datainfo.loc[index]['path']
        feat1_name = filename.replace(self.dir_name, f'{self.dir_name}_feat1_npy').replace('.flac', '.npy')
        feat1_path = f'{self.root_path}/{feat1_name}'
        feat2_name = filename.replace(self.dir_name, f'{self.dir_name}_feat2_npy').replace('.flac', '.npy')
        feat2_path = f'{self.root_path}/{feat2_name}'
        
        feat1 = np.load(feat1_path)
        feat2 = np.load(feat2_path)
        
        self.x1_data = torch.FloatTensor(feat1)
        self.x2_data = torch.FloatTensor(feat2)
        
        label = self.df_datainfo.loc[index][['car_left', 'car_right', 'cv_left', 'cv_right']]        
        y = torch.FloatTensor(label)
        self.y_data = y

        return self.x1_data, self.x2_data, self.y_data

    def __len__(self):
        return len(self.df_datainfo)
    
    def get_filename(self, index):        
        return self.df_datainfo.loc[index]['path']

In [5]:
dataset = CustomDataset(path0, gen_name)
dataset_size = dataset.__len__()
print(dataset_size)

train_size = int(dataset_size * 0.8)
valid_size = int(dataset_size * 0.1)
test_size = dataset_size - train_size - valid_size
train_dataset, valid_dataset, test_dataset = random_split(dataset, [train_size, valid_size, test_size])
temp = train_dataset.__getitem__(0)
print(f'size : {temp[0].size()} / {temp[1].size()} / {temp[2].size()}')

6000
size : torch.Size([128, 1874, 4]) / torch.Size([128, 1874, 6]) / torch.Size([4])


### Model

In [4]:
batch_size = 16
num_workers = 0
loss_func = nn.MSELoss()

''' dual_ResNet_GRU Model class (Lightning) '''
class dual_ResNet_GRU(L.LightningModule):
    def __init__(self, hidden_dimension, output_dimension, dropout):
        super().__init__()
        self.validation_step_outputs = []
        self.test_step_outputs = []                

        resnet_temp = resnet18()        # pretrained=True
        resnet_temp.conv1 = nn.Conv2d(4, 64, kernel_size=7, stride=2, padding=3, bias=False) # in : 4ch, out : 64ch
        resnet_temp.avgpool = nn.AdaptiveAvgPool2d((59, 1))
        resnet_temp = nn.Sequential(*list(resnet_temp.children())[:-1])
        self.resnet_spectrogram = resnet_temp

        resnet_temp = resnet18()        # pretrained=True
        resnet_temp.conv1 = nn.Conv2d(6, 64, kernel_size=7, stride=2, padding=3, bias=False) # in : 6ch, out : 64ch
        resnet_temp.avgpool = nn.AdaptiveAvgPool2d((59, 1))
        resnet_temp = nn.Sequential(*list(resnet_temp.children())[:-1])
        self.resnet_sthd = resnet_temp
        
        input_dimension = 1024
        self.gru_layer = nn.GRU(input_dimension, 
                           hidden_dimension, 
                           num_layers=2, 
                           bidirectional=False,     # bidirectional=True, # Not Bi-GRU
                           batch_first=True,
                           dropout=dropout)
        
        self.fc_layer = nn.Linear(hidden_dimension, output_dimension)
 
    def forward(self, x1, x2):
        '''input: (batch_size, feat_bins, time_steps, channels)'''        
        x1 = rearrange(x1, "batch feat time ch -> batch ch time feat")
        x1 = self.resnet_spectrogram(x1)        
        x2 = rearrange(x2, "batch feat time ch -> batch ch time feat")
        x2 = self.resnet_sthd(x2)

        x = torch.cat((x1, x2), dim=1)
        x = rearrange(x, "batch ch time feat -> batch time (ch feat)")

        x, hidden = self.gru_layer(x)        
        
        x = self.fc_layer(hidden[-1])  # hidden state # x[:, -1, :] # Take the output of the last time step
        
        return x

    def training_step(self, batch, batch_num):
        train_x1, train_x2, train_y = batch
        y_pred = self(train_x1, train_x2)        
        training_loss = loss_func(y_pred, train_y)
        
        self.log('train_loss', training_loss, on_epoch=True, prog_bar=True)
        return training_loss

    def validation_step(self, batch, batch_num):
        val_x1, val_x2, val_y = batch
        y_pred = self(val_x1, val_x2)        
        val_loss = loss_func(y_pred, val_y)
        self.validation_step_outputs.append(val_loss)
        
        self.log('val_loss', val_loss, on_step=True, on_epoch=True, prog_bar=True)
        return val_loss

    def on_validation_epoch_end(self):
        avg_loss = torch.stack(self.validation_step_outputs).mean()
        self.validation_step_outputs.clear()        
        return avg_loss

    def test_step(self, batch, batch_num):        
        test_x1, test_x2, test_y = batch
        y_pred = self(test_x1, test_x2)        
        test_loss = loss_func(y_pred, test_y)
        self.test_step_outputs.append(test_loss)
        
        self.log('test_loss', test_loss, on_step=True, on_epoch=True, prog_bar=True)
        return test_loss

    def on_test_epoch_end(self):
        avg_loss = torch.stack(self.test_step_outputs).mean()
        self.test_step_outputs.clear()
        return avg_loss

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=1e-4)     # 1e-3

    def train_dataloader(self):
        return DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)

    def val_dataloader(self):
        return DataLoader(dataset=valid_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)

    def test_dataloader(self):
        return DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)

In [7]:
# 출력 4개 : 'car_left', 'car_right', 'cv_left', 'cv_right' # 각각 회귀
model = dual_ResNet_GRU(hidden_dimension=256, output_dimension=4, dropout=0.05)

In [None]:
# from torchsummary import summary
# summary(model.to('cuda'), [(128, 1874, 4), (128, 1874, 6)])

In [1]:
# EarlyStopping 콜백 설정
early_stopping = EarlyStopping(
    monitor='val_loss',     # 모니터링할 메트릭
    patience=10,            # 성능 향상이 없을 때 기다리는 에포크 수
    verbose=True,           # 로그 출력 여부
    mode='min'              # 'min' (최소화) 또는 'max' (최대화)
)

''' GRU model train '''
trainer = L.Trainer(max_epochs=100, accelerator="gpu", default_root_dir = 'logs_gen1', callbacks=[early_stopping])  # EarlyStopping 콜백 추가
trainer.fit(model)

In [2]:
''' GRU model test '''
trainer.test()  # trainer.test('ckpt_path='last')

In [None]:
%tensorboard

In [14]:
test_data = test_dataset.__getitem__(7)
test_x1, test_x2, test_y = test_data
print(f'size : {test_x1.size()} / {test_x2.size()} / {test_y.size()}')
print(test_y)

y_pred = model(test_x1.unsqueeze(0), test_x2.unsqueeze(0))
print(y_pred.squeeze())
print(y_pred.squeeze().round())

size : torch.Size([128, 1874, 4]) / torch.Size([128, 1874, 6]) / torch.Size([4])
tensor([4., 6., 1., 2.])
tensor([3.4177, 5.5061, 1.6080, 1.8384], grad_fn=<SqueezeBackward0>)
tensor([3., 6., 2., 2.], grad_fn=<RoundBackward0>)


### 모델 평가
다른 데이터

In [5]:
path0 = "C:/Users/" + os.getenv('USERNAME') +"/Desktop/DCASE2024-Task10-Dataset/simulation"
gen_name = 'gen_sound_v0'       # 600개 # 평가할 데이터 (학습안된 데이터)
test_dataset = CustomDataset(path0, gen_name)

In [6]:
# 모델 불러오기
default_root_dir = 'logs_gen1'
ver_num = "0"
ckpt_path = f"{default_root_dir}/lightning_logs/version_{ver_num}/checkpoints"
ckpt_name = [file for file in os.listdir(ckpt_path) if file.endswith('.ckpt')][0]  # ckpt_name = "epoch=99-step=12000.ckpt"

model = dual_ResNet_GRU.load_from_checkpoint(f"{ckpt_path}/{ckpt_name}", hidden_dimension=256, output_dimension=4, dropout=0.05)
model.to(device)
model.eval()

dual_ResNet_GRU(
  (resnet_spectrogram): Sequential(
    (0): Conv2d(4, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (4): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_

In [14]:
# 테스트 데이터 확인
test_csv_filename = f'{gen_name}_test.csv'

columns = ['car_left', 'car_right', 'cv_left', 'cv_right', 'path']
df = pd.DataFrame(columns=columns)

# mse = []
for idx in tqdm(range(test_dataset.__len__())):
    test_data = test_dataset.__getitem__(idx)
    test_x1, test_x2, test_y = test_data
    
    y_pred = model(test_x1.unsqueeze(0).to(device), test_x2.unsqueeze(0).to(device))
    y_pred = y_pred.squeeze().round().to('cpu')
    
    data_filename = test_dataset.get_filename(idx)
    
    df.loc[idx] = y_pred.tolist() + [data_filename]
        
    # mse 계산?
    # mse.append(((test_y - y_pred) ** 2).mean().tolist)

df[['car_left', 'car_right', 'cv_left', 'cv_right']] = df[['car_left', 'car_right', 'cv_left', 'cv_right']].astype(int)
df.to_csv(test_csv_filename, index=False)

  0%|          | 0/600 [00:00<?, ?it/s]