# train dual resnet gru
- train
- pretrain

In [1]:
import os
import numpy as np
import pandas as pd
import librosa
from tqdm.auto import tqdm
from preprocessing_sig2feat import *
from ysp_func import *

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, random_split

from einops import rearrange
from torchvision.models import resnet18
from torchvision import transforms

import lightning as L
from lightning.pytorch.callbacks import EarlyStopping

import warnings
warnings.filterwarnings(action='ignore') 

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [3]:
path0 = "C:/Users/" + os.getenv('USERNAME') +"/Desktop/DCASE2024-Task10-Dataset/simulation"
gen_name = 'gen_sound_v3'

### Dataset

In [4]:
class CustomDataset(Dataset):
    def __init__(self, root_path, dir_name):
        super(CustomDataset, self).__init__()

        # fs = 16000
        self.fframe = 2**10      # 1024
        self.delay = int(self.fframe/4)    # 256

        self.root_path = root_path
        self.dir_name = dir_name
        self.df_datainfo = pd.read_csv(f'{self.root_path}/{self.dir_name}.csv')        
        
    def __getitem__(self, index):
        filename = self.df_datainfo.loc[index]['path']
        file_path = f'{self.root_path}/{filename}'        
                
        sig, fs = librosa.load(file_path, sr=None, mono=False)
        sig = sig / np.max(np.abs(sig))
        
        feat1 = feature_spectrogram_tensor(sig, fs, self.fframe, device)
        self.x1_data = standardization_tensor(feat1)

        feat2 = feature_sthd_tensor(sig, fs, self.fframe, self.delay, device)
        self.x2_data = standardization_tensor(feat2)
        
        label = self.df_datainfo.loc[index][['car_left', 'car_right', 'cv_left', 'cv_right']]
        y = torch.FloatTensor(label)
        self.y_data = y

        return self.x1_data, self.x2_data, self.y_data

    def __len__(self):
        return len(self.df_datainfo)
    
    def get_filename(self, index):        
        return self.df_datainfo.loc[index]['path']

In [5]:
dataset = CustomDataset(path0, gen_name)
dataset_size = dataset.__len__()
print(dataset_size)

train_size = int(dataset_size * 0.85)
valid_size = int(dataset_size * 0.1)
test_size = dataset_size - train_size - valid_size
train_dataset, valid_dataset, test_dataset = random_split(dataset, [train_size, valid_size, test_size])
temp = train_dataset.__getitem__(0)
print(f'size : {temp[0].size()} / {temp[1].size()} / {temp[2].size()}')

6000
size : torch.Size([128, 1876, 4]) / torch.Size([128, 1876, 6]) / torch.Size([4])


### Model

In [6]:
batch_size = 16
num_workers = 0
loss_func = nn.MSELoss()

''' dual_ResNet_GRU Model class (Lightning) '''
class dual_ResNet_GRU(L.LightningModule):
    def __init__(self, hidden_dimension, output_dimension, dropout):
        super().__init__()
        self.validation_step_outputs = []
        self.test_step_outputs = []                

        resnet_temp = resnet18()        # pretrained=True
        resnet_temp.conv1 = nn.Conv2d(4, 64, kernel_size=7, stride=2, padding=3, bias=False) # in : 4ch, out : 64ch
        resnet_temp.avgpool = nn.AdaptiveAvgPool2d((59, 1))
        resnet_temp = nn.Sequential(*list(resnet_temp.children())[:-1])
        self.resnet_spectrogram = resnet_temp

        resnet_temp = resnet18()        # pretrained=True
        resnet_temp.conv1 = nn.Conv2d(6, 64, kernel_size=7, stride=2, padding=3, bias=False) # in : 6ch, out : 64ch
        resnet_temp.avgpool = nn.AdaptiveAvgPool2d((59, 1))
        resnet_temp = nn.Sequential(*list(resnet_temp.children())[:-1])
        self.resnet_sthd = resnet_temp
        
        input_dimension = 1024
        self.gru_layer = nn.GRU(input_dimension, 
                           hidden_dimension, 
                           num_layers=2, 
                           bidirectional=False,     # bidirectional=True, # Not Bi-GRU
                           batch_first=True,
                           dropout=dropout)
        
        self.fc_layer = nn.Linear(hidden_dimension, output_dimension)
 
    def forward(self, x1, x2):
        '''input: (batch_size, feat_bins, time_steps, channels)'''        
        x1 = rearrange(x1, "batch feat time ch -> batch ch time feat")
        x1 = self.resnet_spectrogram(x1)        
        x2 = rearrange(x2, "batch feat time ch -> batch ch time feat")
        x2 = self.resnet_sthd(x2)

        x = torch.cat((x1, x2), dim=1)
        x = rearrange(x, "batch ch time feat -> batch time (ch feat)")

        x, hidden = self.gru_layer(x)        
        
        x = self.fc_layer(hidden[-1])  # hidden state # x[:, -1, :] # Take the output of the last time step
        
        return x

    def training_step(self, batch, batch_num):
        train_x1, train_x2, train_y = batch
        y_pred = self(train_x1, train_x2)        
        training_loss = loss_func(y_pred, train_y)
        
        self.log('train_loss', training_loss, on_epoch=True, prog_bar=True)
        return training_loss

    def validation_step(self, batch, batch_num):
        val_x1, val_x2, val_y = batch
        y_pred = self(val_x1, val_x2)        
        val_loss = loss_func(y_pred, val_y)
        self.validation_step_outputs.append(val_loss)
        
        self.log('val_loss', val_loss, on_step=True, on_epoch=True, prog_bar=True)
        return val_loss

    def on_validation_epoch_end(self):
        avg_loss = torch.stack(self.validation_step_outputs).mean()
        self.validation_step_outputs.clear()        
        return avg_loss

    def test_step(self, batch, batch_num):        
        test_x1, test_x2, test_y = batch
        y_pred = self(test_x1, test_x2)        
        test_loss = loss_func(y_pred, test_y)
        self.test_step_outputs.append(test_loss)
        
        self.log('test_loss', test_loss, on_step=True, on_epoch=True, prog_bar=True)
        return test_loss

    def on_test_epoch_end(self):
        avg_loss = torch.stack(self.test_step_outputs).mean()
        self.test_step_outputs.clear()
        return avg_loss

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=1e-4)     # 1e-3

    def train_dataloader(self):
        return DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)

    def val_dataloader(self):
        return DataLoader(dataset=valid_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)

    def test_dataloader(self):
        return DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)

#### Model load - Not-rained

In [8]:
# 출력 4개 : 'car_left', 'car_right', 'cv_left', 'cv_right' # 각각 회귀
model = dual_ResNet_GRU(hidden_dimension=256, output_dimension=4, dropout=0.05)

#### Model load - Pre-trained

In [7]:
# # 모델 불러오기
# default_root_dir = 'logs_gen3'
# ver_num = "0"
# ckpt_path = f"{default_root_dir}/lightning_logs/version_{ver_num}/checkpoints"
# ckpt_name = [file for file in os.listdir(ckpt_path) if file.endswith('.ckpt')][0]  # ckpt_name = "epoch=99-step=12000.ckpt"

# model = dual_ResNet_GRU.load_from_checkpoint(f"{ckpt_path}/{ckpt_name}", hidden_dimension=256, output_dimension=4, dropout=0.05)

In [None]:
# from torchsummary import summary
# summary(model.to('cuda'), [(128, 1874, 4), (128, 1874, 6)])

In [9]:
# EarlyStopping 콜백 설정
early_stopping = EarlyStopping(
    monitor='val_loss',     # 모니터링할 메트릭
    patience=10,            # 성능 향상이 없을 때 기다리는 에포크 수
    verbose=True,           # 로그 출력 여부
    mode='min'              # 'min' (최소화) 또는 'max' (최대화)
)

''' GRU model train '''
trainer = L.Trainer(max_epochs=100, accelerator="gpu", default_root_dir = 'logs_gen3', callbacks=[early_stopping])  # EarlyStopping 콜백 추가
trainer.fit(model)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: logs_gen3\lightning_logs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name               | Type       | Params
--------------------------------------------------
0 | resnet_spectrogram | Sequential | 11.2 M
1 | resnet_sthd        | Sequential | 11.2 M
2 | gru_layer          | GRU        | 1.4 M 
3 | fc_layer           | Linear     | 1.0 K 
--------------------------------------------------
23.7 M    Trainable params
0         Non-trainable params
23.7 M    Total params
94.984    Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Metric val_loss improved. New best score: 0.327


Validation: 0it [00:00, ?it/s]

Metric val_loss improved by 0.109 >= min_delta = 0.0. New best score: 0.218


Validation: 0it [00:00, ?it/s]

Metric val_loss improved by 0.028 >= min_delta = 0.0. New best score: 0.189


Validation: 0it [00:00, ?it/s]

Metric val_loss improved by 0.007 >= min_delta = 0.0. New best score: 0.182


Validation: 0it [00:00, ?it/s]

Metric val_loss improved by 0.014 >= min_delta = 0.0. New best score: 0.168


Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Metric val_loss improved by 0.001 >= min_delta = 0.0. New best score: 0.167


Validation: 0it [00:00, ?it/s]

Metric val_loss improved by 0.014 >= min_delta = 0.0. New best score: 0.153


Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Metric val_loss improved by 0.002 >= min_delta = 0.0. New best score: 0.152


Validation: 0it [00:00, ?it/s]

Metric val_loss improved by 0.004 >= min_delta = 0.0. New best score: 0.148


Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Metric val_loss improved by 0.001 >= min_delta = 0.0. New best score: 0.147


Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Metric val_loss improved by 0.006 >= min_delta = 0.0. New best score: 0.141


Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Metric val_loss improved by 0.009 >= min_delta = 0.0. New best score: 0.132


Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Monitored metric val_loss did not improve in the last 10 records. Best score: 0.132. Signaling Trainer to stop.


In [10]:
''' GRU model test '''
trainer.test()  # trainer.test('ckpt_path='last')

Restoring states from the checkpoint path at logs_gen3\lightning_logs\version_0\checkpoints\epoch=38-step=12441.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at logs_gen3\lightning_logs\version_0\checkpoints\epoch=38-step=12441.ckpt


Testing: 0it [00:00, ?it/s]

[{'test_loss_epoch': 0.15113890171051025}]

In [None]:
%tensorboard