# transfer learning dual resnet gru
fine-tuning

In [1]:
import os
import numpy as np
import pandas as pd
import librosa
from tqdm.auto import tqdm
from preprocessing_sig2feat import *
from ysp_func import *

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, random_split, Subset

from einops import rearrange
from torchvision.models import resnet18
from torchvision import transforms

import lightning as L
from lightning.pytorch.callbacks import EarlyStopping

import warnings
warnings.filterwarnings(action='ignore') 

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [3]:
loc_dict = {1: 'loc1', 2: 'loc2', 3: 'loc3', 4: 'loc4', 5: 'loc5', 6: 'loc6'}
loc_nn = 1
path0 = "C:/Users/" + os.getenv('USERNAME') + f"/Desktop/DCASE2024-Task10-Dataset/{loc_dict[loc_nn]}"

### Dataset

In [4]:
class CustomDataset(Dataset):
    def __init__(self, root_path, dir_name):
        super(CustomDataset, self).__init__()

        # fs = 16000
        self.fframe = 2**10      # 1024
        self.delay = int(self.fframe/4)    # 256

        self.root_path = root_path
        self.dir_name = dir_name
        self.df_datainfo = pd.read_csv(f'{self.root_path}/{self.dir_name}.csv')        
        
    def __getitem__(self, index):
        filename = self.df_datainfo.loc[index]['path']
        file_path = f'{self.root_path}/{filename}'        
                
        sig, fs = librosa.load(file_path, sr=None, mono=False)
        sig = sig / np.max(np.abs(sig))
        
        feat1 = feature_spectrogram_tensor(sig, fs, self.fframe, device)
        self.x1_data = standardization_tensor(feat1)

        feat2 = feature_sthd_tensor(sig, fs, self.fframe, self.delay, device)
        self.x2_data = standardization_tensor(feat2)
        
        label = self.df_datainfo.loc[index][['car_left', 'car_right', 'cv_left', 'cv_right']]
        y = torch.FloatTensor(label)
        self.y_data = y

        return self.x1_data, self.x2_data, self.y_data

    def __len__(self):
        return len(self.df_datainfo)
    
    def get_filename(self, index):        
        return self.df_datainfo.loc[index]['path']

In [5]:
train_dataset = CustomDataset(path0, 'train')
train_size = train_dataset.__len__()
print(train_size)

valid_dataset = CustomDataset(path0, 'val')
valid_size = valid_dataset.__len__()
print(valid_size)

1256
1282


In [6]:
random_indices = np.random.permutation(train_size)[:256]
train_dataset = Subset(train_dataset, random_indices)
train_size = train_dataset.__len__()
print(train_size)
random_indices = np.random.permutation(valid_size)[:64]
valid_dataset = Subset(valid_dataset, random_indices)
valid_size = valid_dataset.__len__()
print(valid_size)

256
64


### Model

In [7]:
batch_size = 16
num_workers = 0
loss_func = nn.MSELoss()

''' dual_ResNet_GRU Model class (Lightning) '''
class dual_ResNet_GRU(L.LightningModule):
    def __init__(self, hidden_dimension, output_dimension, dropout):
        super().__init__()
        self.validation_step_outputs = []
        self.test_step_outputs = []                

        resnet_temp = resnet18()        # pretrained=True
        resnet_temp.conv1 = nn.Conv2d(4, 64, kernel_size=7, stride=2, padding=3, bias=False) # in : 4ch, out : 64ch
        resnet_temp.avgpool = nn.AdaptiveAvgPool2d((59, 1))
        resnet_temp = nn.Sequential(*list(resnet_temp.children())[:-1])
        self.resnet_spectrogram = resnet_temp

        resnet_temp = resnet18()        # pretrained=True
        resnet_temp.conv1 = nn.Conv2d(6, 64, kernel_size=7, stride=2, padding=3, bias=False) # in : 6ch, out : 64ch
        resnet_temp.avgpool = nn.AdaptiveAvgPool2d((59, 1))
        resnet_temp = nn.Sequential(*list(resnet_temp.children())[:-1])
        self.resnet_sthd = resnet_temp
        
        input_dimension = 1024
        self.gru_layer = nn.GRU(input_dimension, 
                           hidden_dimension, 
                           num_layers=2, 
                           bidirectional=False,     # bidirectional=True, # Not Bi-GRU
                           batch_first=True,
                           dropout=dropout)
        
        self.fc_layer = nn.Linear(hidden_dimension, output_dimension)
 
    def forward(self, x1, x2):
        '''input: (batch_size, feat_bins, time_steps, channels)'''        
        x1 = rearrange(x1, "batch feat time ch -> batch ch time feat")
        x1 = self.resnet_spectrogram(x1)        
        x2 = rearrange(x2, "batch feat time ch -> batch ch time feat")
        x2 = self.resnet_sthd(x2)

        x = torch.cat((x1, x2), dim=1)
        x = rearrange(x, "batch ch time feat -> batch time (ch feat)")

        x, hidden = self.gru_layer(x)        
        
        x = self.fc_layer(hidden[-1])  # hidden state # x[:, -1, :] # Take the output of the last time step
        
        return x

    def training_step(self, batch, batch_num):
        train_x1, train_x2, train_y = batch
        y_pred = self(train_x1, train_x2)        
        training_loss = loss_func(y_pred, train_y)
        
        self.log('train_loss', training_loss, on_epoch=True, prog_bar=True)
        return training_loss

    def validation_step(self, batch, batch_num):
        val_x1, val_x2, val_y = batch
        y_pred = self(val_x1, val_x2)        
        val_loss = loss_func(y_pred, val_y)
        self.validation_step_outputs.append(val_loss)
        
        self.log('val_loss', val_loss, on_step=True, on_epoch=True, prog_bar=True)
        return val_loss

    def on_validation_epoch_end(self):
        avg_loss = torch.stack(self.validation_step_outputs).mean()
        self.validation_step_outputs.clear()        
        return avg_loss

    def test_step(self, batch, batch_num):        
        test_x1, test_x2, test_y = batch
        y_pred = self(test_x1, test_x2)        
        test_loss = loss_func(y_pred, test_y)
        self.test_step_outputs.append(test_loss)
        
        self.log('test_loss', test_loss, on_step=True, on_epoch=True, prog_bar=True)
        return test_loss

    def on_test_epoch_end(self):
        avg_loss = torch.stack(self.test_step_outputs).mean()
        self.test_step_outputs.clear()
        return avg_loss

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=1e-4)     # 1e-3

    def train_dataloader(self):
        return DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)

    def val_dataloader(self):
        return DataLoader(dataset=valid_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)

    # def test_dataloader(self):
    #     return DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)

### load pre_trained model

In [8]:
logs_name = f'logs_gen3_{loc_dict[loc_nn]}'     # logs_name = 'logs_gen3'
ver_num = "0"
ckpt_path = f"{logs_name}/lightning_logs/version_{ver_num}/checkpoints"
ckpt_name = [file for file in os.listdir(ckpt_path) if file.endswith('.ckpt')][0]

model = dual_ResNet_GRU.load_from_checkpoint(f"{ckpt_path}/{ckpt_name}", hidden_dimension=256, output_dimension=4, dropout=0.05)

In [None]:
# # Model Summary
# from torchsummary import summary
# summary(model.to('cuda'), [(128, 1874, 4), (128, 1874, 6)])

## Train Model (Transfer Learning)

In [9]:
# EarlyStopping 콜백 설정
early_stopping = EarlyStopping(
    monitor='val_loss',     # 모니터링할 메트릭
    patience=10,            # 성능 향상이 없을 때 기다리는 에포크 수
    verbose=True,           # 로그 출력 여부
    mode='min'              # 'min' (최소화) 또는 'max' (최대화)
)

new_logs_name = f'{logs_name}' # new_logs_name = f'{logs_name}_{loc_dict[loc_nn]}'
''' GRU model train '''
trainer = L.Trainer(max_epochs=100, accelerator="gpu", default_root_dir = new_logs_name, callbacks=[early_stopping])  # EarlyStopping 콜백 추가
trainer.fit(model)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4060 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name               | Type       | Params
--------------------------------------------------
0 | resnet_spectrogram | Sequential | 11.2 M
1 | resnet_sthd        | Sequential | 11.2 M
2 | gru_layer          | GRU        | 1.4 M 
3 | fc_layer           | Linear     | 1.0 K 
--------------------------------------------------
23.7 M    Trainable params
0         Non-trainable params
23.7 M    Total params
94.984    Total estimated mode

Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Metric val_loss improved. New best score: 4.567


Validation: 0it [00:00, ?it/s]

Metric val_loss improved by 0.544 >= min_delta = 0.0. New best score: 4.023


Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Metric val_loss improved by 0.025 >= min_delta = 0.0. New best score: 3.998


Validation: 0it [00:00, ?it/s]

Metric val_loss improved by 0.112 >= min_delta = 0.0. New best score: 3.886


Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Metric val_loss improved by 0.037 >= min_delta = 0.0. New best score: 3.849


Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Metric val_loss improved by 0.054 >= min_delta = 0.0. New best score: 3.795


Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Metric val_loss improved by 0.012 >= min_delta = 0.0. New best score: 3.783


Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Metric val_loss improved by 0.011 >= min_delta = 0.0. New best score: 3.773


Validation: 0it [00:00, ?it/s]

Metric val_loss improved by 0.017 >= min_delta = 0.0. New best score: 3.756


Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Metric val_loss improved by 0.020 >= min_delta = 0.0. New best score: 3.736


Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Metric val_loss improved by 0.000 >= min_delta = 0.0. New best score: 3.735


Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Metric val_loss improved by 0.055 >= min_delta = 0.0. New best score: 3.680


Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Metric val_loss improved by 0.007 >= min_delta = 0.0. New best score: 3.673


Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Metric val_loss improved by 0.014 >= min_delta = 0.0. New best score: 3.659


Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Monitored metric val_loss did not improve in the last 10 records. Best score: 3.659. Signaling Trainer to stop.


In [None]:
%tensorboard