<a href="https://colab.research.google.com/github/Slbini/videoclassification-/blob/main/CNN_LSTM_Raw_Data_Label_Preprocessing_v2_edited.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# <span style="color:orange;">Raw Data-Label Preprocessing for Training Data v2</span>

In [1]:
from google.colab import drive

# Google Drive 마운트
drive.mount('/content/drive')

# 마운트된 경로 확인
print("Google Drive is mounted at '/content/drive'")

Mounted at /content/drive
Google Drive is mounted at '/content/drive'


## VideoDataset_Unified Class (extends Dataset)
 - constructor는 raw data의 경로와 대응되는 label의 경로를 파라미터로 받습니다. 그러면 이에 대응되는 dataset이 만들어집니다.
 - 데이터를 getitem 요청이 들어올 때마다 동적으로 구성하여 메모리 효율을 개선했습니다. 메모리 소모량은 영상 데이터의 크기와 거의 같습니다.
 - 여기서 각 data는 (video, label) 꼴이며, label은 {'S1': 0, 'S2': 1, 'S3': 2, 'S4': 3, 'S5': 4, 'S10': 5, 'S20': 6}에 따라 mapping된 정수이고, video는 shape가 (10, 40, 60)인 float tensor(pytorch)입니다.
 - 전처리 과정은 최소화하는 게 목적이었으므로 이미지 resizing조차 하지 않았습니다. 이 부분은 model forward에서 interpolation 등으로 처리해야 할 것 같습니다.
 - VideoDataset_Unified의 생성자에 transform이 추가되었습니다. Usage에서는 데이터를 64x64로 만든 뒤 -1~1로 정규화를 수행하고 있습니다.


### 2025-02-25 수정사항
 - 전체 데이터를 랜덤 분할하여 train, validation, test 데이터 셋을 각각 독립적으로 구성하도록 수정했습니다 데이터가 겹치지 않으므로 validation 및 test의 평가 정확도가 과대 평가되지 않습니다.
 -train_loss 와 val_loss가 동일한 방식으로 평균을 계산하도록 수정되었습니다. 각 배치의 손실을 합산한 후, 전체 배치 수로 나누어 최종 손실 값을 계산합니다.

In [2]:
import numpy as np
from collections import deque
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, ConcatDataset
import torchvision.transforms as transforms
import os
import json
import re
import glob
from sklearn.metrics import accuracy_score
import random

In [3]:
class VideoDataset_Unified(Dataset):
    def __load_label(self, **kwargs):
        label_map = kwargs['label_map'] if 'label_map' in kwargs else {'S1': 0, 'S2': 1, 'S3': 2, 'S4': 3, 'S5': 4, 'S10': 5, 'S20': 6}

        raw_label_dat = json.load(open(self.label_path, encoding='utf-8'))
        for key in raw_label_dat.keys():
            # 'frame_0000xx' -> int('0000xx') = xx
            frame_index = int(key.split('_')[1])

            # 'Sn(description)' -> 'Sn'
            #frame_labels = tuple(map(lambda s: label_map[s.split('(')[0]], raw_label_dat[key]['labels']))
            if (len(raw_label_dat[key]['labels']) == 0): continue
            frame_label = label_map[raw_label_dat[key]['labels'][0].split('(')[0]]
            self.label_dict[frame_index] = frame_label

    def __load_data(self, **kwargs):
        FPS = 20
        image_hw = kwargs['image_hw'] if 'image_hw' in kwargs else (64, 64)
        frames_before_sec = kwargs['frames_before_sec'] if 'frames_before_sec' in kwargs else (0, 0.2, 0.4, 0.6, 0.8, 1.0, 1.2, 1.4, 1.6, 1.8)
        use_init_frames = kwargs['use_init_frames'] if 'use_init_frames' in kwargs else False

        self.frames = []
        try:
            with open(self.raw_path, 'r') as file:
                frames_raw_text = file.read()
                frames_raw = re.split(r't:', frames_raw_text)
        except FileNotFoundError:
            print(f"Error: Unable to open file {self.raw_path}")
            return

        # 각 줄을 읽어 이미지 데이터로 변환
        for frame_index, frame in enumerate(frames_raw):
            thermal_data_values = [int(value) for value in re.findall(r'\d+', frame)]

            # to avoid metadata reading
            if(thermal_data_values[0] < 20):
                thermal_data_values = thermal_data_values[3:]

            if len(thermal_data_values) >= 2400:
                # 1d data to 40x60 tensor (flipped)
                if(self.transform is not None):
                    frame_4060 = self.transform(torch.tensor(thermal_data_values[:2400], dtype=torch.float32).reshape(1, 40, 60).flip((-1,)))
                else:
                    frame_4060 = torch.tensor(thermal_data_values[:2400], dtype=torch.float32).reshape(1, 40, 60).flip((-1,))
                # 40x60 -> 64x64 (not implemented)
                #frame_resized = nn.functional.interpolate(frame_4060.reshape(1, 1, 40, 60), image_hw, mode='bilinear', align_corners=False).squeeze()
                self.frames.append(frame_4060)

                if(self.test_mode or frame_index in self.label_dict.keys()):
                    if(not use_init_frames and frame_index < round(frames_before_sec[-1] * FPS)):
                        continue
                    self.data_info.append({
                        'img_idx': tuple(max(frame_index - round(t * FPS), 0) for t in reversed(frames_before_sec)),
                        'label': self.label_dict[frame_index] if not self.test_mode else -1
                    })

    def __init__(self, raw_path, label_path, transform, test_mode=False, **kwargs):
        self.test_mode = test_mode
        self.raw_path = raw_path
        self.label_path = label_path

        self.label_dict = {}

        self.frames = []
        self.data_info = []

        self.transform = transform

        if(not self.test_mode):
            self.__load_label(**kwargs)
        self.__load_data(**kwargs)

        print(f"Data at raw: {raw_path}, label: {label_path} loaded.")

    def __len__(self):
        return len(self.data_info)

    def __getitem__(self, idx):
        video = torch.stack(tuple(self.frames[i] for i in self.data_info[idx]['img_idx']))
        label = self.data_info[idx]['label']

        return video, label

## Visualization

In [None]:

import matplotlib.pyplot as plt
from IPython.display import clear_output

def visualize_all_frames(dat, idx=0):
    clear_output(wait=True)
    video=dat[0]
    label=dat[1]

    num_frames = video.shape[0]
    plt.figure(figsize=(15, 5))

    for i in range(num_frames):
        # not working (beacuse of normalization)
        min_temp, max_temp = 2900, 3050
        frame = np.clip(video[i].view(64, 64).numpy(), min_temp, max_temp)
        frame = (frame - min_temp) / (max_temp - min_temp) * 255

        plt.subplot(1, num_frames, i + 1)
        plt.imshow(frame, cmap="gray")
        plt.title(f"Frame {i+1}")
        plt.axis("off")

    plt.suptitle(f"All Frames of Video Index {idx}, Label: {label}")
    plt.tight_layout()
    plt.show()


for i in range(len(dataset) // 10):
    visualize_all_frames(dataset[i*10], idx=i*10)

## Training Test

In [4]:
class CNNLSTMModel(nn.Module):
    def __init__(self, num_classes):
        super(CNNLSTMModel, self).__init__()

        # CNN Layers
        self.cnn = nn.Sequential(
            nn.Conv2d(1, 8, kernel_size=3, stride=2, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),  # 64 -> 16

            nn.Conv2d(8, 16, kernel_size=3, stride=2, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),  # 16 -> 4

            nn.Conv2d(16, 32, kernel_size=3, stride=2, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),  # 4 -> 1
        )

        # LSTM Layer
        self.lstm = nn.LSTM(input_size=32, hidden_size=128, num_layers=2, batch_first=True)

        # Fully Connected Layer
        self.fc = nn.Linear(128, num_classes)

    def forward(self, x):
        batch_size, seq_len, c, h, w = x.shape # 4, 20, 1, 64, 64
        x = x.view(batch_size * seq_len, c, h, w)
        x = self.cnn(x)
        x = x.view(batch_size, seq_len, -1)  # (batch_size, seq_len, feature_dim) # 4 x 20 x 32
        _, (hidden, _) = self.lstm(x)
        x = self.fc(hidden[-1])  # 마지막 LSTM layer의 hidden state 사용
        return x

In [7]:
# 학습 함수
##################################################
def train_model(model, train_loader, val_loader, criterion, optimizer, device, num_epochs=100):
##################################################
    best_model = None
    best_val_acc = 0.0

    for epoch in range(num_epochs):
        model.train()
        train_loss = 0.0
        correct_preds, total_preds = 0, 0
        num_batches = len(train_loader)

        for videos, labels in train_loader:
            videos, labels = videos.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(videos)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()

            # 정확도 계산
            _, preds = torch.max(outputs, dim=1)
            correct_preds += (preds == labels).sum().item()
            total_preds += labels.size(0)

        train_loss /= num_batches
        train_acc = correct_preds / total_preds

        # Validation
        model.eval()
        val_loss = 0.0
        correct_preds, total_preds = 0, 0
        with torch.no_grad():
            for videos, labels in val_loader:
                videos, labels = videos.to(device), labels.to(device)
                outputs = model(videos)
                loss = criterion(outputs, labels)
                val_loss += loss.item()

                # 정확도 계산
                _, preds = torch.max(outputs, dim=1)
                correct_preds += (preds == labels).sum().item()
                total_preds += labels.size(0)

        val_loss /= num_batches
        val_acc = correct_preds / total_preds

        print(f"Epoch {epoch + 1}, Train Loss: {train_loss:.4f}, Train Accuracy: {train_acc:.4f}, Val Loss: {val_loss:.4f}, Val Accuracy: {val_acc:.4f}")

        # Best Model 저장
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            best_model = model.state_dict()
            ####################################################
            torch.save(best_model, "/content/drive/MyDrive/telecons/CNN_LSTM/best_model/best_model_CNNLSTM_20250225.pt")
            ####################################################
            print(f"Best model saved with validation accuracy: {best_val_acc:.4f}")

    # 최종적으로 Best Model 로드
    if best_model:
        model.load_state_dict(best_model)
        print(f"Best model loaded with validation accuracy: {best_val_acc:.4f}")

    return model

# 테스트 함수
def test_model(model, test_loader, device):
    model.eval()
    all_preds, all_labels = [], []

    with torch.no_grad():
        for videos, labels in test_loader:
            videos = videos.to(device)
            outputs = model(videos)
            preds = torch.argmax(outputs, dim=1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.numpy())

    acc = accuracy_score(all_labels, all_preds)
    print(f"Test Accuracy: {acc:.4f}")
    return acc

In [8]:
from torch.utils.data import random_split

# 지정한 디렉토리 내에서 이름이 같은 JSON/TXT 파일 쌍 찾기
data_dir = '/content/drive/MyDrive/telecons/data/20250221/ALL/'

# glob을 이용해 .json과 .TXT 파일 리스트 얻기
json_files = glob.glob(os.path.join(data_dir, "*.json"))
txt_files = glob.glob(os.path.join(data_dir, "*.TXT"))

# 파일명(확장자 제외)를 key로 매핑
json_map = {os.path.splitext(os.path.basename(path))[0]: path for path in json_files}
txt_map  = {os.path.splitext(os.path.basename(path))[0]: path for path in txt_files}

# 두 매핑에서 공통된 파일명에 해당하는 쌍 생성 (raw_path: TXT, label_path: JSON)
paths = []
for key in json_map.keys():
    if key in txt_map:
        paths.append((txt_map[key], json_map[key]))

print(f"총 {len(paths)} 쌍의 파일을 찾았습니다.")

# transform 설정 (resize 및 normalize)
transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.Normalize((2868,), (34,))
])

# 데이터셋을 랜덤으로 train, val, test로 분할
random.shuffle(paths)
n_train = int(len(paths) * 0.8)
n_val = int(len(paths) * 0.1)
n_test = len(paths) - n_train - n_val

train_paths = paths[:n_train]
val_paths = paths[n_train:n_train + n_val]
test_paths = paths[n_train + n_val:]

# 분할된 데이터셋 정보 출력
print("Train Set:")
for raw, label in train_paths:
    print(f"Raw: {raw}, Label: {label}")
print("\nValidation Set:")
for raw, label in val_paths:
    print(f"Raw: {raw}, Label: {label}")
print("\nTest Set:")
for raw, label in test_paths:
    print(f"Raw: {raw}, Label: {label}")

train_dataset_list, val_dataset_list, test_dataset_list = [], [], []
for raw, label in train_paths:
    dataset = VideoDataset_Unified(raw, label, transform)
    train_dataset_list.append(dataset)
for raw, label in val_paths:
    dataset = VideoDataset_Unified(raw, label, transform)
    val_dataset_list.append(dataset)
for raw, label in test_paths:
    dataset = VideoDataset_Unified(raw, label, transform)
    test_dataset_list.append(dataset)

train_dataset = ConcatDataset(train_dataset_list)
val_dataset = ConcatDataset(val_dataset_list)
test_dataset = ConcatDataset(test_dataset_list)

train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True, drop_last=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, drop_last=True)
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False, drop_last=True)

총 26 쌍의 파일을 찾았습니다.
Train Set:
Raw: /content/drive/MyDrive/telecons/data/20250221/ALL/20250217_cf4.TXT, Label: /content/drive/MyDrive/telecons/data/20250221/ALL/20250217_cf4.json
Raw: /content/drive/MyDrive/telecons/data/20250221/ALL/WF0_0.TXT, Label: /content/drive/MyDrive/telecons/data/20250221/ALL/WF0_0.json
Raw: /content/drive/MyDrive/telecons/data/20250221/ALL/20250217_cf0.TXT, Label: /content/drive/MyDrive/telecons/data/20250221/ALL/20250217_cf0.json
Raw: /content/drive/MyDrive/telecons/data/20250221/ALL/WF1_0.TXT, Label: /content/drive/MyDrive/telecons/data/20250221/ALL/WF1_0.json
Raw: /content/drive/MyDrive/telecons/data/20250221/ALL/WF2_0.TXT, Label: /content/drive/MyDrive/telecons/data/20250221/ALL/WF2_0.json
Raw: /content/drive/MyDrive/telecons/data/20250221/ALL/20250217_cf2.TXT, Label: /content/drive/MyDrive/telecons/data/20250221/ALL/20250217_cf2.json
Raw: /content/drive/MyDrive/telecons/data/20250221/ALL/CS20_3.TXT, Label: /content/drive/MyDrive/telecons/data/20250221/ALL/

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"

model = CNNLSTMModel(num_classes=7).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

model = train_model(model, train_loader, val_loader, criterion, optimizer, device)

Epoch 1, Train Loss: 0.4699, Train Accuracy: 0.8274, Val Loss: 0.6557, Val Accuracy: 0.5802
Best model saved with validation accuracy: 0.5802
Epoch 2, Train Loss: 0.1785, Train Accuracy: 0.9337, Val Loss: 0.8966, Val Accuracy: 0.5596
Epoch 3, Train Loss: 0.1100, Train Accuracy: 0.9588, Val Loss: 1.0400, Val Accuracy: 0.5528
Epoch 4, Train Loss: 0.0788, Train Accuracy: 0.9701, Val Loss: 1.2027, Val Accuracy: 0.6027
Best model saved with validation accuracy: 0.6027
Epoch 5, Train Loss: 0.0647, Train Accuracy: 0.9755, Val Loss: 0.9689, Val Accuracy: 0.5703
Epoch 6, Train Loss: 0.0536, Train Accuracy: 0.9794, Val Loss: 1.1591, Val Accuracy: 0.5718
Epoch 7, Train Loss: 0.0462, Train Accuracy: 0.9821, Val Loss: 1.2149, Val Accuracy: 0.5703
Epoch 8, Train Loss: 0.0427, Train Accuracy: 0.9833, Val Loss: 1.1881, Val Accuracy: 0.5865
Epoch 9, Train Loss: 0.0415, Train Accuracy: 0.9836, Val Loss: 1.3198, Val Accuracy: 0.5866
Epoch 10, Train Loss: 0.0376, Train Accuracy: 0.9850, Val Loss: 1.2752, 

In [None]:
test_model(model, test_loader, device)