In [2]:
import pandas as pd
import re

# 데이터 로드
data1 = pd.read_csv('train_data_4_1.csv', encoding='utf-8')
data2 = pd.read_csv('train_data_4_2.csv', encoding='utf-8')
data3 = pd.read_csv('train_data_4_3.csv', encoding='utf-8')

# 결측치 제거 (선택 사항)
data1.dropna(subset=['HTML', 'Important', 'Type'], inplace=True)
data2.dropna(subset=['HTML', 'Important', 'Type'], inplace=True)
data3.dropna(subset=['HTML', 'Important', 'Type'], inplace=True)

# Important와 Type 열의 데이터 타입 변환
data1['Important'] = data1['Important'].astype(int)
data1['Type'] = data1['Type'].astype(str)

data2['Important'] = data2['Important'].astype(int)
data2['Type'] = data2['Type'].astype(str)

data3['Important'] = data3['Important'].astype(int)
data3['Type'] = data3['Type'].astype(str)

# 특수문자 제거 등 전처리 함수 정의
def preprocess_html(html_text):
    # HTML 태그 제거 (필요한 경우)
    # text = re.sub('<[^<]+?>', '', html_text)
    # 특수문자 제거
    text = re.sub('[^0-9a-zA-Zㄱ-ㅎ가-힣 ]', ' ', html_text)
    # 공백 여러 개를 하나로
    text = re.sub('\s+', ' ', text).strip()
    return text

# 데이터에 전처리 적용
data1['processed_html'] = data1['HTML'].apply(preprocess_html)
data2['processed_html'] = data2['HTML'].apply(preprocess_html)
data3['processed_html'] = data3['HTML'].apply(preprocess_html)


# 레이블 인코딩 및 토크나이저 로드

In [3]:
from transformers import AutoTokenizer

# 토크나이저 로드
tokenizer = AutoTokenizer.from_pretrained('xlm-roberta-base')

# Type과 Important의 고유값 추출 및 매핑 생성 함수
def create_label_maps(data):
    # Type과 Important의 고유값 추출
    type_labels = data['Type'].unique().tolist()
    important_labels = [0, 1]  # Important는 0과 1로 구성

    # 레이블 인코딩을 위한 매핑 생성
    type_label_map = {label: idx for idx, label in enumerate(type_labels)}
    important_label_map = {label: idx for idx, label in enumerate(important_labels)}

    # 역매핑 (필요한 경우)
    type_id_to_label = {idx: label for label, idx in type_label_map.items()}

    return type_label_map, important_label_map, type_id_to_label

# 각각의 데이터에 대해 매핑 생성
type_label_map1, important_label_map1, type_id_to_label1 = create_label_maps(data1)
type_label_map2, important_label_map2, type_id_to_label2 = create_label_maps(data2)
type_label_map3, important_label_map3, type_id_to_label3 = create_label_maps(data3)



# 데이터셋 정의

In [10]:
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer
from sklearn.model_selection import train_test_split

# 토크나이저 로드
tokenizer = AutoTokenizer.from_pretrained('xlm-roberta-base')

# MultiTaskDataset 정의
class MultiTaskDataset(Dataset):
    def __init__(self, data, tokenizer, type_label_map, important_label_map, max_length=256):
        self.data = data.reset_index(drop=True)
        self.tokenizer = tokenizer
        self.type_label_map = type_label_map
        self.important_label_map = important_label_map
        self.max_length = max_length
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        input_text = row['processed_html']
        
        # 입력 텍스트 토크나이징
        inputs = self.tokenizer(
            input_text,
            padding='max_length',
            truncation=True,
            max_length=self.max_length,
            return_tensors='pt'
        )
        
        # 레이블 인코딩
        type_label = self.type_label_map[row['Type']]
        important_label = self.important_label_map[row['Important']]
        
        return {
            'input_ids': inputs['input_ids'].squeeze(0),
            'attention_mask': inputs['attention_mask'].squeeze(0),
            'type_label': torch.tensor(type_label, dtype=torch.long),
            'important_label': torch.tensor(important_label, dtype=torch.long)
        }

# 함수: 데이터를 분할하고 데이터셋 및 데이터로더 생성
def create_dataloaders(data, tokenizer, type_label_map, important_label_map, batch_size=8):
    # 데이터 분할, Important 값의 비율을 유지
    train_data, val_data = train_test_split(
        data, test_size=0.2, random_state=42, stratify=data['Important']
    )
    
    # 데이터셋 생성
    train_dataset = MultiTaskDataset(train_data, tokenizer, type_label_map, important_label_map)
    val_dataset = MultiTaskDataset(val_data, tokenizer, type_label_map, important_label_map)
    
    # 데이터로더 생성
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    
    return train_loader, val_loader

# 세 개의 데이터셋에 대해 각각 데이터로더 생성
# 데이터 로드
data1 = pd.read_csv('train_data_4_1.csv', encoding='utf-8')
data2 = pd.read_csv('train_data_4_2.csv', encoding='utf-8')
data3 = pd.read_csv('train_data_4_3.csv', encoding='utf-8')

# 전처리 및 레이블 매핑 생성
def preprocess_and_create_maps(data):
    # 결측치 제거
    data.dropna(subset=['HTML', 'Important', 'Type'], inplace=True)
    
    # Important와 Type 열의 데이터 타입 변환
    data['Important'] = data['Important'].astype(int)
    data['Type'] = data['Type'].astype(str)
    
    # HTML 전처리
    data['processed_html'] = data['HTML'].apply(lambda x: re.sub('[^0-9a-zA-Zㄱ-ㅎ가-힣 ]', ' ', str(x)).strip())
    
    # Type과 Important의 고유값 추출 및 매핑 생성
    type_labels = data['Type'].unique().tolist()
    type_label_map = {label: idx for idx, label in enumerate(type_labels)}
    important_label_map = {0: 0, 1: 1}
    
    return data, type_label_map, important_label_map

# 각 데이터셋에 전처리 및 레이블 매핑 생성 적용
data1, type_label_map1, important_label_map1 = preprocess_and_create_maps(data1)
data2, type_label_map2, important_label_map2 = preprocess_and_create_maps(data2)
data3, type_label_map3, important_label_map3 = preprocess_and_create_maps(data3)

# 각각의 데이터셋에 대해 데이터로더 생성
train_loader1, val_loader1 = create_dataloaders(data1, tokenizer, type_label_map1, important_label_map1)
train_loader2, val_loader2 = create_dataloaders(data2, tokenizer, type_label_map2, important_label_map2)
train_loader3, val_loader3 = create_dataloaders(data3, tokenizer, type_label_map3, important_label_map3)

print("각 데이터셋에 대한 데이터로더가 생성되었습니다.")




각 데이터셋에 대한 데이터로더가 생성되었습니다.


# 모델정의

In [6]:
class MultiTaskModel(nn.Module):
    def __init__(self, model_name, num_type_labels, num_important_labels, dropout_ratio=0.1):
        super(MultiTaskModel, self).__init__()
        self.roberta = AutoModel.from_pretrained(model_name)
        self.dropout = nn.Dropout(dropout_ratio)
        
        # type 분류를 위한 출력 레이어
        self.type_classifier = nn.Linear(self.roberta.config.hidden_size, num_type_labels)
        
        # important 분류를 위한 출력 레이어
        self.important_classifier = nn.Linear(self.roberta.config.hidden_size, num_important_labels)
        
    def forward(self, input_ids, attention_mask):
        outputs = self.roberta(input_ids=input_ids, attention_mask=attention_mask)
        
        # [CLS] 토큰의 출력
        cls_output = outputs.last_hidden_state[:, 0, :]
        cls_output = self.dropout(cls_output)
        
        # 분류 결과
        type_logits = self.type_classifier(cls_output)
        important_logits = self.important_classifier(cls_output)
        
        return type_logits, important_logits


In [12]:
import torch.nn as nn

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# 첫 번째 데이터셋 모델 초기화
num_type_labels1 = len(type_label_map1)
num_important_labels1 = len(important_label_map1)

model1 = MultiTaskModel(
    model_name='xlm-roberta-base',
    num_type_labels=num_type_labels1,
    num_important_labels=num_important_labels1
)
model1.to(device)

# 두 번째 데이터셋 모델 초기화
num_type_labels2 = len(type_label_map2)
num_important_labels2 = len(important_label_map2)

model2 = MultiTaskModel(
    model_name='xlm-roberta-base',
    num_type_labels=num_type_labels2,
    num_important_labels=num_important_labels2
)
model2.to(device)

# 세 번째 데이터셋 모델 초기화
num_type_labels3 = len(type_label_map3)
num_important_labels3 = len(important_label_map3)

model3 = MultiTaskModel(
    model_name='xlm-roberta-base',
    num_type_labels=num_type_labels3,
    num_important_labels=num_important_labels3
)
model3.to(device)

# 손실 함수 및 옵티마이저
criterion = nn.CrossEntropyLoss()

optimizer1 = torch.optim.AdamW(model1.parameters(), lr=2e-5)
optimizer2 = torch.optim.AdamW(model2.parameters(), lr=2e-5)
optimizer3 = torch.optim.AdamW(model3.parameters(), lr=2e-5)




# 학습 함수 정의

In [13]:
import os
import torch
from tqdm import tqdm
from torch.nn.utils import clip_grad_norm_
from transformers import get_linear_schedule_with_warmup

def train_and_evaluate(model, train_loader, val_loader, criterion, optimizer, device, num_epochs, save_path, patience=5):
    best_val_loss = float('inf')  # 가장 낮은 검증 손실을 추적
    patience_counter = 0  # Early Stopping을 위한 카운터

    total_steps = len(train_loader) * num_epochs

    # Learning Rate Scheduler 설정
    scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=int(0.1 * total_steps),  # 전체 스텝의 10%를 워밍업으로 사용
        num_training_steps=total_steps
    )

    for epoch in range(1, num_epochs + 1):
        print(f"Epoch {epoch}/{num_epochs}")

        # Training 단계
        model.train()
        total_train_loss = 0
        for batch in tqdm(train_loader, desc="Training"):
            optimizer.zero_grad()

            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            type_labels = batch['type_label'].to(device)
            important_labels = batch['important_label'].to(device)

            # Forward Pass
            type_logits, important_logits = model(input_ids, attention_mask)

            # 손실 계산
            loss_type = criterion(type_logits, type_labels)
            loss_important = criterion(important_logits, important_labels)
            loss = loss_type + loss_important  # 총 손실은 두 손실의 합

            # Backward Pass 및 최적화
            loss.backward()

            # Gradient Clipping
            clip_grad_norm_(model.parameters(), max_norm=1.0)

            optimizer.step()
            scheduler.step()

            total_train_loss += loss.item()
        avg_train_loss = total_train_loss / len(train_loader)

        # Validation 단계
        model.eval()
        total_val_loss = 0
        correct_type = 0
        correct_important = 0
        total_samples = 0
        with torch.no_grad():
            for batch in tqdm(val_loader, desc="Validation"):
                input_ids = batch['input_ids'].to(device)
                attention_mask = batch['attention_mask'].to(device)
                type_labels = batch['type_label'].to(device)
                important_labels = batch['important_label'].to(device)

                # Forward Pass
                type_logits, important_logits = model(input_ids, attention_mask)

                # 손실 계산
                loss_type = criterion(type_logits, type_labels)
                loss_important = criterion(important_logits, important_labels)
                loss = loss_type + loss_important

                total_val_loss += loss.item()

                # 정확도 계산
                _, type_preds = torch.max(type_logits, dim=1)
                _, important_preds = torch.max(important_logits, dim=1)
                correct_type += (type_preds == type_labels).sum().item()
                correct_important += (important_preds == important_labels).sum().item()
                total_samples += type_labels.size(0)
        avg_val_loss = total_val_loss / len(val_loader)
        type_accuracy = correct_type / total_samples
        important_accuracy = correct_important / total_samples

        # 에포크 결과 출력
        print(f"Training Loss: {avg_train_loss:.4f}")
        print(f"Validation Loss: {avg_val_loss:.4f}")
        print(f"Type Accuracy: {type_accuracy * 100:.2f}%")
        print(f"Important Accuracy: {important_accuracy * 100:.2f}%\n")

        # 최상의 모델 저장
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            patience_counter = 0  # Early Stopping 카운터 초기화
            # 모델 저장
            os.makedirs(os.path.dirname(save_path), exist_ok=True)
            torch.save(model.state_dict(), save_path)
            print(f"Best model saved with validation loss: {avg_val_loss:.4f}")
        else:
            patience_counter += 1
            print(f"No improvement in validation loss for {patience_counter} epochs.")

        # Early Stopping 조건 확인
        if patience_counter >= patience:
            print("Early stopping triggered.")
            break

# 사용 예시:
#num_epochs = 40
#save_path = './output/best_model.pth'
#train_and_evaluate(model, train_loader, val_loader, criterion, optimizer, device, num_epochs, save_path)

In [14]:
import os

# 학습 파라미터 설정
num_epochs = 20
save_paths = ['./output/best_model_data1.pth', './output/best_model_data2.pth', './output/best_model_data3.pth']

# 첫 번째 데이터셋 학습
print("Training on Dataset 1")
train_loader, val_loader = train_loader1, val_loader1
model = model1
optimizer = optimizer1
train_and_evaluate(model, train_loader, val_loader, criterion, optimizer, device, num_epochs, save_paths[0])

# 두 번째 데이터셋 학습
print("Training on Dataset 2")
train_loader, val_loader = train_loader2, val_loader2
model = model2
optimizer = optimizer2
train_and_evaluate(model, train_loader, val_loader, criterion, optimizer, device, num_epochs, save_paths[1])

# 세 번째 데이터셋 학습
print("Training on Dataset 3")
train_loader, val_loader = train_loader3, val_loader3
model = model3
optimizer = optimizer3
train_and_evaluate(model, train_loader, val_loader, criterion, optimizer, device, num_epochs, save_paths[2])


Training on Dataset 1
Epoch 1/20


Training: 100%|██████████████████████████████████████████████████████████████████████| 156/156 [00:25<00:00,  6.06it/s]
Validation: 100%|██████████████████████████████████████████████████████████████████████| 39/39 [00:01<00:00, 29.58it/s]


Training Loss: 1.6255
Validation Loss: 0.7125
Type Accuracy: 82.69%
Important Accuracy: 93.91%

Best model saved with validation loss: 0.7125
Epoch 2/20


Training: 100%|██████████████████████████████████████████████████████████████████████| 156/156 [00:24<00:00,  6.24it/s]
Validation: 100%|██████████████████████████████████████████████████████████████████████| 39/39 [00:01<00:00, 30.40it/s]


Training Loss: 0.4872
Validation Loss: 0.4041
Type Accuracy: 92.63%
Important Accuracy: 98.40%

Best model saved with validation loss: 0.4041
Epoch 3/20


Training: 100%|██████████████████████████████████████████████████████████████████████| 156/156 [00:24<00:00,  6.27it/s]
Validation: 100%|██████████████████████████████████████████████████████████████████████| 39/39 [00:01<00:00, 30.11it/s]


Training Loss: 0.2482
Validation Loss: 0.5186
Type Accuracy: 93.27%
Important Accuracy: 98.08%

No improvement in validation loss for 1 epochs.
Epoch 4/20


Training: 100%|██████████████████████████████████████████████████████████████████████| 156/156 [00:24<00:00,  6.30it/s]
Validation: 100%|██████████████████████████████████████████████████████████████████████| 39/39 [00:01<00:00, 30.29it/s]


Training Loss: 0.2038
Validation Loss: 0.4799
Type Accuracy: 93.59%
Important Accuracy: 98.40%

No improvement in validation loss for 2 epochs.
Epoch 5/20


Training: 100%|██████████████████████████████████████████████████████████████████████| 156/156 [00:24<00:00,  6.30it/s]
Validation: 100%|██████████████████████████████████████████████████████████████████████| 39/39 [00:01<00:00, 30.24it/s]


Training Loss: 0.1716
Validation Loss: 0.6101
Type Accuracy: 93.59%
Important Accuracy: 97.12%

No improvement in validation loss for 3 epochs.
Epoch 6/20


Training: 100%|██████████████████████████████████████████████████████████████████████| 156/156 [00:24<00:00,  6.31it/s]
Validation: 100%|██████████████████████████████████████████████████████████████████████| 39/39 [00:01<00:00, 30.43it/s]


Training Loss: 0.1532
Validation Loss: 0.4390
Type Accuracy: 94.87%
Important Accuracy: 98.40%

No improvement in validation loss for 4 epochs.
Epoch 7/20


Training: 100%|██████████████████████████████████████████████████████████████████████| 156/156 [00:24<00:00,  6.28it/s]
Validation: 100%|██████████████████████████████████████████████████████████████████████| 39/39 [00:01<00:00, 30.02it/s]


Training Loss: 0.1386
Validation Loss: 0.4703
Type Accuracy: 94.23%
Important Accuracy: 98.72%

No improvement in validation loss for 5 epochs.
Early stopping triggered.
Training on Dataset 2
Epoch 1/20


Training: 100%|██████████████████████████████████████████████████████████████████████| 234/234 [03:27<00:00,  1.13it/s]
Validation: 100%|██████████████████████████████████████████████████████████████████████| 59/59 [00:01<00:00, 30.77it/s]


Training Loss: 1.3620
Validation Loss: 0.3953
Type Accuracy: 87.61%
Important Accuracy: 97.01%

Best model saved with validation loss: 0.3953
Epoch 2/20


Training: 100%|██████████████████████████████████████████████████████████████████████| 234/234 [03:41<00:00,  1.05it/s]
Validation: 100%|██████████████████████████████████████████████████████████████████████| 59/59 [00:01<00:00, 30.48it/s]


Training Loss: 0.2968
Validation Loss: 0.3520
Type Accuracy: 94.44%
Important Accuracy: 99.15%

Best model saved with validation loss: 0.3520
Epoch 3/20


Training: 100%|██████████████████████████████████████████████████████████████████████| 234/234 [03:43<00:00,  1.05it/s]
Validation: 100%|██████████████████████████████████████████████████████████████████████| 59/59 [00:02<00:00, 27.98it/s]


Training Loss: 0.2276
Validation Loss: 0.2731
Type Accuracy: 96.79%
Important Accuracy: 99.36%

Best model saved with validation loss: 0.2731
Epoch 4/20


Training: 100%|██████████████████████████████████████████████████████████████████████| 234/234 [03:47<00:00,  1.03it/s]
Validation: 100%|██████████████████████████████████████████████████████████████████████| 59/59 [00:02<00:00, 28.39it/s]


Training Loss: 0.1629
Validation Loss: 0.2560
Type Accuracy: 97.65%
Important Accuracy: 98.72%

Best model saved with validation loss: 0.2560
Epoch 5/20


Training: 100%|██████████████████████████████████████████████████████████████████████| 234/234 [03:51<00:00,  1.01it/s]
Validation: 100%|██████████████████████████████████████████████████████████████████████| 59/59 [00:02<00:00, 28.15it/s]


Training Loss: 0.1654
Validation Loss: 0.2250
Type Accuracy: 97.44%
Important Accuracy: 98.93%

Best model saved with validation loss: 0.2250
Epoch 6/20


Training: 100%|██████████████████████████████████████████████████████████████████████| 234/234 [03:33<00:00,  1.10it/s]
Validation: 100%|██████████████████████████████████████████████████████████████████████| 59/59 [00:02<00:00, 29.26it/s]


Training Loss: 0.1306
Validation Loss: 0.2231
Type Accuracy: 97.44%
Important Accuracy: 99.36%

Best model saved with validation loss: 0.2231
Epoch 7/20


Training: 100%|██████████████████████████████████████████████████████████████████████| 234/234 [03:29<00:00,  1.11it/s]
Validation: 100%|██████████████████████████████████████████████████████████████████████| 59/59 [00:02<00:00, 29.24it/s]


Training Loss: 0.1323
Validation Loss: 0.2140
Type Accuracy: 97.22%
Important Accuracy: 99.15%

Best model saved with validation loss: 0.2140
Epoch 8/20


Training: 100%|██████████████████████████████████████████████████████████████████████| 234/234 [03:29<00:00,  1.12it/s]
Validation: 100%|██████████████████████████████████████████████████████████████████████| 59/59 [00:02<00:00, 29.31it/s]


Training Loss: 0.1136
Validation Loss: 0.2168
Type Accuracy: 97.65%
Important Accuracy: 99.36%

No improvement in validation loss for 1 epochs.
Epoch 9/20


Training: 100%|██████████████████████████████████████████████████████████████████████| 234/234 [03:29<00:00,  1.12it/s]
Validation: 100%|██████████████████████████████████████████████████████████████████████| 59/59 [00:02<00:00, 29.00it/s]


Training Loss: 0.0941
Validation Loss: 0.2536
Type Accuracy: 97.01%
Important Accuracy: 99.15%

No improvement in validation loss for 2 epochs.
Epoch 10/20


Training: 100%|██████████████████████████████████████████████████████████████████████| 234/234 [03:36<00:00,  1.08it/s]
Validation: 100%|██████████████████████████████████████████████████████████████████████| 59/59 [00:02<00:00, 28.33it/s]


Training Loss: 0.0903
Validation Loss: 0.2231
Type Accuracy: 97.22%
Important Accuracy: 99.15%

No improvement in validation loss for 3 epochs.
Epoch 11/20


Training: 100%|██████████████████████████████████████████████████████████████████████| 234/234 [03:34<00:00,  1.09it/s]
Validation: 100%|██████████████████████████████████████████████████████████████████████| 59/59 [00:02<00:00, 28.31it/s]


Training Loss: 0.0881
Validation Loss: 0.2470
Type Accuracy: 97.22%
Important Accuracy: 99.15%

No improvement in validation loss for 4 epochs.
Epoch 12/20


Training: 100%|██████████████████████████████████████████████████████████████████████| 234/234 [03:34<00:00,  1.09it/s]
Validation: 100%|██████████████████████████████████████████████████████████████████████| 59/59 [00:02<00:00, 28.39it/s]


Training Loss: 0.0723
Validation Loss: 0.2482
Type Accuracy: 97.22%
Important Accuracy: 99.15%

No improvement in validation loss for 5 epochs.
Early stopping triggered.
Training on Dataset 3
Epoch 1/20


Training: 100%|██████████████████████████████████████████████████████████████████████| 312/312 [13:41<00:00,  2.63s/it]
Validation: 100%|██████████████████████████████████████████████████████████████████████| 78/78 [00:02<00:00, 29.31it/s]


Training Loss: 1.1642
Validation Loss: 0.1651
Type Accuracy: 94.87%
Important Accuracy: 99.52%

Best model saved with validation loss: 0.1651
Epoch 2/20


Training: 100%|██████████████████████████████████████████████████████████████████████| 312/312 [13:42<00:00,  2.64s/it]
Validation: 100%|██████████████████████████████████████████████████████████████████████| 78/78 [00:02<00:00, 27.87it/s]


Training Loss: 0.2060
Validation Loss: 0.1412
Type Accuracy: 97.92%
Important Accuracy: 99.84%

Best model saved with validation loss: 0.1412
Epoch 3/20


Training: 100%|██████████████████████████████████████████████████████████████████████| 312/312 [14:18<00:00,  2.75s/it]
Validation: 100%|██████████████████████████████████████████████████████████████████████| 78/78 [00:02<00:00, 29.12it/s]


Training Loss: 0.1584
Validation Loss: 0.1313
Type Accuracy: 98.24%
Important Accuracy: 99.84%

Best model saved with validation loss: 0.1313
Epoch 4/20


Training:  76%|█████████████████████████████████████████████████████▏                | 237/312 [10:22<03:16,  2.62s/it]


KeyboardInterrupt: 

In [15]:
import torch
from transformers import AutoTokenizer
from tqdm import tqdm

# 모델 초기화 (모델 클래스 정의가 필요)
# 예: model = MultiTaskModel(...)
# model.to(device)

# 토크나이저 초기화
tokenizer = AutoTokenizer.from_pretrained('xlm-roberta-base')

# 검증 함수
def test_model_on_validation(model, val_loader, criterion, device, save_path):
    # 모델 가중치 로드
    checkpoint = torch.load(save_path)
    model.load_state_dict(checkpoint)  # 'model_state_dict' 대신 바로 checkpoint로 로드

    # 모델을 평가 모드로 전환
    model.eval()

    total_val_loss = 0
    correct_type = 0
    correct_important = 0
    total = 0

    # 결과 비교를 위한 리스트
    results = []

    with torch.no_grad():
        for batch in tqdm(val_loader, desc="Testing on Validation Set"):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            type_labels = batch['type_label'].to(device)
            important_labels = batch['important_label'].to(device)

            # 모델 추론
            type_logits, important_logits = model(input_ids, attention_mask)

            # 손실 계산
            loss_type = criterion(type_logits, type_labels)
            loss_important = criterion(important_logits, important_labels)
            loss = loss_type + loss_important
            total_val_loss += loss.item()

            # 예측값 계산
            _, type_preds = torch.max(type_logits, dim=1)
            _, important_preds = torch.max(important_logits, dim=1)

            # 정확도 계산
            correct_type += (type_preds == type_labels).sum().item()
            correct_important += (important_preds == important_labels).sum().item()
            total += type_labels.size(0)

            # 각 배치의 첫 번째 입력에 대한 결과 비교
            decoded_input = tokenizer.decode(input_ids[0], skip_special_tokens=True)
            result = {
                'input_text': decoded_input,
                'true_type_label': type_labels[0].item(),
                'predicted_type': type_preds[0].item(),
                'true_important_label': important_labels[0].item(),
                'predicted_important': important_preds[0].item()
            }
            results.append(result)
    
    # 평균 손실 및 정확도 계산
    avg_val_loss = total_val_loss / len(val_loader)
    type_accuracy = correct_type / total
    important_accuracy = correct_important / total

    # 최종 결과 출력
    print(f"Validation Loss: {avg_val_loss:.4f}")
    print(f"Type Accuracy: {type_accuracy * 100:.2f}%")
    print(f"Important Accuracy: {important_accuracy * 100:.2f}%\n")

    # 각 배치의 첫 번째 입력에 대한 결과 출력
    for i, result in enumerate(results):
        print(f"Batch {i+1} - First Input:")
        print(f"Input Text: {result['input_text']}")
        print(f"True Type Label: {result['true_type_label']}, Predicted Type: {result['predicted_type']}")
        print(f"True Important Label: {result['true_important_label']}, Predicted Important: {result['predicted_important']}")
        print("-" * 50)

# 사용 예시:
# train_data_loader와 val_loader는 DataLoader로 설정되어 있어야 합니다.
# model = YourModelClass(...)  # 모델 클래스 정의 후 초기화
# criterion = nn.CrossEntropyLoss()
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# # 테스트 함수 호출
# test_model_on_validation(model, val_loader, criterion, device, './output/best_model.pth')


In [16]:
import torch
from torch import nn

# 검증에 사용할 장치 및 손실 함수 설정
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
criterion = nn.CrossEntropyLoss()

# 모델과 데이터로더, 저장 경로 리스트 설정
models = [model1, model2, model3]
val_loaders = [val_loader1, val_loader2, val_loader3]
save_paths = ['./output/best_model_data1.pth', './output/best_model_data2.pth', './output/best_model_data3.pth']

# 순차적으로 각 모델을 검증
for i, (model, val_loader, save_path) in enumerate(zip(models, val_loaders, save_paths), start=1):
    print(f"Evaluating Model {i}")
    test_model_on_validation(model, val_loader, criterion, device, save_path)


Evaluating Model 1


Testing on Validation Set: 100%|███████████████████████████████████████████████████████| 39/39 [00:01<00:00, 27.26it/s]


Validation Loss: 0.4041
Type Accuracy: 92.63%
Important Accuracy: 98.40%

Batch 1 - First Input:
Input Text: form id lodging search form name lodging search form method GET action Hotel Search class uitk form has required indicator
True Type Label: 2, Predicted Type: 0
True Important Label: 1, Predicted Important: 1
--------------------------------------------------
Batch 2 - First Input:
Input Text: div class ac4a7896c7
True Type Label: 3, Predicted Type: 3
True Important Label: 0, Predicted Important: 0
--------------------------------------------------
Batch 3 - First Input:
Input Text: div class login
True Type Label: 3, Predicted Type: 3
True Important Label: 0, Predicted Important: 0
--------------------------------------------------
Batch 4 - First Input:
Input Text: a href https en dict naver com mini main class RightWidget module link search FctCQ aria label  
True Type Label: 1, Predicted Type: 1
True Important Label: 1, Predicted Important: 1
--------------------------------

Testing on Validation Set: 100%|███████████████████████████████████████████████████████| 59/59 [00:02<00:00, 26.85it/s]


Validation Loss: 0.2140
Type Accuracy: 97.22%
Important Accuracy: 99.15%

Batch 1 - First Input:
Input Text: a target blank data testid web core property card href https www booking com hotel jp candeo hotels osaka the tower ko html label gen173nr 1BCAEoggI46AdIM1gEaH2IAQGYARe4ARfIAQzYAQHoAQGIAgGoAgO4AoKCl7kGwAIB0gIkMjdkMGMzYzktMGJjMi00MThiLWFlNGQtOWIzNmMwYTU2MTUw2AIF4AIB amp sid 1574076321d6820a777c8110a9d39d5e amp aid 304142 amp ucfs 1 amp arphpl 1 amp checkin 2024 11 08 amp checkout 2024 11 10 amp group adults 2 amp req adults 2 amp no rooms 1 amp group children 0 amp req children 0 amp srpvid 2b512ac18adf00e1 amp srepoch 1730527494 class a83ed08757 fe04f404b8
True Type Label: 1, Predicted Type: 1
True Important Label: 1, Predicted Important: 1
--------------------------------------------------
Batch 2 - First Input:
Input Text: div class flash js transform notice hidden
True Type Label: 3, Predicted Type: 3
True Important Label: 0, Predicted Important: 0
---------------------------

Testing on Validation Set: 100%|███████████████████████████████████████████████████████| 78/78 [00:03<00:00, 24.98it/s]

Validation Loss: 0.1313
Type Accuracy: 98.24%
Important Accuracy: 99.84%

Batch 1 - First Input:
Input Text: div class
True Type Label: 3, Predicted Type: 3
True Important Label: 0, Predicted Important: 0
--------------------------------------------------
Batch 2 - First Input:
Input Text: div class search box id searchBox
True Type Label: 3, Predicted Type: 3
True Important Label: 0, Predicted Important: 0
--------------------------------------------------
Batch 3 - First Input:
Input Text: div class swiper slide
True Type Label: 3, Predicted Type: 3
True Important Label: 0, Predicted Important: 0
--------------------------------------------------
Batch 4 - First Input:
Input Text: div class c624d7469d f034cf5568 c69ad9b0c2 c62ffa0b45 a3214e5942 style bui stack spaced gap s 1
True Type Label: 3, Predicted Type: 3
True Important Label: 0, Predicted Important: 0
--------------------------------------------------
Batch 5 - First Input:
Input Text: div class c624d7469d a0e60936ad e8f9ae2b


