# 사전 설치

In [None]:
!pip install wordcloud
!pip install konlpy
!pip install torch
!sudo apt-get update
!sudo apt-get install -y fonts-nanum fonts-noto-cjk
!fc-list | grep -i "nanum\|noto"
!sudo apt-get update
!sudo apt-get install openjdk-11-jdk -y

# Source Code

In [36]:
import torch
import torch.nn as nn
import torch.optim as optim

import math
import numpy as np
import pandas as pd

import re
import konlpy
from konlpy.tag import Okt
from collections import Counter
from wordcloud import WordCloud
import matplotlib.pyplot as plt
from pathlib import Path

from typing import Optional, Dict

from sklearn.metrics import f1_score, accuracy_score
from tqdm.auto import tqdm

import warnings
warnings.filterwarnings("ignore")

In [None]:
url = '../data/'
train = pd.read_csv(url+'train.csv')

# 지연님 생성 데이터
a = pd.read_csv(url+'general_dialog1.csv').rename(columns={'dialogue': 'conversation'})
a['class'] = '일반 대화'
a['idx'] = range(0,len(a))
a = a[['idx','class','conversation']]

# 유찬님 생성 데이터
b = pd.read_csv(url+'general_dialog2.csv')
b = b[b['class'] == "일반 대화"] # class에 일반대화가 아닌 conversation이 적혀있어 제거

train = pd.concat([train, a, b], axis=0,ignore_index=True).drop(columns='idx')
train.to_csv(url+"train_w_general_conv.csv", index=False)

In [37]:
raw_data = pd.read_csv('./data/train_w_general_conv.csv')
raw_data.head()

Unnamed: 0.1,Unnamed: 0,class,conversation
0,0,협박 대화,지금 너 스스로를 죽여달라고 애원하는 것인가?\n 아닙니다. 죄송합니다.\n 죽을 ...
1,1,협박 대화,길동경찰서입니다.\n9시 40분 마트에 폭발물을 설치할거다.\n네?\n똑바로 들어 ...
2,2,기타 괴롭힘 대화,너 되게 귀여운거 알지? 나보다 작은 남자는 첨봤어.\n그만해. 니들 놀리는거 재미...
3,3,갈취 대화,어이 거기\n예??\n너 말이야 너. 이리 오라고\n무슨 일.\n너 옷 좋아보인다?...
4,4,갈취 대화,저기요 혹시 날이 너무 뜨겁잖아요? 저희 회사에서 이 선크림 파는데 한 번 손등에 ...


In [38]:
raw_data = raw_data.drop('Unnamed: 0', axis=1)

In [39]:
raw_data.head()

Unnamed: 0,class,conversation
0,협박 대화,지금 너 스스로를 죽여달라고 애원하는 것인가?\n 아닙니다. 죄송합니다.\n 죽을 ...
1,협박 대화,길동경찰서입니다.\n9시 40분 마트에 폭발물을 설치할거다.\n네?\n똑바로 들어 ...
2,기타 괴롭힘 대화,너 되게 귀여운거 알지? 나보다 작은 남자는 첨봤어.\n그만해. 니들 놀리는거 재미...
3,갈취 대화,어이 거기\n예??\n너 말이야 너. 이리 오라고\n무슨 일.\n너 옷 좋아보인다?...
4,갈취 대화,저기요 혹시 날이 너무 뜨겁잖아요? 저희 회사에서 이 선크림 파는데 한 번 손등에 ...


In [40]:
raw_data.groupby('class').count()

Unnamed: 0_level_0,conversation
class,Unnamed: 1_level_1
갈취 대화,981
기타 괴롭힘 대화,1094
일반 대화,1000
직장 내 괴롭힘 대화,979
협박 대화,896


In [41]:
okt = Okt()
stop_words = {"하다"}

In [42]:
def preprocess_sentence(sentence, stop_words):
    # 1. 양쪽 공백 제거
    sentence = sentence.strip()

    # 2. 특수문자 및 이모지 제거 (한글, 영어, 숫자, 기본 구두점만 허용)
    sentence = re.sub(r"[^가-힣0-9a-zA-Z.,!?~\s]", " ", sentence)

    # 3. 연속된 공백 하나로 축소 및 줄 바꿈 무시
    sentence = re.sub(r"\s+", " ", sentence)
    sentence = re.sub(r"\n", " ", sentence)

    # 4. 문장 부호 앞뒤로 공백 추가 (토큰 구분을 위함)
    sentence = re.sub(r"([?.!,~])", r" \1 ", sentence)
    sentence = re.sub(r'\s{2,}', ' ', sentence)
    
    # 형태소 분석 (단어, 품사)
    include_tags = {"Noun", "Verb", "Adjective", "Exclamation", "Adverb"}
    pos_tags = okt.pos(sentence, stem=True, norm=True)
    # 원하는 품사만 추출
    tokens = [
        word for word, tag in pos_tags
        if tag in include_tags and len(word) > 1 and word not in stop_words
    ]
        
    return tokens

In [43]:
sample_text = raw_data['conversation'][0]
tokens = preprocess_sentence(sample_text, stop_words)
print(tokens)

['지금', '스스로', '죽이다', '달라', '애원', '아니다', '죄송하다', '혼자', '죽지', '우리', '사건', '말리', '진짜', '죽이다', '버리다', '싶다', '정말', '선택', '죽다', '가족', '죽여주다', '죄송하다', '정말', '선택', '없다', '선택', '가족', '모조리', '죽이다', '버리다', '선택', '한번', '도와주다', '그냥', '죽이다', '버리다', '이의', '없다', '제발', '도와주다']


In [44]:
raw_data['tokens'] = raw_data['conversation'].apply(lambda x: preprocess_sentence(str(x), stop_words))
raw_data.head()

Unnamed: 0,class,conversation,tokens
0,협박 대화,지금 너 스스로를 죽여달라고 애원하는 것인가?\n 아닙니다. 죄송합니다.\n 죽을 ...,"[지금, 스스로, 죽이다, 달라, 애원, 아니다, 죄송하다, 혼자, 죽지, 우리, ..."
1,협박 대화,길동경찰서입니다.\n9시 40분 마트에 폭발물을 설치할거다.\n네?\n똑바로 들어 ...,"[길동, 경찰서, 이다, 마트, 폭발물, 설치, 똑바로, 들다, 한번, 얘기, 장난..."
2,기타 괴롭힘 대화,너 되게 귀여운거 알지? 나보다 작은 남자는 첨봤어.\n그만해. 니들 놀리는거 재미...,"[되게, 귀엽다, 작다, 남자, 보다, 그만하다, 놀리다, 재미없다, 지영, 이지,..."
3,갈취 대화,어이 거기\n예??\n너 말이야 너. 이리 오라고\n무슨 일.\n너 옷 좋아보인다?...,"[어이, 거기, 이리, 오라, 무슨, 좋다, 보이다, 있다, 보다, 아니다, 없다,..."
4,갈취 대화,저기요 혹시 날이 너무 뜨겁잖아요? 저희 회사에서 이 선크림 파는데 한 번 손등에 ...,"[저기, 혹시, 너무, 뜨겁다, 저희, 회사, 선크림, 팔다, 손등, 발라, 보다,..."


In [45]:
# ===== 1) Vocab 빌드 =====
from collections import Counter
from typing import List, Tuple, Dict, Iterable
import json

SPECIALS = ["<pad>", "<unk>", "<cls>", "<sep>"]

def build_vocab(
    token_lists: Iterable[List[str]],
    min_freq: int = 2,
    max_size: int = 30000,
    specials: List[str] = SPECIALS,
) -> Tuple[Dict[str, int], List[str], Counter]:
    """
    token_lists: 각 샘플의 토큰 리스트(iterable of list[str])
    min_freq: 최소 등장 빈도 미만 토큰은 제외
    max_size: special 포함 전체 vocab 상한 (None이면 제한 없음)
    returns: (stoi, itos, counter)
    """
    counter = Counter()
    for toks in token_lists:
        counter.update(toks)

    # 빈도 필터 + 상위 max_size-특수토큰 만큼
    most = [tok for tok, cnt in counter.most_common() if cnt >= min_freq]
    if max_size is not None:
        cap = max_size - len(specials)
        most = most[:max(0, cap)]

    itos = list(specials) + most
    stoi = {tok: i for i, tok in enumerate(itos)}
    return stoi, itos, counter

def save_vocab(path: str, itos: List[str]) -> None:
    with open(path, "w", encoding="utf-8") as f:
        json.dump(itos, f, ensure_ascii=False)

def load_vocab(path: str) -> Tuple[Dict[str, int], List[str]]:
    with open(path, "r", encoding="utf-8") as f:
        itos = json.load(f)
    stoi = {tok: i for i, tok in enumerate(itos)}
    return stoi, itos

In [46]:
# ===== 2) 토큰 → ID 인코딩 =====
def encode_tokens(
    tokens: List[str],
    stoi: Dict[str, int],
    max_len: int = 256,
    add_cls: bool = True,
    add_sep: bool = True,
) -> Tuple[List[int], List[int]]:
    """
    tokens -> input_ids, attention_mask
    - OOV는 <unk>
    - <cls>, <sep>를 옵션으로 앞/뒤에 부착
    - max_len을 초과하면 적절히 자름
    """
    pad_id = stoi["<pad>"]
    unk_id = stoi["<unk>"]
    cls_id = stoi.get("<cls>")
    sep_id = stoi.get("<sep>")

    ids = [stoi.get(t, unk_id) for t in tokens]

    # 길이 계산 (cls/sep 포함해서 자르기)
    extra = (1 if add_cls else 0) + (1 if add_sep else 0)
    keep = max_len - extra
    keep = max(0, keep)
    ids = ids[:keep]

    if add_cls:
        ids = [cls_id] + ids
    if add_sep:
        ids = ids + [sep_id]

    attn = [1] * len(ids)
    return ids, attn

In [47]:
# ===== 3) 배치 패딩(collate) =====
import torch

def collate_batch(
    batch,
    pad_id: int,
):
    """
    batch: [{"input_ids": List[int], "attention_mask": List[int], "label": int}, ...]
    """
    bs = len(batch)
    maxlen = max(len(x["input_ids"]) for x in batch)
    input_ids = torch.full((bs, maxlen), pad_id, dtype=torch.long)
    attention_mask = torch.zeros((bs, maxlen), dtype=torch.long)
    labels = torch.tensor([x["label"] for x in batch], dtype=torch.long)

    for i, x in enumerate(batch):
        L = len(x["input_ids"])
        input_ids[i, :L] = torch.tensor(x["input_ids"], dtype=torch.long)
        attention_mask[i, :L] = torch.tensor(x["attention_mask"], dtype=torch.long)

    return {"input_ids": input_ids, "attention_mask": attention_mask, "labels": labels}


In [48]:
# ===== 4) 예시 파이프라인 (라벨 매핑 포함) =====
# 4-1) tokens 컬럼이 없다면 먼저 생성
# raw_data['tokens'] = raw_data['conversation'].apply(lambda s: preprocess_sentence(str(s), stop_words))

# 4-2) 라벨 매핑
labels = sorted(raw_data["class"].unique().tolist())
label2id = {
    "협박 대화": 0,
    "갈취 대화": 1,
    "직장 내 괴롭힘 대화": 2,
    "기타 괴롭힘 대화": 3,
    "일반 대화": 4,
}
id2label = {v: k for k, v in label2id.items()}

# 4-3) vocab 빌드
stoi, itos, counter = build_vocab(raw_data["tokens"], min_freq=1, max_size=20000)
pad_id = stoi["<pad>"]

# 4-4) 인코딩 (train/valid 분할은 이미 되어있다고 가정하거나 아래처럼 간단 분할)
from sklearn.model_selection import train_test_split
train_df, valid_df = train_test_split(raw_data, test_size=0.2, random_state=42, stratify=raw_data["class"])

def encode_row(row, max_len=256):
    ids, attn = encode_tokens(row["tokens"], stoi, max_len=max_len, add_cls=True, add_sep=True)
    return {
        "input_ids": ids,
        "attention_mask": attn,
        "label": label2id[row["class"]],
    }

train_records = [encode_row(r) for _, r in train_df.iterrows()]
valid_records = [encode_row(r) for _, r in valid_df.iterrows()]

# 4-5) PyTorch Dataset/Dataloader
from torch.utils.data import Dataset, DataLoader

class SimpleListDataset(Dataset):
    def __init__(self, records):
        self.records = records
    def __len__(self):
        return len(self.records)
    def __getitem__(self, idx):
        return self.records[idx]

train_ds = SimpleListDataset(train_records)
valid_ds = SimpleListDataset(valid_records)

train_loader = DataLoader(train_ds, batch_size=16, shuffle=True,
                          collate_fn=lambda b: collate_batch(b, pad_id))
valid_loader = DataLoader(valid_ds, batch_size=32, shuffle=False,
                          collate_fn=lambda b: collate_batch(b, pad_id))

print(f"Vocab size: {len(itos)} | Labels: {label2id}")
print(next(iter(train_loader))["input_ids"].shape)  # (B, S)

Vocab size: 9439 | Labels: {'협박 대화': 0, '갈취 대화': 1, '직장 내 괴롭힘 대화': 2, '기타 괴롭힘 대화': 3, '일반 대화': 4}
torch.Size([16, 67])


# 모델 코드

In [49]:
# ----------------------------
# Positional Encoding (sin/cos)
# ----------------------------
class PositionalEncoding(nn.Module):
    def __init__(self, d_model: int, max_len: int = 512, dropout: float = 0.1):
        super().__init__()
        self.dropout = nn.Dropout(dropout)

        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)  # (max_len,1)
        div_term = torch.exp(
            torch.arange(0, d_model, 2, dtype=torch.float) * (-math.log(10000.0) / d_model)
        )
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0)  # (1, max_len, d_model)
        self.register_buffer("pe", pe)

        # 미세한 안정화용
        nn.init.zeros_(self.pe)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        # x: (B, S, E)
        x = x + self.pe[:, : x.size(1)]
        return self.dropout(x)


In [50]:
# ----------------------------
# Transformer Encoder Classifier
# ----------------------------
class TransformerClassifier(nn.Module):
    """
    순수 Transformer-Encoder 기반 문서/대화 분류기.
    - input_ids: (B, S) 토큰 인덱스
    - attention_mask: (B, S) 1=유효, 0=패딩
    """
    def __init__(
        self,
        vocab_size: int,
        num_classes: int,
        emb_dim: int = 256,
        nhead: int = 8,
        num_layers: int = 4,
        dim_feedforward: int = 512,
        max_len: int = 512,
        dropout: float = 0.1,
        pad_id: int = 0,
        use_cls_pool: bool = True,  # True면 첫 토큰(<cls>)을 문장 표현으로 사용, False면 마스크 평균
    ):
        super().__init__()
        assert emb_dim % nhead == 0, "emb_dim must be divisible by nhead"

        self.emb = nn.Embedding(vocab_size, emb_dim, padding_idx=pad_id)
        self.pos = PositionalEncoding(emb_dim, max_len=max_len, dropout=dropout)

        enc_layer = nn.TransformerEncoderLayer(
            d_model=emb_dim,
            nhead=nhead,
            dim_feedforward=dim_feedforward,
            dropout=dropout,
            activation="gelu",
            batch_first=False,  # 입력은 (S,B,E)
        )
        self.encoder = nn.TransformerEncoder(enc_layer, num_layers=num_layers)

        self.norm = nn.LayerNorm(emb_dim)
        self.classifier = nn.Linear(emb_dim, num_classes)

        self.emb_scale = math.sqrt(emb_dim)
        self.use_cls_pool = use_cls_pool

        # Xavier init (선택)
        nn.init.xavier_uniform_(self.classifier.weight)
        nn.init.zeros_(self.classifier.bias)

    def forward(
        self,
        input_ids: torch.Tensor,
        attention_mask: Optional[torch.Tensor] = None,
        return_repr: bool = False,
    ):
        # (B,S) -> (B,S,E)
        x = self.emb(input_ids) * self.emb_scale
        x = self.pos(x)                         # (B,S,E)
        x = x.transpose(0, 1)                   # (S,B,E)

        key_padding_mask = None
        if attention_mask is not None:
            key_padding_mask = (attention_mask == 0)  # True=mask

        x = self.encoder(x, src_key_padding_mask=key_padding_mask)  # (S,B,E)
        x = x.transpose(0, 1)                                       # (B,S,E)

        if self.use_cls_pool:
            sent_repr = x[:, 0, :]  # <cls> 위치
        else:
            if attention_mask is None:
                sent_repr = x.mean(dim=1)
            else:
                mask = attention_mask.unsqueeze(-1).float()         # (B,S,1)
                sent_repr = (x * mask).sum(dim=1) / mask.sum(dim=1).clamp(min=1e-6)

        sent_repr = self.norm(sent_repr)
        logits = self.classifier(sent_repr)

        if return_repr:
            return logits, sent_repr
        return logits



In [51]:
# ----------------------------
# 학습/평가 루프
# ----------------------------
def train_one_epoch(
    model: nn.Module,
    dataloader,
    optimizer: torch.optim.Optimizer,
    device: torch.device,
    class_weights: Optional[torch.Tensor] = None,
    grad_clip: float = 1.0,
    scheduler = None,
    use_amp: bool = True,
) -> Dict[str, float]:
    model.train()
    scaler = torch.cuda.amp.GradScaler(enabled=use_amp)
    ce = nn.CrossEntropyLoss(weight=class_weights.to(device) if class_weights is not None else None)

    losses, all_preds, all_labels = [], [], []
    for batch in tqdm(dataloader, desc="train", leave=False):
        input_ids = batch["input_ids"].to(device)
        attn = batch.get("attention_mask")
        attn = attn.to(device) if attn is not None else None
        labels = batch["labels"].to(device)

        optimizer.zero_grad(set_to_none=True)
        with torch.cuda.amp.autocast(enabled=use_amp):
            logits = model(input_ids, attention_mask=attn)
            loss = ce(logits, labels)

        scaler.scale(loss).backward()
        if grad_clip is not None:
            scaler.unscale_(optimizer)
            nn.utils.clip_grad_norm_(model.parameters(), grad_clip)
        scaler.step(optimizer)
        scaler.update()
        if scheduler is not None:
            scheduler.step()

        losses.append(loss.item())
        all_preds += logits.argmax(dim=-1).detach().cpu().tolist()
        all_labels += labels.detach().cpu().tolist()

    acc = accuracy_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds, average="macro")
    return {"loss": sum(losses)/len(losses), "acc": acc, "f1_macro": f1}

@torch.no_grad()
def evaluate(
    model: nn.Module,
    dataloader,
    device: torch.device,
) -> Dict[str, float]:
    model.eval()
    ce = nn.CrossEntropyLoss()

    losses, all_preds, all_labels = [], [], []
    for batch in tqdm(dataloader, desc="eval", leave=False):
        input_ids = batch["input_ids"].to(device)
        attn = batch.get("attention_mask")
        attn = attn.to(device) if attn is not None else None
        labels = batch["labels"].to(device)

        logits = model(input_ids, attention_mask=attn)
        loss = ce(logits, labels)
        losses.append(loss.item())

        all_preds += logits.argmax(dim=-1).detach().cpu().tolist()
        all_labels += labels.detach().cpu().tolist()

    acc = accuracy_score(all_labels, all_preds) if all_labels else 0.0
    f1 = f1_score(all_labels, all_preds, average="macro") if all_labels else 0.0
    return {"loss": sum(losses)/len(losses), "acc": acc, "f1_macro": f1}

In [52]:
# ----------------------------
# 모델 팩토리 (간단 생성기)
# ----------------------------
def create_model(
    vocab_size: int,
    num_classes: int = 5,          # 협박0, 갈취1, 직장2, 기타3, 일반4
    pad_id: int = 0,
    emb_dim: int = 256,
    nhead: int = 8,
    num_layers: int = 4,
    dim_ff: int = 512,
    max_len: int = 512,
    dropout: float = 0.1,
    use_cls_pool: bool = True,
) -> nn.Module:
    return TransformerClassifier(
        vocab_size=vocab_size,
        num_classes=num_classes,
        emb_dim=emb_dim,
        nhead=nhead,
        num_layers=num_layers,
        dim_feedforward=dim_ff,
        max_len=max_len,
        dropout=dropout,
        pad_id=pad_id,
        use_cls_pool=use_cls_pool,
    )

In [53]:
# 가정: stoi, itos, train_loader, valid_loader, label2id 존재
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = create_model(
    vocab_size=len(itos),
    num_classes=5,                # 고정 매핑(협박0, 갈취1, 직장2, 기타3, 일반4)
    pad_id=stoi["<pad>"],
    emb_dim=256,
    nhead=8,
    num_layers=3,                 # 처음엔 3~4로 시작 추천
    dim_ff=512,
    max_len=256,                  # 인코딩에서 쓴 max_len과 동일하게
    dropout=0.1,
    use_cls_pool=True,
).to(device)

optimizer = torch.optim.AdamW(model.parameters(), lr=2e-4, weight_decay=1e-2)
scheduler = None  # 필요하면 CosineAnnealingLR 등 연결

In [54]:
# (선택) 클래스 가중치: train_df의 정수 라벨 리스트로 계산
from collections import Counter
train_labels = [rec["label"] for rec in train_records]  # 이전 단계 encode_records 기준
cnt = Counter(train_labels)
weights = torch.tensor([1.0 / max(cnt.get(i, 1), 1) for i in range(5)], dtype=torch.float)
weights = weights / weights.mean()  # 평균 1로 정규화
class_weights = weights

best_f1 = 0.0
epochs = 20
for ep in range(1, epochs+1):
    tr = train_one_epoch(model, train_loader, optimizer, device,
                         class_weights=class_weights, grad_clip=1.0, scheduler=scheduler, use_amp=True)
    va = evaluate(model, valid_loader, device)
    print(f"[{ep:02d}] train: {tr} | valid: {va}")

    if va["f1_macro"] > best_f1:
        best_f1 = va["f1_macro"]
        torch.save(model.state_dict(), "./"+str(best_f1)+"best_transformer_cls.pt")
        print("  ✔ saved best model (F1 ↑)")

train:   0%|          | 0/248 [00:00<?, ?it/s]

eval:   0%|          | 0/31 [00:00<?, ?it/s]

[01] train: {'loss': 0.9301957825138685, 'acc': 0.6330808080808081, 'f1_macro': 0.6336061230290428} | valid: {'loss': 0.6416148658721678, 'acc': 0.7686868686868686, 'f1_macro': 0.764760563816296}
  ✔ saved best model (F1 ↑)


train:   0%|          | 0/248 [00:00<?, ?it/s]

eval:   0%|          | 0/31 [00:00<?, ?it/s]

[02] train: {'loss': 0.5191494069632984, 'acc': 0.8088383838383838, 'f1_macro': 0.8087058848352159} | valid: {'loss': 0.5456315480893658, 'acc': 0.8090909090909091, 'f1_macro': 0.8063818715696763}
  ✔ saved best model (F1 ↑)


train:   0%|          | 0/248 [00:00<?, ?it/s]

eval:   0%|          | 0/31 [00:00<?, ?it/s]

[03] train: {'loss': 0.4044243809691961, 'acc': 0.8547979797979798, 'f1_macro': 0.8543334950991003} | valid: {'loss': 0.4534387064556922, 'acc': 0.8474747474747475, 'f1_macro': 0.8454618880771354}
  ✔ saved best model (F1 ↑)


train:   0%|          | 0/248 [00:00<?, ?it/s]

eval:   0%|          | 0/31 [00:00<?, ?it/s]

[04] train: {'loss': 0.32468314080559196, 'acc': 0.8810606060606061, 'f1_macro': 0.8805851803268885} | valid: {'loss': 0.5495267904573872, 'acc': 0.8404040404040404, 'f1_macro': 0.8413948453163812}


train:   0%|          | 0/248 [00:00<?, ?it/s]

eval:   0%|          | 0/31 [00:00<?, ?it/s]

[05] train: {'loss': 0.2976029585190718, 'acc': 0.9027777777777778, 'f1_macro': 0.90256472296202} | valid: {'loss': 0.5579364064239687, 'acc': 0.8393939393939394, 'f1_macro': 0.8391638575799909}


train:   0%|          | 0/248 [00:00<?, ?it/s]

eval:   0%|          | 0/31 [00:00<?, ?it/s]

[06] train: {'loss': 0.24060421482652367, 'acc': 0.9212121212121213, 'f1_macro': 0.9209052975927889} | valid: {'loss': 0.5640041424382117, 'acc': 0.8515151515151516, 'f1_macro': 0.8505108721974295}
  ✔ saved best model (F1 ↑)


train:   0%|          | 0/248 [00:00<?, ?it/s]

eval:   0%|          | 0/31 [00:00<?, ?it/s]

[07] train: {'loss': 0.2294417011700364, 'acc': 0.9222222222222223, 'f1_macro': 0.921596288718266} | valid: {'loss': 0.6073578524012719, 'acc': 0.8535353535353535, 'f1_macro': 0.8528260642566723}
  ✔ saved best model (F1 ↑)


train:   0%|          | 0/248 [00:00<?, ?it/s]

eval:   0%|          | 0/31 [00:00<?, ?it/s]

[08] train: {'loss': 0.2036944322964935, 'acc': 0.9305555555555556, 'f1_macro': 0.9305423206299} | valid: {'loss': 0.7311634459803181, 'acc': 0.8313131313131313, 'f1_macro': 0.830800050553765}


train:   0%|          | 0/248 [00:00<?, ?it/s]

eval:   0%|          | 0/31 [00:00<?, ?it/s]

[09] train: {'loss': 0.19143817817060946, 'acc': 0.9361111111111111, 'f1_macro': 0.9360156091091394} | valid: {'loss': 0.7166833156539548, 'acc': 0.8494949494949495, 'f1_macro': 0.8478019869466866}


train:   0%|          | 0/248 [00:00<?, ?it/s]

eval:   0%|          | 0/31 [00:00<?, ?it/s]

[10] train: {'loss': 0.16468576415589328, 'acc': 0.9477272727272728, 'f1_macro': 0.9475904419700807} | valid: {'loss': 0.8618458595968062, 'acc': 0.8454545454545455, 'f1_macro': 0.8440827764467121}


train:   0%|          | 0/248 [00:00<?, ?it/s]

eval:   0%|          | 0/31 [00:00<?, ?it/s]

[11] train: {'loss': 0.14835209358464072, 'acc': 0.951010101010101, 'f1_macro': 0.9507619215992464} | valid: {'loss': 0.8422013939388336, 'acc': 0.8535353535353535, 'f1_macro': 0.8527977669177009}


train:   0%|          | 0/248 [00:00<?, ?it/s]

eval:   0%|          | 0/31 [00:00<?, ?it/s]

[12] train: {'loss': 0.15495107117796228, 'acc': 0.954040404040404, 'f1_macro': 0.9539884328461256} | valid: {'loss': 0.8799200328367371, 'acc': 0.8515151515151516, 'f1_macro': 0.8522981265462395}


train:   0%|          | 0/248 [00:00<?, ?it/s]

eval:   0%|          | 0/31 [00:00<?, ?it/s]

[13] train: {'loss': 0.18471884747894166, 'acc': 0.9507575757575758, 'f1_macro': 0.9508337342049273} | valid: {'loss': 0.9671993837241204, 'acc': 0.8494949494949495, 'f1_macro': 0.8503689563324593}


train:   0%|          | 0/248 [00:00<?, ?it/s]

eval:   0%|          | 0/31 [00:00<?, ?it/s]

[14] train: {'loss': 0.12105335155519564, 'acc': 0.9643939393939394, 'f1_macro': 0.9643087539406491} | valid: {'loss': 1.0611533474537633, 'acc': 0.8505050505050505, 'f1_macro': 0.85066642559954}


train:   0%|          | 0/248 [00:00<?, ?it/s]

eval:   0%|          | 0/31 [00:00<?, ?it/s]

[15] train: {'loss': 0.12825964305372303, 'acc': 0.9618686868686869, 'f1_macro': 0.9616011339239767} | valid: {'loss': 1.0734306716870876, 'acc': 0.8525252525252526, 'f1_macro': 0.8527000455114695}


train:   0%|          | 0/248 [00:00<?, ?it/s]

eval:   0%|          | 0/31 [00:00<?, ?it/s]

[16] train: {'loss': 0.11960528742517981, 'acc': 0.9613636363636363, 'f1_macro': 0.9613951576273294} | valid: {'loss': 1.1942860896308576, 'acc': 0.8414141414141414, 'f1_macro': 0.8405783937744016}


train:   0%|          | 0/248 [00:00<?, ?it/s]

eval:   0%|          | 0/31 [00:00<?, ?it/s]

[17] train: {'loss': 0.128610664224016, 'acc': 0.9623737373737373, 'f1_macro': 0.9622661984296907} | valid: {'loss': 1.149308985760135, 'acc': 0.8565656565656565, 'f1_macro': 0.8562053276346113}
  ✔ saved best model (F1 ↑)


train:   0%|          | 0/248 [00:00<?, ?it/s]

eval:   0%|          | 0/31 [00:00<?, ?it/s]

[18] train: {'loss': 0.10979072569548655, 'acc': 0.9719696969696969, 'f1_macro': 0.9719674423248877} | valid: {'loss': 1.264604136588112, 'acc': 0.8515151515151516, 'f1_macro': 0.8499581968445924}


train:   0%|          | 0/248 [00:00<?, ?it/s]

eval:   0%|          | 0/31 [00:00<?, ?it/s]

[19] train: {'loss': 0.13589570325825295, 'acc': 0.9659090909090909, 'f1_macro': 0.9660623753072594} | valid: {'loss': 1.161094265118722, 'acc': 0.8505050505050505, 'f1_macro': 0.8492052240007546}


train:   0%|          | 0/248 [00:00<?, ?it/s]

eval:   0%|          | 0/31 [00:00<?, ?it/s]

[20] train: {'loss': 0.10922668906873549, 'acc': 0.9671717171717171, 'f1_macro': 0.9671082075257157} | valid: {'loss': 1.2462107303642458, 'acc': 0.8494949494949495, 'f1_macro': 0.8489132378360772}


todo: test.csv 불러와서 예측값만 submission.csv에 저장하는 함수 생성