In [1]:
%load_ext lab_black
%cd ..

/mnt/h/hev/log-analytics


In [2]:
import argparse
import math
import multiprocessing
import sys
from datetime import datetime
from pathlib import Path
from pprint import pformat

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch_optimizer
import yaml
from easydict import EasyDict
from pytorch_transformers import DistilBertForSequenceClassification, DistilBertTokenizer

from datasets.dataset_ver7 import DatasetGeneratorVer7
from utils import SAM, AverageMeter, CustomLogger, FocalLoss, seed_everything

from main_ver7 import MyTrainer
from collections import defaultdict
import matplotlib.pyplot as plt
import random
from sklearn.metrics import f1_score, classification_report
from tqdm import tqdm

In [3]:
postfix = "distilbert-base-uncased-focal-AdamW-lr1e-05-ver7-os10_1"
outdir = Path("results/distilbert-base-uncased-ver7")
fold = 1

In [4]:
with open("config/distilbert-base-uncased-ver7.yaml", "r") as f:
    C = EasyDict(yaml.load(f, yaml.FullLoader))
    C.result_dir = Path(C.result_dir)
    C.dataset.dir = Path(C.dataset.dir)
    seed_everything(C.seed, deterministic=False)

In [5]:
C

{'model': {'name': 'distilbert-base-uncased'},
 'comment': None,
 'result_dir': PosixPath('results/distilbert-base-uncased-ver7'),
 'debug': False,
 'seed': 20210425,
 'ver': 7,
 'train': {'SAM': False,
  'folds': [2],
  'checkpoints': ['results/distilbert-base-uncased-ver7/distilbert-base-uncased-focal-AdamW-lr1e-05-ver7-os10_2.pth'],
  'loss': {'name': 'focal',
   'params': {'gamma': 2.0, 's': 45.0, 'm': 0.1, 'crit': 'focal'}},
  'optimizer': {'name': 'AdamW'},
  'finetune': {'do': True, 'step1_epochs': 2, 'step2_epochs': 4},
  'max_epochs': 12,
  'lr': 1e-05,
  'scheduler': {'name': 'ReduceLROnPlateau',
   'params': {'factor': 0.5, 'patience': 3, 'verbose': True}}},
 'dataset': {'dir': PosixPath('data/ver6'),
  'batch_size': 35,
  'num_workers': 8,
  'oversampling': True,
  'oversampling_scale': 40}}

In [None]:
trainer = MyTrainer(C, fold, outdir / f"{postfix}.pth")

In [7]:
model = trainer.model
model.eval()
torch.set_grad_enabled(False)

<torch.autograd.grad_mode.set_grad_enabled at 0x7f16065a9550>

In [8]:
activation = []


def hook(model, input, output):
    activation.append(output.detach().cpu())

In [9]:
model.pre_classifier.register_forward_hook(hook)

<torch.utils.hooks.RemovableHandle at 0x7f160f348a10>

In [10]:
train_dl = trainer.dsgen.train_only()

# 덱만들기 (Train)

In [11]:
activation = []
deck = {"feat": [], "otext": [], "tlevel": [], "fclevel": []}
with tqdm(total=len(train_dl.dataset), ncols=100, file=sys.stdout) as t:
    for text, tlevel, otext in train_dl:
        fclevel = model(text.cuda(non_blocking=True))[0].argmax(dim=1).cpu()
        deck["fclevel"].append(fclevel)
        deck["tlevel"].append(tlevel)
        deck["otext"].extend(otext)
        t.update(text.size(0))

100%|██████████████████████████████████████████████████████| 421079/421079 [21:19<00:00, 329.09it/s]


In [12]:
deck["feat"] = torch.cat(activation)
deck["tlevel"] = torch.cat(deck["tlevel"])
deck["fclevel"] = torch.cat(deck["fclevel"])

In [13]:
deck["tlevel"].shape, deck["fclevel"].shape

(torch.Size([421079]), torch.Size([421079]))

In [14]:
deck["feat"].shape

torch.Size([421079, 768])

In [15]:
torch.save(deck, outdir / f"{postfix}-deck1.pth")

# Valid Lv7에 대한 결과 분석

In [None]:
deck = torch.load("results/distilbert-base-uncased-ver7/distilbert-base-uncased-focal-AdamW-lr1e-05-ver7-os10_1.pth")

In [16]:
ds_lv7 = trainer.dsgen.valid_lv7().dataset

In [17]:
text, otext = ds_lv7[0]
activation = []
print(model(text[None].cuda())[0].cpu())

tensor([[ -5.6991,   7.0030, -10.0221,  -7.6704, -13.5418,  -5.9765, -12.6897]])


In [18]:
dists, indices = torch.norm(deck["feat"] - activation[0][None], p=None, dim=1).topk(4, largest=False)
dists, indices, deck["tlevel"][indices]

(tensor([[36.5123, 37.0881, 40.3818, 42.6459]]),
 tensor([[548, 684, 626, 246]]),
 tensor([[0, 0, 0, 1]]))

In [19]:
text, otext = ds_lv7[1]
activation = []
print(model(text[None].cuda())[0].cpu())

tensor([[ -3.6343,  -3.3369, -11.9597,   2.2067, -11.4330,  -4.2486, -10.3542]])


In [20]:
dists, indices = torch.norm(deck["feat"] - activation[0][None], p=None, dim=1).topk(4, largest=False)
dists, indices, deck["tlevel"][indices]

(tensor([[38.1184, 46.6838, 51.4287, 53.0099]]),
 tensor([[246, 594,   4, 154]]),
 tensor([[1, 0, 1, 1]]))

In [21]:
text, otext = ds_lv7[2]
activation = []
print(model(text[None].cuda())[0].cpu())

tensor([[  5.2112,  -3.9721, -10.3676,  -5.2402, -12.9593,  -6.0200,  -9.9562]])


In [22]:
dists, indices = torch.norm(deck["feat"] - activation[0][None], p=None, dim=1).topk(4, largest=False)
dists, indices, deck["tlevel"][indices]

(tensor([[32.1901, 49.2405, 50.5266, 52.7478]]),
 tensor([[548, 606, 330, 154]]),
 tensor([[0, 1, 1, 1]]))

In [23]:
dists, indices = torch.norm(deck["feat"] - activation[0][None], p=None, dim=1).topk(4, largest=False)
dists, indices, deck["tlevel"][indices]

# 덱만들기 (Submission)

In [11]:
ds_test = trainer.dsgen.test()

In [12]:
activation = []
deck = {"feat": [], "otext": [], "fclevel": [], "ids": []}
with tqdm(total=len(ds_test), ncols=100, file=sys.stdout) as t:
    for i in range(len(ds_test)):
        text, otext, ids = ds_test[i]
        fclevel = model(text[None].cuda(non_blocking=True))[0].argmax(dim=1).cpu()
        deck["fclevel"].append(fclevel)
        deck["otext"].append(otext)
        deck["ids"].append(ids)
        t.update()

100%|██████████████████████████████████████████████████| 1095951/1095951 [2:12:23<00:00, 137.97it/s]


In [13]:
deck["feat"] = torch.stack(activation)
deck["fclevel"] = torch.stack(deck["fclevel"])

In [14]:
deck["feat"].shape, deck["fclevel"].shape

(torch.Size([1095951, 1, 768]), torch.Size([1095951, 1]))

In [15]:
deck["feat"] = deck["feat"][:, 0, :]
# deck["fclevel"] = deck["fclevel"][:, 0]

In [16]:
deck["feat"].shape, deck["fclevel"].shape

(torch.Size([1095951, 768]), torch.Size([1095951, 1]))

In [17]:
torch.save(deck, outdir / f"{postfix}-deck2.pth")