In [1]:
%load_ext lab_black
%cd ..

/home/shim/cev/dl/log-analytics


In [97]:
import argparse
import math
import multiprocessing
import sys
from datetime import datetime
from pathlib import Path
from pprint import pformat

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch_optimizer
import yaml
from easydict import EasyDict
from pytorch_transformers import (
    BertForSequenceClassification,
    BertTokenizer,
    DistilBertForSequenceClassification,
    DistilBertTokenizer,
    RobertaForSequenceClassification,
    RobertaTokenizer,
)
from sklearn.model_selection import StratifiedKFold
from torch.optim import Adam, AdamW
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.utils.data import DataLoader, Dataset
from tqdm import tqdm
from transformers import (
    AlbertForSequenceClassification,
    AlbertTokenizer,
    DebertaForSequenceClassification,
    DebertaTokenizer,
    SqueezeBertTokenizer,
    SqueezeBertForSequenceClassification,
    XLNetTokenizer,
    XLNetForSequenceClassification,
)

from datasets import load_test_data, load_train_data, MyDataset
from utils import SAM, AverageMeter, CustomLogger, FocalLoss, seed_everything

from main import MyTrainer
from collections import defaultdict
import matplotlib.pyplot as plt
import random

In [21]:
def get_dist(deck, feat, topk):
    dist = torch.norm(deck - feat[None], dim=1, p=None)
    knn = dist.topk(topk, largest=False)
    values, indices = knn
    return values, indices

In [3]:
with open("config/distilbert-base-uncased.yaml", "r") as f:
    C = EasyDict(yaml.load(f, yaml.FullLoader))
    C.result_dir = Path(C.result_dir)
    C.dataset.dir = Path(C.dataset.dir)
    seed_everything(C.seed, deterministic=False)

In [4]:
C

{'model': {'name': 'distilbert-base-uncased'},
 'comment': None,
 'result_dir': PosixPath('results/distilbert-base-uncased'),
 'debug': False,
 'seed': 20210425,
 'train': {'SAM': False,
  'folds': [1],
  'checkpoints': [None],
  'loss': {'name': 'focal', 'gamma': 2},
  'optimizer': {'name': 'AdamW'},
  'finetune': {'do': True, 'step1_epochs': 2, 'step2_epochs': 4},
  'max_epochs': 10,
  'lr': 1e-05,
  'scheduler': {'name': 'ReduceLROnPlateau',
   'params': {'factor': 0.5, 'patience': 3, 'verbose': True}}},
 'dataset': {'dir': PosixPath('data/ori'), 'batch_size': 30, 'num_workers': 8}}

In [5]:
trainer = MyTrainer(C, 1, "results/distilbert-base-uncased/distilbert-base-uncased-focal-AdamW-lr1e-05_1.pth")

Load pretrained results/distilbert-base-uncased/distilbert-base-uncased-focal-AdamW-lr1e-05_1.pth


In [9]:
model = trainer.model
model.eval()
torch.set_grad_enabled(False)

<torch.autograd.grad_mode.set_grad_enabled at 0x7ff752df0690>

In [10]:
activation = []


def hook(model, input, output):
    activation.append(output.detach().cpu())

In [13]:
model.pre_classifier.register_forward_hook(hook)

<torch.utils.hooks.RemovableHandle at 0x7ff64bc2d790>

In [14]:
tds = trainer.tdl.dataset
vds = trainer.vdl.dataset

In [15]:
activation = []
tlevels = []

for i in range(100):
    id, text, tlevel, otext = tds[i]
    tlevels.append(tlevel)
    model(text[None].cuda())

In [19]:
deck = {"feat": torch.cat(activation), "level": torch.stack(tlevels)}

In [20]:
deck["feat"].shape

torch.Size([100, 768])

## 아무 train 데이터에선 어떻게 나오는지?

In [45]:
activation = []
id, text, tlevel, otext = tds[1000]
model(text[None].cuda())
pass

In [46]:
values, indices = get_dist(deck["feat"], activation[0][0], 10)

In [47]:
values

tensor([0.0751, 0.2143, 0.2428, 0.2469, 0.3457, 0.4287, 0.4842, 1.2297, 1.2361,
        1.2682])

In [48]:
indices

tensor([12, 68, 53, 65, 67, 74, 44, 52, 81, 95])

In [49]:
deck["level"][indices]

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

TODO: train 데이터중에서도 0~1이랑 햇갈려하는게 몇개 있었는데, 그거 확인해봐야함

## Level7이랑 어떻게 나오는지

In [56]:
id, text, otext = trainer.dl_test2.dataset[0]
activation = []
model(text[None].cuda())
values, indices = get_dist(deck["feat"], activation[0][0], 5)
print(values)
print(indices)
print(deck["level"][indices])

tensor([1.3288, 1.3536, 1.3854, 1.3881, 1.4077])
tensor([19, 34, 72, 13, 99])
tensor([1, 1, 1, 1, 1])


In [57]:
id, text, otext = trainer.dl_test2.dataset[1]
activation = []
model(text[None].cuda())
values, indices = get_dist(deck["feat"], activation[0][0], 5)
print(values)
print(indices)
print(deck["level"][indices])

tensor([17.3320, 19.1922, 19.2110, 19.3582, 19.5117])
tensor([77, 26, 33, 63, 85])
tensor([1, 0, 1, 1, 0])


In [58]:
id, text, otext = trainer.dl_test2.dataset[2]
activation = []
model(text[None].cuda())
values, indices = get_dist(deck["feat"], activation[0][0], 5)
print(values)
print(indices)
print(deck["level"][indices])

tensor([7.1833, 7.3726, 7.4656, 7.4739, 7.4911])
tensor([14, 31, 46, 93, 90])
tensor([0, 0, 0, 0, 0])


## Train 데이터 deck 만들기

In [60]:
activation = []
tlevels = []
with tqdm(total=len(trainer.tdl.dataset), ncols=100, file=sys.stdout) as t:
    for id, text, tlevel, otext in trainer.tdl:
        tlevels.append(tlevel)
        model(text.cuda())
        t.update(len(id))

100%|██████████████████████████████████████████████████████| 378377/378377 [40:35<00:00, 155.38it/s]


In [65]:
deck = {"feat": torch.cat(activation), "level": torch.cat(tlevels)}

In [66]:
deck["feat"].shape, deck["level"].shape

(torch.Size([378377, 768]), torch.Size([378377]))

In [67]:
torch.save(deck, "results/distilbert-base-uncased/distilbert-base-uncased-focal-AdamW-lr1e-05_1-deck.pth")

## 전체 deck에서 Level7이랑 어떻게 나오는지

In [68]:
id, text, otext = trainer.dl_test2.dataset[0]
activation = []
model(text[None].cuda())
values, indices = get_dist(deck["feat"], activation[0][0], 5)
print(values)
print(indices)
print(deck["level"][indices])

tensor([0.8870, 0.8887, 0.9043, 0.9228, 0.9307])
tensor([187668,  38585,  94760, 118164, 193103])
tensor([1, 1, 1, 1, 1])


In [80]:
id, text, otext = trainer.dl_test2.dataset[1]
activation = []
model(text[None].cuda())
values, indices = get_dist(deck["feat"], activation[0][0], 5)
print(values)
print(indices)
print(deck["level"][indices])

tensor([2.9586, 3.1170, 3.1264, 3.1331, 3.1357])
tensor([187749, 221362, 301898, 258427,  44913])
tensor([3, 3, 3, 3, 3])


In [81]:
id, text, otext = trainer.dl_test2.dataset[2]
activation = []
model(text[None].cuda())
values, indices = get_dist(deck["feat"], activation[0][0], 5)
print(values)
print(indices)
print(deck["level"][indices])

tensor([3.5220, 3.5593, 3.5816, 3.6035, 3.6583])
tensor([323923, 217243, 225248, 156748, 256758])
tensor([0, 0, 0, 0, 0])


In [79]:
activation = []
id, text, tlevel, otext = vds[1005]
model(text[None].cuda())
values, indices = get_dist(deck["feat"], activation[0][0], 5)
print(tlevel)
print(values)
print(indices)
print(deck["level"][indices])

tensor(0)
tensor([0.0043, 0.0043, 0.0043, 0.0043, 0.0043])
tensor([10, 19, 27, 57, 26])
tensor([0, 0, 0, 0, 0])


In [85]:
data = np.load("results/distilbert-base-uncased/distilbert-base-uncased-focal-AdamW-lr1e-05_1-valid.npz", allow_pickle=True)

In [131]:
# level 1
activation = []
id, text, tlevel, otext = vds[random.choice(data["leveldic"].item()[1])]
model(text[None].cuda())
values, indices = get_dist(deck["feat"], activation[0][0], 5)
print(tlevel)
print(values)
print(indices)
print(deck["level"][indices])

tensor(1)
tensor([0.0309, 0.0342, 0.0413, 0.0419, 0.0424])
tensor([270788,  30202,  15762, 282319, 245663])
tensor([1, 1, 1, 1, 1])


In [152]:
# level 2
activation = []
id, text, tlevel, otext = vds[random.choice(data["leveldic"].item()[2])]
model(text[None].cuda())
values, indices = get_dist(deck["feat"], activation[0][0], 5)
print(tlevel)
print(values)
print(indices)
print(deck["level"][indices])

tensor(2)
tensor([0.0765, 0.1138, 0.1471, 0.1519, 0.1554])
tensor([359829, 226381, 250211, 235407,  14151])
tensor([2, 2, 2, 2, 2])


In [154]:
# level 3
activation = []
id, text, tlevel, otext = vds[random.choice(data["leveldic"].item()[3])]
model(text[None].cuda())
values, indices = get_dist(deck["feat"], activation[0][0], 5)
print(tlevel)
print(values)
print(indices)
print(deck["level"][indices])

tensor(3)
tensor([0.0863, 0.1290, 0.1640, 0.1813, 0.3446])
tensor([165023, 142063, 317437, 314570,  59031])
tensor([3, 3, 3, 3, 3])


In [156]:
# level 4
activation = []
id, text, tlevel, otext = vds[random.choice(data["leveldic"].item()[4])]
model(text[None].cuda())
values, indices = get_dist(deck["feat"], activation[0][0], 5)
print(tlevel)
print(values)
print(indices)
print(deck["level"][indices])

tensor(4)
tensor([0.0044, 0.7365, 0.7365, 2.2895, 2.3222])
tensor([ 61153, 180196, 276271, 302171, 204333])
tensor([4, 4, 4, 4, 4])


In [158]:
# level 5
activation = []
id, text, tlevel, otext = vds[random.choice(data["leveldic"].item()[5])]
model(text[None].cuda())
values, indices = get_dist(deck["feat"], activation[0][0], 5)
print(tlevel)
print(values)
print(indices)
print(deck["level"][indices])

tensor(5)
tensor([0.0106, 0.0107, 0.1543, 0.1569, 0.1584])
tensor([201502, 216081,  15429, 310271,  84694])
tensor([5, 5, 5, 5, 5])


In [160]:
# level 6
activation = []
id, text, tlevel, otext = vds[random.choice(data["leveldic"].item()[6])]
model(text[None].cuda())
values, indices = get_dist(deck["feat"], activation[0][0], 5)
print(tlevel)
print(values)
print(indices)
print(deck["level"][indices])

tensor(6)
tensor([0.0034, 0.0034, 1.6767, 3.0890, 3.1557])
tensor([134746, 305939,  76630, 199656, 114107])
tensor([6, 6, 6, 6, 6])


In [184]:
# test
activation = []
id, text, otext = trainer.dl_test.dataset[18]
model(text[None].cuda())
values, indices = get_dist(deck["feat"], activation[0][0], 5)
print(values)
print(indices)
print(deck["level"][indices])

tensor([0.0032, 0.0032, 0.0032, 0.0032, 0.0032])
tensor([ 78, 139, 478, 716, 305])
tensor([3, 3, 3, 3, 3])


## 모든 valid를 돌렸을 때 top1이 몇이나 되는지 확인

In [303]:
activation = []
vlevels = []
fc_level = []
with tqdm(total=len(trainer.vdl.dataset), ncols=100, file=sys.stdout) as t:
    for id, text, vlevel, otext in trainer.vdl:
        vlevels.append(vlevel)
        p = model(text.cuda())[0].cpu().argmax(dim=1)
        fc_level.append(p)
        t.update(len(id))

100%|████████████████████████████████████████████████████████| 94595/94595 [04:38<00:00, 340.07it/s]


In [305]:
fc_level = torch.cat(fc_level)

In [307]:
vfeats = torch.cat(activation)
vlevels = torch.cat(vlevels)

In [312]:
(vlevels == fc_level).sum()

tensor(94425)

## FC로 정답을 틀린게 뭐가 있지?

In [316]:
correct = fc_level == vlevels

In [317]:
incorrect_indices = []
for i, v in enumerate(correct):
    if not v:
        incorrect_indices.append(i)

In [321]:
for i in incorrect_indices:
    _, _, level, otext = trainer.vdl.dataset[i]
    print(level, fc_level[i], otext)

tensor(5) tensor(0) level : 10, log : fru-name#fru-slot - command
tensor(1) tensor(0) level : 5, log : Ignoring RADIUS server radius-server: unable to bind to source address source-address (error-message)
tensor(1) tensor(0) level : 5, log : function-name operation failed for object-name: index1 index1 index2 index2 index3 index3 index4 index4 (error-message)
tensor(1) tensor(0) level : 3, log : Client aborted login
tensor(1) tensor(0) level : 5, log : Versions of BFD mirror software on Routing Engines are incompatible: error-message
tensor(0) tensor(1) level : 5, log : License grace period for feature feature-name(feature-id) will expire in time (limit = maximum-value)
tensor(1) tensor(0) level : 3, log : function-name: runtime was excessive (time) during action of module
tensor(0) tensor(1) level : 5, log : Socket listen (server) failed unexpectedly. error-message
tensor(1) tensor(0) level : 3, log : Unable to fork: error-message
tensor(1) tensor(0) level : 5, log : Error transferrin

## 거리 재기

In [248]:
dist, index = get_dist(deck["feat"], vfeats[5000], 10)
dist, index

(tensor([0.0306, 0.0308, 0.0324, 0.0330, 0.0334, 0.0339, 0.0342, 0.0342, 0.0351,
         0.0354]),
 tensor([183202, 154476, 359887,  51075, 252679, 120976, 136164, 339124, 168650,
         136352]))

In [245]:
deck["level"][index]

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [208]:
tfeats_ = deck["feat"].cuda()

In [211]:
vfeats_ = vfeats.cuda()

In [279]:
target_level, pred_level, pred_dist = [], [], []
with tqdm(total=len(vfeats), ncols=100, file=sys.stdout) as t:
    for vfeat_, vlevel in zip(vfeats_, vlevels):
        dist_, index_ = get_dist(tfeats_, vfeat_, 1)
        target_level.append(vlevel)
        pred_level.append(deck["level"][index_.cpu()])
        pred_dist.append(dist_.cpu())

        t.update()

100%|████████████████████████████████████████████████████████| 94595/94595 [09:29<00:00, 166.04it/s]


In [None]:
pred_level = torch.cat(pred_level)

In [281]:
target_level = torch.stack(target_level)

In [313]:
(pred_level == target_level).sum()

tensor(94361)

In [322]:
dist_correct = pred_level == target_level

In [323]:
dist_incorrect = []
for i, v in enumerate(dist_correct):
    if not v:
        dist_incorrect.append(i)

In [325]:
for i in dist_incorrect:
    _, _, level, otext = trainer.vdl.dataset[i]
    print(level, pred_level[i], otext)

tensor(5) tensor(0) level : 10, log : fru-name#fru-slot - command
tensor(0) tensor(1) level : 3, log : reason: error-message
tensor(0) tensor(1) level : 3, log : This is test alarm argument1 of argument2
tensor(0) tensor(1) level : 5, log : Failed to open profile database: error-code
tensor(0) tensor(1) level : 5, log : Unable to create session child: error-message
tensor(0) tensor(1) level : 10, log : Key-Pair has been generated
tensor(0) tensor(1) level : 5, log : Error reading midplane ID EEPROM, errno error-code
tensor(1) tensor(0) level : 5, log : Ignoring RADIUS server radius-server: unable to bind to source address source-address (error-message)
tensor(1) tensor(5) level : 5, log : function-name operation failed for object-name: index1 index1 index2 index2 index3 index3 index4 index4 (error-message)
tensor(1) tensor(0) level : 3, log : Client aborted login
tensor(1) tensor(0) level : 5, log : Versions of BFD mirror software on Routing Engines are incompatible: error-message
tens

## KNN으로 하면 Validation 정확도가 더 나아지는지 확인

어떻게 plevel을 결정할까?

top1이 0.1미만이면 그 level로 결정하고, 0.1보다 크면 level 7로 하기?



## KNN으로 submission 제작