In [2]:
import torch

In [3]:
import pickle

with open('ADB_args.pickle', 'rb') as f:
    args = pickle.load(f)

In [4]:
from configs.base import ParamManager
from dataloaders.base import DataManager
from backbones.base import ModelManager
from methods import method_map
from utils.functions import save_results
import logging
import argparse
import sys
import os
import datetime

start
LibMR not installed or libmr.so not found
Install libmr: cd libMR/; ./compile.sh


In [13]:
from backbones.bert import BERT, BERT_Norm, BERT_MixUp, BERT_SEG, BERT_Disaware, BERT_DOC
backbones_map = {
                    'bert': BERT, 
                    'bert_norm': BERT_Norm,
                    'bert_mixup': BERT_MixUp,
                    'bert_seg': BERT_SEG,
                    'bert_disaware': BERT_Disaware,
                    'bert_doc': BERT_DOC
                }

class ModelManager:

    def __init__(self, args, data, logger_name = 'Detection'):
        
        self.logger = logging.getLogger(logger_name)
        if args.backbone.startswith('bert'):
            self.model = self.set_model(args, 'bert')
            self.optimizer, self.scheduler = self.set_optimizer(self.model, data.dataloader.num_train_examples, args.train_batch_size, \
                args.num_train_epochs, args.lr, args.warmup_proportion) 
    
    def set_optimizer(self, model, num_train_examples, train_batch_size, num_train_epochs, lr, warmup_proportion):
        num_train_optimization_steps = int(num_train_examples / train_batch_size) * num_train_epochs

        param_optimizer = list(model.named_parameters())
        no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
        optimizer_grouped_parameters = [
            {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
            {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
        ]
        
        optimizer = AdamW(optimizer_grouped_parameters, lr = lr, correct_bias=False)
        num_warmup_steps= int(num_train_examples * num_train_epochs * warmup_proportion / train_batch_size)
        
        scheduler = get_linear_schedule_with_warmup(optimizer,
                                                    num_warmup_steps=num_warmup_steps,
                                                    num_training_steps=num_train_optimization_steps)
        
        return optimizer, scheduler
    
    def set_model(self, args, pattern):
        backbone = backbones_map[args.backbone]
        print(backbone)
        args.device = self.device = torch.device('cuda:%d' % int(args.gpu_id) if torch.cuda.is_available() else 'cpu')

        if pattern == 'bert':
            model = backbone.from_pretrained('bert-base-uncased', args = args) 
            if args.freeze_backbone_parameters:
                self.logger.info('Freeze all parameters but the last layer for efficiency')
                model = freeze_bert_parameters(model)
        model.to(self.device)
        
        return model

In [6]:
data = DataManager(args, logger_name = args.logger_name)

In [14]:
model = ModelManager(args, data, logger_name = args.logger_name)

<class 'backbones.bert.BERT'>


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BERT: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BERT from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BERT from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BERT were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'dense.bias', 'dense.weight', 'c

위에 두개를 실행 후 
```python
def run(args, data, model, logger)
```
요고를 실행 하면 밑으로

``` methods/__init__.py ```
```python

method_map = {
                'ADB': ADBManager, 
                'DA-ADB': ADBManager, 
                'MSP': MSPManager, 
                'DeepUnk':DeepUnkManager, 
                'LOF': DeepUnkManager, 
                'DOC': DOCManager, 
                'OpenMax': OpenMaxManager, 
                'MixUp': MixUpManager,
                'SEG': SEGManager
            }

```
```run.py```
```python
def run(args, data, model, logger):
    method_manager = method_map[args.method]
    method = method_manager(args, data, model, logger_name = args.logger_name)
```

In [39]:
args.method

'ADB'

method는 ```methods/ADB/manager.py``` 파일의 ```calss ADBManager``` 에서 실행됨

```python
class ADBManager:
    print("start")
    
    def __init__(self, args, data, model, logger_name = 'Detection'):
        print("ADB init")
        self.logger = logging.getLogger(logger_name)
        
        pretrain_model = PretrainManager(args, data, model)
        self.model = pretrain_model.model
        self.centroids = pretrain_model.centroids
        self.pretrain_best_eval_score = pretrain_model.best_eval_score
```
```methods/ADB/pretrain.py``` 의 ```clase PretrainManager```를 살펴보자
밑으로는 ```def __init__``` 내용

In [15]:
PretrainManager_model = model.model
PretrainManager_optimizer = model.optimizer
PretrainManager_scheduler = model.scheduler
PretrainManager_device = model.device

In [24]:
PretrainManager_model

BERT(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
       

In [16]:
PretrainManager_device

device(type='cuda', index=3)

In [17]:
PretrainManager_train_dataloader = data.dataloader.train_labeled_loader
PretrainManager_eval_dataloader = data.dataloader.eval_loader
PretrainManager_test_dataloader = data.dataloader.test_loader

In [18]:
from losses.CosineFaceLoss import CosineFaceLoss
from torch import nn 
loss_map = {
                'CrossEntropyLoss': nn.CrossEntropyLoss(), 
                'Binary_CrossEntropyLoss': nn.BCELoss(),
                'CosineFaceLoss': CosineFaceLoss()
            }

PretrainManager_loss_fct = loss_map[args.loss_fct]  
PretrainManager_centroids = None
PretrainManager_best_eval_score = None

In [19]:
args.pretrain, args.backbone

(True, 'bert')

if 문에서 ```train_plain```을 실행하게 됨
```python
        if args.pretrain or (not os.path.exists(args.model_output_dir)):
            print("pretrainmanager init if")
            self.logger.info('Pre-training Begin...')

            if args.backbone == 'bert_disaware':
                print("pretrainmanager init bert_disaware")
                self.train_disaware(args, data)
            else:
                print("pretrainmanager init else bert_disaware")
                self.train_plain(args, data)
```


```python
class pretrainManager 
def train_plain:
```

In [20]:
from tqdm import trange, tqdm

wait = 0
best_model = None
best_eval_score = 0

In [21]:
for epoch in trange(int(args.num_train_epochs), desc="Epoch"):
    print(epoch)
    PretrainManager_model.train()
    tr_loss = 0
    nb_tr_examples, nb_tr_steps = 0, 0
    for step, batch in enumerate(tqdm(PretrainManager_train_dataloader, desc="Iteration")):
        batch = tuple(t.to(PretrainManager_device) for t in batch)
        input_ids, input_mask, segment_ids, label_ids = batch
        
        with torch.set_grad_enabled(True):
            loss = PretrainManager_model(input_ids, segment_ids, input_mask, label_ids, mode = "train", loss_fct = PretrainManager_loss_fct)
            
        break
    
    break

Epoch:   0%|                                                                                                                                                                       | 0/100 [00:00<?, ?it/s]

0



Iteration:   0%|                                                                                                                                                                    | 0/17 [00:00<?, ?it/s][A
Epoch:   0%|                                                                                                                                                                       | 0/100 [00:00<?, ?it/s]


In [79]:
len(batch), batch[0].shape, batch[1].shape, batch[2].shape, batch[3].shape, batch[0][0], batch[1][0], batch[2][0], batch[3]

(4,
 torch.Size([128, 55]),
 torch.Size([128, 55]),
 torch.Size([128, 55]),
 torch.Size([128]),
 tensor([  101,  7632,  1010,  1045,  2572,  2006, 10885,  1999,  3577,  1998,
          2619, 10312,  2026,  4524,  2007,  2026,  3042,  1998, 15882,  2007,
          5329,  1998,  2673,  1012,  2064,  2017,  3796,  2009, 17306,  2361,
          1998,  2059,  1045,  2215,  2000,  2344,  1037,  2047,  2028,  2036,
         17306,  2361,  1012,   102,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0]),
 tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0]),
 tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0]),
 tensor([13, 10, 14,  1,  1, 14,  6, 16,  4, 12,  0,  4,  0, 11, 10, 15, 14

```backbones/bert.py```에 ```class BERT```의 ```forward```를 직접해봄

In [34]:
token_type_ids = segment_ids
attention_mask = input_mask
labels = label_ids

outputs = PretrainManager_model.bert(input_ids, token_type_ids=token_type_ids, attention_mask=attention_mask, output_hidden_states=True)

In [35]:
encoded_layer_12 = outputs.hidden_states
pooled_output = outputs.pooler_output
pooled_output = encoded_layer_12[-1].mean(dim=1)

pooled_output = PretrainManager_model.dense(encoded_layer_12[-1].mean(dim=1))
pooled_output = PretrainManager_model.activation(pooled_output)
pooled_output = PretrainManager_model.dropout(pooled_output)

logits = PretrainManager_model.classifier(pooled_output)

In [36]:
logits.shape, logits[0]

(torch.Size([128, 19]),
 tensor([ 0.0128,  0.0217,  0.0274, -0.0431, -0.0917,  0.0591,  0.0121, -0.1554,
          0.0351, -0.0333, -0.0211, -0.0598,  0.0372, -0.0200, -0.1376, -0.0256,
         -0.0008,  0.0446,  0.0555], device='cuda:3', grad_fn=<SelectBackward0>))

In [38]:
loss_ce = PretrainManager_loss_fct(logits, labels)
loss_ce

tensor(2.9441, device='cuda:3', grad_fn=<NllLossBackward0>)

# class ModelManager:

In [14]:
from backbones.bert import BERT, BERT_Norm, BERT_MixUp, BERT_SEG, BERT_Disaware, BERT_DOC
backbones_map = {
                    'bert': BERT, 
                    'bert_norm': BERT_Norm,
                    'bert_mixup': BERT_MixUp,
                    'bert_seg': BERT_SEG,
                    'bert_disaware': BERT_Disaware,
                    'bert_doc': BERT_DOC
                }

In [10]:
import torch 
from backbones.utils import freeze_bert_parameters

def set_model(args, pattern):
    backbone = backbones_map[args.backbone]
    args.device = device = torch.device('cuda:%d' % int(args.gpu_id) if torch.cuda.is_available() else 'cpu')

    if pattern == 'bert':
        # cache_dir = "cache",이거 지우니깐 되네... 왜?? 3090 머시기 에러 있음
        model = backbone.from_pretrained('bert-base-uncased', args = args) 
        if args.freeze_backbone_parameters:
            
            model = freeze_bert_parameters(model)
    model.to(device)

    return model

In [29]:
model = set_model(args, 'bert')

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BERT: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BERT from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BERT from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BERT were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['dense.bias', 'dense.weight', 'classifier.weight', 'c

In [12]:
from transformers import AdamW, get_linear_schedule_with_warmup

def set_optimizer(model, num_train_examples, train_batch_size, num_train_epochs, lr, warmup_proportion):
    num_train_optimization_steps = int(num_train_examples / train_batch_size) * num_train_epochs

    param_optimizer = list(model.named_parameters())
    no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
    optimizer_grouped_parameters = [
        {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
        {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
    ]

    optimizer = AdamW(optimizer_grouped_parameters, lr = lr, correct_bias=False)
    num_warmup_steps= int(num_train_examples * num_train_epochs * warmup_proportion / train_batch_size)

    scheduler = get_linear_schedule_with_warmup(optimizer,
                                                num_warmup_steps=num_warmup_steps,
                                                num_training_steps=num_train_optimization_steps)

    return optimizer, scheduler

In [38]:
optimizer, scheduler = set_optimizer(model, data.dataloader.num_train_examples, args.train_batch_size, \
                args.num_train_epochs, args.lr, args.warmup_proportion) 

In [5]:
method_manager = method_map[args.method]

In [None]:
method = method_manager(args, data, model, logger_name = args.logger_name)