In [1]:
import sys

sys.path.append("./TextBrewer/examples/conll2003_example")
sys.path.append("./TextBrewer/src/textbrewer")

print(sys.executable)

/home/hs3228/miniconda3/envs/prac/bin/python


In [2]:
from utils_ner import convert_examples_to_features, get_labels, read_examples_from_file
from transformers import WEIGHTS_NAME, BertConfig, BertForTokenClassification, BertTokenizer
from transformers import RobertaConfig, RobertaForTokenClassification, RobertaTokenizer
from transformers import DistilBertConfig, DistilBertForTokenClassification, DistilBertTokenizer
from transformers import CamembertConfig, CamembertForTokenClassification, CamembertTokenizer

MODEL_CLASSES = {
    "bert": (BertConfig, BertForTokenClassification, BertTokenizer),
    "roberta": (RobertaConfig, RobertaForTokenClassification, RobertaTokenizer),
    "distilbert": (DistilBertConfig, DistilBertForTokenClassification, DistilBertTokenizer),
    "camembert": (CamembertConfig, CamembertForTokenClassification, CamembertTokenizer),
}

device = 'cpu'

# Load teacher model (conll2003)
model_type = 'bert'
model_name = 'bert-base-cased'
labels = ""

labels = get_labels(labels)
num_labels = len(labels)
    
config_class, model_class, tokenizer_class = MODEL_CLASSES[model_type]
config = config_class.from_pretrained(model_name,
                                      num_labels=num_labels,
                                      cache_dir=None)
tokenizer = tokenizer_class.from_pretrained(model_name,
                                            do_lower_case=False,
                                            cache_dir=None)
teacher_model = model_class.from_pretrained(model_name,
                                    from_tf=False,
                                    config=config,
                                    cache_dir=None)

teacher_model.to(device)

  from .autonotebook import tqdm as notebook_tqdm


BertForTokenClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(28996, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwis

In [9]:
# Load student model (conll2003)
student_output_dir = './TextBrewer/output_model/conll_distill/student_layer9'
tokenizer = tokenizer_class.from_pretrained(student_output_dir, do_lower_case=False)
checkpoints = [student_output_dir]

for checkpoint in checkpoints:
    global_step = checkpoint.split("-")[-1] if len(checkpoints) > 1 else ""
    student_model = model_class.from_pretrained(checkpoint)
    student_model.to(device)

In [10]:
import textbrewer

from textbrewer import GeneralDistiller
from textbrewer import TrainingConfig, DistillationConfig

# Show the statistics of model parameters
print("\nteacher_model's parametrers:")
result, _ = textbrewer.utils.display_parameters(teacher_model,max_level=3)
print (result)

print("student_model's parametrers:")
result, _ = textbrewer.utils.display_parameters(student_model,max_level=3)
print (result)


teacher_model's parametrers:

LAYER NAME                    	        #PARAMS	     RATIO	 MEM(MB)
--model:                      	    108,317,193	   100.00%	  413.20
  --bert:                     	    108,310,272	    99.99%	  413.17
    --embeddings:             	     22,665,216	    20.92%	   86.46
      --word_embeddings:      	     22,268,928	    20.56%	   84.95
      --position_embeddings:  	        393,216	     0.36%	    1.50
      --token_type_embeddings:	          1,536	     0.00%	    0.01
      --LayerNorm:            	          1,536	     0.00%	    0.01
    --encoder                 
      --layer:                	     85,054,464	    78.52%	  324.46
    --pooler                  
      --dense:                	        590,592	     0.55%	    2.25
  --classifier:               	          6,921	     0.01%	    0.03
    --weight:                 	          6,912	     0.01%	    0.03
    --bias:                   	              9	     0.00%	    0.00

student_model's parametrers:

LAYER

### Loss curve

In [17]:
import re

def extract_by_metric_name(line, metric='f1'):
    metric_value = re.findall(r'{} = .*'.format(metric), line)[0]
    metric_value = float(metric_value.strip('{} = '.format(metric)))
    
    return metric_value

def extract_all_metric_values(loglines):
    f1_line = loglines[0]
    f1 = extract_by_metric_name(f1_line, metric='f1')

    loss_line = loglines[1]
    loss = extract_by_metric_name(loss_line, metric='loss')

    precision_line = loglines[2]
    precision = extract_by_metric_name(precision_line, metric='precision')

    recall_line = loglines[3]
    recall = extract_by_metric_name(recall_line, metric='recall')
    
    epoch_eval_res = {
        'f1': f1,
        'loss': loss,
        'precision': precision,
        'recall': recall
    }
    
    return epoch_eval_res

log_dir = '/home/hs3228/TextBrewer/log/conll_distill_raw_layer7.log'

with open(log_dir, 'r') as f:
    logs = f.readlines()

epoch_eval_res = []
test_eval_res = None

for idx, l in enumerate(logs):
    if '***** Eval results  *****' in l:
        epoch_num_line = logs[idx+5]
        
        if 'Epoch' in epoch_num_line:
            epoch_value = re.findall(r'Epoch [0-9]+', epoch_num_line)[0]
            epoch_value = int(epoch_value.strip('Epoch '))
            
            epoch_res = extract_all_metric_values(logs[idx+1:idx+5])
            epoch_res['epoch_num'] = epoch_value
            epoch_eval_res.append(epoch_res)
        else:
            test_eval_res = extract_all_metric_values(logs[idx+1:idx+5])
            break

print(len(epoch_eval_res), epoch_eval_res[-1])
print(test_eval_res)

20 {'f1': 0.9126710816777042, 'loss': 0.1343695681247465, 'precision': 0.9096989966555183, 'recall': 0.9156626506024096, 'epoch_num': 20}
{'f1': 0.9429459709268129, 'loss': 0.050306530181449094, 'precision': 0.9406538139145012, 'recall': 0.9452493261455526}


In [5]:
import pandas as pd

x = pd.DataFrame.from_dict({'class': [1,2,3,3,4], 'num':[1,2,4,7,6]})
x.head()

Unnamed: 0,class,num
0,1,1
1,2,2
2,3,4
3,3,7
4,4,6


In [10]:
x = df['student'].value_counts()
x = x[x == 1]
x.head()

4    1
2    1
1    1
Name: class, dtype: int64