In [1]:
import config
from dataset import CustomDataset, process_data, train_test_split
from model import create_model

import os
import math
import tensorflow as tf
import warnings
import numpy as np
import pandas as pd

os.environ['CUDA_VISIBLE_DEVICES'] = config.CUDA_VISIBLE_DEVICES    # specify GPU usage    


print('Loading dataset...')
train_df = pd.read_csv(config.TRAIN_FILE) 
val_df = pd.read_csv(config.VALIDATION_FILE)    
print('Training set shape: '+ str(train_df.shape))
print('Validaiton set shape: '+ str(val_df.shape))
print('Loading finished.')

Loading dataset...
Training set shape: (11391, 3)
Validaiton set shape: (2847, 3)
Loading finished.


In [2]:
print('Processing dataset...')
train_set = CustomDataset(
    sentences=train_df[config.CONTENT_FIELD].values.astype("str"),
    labels=train_df[config.LABEL_FIELD],
    batch_size=config.BATCH_SIZE
)
val_set = CustomDataset(
    sentences=val_df[config.CONTENT_FIELD].values.astype("str"),
    labels=val_df[config.LABEL_FIELD],
    batch_size=config.BATCH_SIZE
)
print('Processing finished.')

Processing dataset...
Processing finished.


In [9]:
val_set[0][0][0][0]

<tf.Tensor: shape=(196,), dtype=int32, numpy=
array([  101,  2595,   817,  3683,  7770,  8024,  1912,  6225,  4692,
        4708,  7556,  4706,  8024,  6421,  3300,  4638,  1216,  5543,
        6963,  3300,   749,  8024,  2897,  1168,  4638,  3221,  2130,
        5401,  2242,  8024,  2458,  2552, 10139,   102,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,   

In [7]:
val_set[0][0][1][0]

<tf.Tensor: shape=(196,), dtype=int32, numpy=
array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
      dtype=int32)>

In [8]:
val_set[0][0][2][0]

<tf.Tensor: shape=(196,), dtype=int32, numpy=
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
      dtype=int32)>

In [4]:
import config
from dataset import CustomDataset, process_data, train_test_split
from model import create_model

import os
import math
import datetime
import tensorflow as tf
import pandas as pd
from transformers.optimization_tf import create_optimizer
from tensorflow.keras.metrics import SparseCategoricalAccuracy
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.optimizers.schedules import ExponentialDecay

## Model init
filenames = dict()
i = 1
for f in os.listdir(config.ROOT_PATH):
    if '.index' in f:
        filenames[i] = f
        i+=1
print(filenames)
get_epoch = input('Choose a checkpoint (input 0 to train a new model):')
get_epoch = int(get_epoch)

print('Initializing model...')
mirrored_strategy = tf.distribute.MirroredStrategy()     # multi-GPU config
with mirrored_strategy.scope():
    model = create_model()
    if get_epoch == 0:     
        print('Init a new model...')
        checkpoint_path = config.ROOT_PATH + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")+'-{epoch:02d}'                 # save the model checkpoints
        optimizer, lr_schedule = create_optimizer(
            init_lr=config.ADAM_LR, 
            num_train_steps=math.ceil(train_df.shape[0]/config.EPOCHS)*config.EPOCHS,
            num_warmup_steps=config.ADAM_WARMUP_STEPS,
            min_lr_ratio=config.ADAM_MIN_LR_RATIO,
            weight_decay_rate=config.ADAM_DECAY_RATE,
            power=config.ADAM_POWER
        )
        metric = SparseCategoricalAccuracy()
        loss = SparseCategoricalCrossentropy()
        model.compile(loss=loss, optimizer=optimizer, metrics=[metric])
    else:
        print('Init model from a checkpoint...')
        checkpoint_path = config.ROOT_PATH + filenames[get_epoch].replace('.index','') + '__' + datetime.datetime.now().strftime("%Y%m%d-%H%M%S") + '-{epoch:02d}' # save the model checkpoints
        model.load_weights(config.ROOT_PATH + filenames[get_epoch].replace('.index','')).expect_partial()
        lr_schedule = ExponentialDecay(
            config.SGD_LR,
            decay_steps=config.SGD_DECAY_STEPS,
            decay_rate=config.SGD_DECAY_RATE
        )
        optimizer = SGD(
            learning_rate=lr_schedule, 
            momentum=config.SGD_MOMENTUM, 
            nesterov=config.SGD_NESTEROV
        )           
        metric = SparseCategoricalAccuracy()
        loss = SparseCategoricalCrossentropy()
        model.compile(loss=loss, optimizer=optimizer, metrics=[metric])
model.summary()
print('Initialization finished.')


{1: '20211013-114803-04__20211013-122045-04.index', 2: '20211013-114803-04.index'}
Initializing model...
INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1', '/job:localhost/replica:0/task:0/device:GPU:2', '/job:localhost/replica:0/task:0/device:GPU:3')


Some layers from the model checkpoint at ../../experiments/model/bert-base-chinese were not used when initializing TFBertModel: ['mlm___cls', 'nsp___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at ../../experiments/model/bert-base-chinese.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: <cyfunction Socket.send at 0x7f8b5d3a85c0> is not a module, class, method, function, traceback, frame, or code object
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: <cyfunction Socket.send at 0x7f8b5d3a85c0> is not a module, class, method, function, traceback, frame, or code object

Init model from a checkpoint...
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/t

In [13]:
model.get_layer('bert')._layers[0]

<transformers.models.bert.modeling_tf_bert.TFBertMainLayer at 0x7f8afff9f2b0>

In [30]:
model.get_layer('bert')._layers[0]._layers[1]._layers[0][2].count_params()

7087872

In [41]:
model.get_layer('bert')._layers[0]._layers[2]

<transformers.models.bert.modeling_tf_bert.TFBertPooler at 0x7f8a9be7f198>

In [3]:
import os
os.path.abspath('') 

'/home/fengyuan/workspaceGServer/NLP/sentiment_analysis_bert/code'