In [1]:
### -------- Load libraries ------- ###

# Load Huggingface transformers
from transformers import TFBertModel, BertConfig, BertTokenizerFast

# Then what you need from tensorflow.keras
from tensorflow.keras.layers import Input, Dropout, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.initializers import TruncatedNormal
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import CategoricalAccuracy
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import ModelCheckpoint
# And pandas for data import + sklearn because you allways need sklearn
import pandas as pd
from sklearn.model_selection import train_test_split
import tensorflow as tf
import matplotlib.pyplot as plt
from datetime import datetime
import numpy as np
import sklearn
import itertools
import io
import os



In [2]:
print (tf.__version__)
### --------- Setup GPU ---------- ###

# rtx 3080 tf 2.4.0-rc4 bug
gpu_devices = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(gpu_devices[0], True)


2.4.0-rc4


# Run Configuration

In [3]:
### --------- Setup BERT ---------- ###

# Name of the BERT model to use
model_name = 'bert-large-uncased'
# bert-base-uncased
# bert-large-uncased
# bert-base-cased
# bert-large-cased





# Max length of tokens
max_length = 100

epochs=20

### --------- Setup logs paths ---------- ###
lvl="1"

# Programm

In [4]:
### --------- Setup logs paths ---------- ###
path="/lvl"+lvl+"/"+model_name+"/"+ str(max_length) +"T_"+str(epochs)+"e/"

aux_path= os.getcwd()+"/saved_models"+path
try:
    os.makedirs(aux_path)
except OSError:
    print ("%s already exists" % aux_path)
else:
    print ("Successfully created the directory %s " % aux_path)

aux_path= os.getcwd()+"/saved_data"+path
try:
    os.makedirs(aux_path)
except OSError:
    print ("%s already exists" % aux_path)
else:
    print ("Successfully created the directory %s " % aux_path)
    
dir_list=os.listdir("./saved_models"+path)
if len(dir_list)==0:
    run=1
else:
    aux=[int(x[-1:]) for x in dir_list]
    aux.sort()
    run=aux[-1]+1
path+="Run"+str(run)


path_model="./saved_models"+path

logdir = path_model + "/logs"
path_svae_model=path_model+"/model/"
checkpoint_filepath = path_model+'/checkpoint/'
log_dir_custom_scalars=logdir + '/custom_metrics'



path_data="./saved_data"+path
try:
    os.makedirs(path_data)
except OSError:
    print ("%s already exists" % aux_path)
else:
    print ("Successfully created the directory %s " % aux_path)

path_model_plot=path_data + "/model.png"
path_confusion_mat=path_data+'/conf.png'
path_saved_data=path_data+"/test_pred_raw.npz"

print("Config: "+ path+ "\nRelative paths "+ 
      "\n \n##### Model data #####"+
      "\n \nlog dir:" + logdir+ 
      "\n \nlog custom scalars dir:" +log_dir_custom_scalars+
      "\n \nCheckpoint dir:" +checkpoint_filepath+
      "\n \nSaved model dir:" +path_svae_model+
      "\n \n \n##### Plots and predictions #####"+
      "\n \nPlots dir:" +path_model_plot+
      "\n \nConfusion matrix dir:" +path_confusion_mat+
      "\n \nSaved data dir:" +path_saved_data)

Successfully created the directory /home/lh/Documents/BERT_classifier/saved_models/lvl1/bert-large-uncased/100T_20e/ 
Successfully created the directory /home/lh/Documents/BERT_classifier/saved_data/lvl1/bert-large-uncased/100T_20e/ 
Successfully created the directory /home/lh/Documents/BERT_classifier/saved_data/lvl1/bert-large-uncased/100T_20e/ 
Config: /lvl1/bert-large-uncased/100T_20e/Run1
Relative paths 
 
##### Model data #####
 
log dir:./saved_models/lvl1/bert-large-uncased/100T_20e/Run1/logs
 
log custom scalars dir:./saved_models/lvl1/bert-large-uncased/100T_20e/Run1/logs/custom_metrics
 
Checkpoint dir:./saved_models/lvl1/bert-large-uncased/100T_20e/Run1/checkpoint/
 
Saved model dir:./saved_models/lvl1/bert-large-uncased/100T_20e/Run1/model/
 
 
##### Plots and predictions #####
 
Plots dir:./saved_data/lvl1/bert-large-uncased/100T_20e/Run1/model.png
 
Confusion matrix dir:./saved_data/lvl1/bert-large-uncased/100T_20e/Run1/conf.png
 
Saved data dir:./saved_data/lvl1/bert-la

In [5]:
def plot_confusion_matrix(cm,f1_score,accuracy_score, class_names):
    """
    Returns a matplotlib figure containing the plotted confusion matrix.
    Args:
        cm (array, shape = [n, n]): a confusion matrix of integer classes
        class_names (array, shape = [n]): String names of the integer classe
    """
    figure = plt.figure(figsize=(8, 8))
    plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
    plt.title("Confusion matrix, accuracy {:.4f} \n f1 Score {:.4f}".format(accuracy_score,f1_score))
    plt.colorbar()
    tick_marks = np.arange(len(class_names))
    plt.xticks(tick_marks, class_names, rotation=45)
    plt.yticks(tick_marks, class_names)

    
    # Compute the labels from the normalized confusion matrix.
    labels = np.around(cm.astype('float') / cm.sum(axis=1)[:, np.newaxis], decimals=2)


    # Use white text if squares are dark; otherwise black.
    threshold = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        color = "white" if cm[i, j] > threshold else "black"
        plt.text(j, i, labels[i, j], horizontalalignment="center", color=color)

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    return figure

def plot_to_image(figure):
    """Converts the matplotlib plot specified by 'figure' to a PNG image and
    returns it. The supplied figure is closed and inaccessible after this call."""
    # Save the plot to a PNG in memory.
    buf = io.BytesIO()
    plt.savefig(buf, format='png')
    # Closing the figure prevents it from being displayed directly inside
    # the notebook.
    plt.close(figure)
    buf.seek(0)
    # Convert PNG buffer to TF image
    image = tf.image.decode_png(buf.getvalue(), channels=4)
    # Add the batch dimension
    image = tf.expand_dims(image, 0)
    return image


In [6]:
#######################################
### --------- Import data --------- ###

# Import data from csv
data = pd.read_csv("amazon/train.csv")
test = pd.read_csv("amazon/test.csv")

# Select required columns
data = data[['Text', 'Cat1']]
test = test[['Text', 'Cat1']]

# Training data
# Set model output as categorical and save in new label col
data['Cat1_label'] = pd.Categorical(data['Cat1'])
# Transform your output to numeric
data['Cat1'] = data['Cat1_label'].cat.codes


# Setup test data for logging
# Set model output as categorical and save in new label col
test['Cat1_label'] = pd.Categorical(test['Cat1'])
# Transform your output to numeric
test['Cat1'] = test['Cat1_label'].cat.codes

class_names =np.unique(test['Cat1_label'])
print("Class names:",class_names)

data.head()


Class names: ['baby products' 'beauty' 'grocery gourmet food' 'health personal care'
 'pet supplies' 'toys games']


Unnamed: 0,Text,Cat1,Cat1_label
0,The description and photo on this product need...,2,grocery gourmet food
1,This was a great book!!!! It is well thought t...,5,toys games
2,"I am a first year teacher, teaching 5th grade....",5,toys games
3,I got the book at my bookfair at school lookin...,5,toys games
4,Hi! I'm Martine Redman and I created this puzz...,5,toys games


In [7]:
### --------- Load BERT ---------- ###

# Load transformers config and set output_hidden_states to False
config = BertConfig.from_pretrained(model_name)
config.output_hidden_states = False

# Load BERT tokenizer
tokenizer = BertTokenizerFast.from_pretrained(pretrained_model_name_or_path = model_name, config = config)

# Load the Transformers BERT model
transformer_model = TFBertModel.from_pretrained(model_name, config = config)


Some layers from the model checkpoint at bert-large-uncased were not used when initializing TFBertModel: ['nsp___cls', 'mlm___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at bert-large-uncased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


In [8]:
### ------- Build the model ------- ###

# TF Keras documentation: https://www.tensorflow.org/api_docs/python/tf/keras/Model

# Load the MainLayer
bert = transformer_model.layers[0]

# Build your model input
input_ids = Input(shape=(max_length,), name='input_ids', dtype='int32')
attention_mask = Input(shape=(max_length,), name='attention_mask', dtype='int32') # Ignores padded part of sentences
inputs = {'input_ids': input_ids, 'attention_mask': attention_mask}

# Load the Transformers BERT model as a layer in a Keras model
bert_model = bert(inputs)[1]
dropout = Dropout(config.hidden_dropout_prob, name='pooled_output')
pooled_output = dropout(bert_model, training=False)

# Then build your model output
output = Dense(units=len(data.Cat1_label.value_counts()), kernel_initializer=TruncatedNormal(stddev=config.initializer_range), name='Cat1')(pooled_output)

# And combine it all in a model object
model = Model(inputs=inputs, outputs=output, name='BERT_MultiClass')

# Take a look at the model
model.summary()

Model: "BERT_MultiClass"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
attention_mask (InputLayer)     [(None, 100)]        0                                            
__________________________________________________________________________________________________
input_ids (InputLayer)          [(None, 100)]        0                                            
__________________________________________________________________________________________________
bert (TFBertMainLayer)          TFBaseModelOutputWit 335141888   attention_mask[0][0]             
                                                                 input_ids[0][0]                  
__________________________________________________________________________________________________
pooled_output (Dropout)         (None, 1024)         0           bert[0][1]         

In [9]:
### ------- Setup training ------- ###

# Set an optimizer
optimizer = Adam(
    learning_rate=5e-05,
    epsilon=1e-08,
    decay=0.01,
    clipnorm=1.0)

# Set loss and metrics
loss = CategoricalCrossentropy(from_logits = True)
metric = CategoricalAccuracy('accuracy')
     
        
        
# Compile the model
model.compile(
    optimizer = optimizer,
    loss = loss, 
    metrics = metric)

# Ready target data for the model
y_Cat1 = to_categorical(data['Cat1'])

# Ready target test data for logging
test_y_cat1 = to_categorical(test['Cat1'])
 
# Tokenize the input (takes some time) for training and test (for logging) data
x = tokenizer(
    text=data['Text'].to_list(),
    add_special_tokens=True,
    max_length=max_length,
    truncation=True,
    padding=True, 
    return_tensors='tf',
    return_token_type_ids = False,
    return_attention_mask = True,
    verbose = True)


test_x = tokenizer(
    text=test['Text'].to_list(),
    add_special_tokens=True,
    max_length=max_length,
    truncation=True,
    padding=True, 
    return_tensors='tf',
    return_token_type_ids = False,
    return_attention_mask = True,
    verbose = True)


In [10]:
### ------- Callbacks ------- ###
# Tensorboard callback
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=logdir,histogram_freq=1, write_graph=False, write_images=True, profile_batch='2,9')
#Custom summary writer for custom scalars and confusion matrix
file_writer_cm = tf.summary.create_file_writer(log_dir_custom_scalars)

#Save weights of best val_accuracy 
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=True,
    monitor='val_accuracy',
    mode='max',
    save_best_only=True)

def log_confusion_matrix(epoch, logs):
    # Use the model to predict the values from the validation dataset.
    test_pred_raw = model.predict(x={'input_ids': test_x['input_ids'], 'attention_mask': test_x['attention_mask']})
    test_pred = np.argmax(test_pred_raw, axis=1)

    # Calculate the confusion matrix.
    cm = sklearn.metrics.confusion_matrix(test['Cat1'] , test_pred)
    
    f1_score= sklearn.metrics.f1_score(test['Cat1'], test_pred, average='macro')
    accuracy_score=sklearn.metrics.accuracy_score(test['Cat1'], test_pred)
    
    accu.append(accuracy_score)
    f1_score_list.append(f1_score)
    # Log the confusion matrix as an image summary.
    figure = plot_confusion_matrix(cm,f1_score,accuracy_score, class_names=class_names)
    cm_image = plot_to_image(figure)

    # Log the confusion matrix as an image summary.
    with file_writer_cm.as_default():
        tf.summary.image("Confusion Matrix", cm_image, step=epoch)
        tf.summary.scalar("Test accuracy", accuracy_score, step=epoch)
        tf.summary.scalar("f1_score", f1_score, step=epoch)

# Define the per-epoch callback.
cm_callback = tf.keras.callbacks.LambdaCallback(on_epoch_end=log_confusion_matrix)

In [None]:
accu=[]
f1_score_list=[]
### ------- Train the model ------- ###
# Fit the model
history=model.fit(
    x={'input_ids': x['input_ids'], 'attention_mask': x['attention_mask']},
    y=y_Cat1,
    validation_split=0.2,
    batch_size=14, #100T base-uncased: 50/ 100T large-uncase: 14
    epochs=epochs,
    callbacks=[tensorboard_callback, cm_callback,model_checkpoint_callback]) 

Epoch 1/20
Epoch 2/20
Epoch 3/20


In [None]:
# Load the best weights and save the model
model.load_weights(checkpoint_filepath)
model.save(path_svae_model)

In [None]:
tf.keras.utils.plot_model(
    model,
    to_file=path_model_plot,
    show_shapes=True,
    show_layer_names=True,
    rankdir="TB",
    expand_nested=False,
    dpi=96,
)

In [None]:
### ----- Evaluate the model ------ ###

test_pred_raw = model.predict(x={'input_ids': test_x['input_ids'], 'attention_mask': test_x['attention_mask']}, verbose=1) 
train_pred_raw = model.predict(x={'input_ids': x['input_ids'], 'attention_mask': x['attention_mask']}, verbose=1) 

test_pred = np.argmax(test_pred_raw, axis=1)
# Calculate the confusion matrix.
cm = sklearn.metrics.confusion_matrix(test['Cat1'] , test_pred)
cm[np.around(cm.astype('float') / cm.sum(axis=1)[:, np.newaxis], decimals=2) < 0.05] = 0

f1_score= sklearn.metrics.f1_score(test['Cat1'], test_pred, average='macro')
accuracy_score=sklearn.metrics.accuracy_score(test['Cat1'], test_pred)
# Log the confusion matrix as an image summary.
figure = plot_confusion_matrix(cm,f1_score,accuracy_score, class_names=class_names)

figure.savefig(path_confusion_mat)
plt.show() #plt.close(figure)
print("F1 macro score", f1_score)
print("accuracy score", accuracy_score)

In [None]:
report=sklearn.metrics.classification_report(test['Cat1'], test_pred, target_names=class_names, digits=4)
print(report)

In [None]:
#Save data for hierarchical runs
np.savez(path_saved_data, test_pred_raw=test_pred_raw, f1_score=f1_score, accuracy_score=accuracy_score,train_pred_raw=train_pred_raw, report=report, accu_list=np.array(accu), f1_score_list=np.array(f1_score_list), history=history)