In [33]:
#######################################
### -------- Load libraries ------- ###
# Load Huggingface transformers
from transformers import TFBertModel,  BertConfig, BertTokenizer
from transformers import TFAlbertModel,  AlbertConfig, AlbertTokenizer
from transformers import TFRobertaModel,  RobertaConfig, RobertaTokenizer
from transformers import TFDistilBertModel, BertTokenizer, DistilBertConfig
from transformers import TFXLMModel, XLMTokenizer, XLMConfig, TFSequenceSummary
from transformers import TFXLMRobertaModel, XLMRobertaTokenizer, XLMRobertaConfig
# Then what you need from tensorflow.keras
from tensorflow.keras.layers import Input, Dropout, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.initializers import TruncatedNormal
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import CategoricalAccuracy
from tensorflow.keras.utils import to_categorical
import tensorflow as tf
from official.nlp import optimization  # to create AdamW optimizer
import official.nlp.modeling.layers as layers
import tensorflow_addons as tfa
# And pandas for data import + sklearn because you allways need sklearn
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.utils import shuffle
import os
import time
from collections import defaultdict
import glob
#loggging
from datetime import datetime

from tensorflow import keras

import numpy as np
import random

#uncertainty
from robustness_metrics.metrics import uncertainty



seed= 99 #42 standard, 0, 21, 99, 365

random.seed(seed)

np.random.seed(seed)

tf.random.set_seed(seed)

os.environ['PYTHONHASHSEED']=str(seed)




##settings##
language = "EN" #EN, DE, RO, All languages only choice if standard bert_type

language_model_relation = "multi" # specific vs multi

size = "medium" #small, medium, large for roberta and albert maybe

bert_type = "Distilbert"  #Roberta, #Distilbert, #BERT # RobertaXLM

cased = False # True always cased for multilanguage

class_weighting = False

max_length = 40

epochs = 5 #5 is standard, 10 for roberta

batch_size = 32

learning_rate = 1e-4 #5e-5 or 2e-5, 1e-4, 4e-4


layer_strategy = 'last4' #last, last4

reduce_strategy = 'cls' # cls,mean, max

last_layer_strategy = "concat" #mean, max, concat --> only relevant for last4 layer strategy


decay_factor = 1 # only takes effect with layer wise lr

layer_wise_lr = False

val_size=0.2

freeze = False

training = True

num_ensemble = 10

tf.get_logger().setLevel('ERROR')

In [34]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 12119281466083229456
, name: "/device:XLA_CPU:0"
device_type: "XLA_CPU"
memory_limit: 17179869184
locality {
}
incarnation: 16194232736936346990
physical_device_desc: "device: XLA_CPU device"
, name: "/device:XLA_GPU:0"
device_type: "XLA_GPU"
memory_limit: 17179869184
locality {
}
incarnation: 8578424813065476343
physical_device_desc: "device: XLA_GPU device"
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 10468833408
locality {
  bus_id: 1
  links {
  }
}
incarnation: 3717588638101577566
physical_device_desc: "device: 0, name: GeForce GTX 1080 Ti, pci bus id: 0000:65:00.0, compute capability: 6.1"
]


2022-01-06 09:29:44.439516: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1716] Found device 0 with properties: 
pciBusID: 0000:65:00.0 name: GeForce GTX 1080 Ti computeCapability: 6.1
coreClock: 1.582GHz coreCount: 28 deviceMemorySize: 10.92GiB deviceMemoryBandwidth: 451.17GiB/s
2022-01-06 09:29:44.439626: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1
2022-01-06 09:29:44.439695: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcublas.so.10
2022-01-06 09:29:44.439725: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcufft.so.10
2022-01-06 09:29:44.439753: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcurand.so.10
2022-01-06 09:29:44.439781: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcusolv

In [35]:
if language != "All":
    df = pd.read_csv("/media/remote/jdeke/Dataframes/df_combined.csv", header=0)
    df = df[df["language key"]==language][["short text","material group"]]
else:
    df = pd.read_csv("/media/remote/jdeke/Dataframes/df_combined.csv", header=0)
    df = df[["short text","material group", "language key"]]

  exec(code_obj, self.user_global_ns, self.user_ns)


In [36]:
df["short text"] = df["short text"].astype(str)

In [37]:
df = df.groupby('material group').filter(lambda x : len(x) > 1) # filter out classes occuring only once

In [38]:
possible_labels = df["material group"].unique()

label_dict = {}
for index, possible_label in enumerate(possible_labels):
    label_dict[possible_label] = index
label_dict

{'1FSCONMAT': 0,
 '1ROMACHIN': 1,
 '1ITSVINSW': 2,
 '1ITHWAPPL': 3,
 '1COPDPRNT': 4,
 '1FSMAINTE': 5,
 '1ITSWBUSI': 6,
 '1FMOSOSF': 7,
 '1FSBUEQMT': 8,
 '1LOGSERWC': 9,
 '1MSRMESCO': 10,
 '1ITSVSTSW': 11,
 '1ITTCVOMO': 12,
 '1FMREGARD': 13,
 '1TFLEGAL': 14,
 '1ROLOAD': 15,
 '1STAMAINT': 16,
 '1ITSWOFFI': 17,
 '1FSCIVILW': 18,
 '1CIVENGIN': 19,
 '1LOGSERSF': 20,
 '1FMOSCMSV': 21,
 '1HRTRANSL': 22,
 '1COATLCRE': 23,
 '1CICOHSSE': 24,
 '1ELCOMAIN': 25,
 '1HSSSFSSE': 26,
 '1FSITCOMM': 27,
 '1MSRLRS': 28,
 '1HSSSSPRS': 29,
 '1FSENGNRG': 30,
 '1STAEAIRC': 31,
 '1FMOSCLPS': 32,
 '1WSRSLABA': 33,
 '1FSBUPREF': 34,
 '1FSCARWAS': 35,
 '1LOGVEHCL': 36,
 '1FSFUELTE': 37,
 '1FSSIGNW': 38,
 '1FSFCEQMT': 39,
 '1ITSVINF': 40,
 '1MSRSSECU': 41,
 '1CICOMNGM': 42,
 '1HSENSTAU': 43,
 '1ITHWDESK': 44,
 '1FMREBLDM': 45,
 '1COPRPROM': 46,
 '1HRTLTL': 47,
 '1HRTROMV': 48,
 '1FMOSARCH': 49,
 '1ITSVCONS': 50,
 '1HRSEREAS': 51,
 '1PRSPWAST': 52,
 '1FMREFURN': 53,
 '1FMOSCNTS': 54,
 '1FMRENTAL': 55,
 '1COMEDIAB':

In [39]:
n_classes = len(label_dict)
print(n_classes)

278


In [40]:
df['label'] = df["material group"].replace(label_dict)

In [41]:
df.head()

Unnamed: 0,short text,material group,label
373366,hygienic audits for FS 2019 - Šediváková,1FSCONMAT,0
373367,Missing offers from Bronti,1ROMACHIN,1
373368,Repair works of Thermooilpumps from Ferv,1ROMACHIN,1
373369,Bronti January 2019,1ROMACHIN,1
373370,Termoko service,1ROMACHIN,1


In [42]:
X_train, X_test, y_train, y_test = train_test_split(df["short text"].values, 
                                                  df.label.values, 
                                                  test_size=0.2, 
                                                    random_state=seed,
                                                    stratify=df["label"], shuffle=True
                                                  )

# End Optional

In [43]:
if layer_strategy != "last":
    output_hidden_states = True
    print('output hidden')
else:
    output_hidden_states = False

    
    
if bert_type == "BERT":
    if cased == False:
        # Load transformers config and set output_hidden_states to False
        if language != "All":

            language_size_model_dict = {"EN": {"specific":'bert-base-uncased', 
                                           "multi":'bert-base-multilingual-cased'}, 
                                        "DE":{"specific":"bert-base-german-cased", 
                                              "multi":'bert-base-multilingual-cased'}, 
                                        "RO":{"multi":'bert-base-multilingual-cased'}, 
                                        "All":{"multi":'bert-base-multilingual-cased'}}
            
            config = BertConfig.from_pretrained(language_size_model_dict[language][language_model_relation])
            config.output_hidden_states = output_hidden_states
            tokenizer = BertTokenizer.from_pretrained(language_size_model_dict[language][language_model_relation], config = config)
            transformer_model = TFBertModel.from_pretrained(language_size_model_dict[language][language_model_relation], config = config)

        else:
            config = BertConfig.from_pretrained('bert-base-multilingual-cased')
            config.output_hidden_states = output_hidden_states
            tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-cased', config=config)
            transformer_model = TFBertModel.from_pretrained('bert-base-multilingual-cased',config=config)

    else:
        if language != "All":

            language_size_model_dict = {"EN": {"specific":'bert-base-cased', 
                                           "multi":'bert-base-multilingual-cased'}, 
                                        "DE":{"specific":"bert-base-german-cased", 
                                              "multi":'bert-base-multilingual-cased'}, 
                                        "RO":{"multi":'bert-base-multilingual-cased'}, 
                                        "All":{"multi":'bert-base-multilingual-cased'}}
            config = BertConfig.from_pretrained(language_size_model_dict[language][language_model_relation])
            config.output_hidden_states = output_hidden_states
            tokenizer = BertTokenizer.from_pretrained(language_size_model_dict[language][language_model_relation], config=config)
            transformer_model = TFBertModel.from_pretrained(language_size_model_dict[language][language_model_relation],config=config)

        else:
            config = BertConfig.from_pretrained('bert-base-multilingual-cased')
            config.output_hidden_states = output_hidden_states
            tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-cased', config=config)
            transformer_model = TFBertModel.from_pretrained('bert-base-multilingual-cased',config=config)

elif bert_type == "Albert":
    size_model_dict = {"small":"","medium":'albert-base-v2',"large":'albert-large-v2'}
    
    config = AlbertConfig.from_pretrained(size_model_dict[size])
    config.output_hidden_states = output_hidden_states
    tokenizer = AlbertTokenizer.from_pretrained(size_model_dict[size], config=config)
    transformer_model = TFAlbertModel.from_pretrained(size_model_dict[size],config=config)

elif bert_type == "Roberta":
    size_model_dict = {"small":'roberta-base',"medium":'roberta-base',"large":'roberta-large'}
    
    config = RobertaConfig.from_pretrained(size_model_dict[size])
    config.output_hidden_states = output_hidden_states
    tokenizer = RobertaTokenizer.from_pretrained(size_model_dict[size],config=config)
    transformer_model = TFRobertaModel.from_pretrained(size_model_dict[size],config=config)
    
elif bert_type == "RobertaXLM":
    config = XLMRobertaConfig.from_pretrained('jplu/tf-xlm-roberta-base')
    config.output_hidden_states = output_hidden_states
    tokenizer = XLMRobertaTokenizer.from_pretrained('jplu/tf-xlm-roberta-base', config=config)
    transformer_model = TFXLMRobertaModel.from_pretrained('jplu/tf-xlm-roberta-base', config=config)

elif bert_type == "Distilbert":
    if cased == False:
        if language != "All":

            language_size_model_dict = {"EN": {"specific":'distilbert-base-uncased', 
                                           "multi":'distilbert-base-multilingual-cased'}, 
                                        "DE":{"specific":"distilbert-base-german-cased", 
                                              "multi":'distilbert-base-multilingual-cased'}, 
                                        "RO":{"multi":'distilbert-base-multilingual-cased'}, 
                                        "All":{"multi":'distilbert-base-multilingual-cased'}}
            
            config = DistilBertConfig.from_pretrained(language_size_model_dict[language][language_model_relation])
            config.output_hidden_states = output_hidden_states
            tokenizer = BertTokenizer.from_pretrained(language_size_model_dict[language][language_model_relation], config=config)
            transformer_model = TFDistilBertModel.from_pretrained(language_size_model_dict[language][language_model_relation],config=config)

        else:
            config = DistilBertConfig.from_pretrained('distilbert-base-multilingual-cased')
            config.output_hidden_states = output_hidden_states
            tokenizer = BertTokenizer.from_pretrained('distilbert-base-multilingual-cased',config=config)
            transformer_model = TFDistilBertModel.from_pretrained('bert-base-multilingual-cased',config=config)

    else:
        if language != "All":

            language_size_model_dict = {"EN": {"specific":'distilbert-base-cased', 
                                           "multi":'distilbert-base-multilingual-cased'}, 
                                        "DE":{"specific":"distilbert-base-german-cased", 
                                              "multi":'distilbert-base-multilingual-cased'}, 
                                        "RO":{"multi":'distilbert-base-multilingual-cased'}, 
                                        "All":{"multi":'distilbert-base-multilingual-cased'}}
            config = DistilBertConfig.from_pretrained(language_size_model_dict[language][language_model_relation])
            config.output_hidden_states = output_hidden_states
            tokenizer = BertTokenizer.from_pretrained(language_size_model_dict[language][language_model_relation], config=config)
            transformer_model = TFDistilBertModel.from_pretrained(language_size_model_dict[language][language_model_relation],config=config)

        else:
            config = DistilBertConfig.from_pretrained('distilbert-base-multilingual-cased')
            config.output_hidden_states = output_hidden_states
            tokenizer = BertTokenizer.from_pretrained('distilbert-base-multilingual-cased', config=config)
            transformer_model = TFDistilBertModel.from_pretrained('bert-base-multilingual-cased', config=config)
            
print(tokenizer)
print(transformer_model)

output hidden


The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'DistilBertTokenizer'. 
The class this function is called from is 'BertTokenizer'.
2022-01-06 09:29:51.500615: W tensorflow/python/util/util.cc:348] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.
Some layers from the model checkpoint at distilbert-base-multilingual-cased were not used when initializing TFDistilBertModel: ['vocab_projector', 'vocab_transform', 'activation_13', 'vocab_layer_norm']
- This IS expected if you are initializing TFDistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFDistilBertModel from the checkpoint of a

PreTrainedTokenizer(name_or_path='distilbert-base-multilingual-cased', vocab_size=119547, model_max_len=1000000000000000019884624838656, is_fast=False, padding_side='right', special_tokens={'unk_token': '[UNK]', 'sep_token': '[SEP]', 'pad_token': '[PAD]', 'cls_token': '[CLS]', 'mask_token': '[MASK]'})
<transformers.models.distilbert.modeling_tf_distilbert.TFDistilBertModel object at 0x7f51fc3fc910>


In [44]:
print(training)

True


In [45]:
#######################################
### ------- Build the model ------- ###
#implementation according to transformer library --> original setup as in paper

reduce = tf.math.reduce_mean if reduce_strategy=='mean' else tf.math.reduce_max

def build_classifier_model(training=training):
    transformer = transformer_model.layers[0]
    # Build your model input
    input_ids = Input(shape=(max_length,), name='input_ids', dtype='int32')
    attention_mask = Input(shape=(max_length,), name='attention_mask', dtype='int32') 
    inputs = {'input_ids': input_ids, 'attention_mask': attention_mask}
    
    if bert_type=="Roberta":
        roberta_model = transformer(inputs, training=training)[0]#sequence output because pooler output not good for roberta
        if reduce_strategy == 'cls':
            pooled_output = roberta_model[:,0,:]
        else:
            pooled_output = reduce(roberta_model,axis=1)
        #basically what follows is what is done to compute pooled output
        dropout_pooler = Dropout(config.hidden_dropout_prob, name='pooler_dropout', seed=seed)
        pooled_output_dropout = dropout_pooler(pooled_output, training=training)
        hidden_roberta = Dense(units=config.hidden_size, kernel_initializer=TruncatedNormal(stddev=config.initializer_range), name='hidden', activation='tanh')(pooled_output_dropout) #take s token
        #from here pooled output is used essentially
        dropout_hidden = Dropout(config.hidden_dropout_prob, name='hidden_output', seed=seed)
        hidden_output_roberta = dropout_hidden(hidden_roberta, training=training)
        material = Dense(units=len(label_dict), kernel_initializer=TruncatedNormal(stddev=config.initializer_range), name='material')(hidden_output_roberta)
        
    elif bert_type=="Distilbert":
        distilbert_model = transformer(inputs, training=training)[0]
        if reduce_strategy == 'cls':
            pooled_output = distilbert_model[:,0]
        else:
            pooled_output = reduce(distilbert_model, axis=1)
        #for distilbert, the pooler is a bit different, e.g. relu activation
        hidden_layer = Dense(units=config.dim, kernel_initializer=TruncatedNormal(stddev=config.initializer_range), name='hidden', activation='relu')(pooled_output)
        dropout_hidden = Dropout(config.seq_classif_dropout, name='hidden_output', seed=seed)
        hidden_output_distilbert = dropout_hidden(hidden_layer, training=training)
        material = Dense(units=len(label_dict), kernel_initializer=TruncatedNormal(stddev=config.initializer_range), name='material')(hidden_output_distilbert)
        
    else:
        # Load the Transformers BERT model as a layer in a Keras model
        if reduce_strategy == 'cls':
            pooled_output = transformer(inputs, training=training)[1] #pooler output in tensorflow
        else:
            bert_model = transformer(inputs, training=training)[0] #pooler output in tensorflow
            pooled_output = reduce(bert_model,axis=1)
            dropout_pooler = Dropout(config.hidden_dropout_prob, name='pooler_dropout', seed=seed)
            pooled_output = dropout_pooler(pooled_output, training=training)
            pooled_output = Dense(units=config.hidden_size, kernel_initializer=TruncatedNormal(stddev=config.initializer_range), name='hidden', activation='tanh')(pooled_output) #take s token
        # Then build your model output
        dropout = Dropout(config.hidden_dropout_prob, name='pooled_output', seed=seed)
        pooled_output = dropout(pooled_output, training=training) #training=False
        material = Dense(units=len(label_dict), kernel_initializer=TruncatedNormal(stddev=config.initializer_range), name='material')(pooled_output)
            
    #outputs = {'material': material}
    # And combine it all in a model object
    model = Model(inputs=inputs, outputs=material, name='BERT_MultiClass')
    return model


# Last x layers

In [46]:
#last4 concat strategy either cls, mean or max of respective hidden state

reduce = tf.math.reduce_mean if reduce_strategy=='mean' else tf.math.reduce_max
reduce_cross_layer = tf.math.reduce_mean if last_layer_strategy=='mean' else tf.math.reduce_max

def build_classifier_model_last4(training=training):
    transformer = transformer_model.layers[0]
    # Build your model input
    input_ids = Input(shape=(max_length,), name='input_ids', dtype='int32')
    attention_mask = Input(shape=(max_length,), name='attention_mask', dtype='int32') 
    inputs = {'input_ids': input_ids, 'attention_mask': attention_mask}
    
    if bert_type=="Roberta":
        roberta_model = transformer(inputs, training=training)[2]#models hidden states
        if last_layer_strategy=="concat":
            pooled_output = tf.concat(tuple([roberta_model[i] for i in [-4, -3, -2, -1]]), axis=-1) #4 last hidden states
        else:
            pooled_output = reduce_cross_layer(tuple([roberta_model[i] for i in [-4, -3, -2, -1]]), axis=0) #4 last hidden states
        if reduce_strategy == 'cls':
            pooled_output = pooled_output[:, 0, :]
        else:
            pooled_output = reduce(pooled_output,axis=1)
        dropout_pooler = Dropout(config.hidden_dropout_prob, name='pooler_dropout', seed=seed)
        pooled_output = dropout_pooler(pooled_output, training=training)
        pooled_output = Dense(units=config.hidden_size, kernel_initializer=TruncatedNormal(stddev=config.initializer_range), name='hidden', activation='tanh')(pooled_output) #take s token
        dropout_hidden = Dropout(config.hidden_dropout_prob, name='hidden_output', seed=seed)
        pooled_output = dropout_hidden(pooled_output, training=training)
        material = Dense(units=len(label_dict), kernel_initializer=TruncatedNormal(stddev=config.initializer_range), name='material')(pooled_output)
        
    elif bert_type=="Distilbert":
        distilbert_model = transformer(inputs, training=training)[1]
        if last_layer_strategy=="concat":
            pooled_output = tf.concat(tuple([distilbert_model[i] for i in [-2, -1]]), axis=-1)
        else:
            pooled_output = reduce_cross_layer(tuple([distilbert_model[i] for i in [-2, -1]]), axis=0)
        if reduce_strategy == 'cls':
            pooled_output = pooled_output[:, 0, :]
        else:
            pooled_output = reduce(pooled_output,axis=1)
        #no dropout here because pooler is not using dropout in distilbert
        #dropout_pooler = Dropout(config.seq_classif_dropout, name='pooler_dropout', seed=seed)
        #pooled_output = dropout_pooler(pooled_output)
        pooled_output = Dense(units=config.dim, kernel_initializer=TruncatedNormal(stddev=config.initializer_range), name='hidden', activation='relu')(pooled_output)
        dropout_hidden = Dropout(config.seq_classif_dropout, name='hidden_output', seed=seed)
        pooled_output = dropout_hidden(pooled_output, training=training)
        material = Dense(units=len(label_dict), kernel_initializer=TruncatedNormal(stddev=config.initializer_range), name='material')(pooled_output)
        
    else:
    # Load the Transformers BERT model as a layer in a Keras model
        bert_hidden_states = transformer(inputs, training=training)[2] #hidden states
        # Then build your model output
        if last_layer_strategy=="concat":
            pooled_output = tf.concat(tuple([bert_hidden_states[i] for i in [-4, -3, -2, -1]]), axis=-1)
        else:
            pooled_output = reduce_cross_layer(tuple([bert_hidden_states[i] for i in [-4, -3, -2, -1]]), axis=0)
        if reduce_strategy == 'cls':
            pooled_output = pooled_output[:, 0, :]
        else:
            pooled_output = reduce(pooled_output,axis=1)
        dropout = Dropout(config.hidden_dropout_prob, name='pooled_output', seed=seed)
        pooled_output = dropout(pooled_output, training=training) 
        pooled_output = Dense(units=config.hidden_size, kernel_initializer=TruncatedNormal(stddev=config.initializer_range), name='hidden', activation='relu')(pooled_output)
        dropout_hidden = Dropout(config.hidden_dropout_prob, name='hidden_output', seed=seed)
        pooled_output = dropout_hidden(pooled_output, training=training)
        material = Dense(units=len(label_dict), kernel_initializer=TruncatedNormal(stddev=config.initializer_range), name='material')(pooled_output)

    #outputs = {'material': material}
    # And combine it all in a model object
    model = Model(inputs=inputs, outputs=material, name='BERT_MultiClass')
    return model

# Build Model

In [47]:
print("Layer Strategy is:",layer_strategy)
print("Reduce Strategy is:", reduce_strategy)

# Take a look at the model
if layer_strategy == 'last':
    model = build_classifier_model(training=training)
elif layer_strategy == 'last4':
    model = build_classifier_model_last4(training=training)
model.summary()

Layer Strategy is: last4
Reduce Strategy is: cls
Model: "BERT_MultiClass"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
attention_mask (InputLayer)     [(None, 40)]         0                                            
__________________________________________________________________________________________________
input_ids (InputLayer)          [(None, 40)]         0                                            
__________________________________________________________________________________________________
distilbert (TFDistilBertMainLay TFBaseModelOutput(la 134734080   attention_mask[0][0]             
                                                                 input_ids[0][0]                  
__________________________________________________________________________________________________
tf_op_layer_concat_2 (TensorFlo [(N

# Calibration

# Calibration Metrics

In [48]:
#for vanilla BERT
model.load_weights('models/fit/EN_last4_concat_cls_1_multi_False_Distilbert_0.0001_5_20220104-233911_random_state42')



<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7f523c6a6be0>

In [49]:
#######################################
### ----- Evaluate the model ------ ###
# Ready test data
test_y_material = to_categorical(y_test,num_classes=n_classes)
test_x = tokenizer(
    text=list(X_test),
    add_special_tokens=True,
    max_length=max_length,
    truncation=True,
    padding=True, 
    return_tensors='tf',
    return_token_type_ids = False,
    return_attention_mask = True,
    verbose = True)

# Time Taken

Dropout

In [50]:
def mc_dropout_sampling(test_examples):
  # Enable dropout during inference.
  return model.predict(x={'input_ids': test_x['input_ids'],'attention_mask':test_x["attention_mask"]})

In [None]:
ensembles = [3,5,10,20,30,40,50,60,70,80,90,100] #3,5,10,20,30,40,50,60,70,80,90,100
metrics_dict = defaultdict(dict)
for num_ensemble in ensembles: 
    
    # Take a look at the model
    if layer_strategy == 'last':
        model = build_classifier_model(training=training)
    elif layer_strategy == 'last4':
        model = build_classifier_model_last4(training=training)
    
    #for vanilla BERT
    model.load_weights('models/fit/EN_last4_concat_cls_1_multi_False_Distilbert_0.0001_5_20220104-233911_random_state42')

    start_time = time.time()
    dropout_logit_samples = [model.predict(x={'input_ids': test_x['input_ids'],'attention_mask':test_x["attention_mask"]}) for _ in range(num_ensemble)]
    dropout_prob_samples = [tf.nn.softmax(dropout_logits, axis=-1) for dropout_logits in dropout_logit_samples]
    dropout_prob_samples = tf.stack([dropout_prob_samples])[0]
    dropout_probs_mean = tf.reduce_mean(dropout_prob_samples, axis=0)

    end_time = time.time()

    time_elapsed = (end_time - start_time)

    time_elapsed_per_example = time_elapsed/y_test.shape[0]
    
    metrics_dict[num_ensemble]["time_elapsed"] = time_elapsed
    metrics_dict[num_ensemble]["time_elapsed_per_example"] = time_elapsed_per_example




    print(time_elapsed)
    print(time_elapsed_per_example)

30.79034161567688
0.0025994378738435527


In [None]:
pd.DataFrame.from_dict(data=metrics_dict, orient='index').to_csv("DE_"+str(seed) + '_MC Dropout time.csv', header=True)

Vanilla

In [17]:
metrics_dict = defaultdict(dict)
start_time = time.time()

# Take a look at the model
if layer_strategy == 'last':
    model = build_classifier_model(training=training)
elif layer_strategy == 'last4':
    model = build_classifier_model_last4(training=training)

#for vanilla BERT
model.load_weights('models/fit/DE_last4_concat_mean_1_specific_False_BERT_0.0001_5_20211103-100737_random_state99')
dropout_logit_samples = model.predict(x={'input_ids': test_x['input_ids'],'attention_mask':test_x["attention_mask"]})
dropout_prob_samples = tf.nn.softmax(dropout_logit_samples)
end_time = time.time()

time_elapsed = (end_time - start_time)

time_elapsed_per_example = time_elapsed/y_test.shape[0]

metrics_dict["1"]["time_elapsed"] = time_elapsed
metrics_dict["1"]["time_elapsed_per_example"] = time_elapsed_per_example

pd.DataFrame.from_dict(data=metrics_dict, orient='index').to_csv("DE_"+ str(seed) + '_Vanilla time_more.csv', header=True)

print(time_elapsed)
print(time_elapsed_per_example)

71.23747539520264
0.003345110602704857


Deep Ensemble

In [None]:
ensembles = [3,4,5,6,7,8,9,10]
metrics_dict = defaultdict(dict)
x = glob.glob("/media/remote/jdeke/models/fit/*.index")

start_time = time.time()
for num_ensemble in ensembles:
    model_logit_samples = []
    for f in x[:num_ensemble]:

        print(os.path.basename(f)[:-6])
        if layer_strategy == 'last':
            model = build_classifier_model()
        elif layer_strategy == 'last4':
            model = build_classifier_model_last4()

        model.load_weights('models/fit/'+os.path.basename(f)[:-6])
        model_logit_samples.append(model.predict(x={'input_ids': test_x['input_ids'],'attention_mask':test_x["attention_mask"]}))
        
    dropout_prob_samples = [tf.nn.softmax(dropout_logits, axis=-1) for dropout_logits in model_logit_samples]
    dropout_prob_samples = tf.stack([dropout_prob_samples])[0]
    dropout_probs_mean = tf.reduce_mean(dropout_prob_samples, axis=0)
    
    end_time = time.time()
    
    time_elapsed = (end_time - start_time)

    time_elapsed_per_example = time_elapsed/y_test.shape[0]
    
    metrics_dict[num_ensemble]["time_elapsed"] = time_elapsed
    metrics_dict[num_ensemble]["time_elapsed_per_example"] = time_elapsed_per_example




    print(time_elapsed)
    print(time_elapsed_per_example)

RO_last4_concat_cls_1_multi_False_Distilbert_0.0001_5_20211021-073036_random_state42


In [None]:
pd.DataFrame.from_dict(data=metrics_dict, orient='index').to_csv("RO_"+str(seed) + '_Deep Ensemble time.csv', header=True)